e2store: add era format (#2382)

Era files contain 8192 blocks and a state corresponding to the length of the array holding block roots in the state, meaning that each block is verifiable using the pubkeys and block roots from the state. Of course, one would need to know the root of the state as well, which is available in the first block of the _next_ file - or known from outside. This PR also adds an implementation to write e2s, e2i and era files, as well as a python script to inspect them. All in all, the format is very similar to what goes on in the network requests meaning it can trivially serve as a backing format for serving said requests. Mainnet, up to the first 671k slots, take up 3.5gb - in each era file, the BeaconState contributes about 9mb at current validator set sizes, up from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras tested - this overhead could potentially be calculated but one would lose the ability to verify individual blocks (eras could still be verified using historical roots). ``` -rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i -rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s -rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i -rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s ... -rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i -rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s -rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i -rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s ```
2025-02-22 19:28:20 +00:00 · 2021-03-15 11:31:39 +01:00 · 2021-03-15 11:31:39 +01:00 · aabdd34704
commit aabdd34704
parent 4fcff4cd27
5 changed files with 300 additions and 44 deletions
--- a/beacon_chain/consensus_object_pools/blockchain_dag.nim
+++ b/beacon_chain/consensus_object_pools/blockchain_dag.nim
@ -1082,8 +1082,7 @@ proc getGenesisBlockData*(dag: ChainDAGRef): BlockData =
  dag.get(dag.genesis)

 proc getGenesisBlockSlot*(dag: ChainDAGRef): BlockSlot =
-  let blockData = dag.getGenesisBlockData()
-  BlockSlot(blck: blockData.refs, slot: GENESIS_SLOT)
+  BlockSlot(blck: dag.genesis, slot: GENESIS_SLOT)

 proc getProposer*(
    dag: ChainDAGRef, head: BlockRef, slot: Slot):
--- a/docs/e2store.md
+++ b/docs/e2store.md
@ -6,9 +6,9 @@ The `e2store` (extension: `.e2s`) is a simple linear [TLV](https://en.wikipedia.

 `e2s` files consist of repeated type-length-value records. Each record is variable-length, and unknown records can easily be skipped. In particular, `e2s` files are designed to:

-* allow trivial implementations that are easy to analyze
+* allow trivial implementations that are easy to audit
 * allow append-only implementations
-* allow future record types to be added
+* allow future record types to be added, such as when the chain forks

 The type and length are encoded in an 8-byte header which is directly followed by data.

@ -21,35 +21,48 @@ length = Vector[byte, 6]

 The `length` is the first 6 bytes of a little-endian encoded `uint64`, not including the header itself. For example, the entry with header type `[0x22, 0x32]`, the length `4` and the bytes `[0x01, 0x02, 0x03, 0x04]` will be stored as the byte sequence `[0x22, 0x32, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04]`.

+`.e2s` files may freely be concatenated, and may contain out-of-order records.
+
+Types that have the high bit in the first byte set (those in the range `[0x80-0xff]`) are application and/or vendor specific.
+
 ## Reading

-In a loop, the following pseudocode can be used to read the file:
+The following python code can be used to read an e2 file:

-```
-while file.bytesRemaining > 0:
-  if file.bytesRemaining < 8:
-    abort("Header missing")
+```python
+import sys, struct

-  header = read(file, 8)
-  type = header[0:2]
-  length = fromLittleEndian(header[2:8])
+with open(sys.argv[1], "rb") as f:
+  header = f.read(8)
+  typ = header[0:2] # First 2 bytes for type

-  if file.bytesRemaining < length:
-    abort("Not enough data")
+  if typ != b"e2":
+    raise RuntimeError("this is not an e2store file")

-  data = read(file, length)
+  while True:
+    header = f.read(8) # Header is 8 bytes
+    if not header: break

-  if type == ...:
-    # process the data
-  else:
-    # Unkown record type, skip
+    typ = header[0:2] # First 2 bytes for type
+    dlen = struct.unpack("<q", header[2:8] + b"\0\0")[0] # 6 bytes of little-endian length
+
+    print("typ:", "".join("{:02x}".format(x) for x in typ), "len:", dlen)
+
+    data = f.read(dlen)
+    if len(data) != dlen: # Don't trust the given length, specially when pre-allocating
+      print("Missing data", len(data), dlen)
+      break
+
+    if typ == b"i2":
+      print("Index header")
+      break
+    elif typ == b"e2":
+      print("e2 header") # May appear
 ```

 ## Writing

-`e2s` files are linear and append-only. To write a new entry, simply append it to the end of the file. In a separate transaction, the index file may be updated also.
-
-Since the files are append-only, `e2s` files are suitable in particular for finalized blocks only.
+`e2s` files are by design intended to be append-only, making them suitable for cold storage of finalized chain data.

 # Known types

@ -57,52 +70,91 @@ Since the files are append-only, `e2s` files are suitable in particular for fina

 ```
 type: [0x65, 0x32]
-data: Vector[byte, 0]
 ```

-The `version` type must be the first record in the file. Its type is `[0x65, 0x32]` (`e2` in ascii) and the length of its data field is always 0, thus the first 8 bytes of an `e2s` file are always `[0x65, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]`.
+The `version` type must be the first record in the file. Its type is `[0x65, 0x32]` (`e2` in ascii) and the length of its data field is always 0, thus the first 8 bytes of an `e2s` file are always `[0x65, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]`. When a new version record is encountered, it applies to all records following the version entry - this can happen when two e2s files are concatenated.

 ## CompressedSignedBeaconBlock

 ```
 type: [0x01, 0x00]
-data: snappyFramed(length-varint | ssz(SignedBeaconBlock))
+data: snappyFramed(ssz(SignedBeaconBlock))
 ```

-`CompressedSignedBeackBlock` entries are entries whose data field matches the payload of `BeaconBlocksByRange` and `BeaconBlocksByRoot` chunks in the phase0 p2p specification. In particular, the SignedBeaconBlock is serialized using SSZ, prefixed with a varint-length, then compressed using the snappy [framing format](https://github.com/google/snappy/blob/master/framing_format.txt).
+`CompressedSignedBeackBlock` entries are entries whose data field matches the payload of `BeaconBlocksByRange` and `BeaconBlocksByRoot` chunks in the phase0 p2p specification. In particular, the SignedBeaconBlock is serialized using SSZ, then compressed using the snappy [framing format](https://github.com/google/snappy/blob/master/framing_format.txt).
+
+## CompressedBeaconState
+
+```
+type: [0x02, 0x00]
+data: snappyFramed(ssz(BeaconState))
+```
+
+`CompressedBeaconState` entries are entries whose data field match that of `CompressedSignedBeaconBlock` but carry a `BeaconState` instead.
+
+## Empty
+
+```
+type: [0x00, 0x00]
+```
+
+The `Empty` type contains no data, but may have a length. The corresponding amount of data should be skiped while reading the file.

 # Slot Index files

-Index files are files that store indices to linear histories of entries. They consist of offsets that point the the beginning of the corresponding record. Index files start with an 8-byte header, followed by a series of `uint64` encoded as little endian bytes. An index of 0 idicates that there is no data for the given slot.
+Index files are files that store indices to linear histories of entries. They consist of offsets that point the the beginning of the corresponding record. Index files start with an 8-byte header and a starting offset followed by a series of `uint64` encoded as little endian bytes. An index of 0 idicates that there is no data for the given slot.

-Each entry in the slot index is fixed-length, meaning that the entry for slot `N` can be found at index `(N * 8) + 8` in the index file. Index files only support linear histories, meaning that the blocks that they point to must have passed finalization.
+Each entry in the slot index is fixed-length, meaning that the entry for slot `N` can be found at index `(N * 8) + 16` in the index file. Index files only support linear histories.

 By convention, slot index files have the name `.e2i`.

 ```
-header | index | index | index ...
+header | starting-slot | index | index | index ...
 ```

 ## IndexVersion

+```
+type: [0x69, 0x32]
+```
+
 The `version` header of an index file consists of the bytes `[0x69, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]`.

-## Index
-
-Index entries are `uint64` offsets, encoded as little-endian, from the beginning of the store file to the corresponding entry.
-
 ## Reading

-```
-  if failed(setpos(indexfile, slot * 8 + 8)):
-    abort("no data for the given slot")
+```python
+def find_offset(name, slot):
+  # Find the offset of a given slot
+  with open(name, "rb") as f:
+    header = f.read(8)
+    typ = header[0:2] # First 2 bytes for type

-  offset = fromLittleEndian(read(indexfile, 8))
-  if offset == 0:
-    abort("no data for the given slot")
+    if typ != b"i2":
+      raise RuntimeError("this is not an e2store file")
+
+    start_slot = struct.unpack("<q", f.read(8))[0]
+
+    f.seek(8 * (slot - start_slot) + 16)
+
+    return struct.unpack("<q", f.read(8))[0]

-  if failed(setpos(datafile, offset)):
-    abort("index file corrupt, data not found at offset")
-  header = read(datafile, 8)
-  # as above
 ```
+
+# Era files
+
+`.era` files are special instances of `.e2s` files that follow a more strict content format optimised for reading and long-term storage and distribution. Era files contain groups consisting of a state and the blocks that led up to it, limited to `SLOTS_PER_HISTORICAL_ROOT` slots each, allowing quick verification of the data contained in the file.
+
+Each era is identified by when it ends. Thus, the genesis era is era 0, followed by era 1 which ends before slot 8192 etc.
+
+`.era` files MAY follow a simple naming convention: `eth2-<network>-<era-number>-<era-count>.era` with era and count hex-encoded to 8 digits.
+
+An `.era` file is structured in the following way:
+
+```
+era := group+
+group := canonical-state | blocks*
+```
+
+The `canonical-state` is the state of the slot that immediately follows the end of the era without applying blocks from the next era. For example, for the era that covers the first 8192 slots will have all blocks applied up to slot 8191 and will `process_slots` up to 8192. The genesis group contains only the genesis state but no blocks.
+
+Era files place the state first for a number of reasons: the state is then guaranteed to contain all public keys and block roots needed to verify the blocks in the file. A special case is the genesis era file - this file contains only the genesis state.
--- a/ncli/e2store.nim
+++ b/ncli/e2store.nim
@ -0,0 +1,96 @@
+{.push raises: [Defect].}
+
+import
+  stew/[endians2, results],
+  snappy, snappy/framing,
+  ../beacon_chain/spec/datatypes,
+  ../beacon_chain/ssz/ssz_serialization
+
+const
+  E2Version = [byte 0x65, 0x32]
+  E2Index = [byte 0x69, 0x32]
+  SnappyBeaconBlock = [byte 0x01, 0x00]
+  SnappyBeaconState = [byte 0x02, 0x00]
+
+type
+  E2Store* = object
+    data: File
+    index: File
+    slot: Slot
+
+  Header* = object
+    typ*: array[2, byte]
+    len*: uint64
+
+proc append(f: File, data: openArray[byte]): Result[void, string] =
+  try:
+    if writeBytes(f, data, 0, data.len()) != data.len:
+      err("Cannot write to file")
+    else:
+      ok()
+  except CatchableError as exc:
+    err(exc.msg)
+
+proc readHeader(f: File): Result[Header, string] =
+  try:
+    var buf: array[8, byte]
+    if system.readBuffer(f, addr buf[0], 8)  != 8:
+      return err("Not enough bytes for header")
+  except CatchableError as e:
+    return err("Cannot read header")
+
+proc appendRecord(f: File, typ: array[2, byte], data: openArray[byte]): Result[int64, string] =
+  try:
+    let start = getFilePos(f)
+    let dlen = toBytesLE(data.len().uint64)
+
+    ? append(f, typ)
+    ? append(f, dlen.toOpenArray(0, 5))
+    ? append(f, data)
+    ok(start)
+  except CatchableError as e:
+    err(e.msg)
+
+proc open*(T: type E2Store, path: string, name: string, firstSlot: Slot): Result[E2Store, string] =
+  let
+    data =
+      try: open(path / name & ".e2s", fmWrite)
+      except CatchableError as e: return err(e.msg)
+    index =
+      try: system.open(path / name & ".e2i", fmWrite)
+      except CatchableError as e:
+        close(data)
+        return err(e.msg)
+  discard ? appendRecord(data, E2Version, [])
+  discard ? appendRecord(index, E2Index, [])
+  ? append(index, toBytesLE(firstSlot.uint64))
+
+  ok(E2Store(data: data, index: index, slot: firstSlot))
+
+proc close*(store: var E2Store) =
+  store.data.close()
+  store.index.close()
+
+proc toCompressedBytes(item: auto): seq[byte] =
+  try:
+    let
+      payload = SSZ.encode(item)
+    framingFormatCompress(payload)
+  except CatchableError as exc:
+    raiseAssert exc.msg # shouldn't happen
+
+proc appendRecord*(store: var E2Store, v: SomeSignedBeaconBlock): Result[void, string] =
+  if v.message.slot < store.slot:
+    return err("Blocks must be written in order")
+  let start = store.data.appendRecord(SnappyBeaconBlock, toCompressedBytes(v)).get()
+  while store.slot < v.message.slot:
+    ? append(store.index, toBytesLE(0'u64))
+    store.slot += 1
+  ? append(store.index, toBytesLE(start.uint64))
+  store.slot += 1
+
+  ok()
+
+proc appendRecord*(store: var E2Store, v: BeaconState): Result[void, string] =
+  discard ? store.data.appendRecord(SnappyBeaconState, toCompressedBytes(v))
+  ok()
--- a/ncli/e2store.py
+++ b/ncli/e2store.py
@ -0,0 +1,49 @@
+import sys, struct
+
+def read_e2store(name):
+  with open(name, "rb") as f:
+    header = f.read(8)
+    typ = header[0:2] # First 2 bytes for type
+
+    if typ != b"e2":
+      raise RuntimeError("this is not an e2store file")
+
+    while True:
+      header = f.read(8) # Header is 8 bytes
+      if not header: break
+
+      typ = header[0:2] # First 2 bytes for type
+      dlen = struct.unpack("<q", header[2:8] + b"\0\0")[0] # 6 bytes of little-endian length
+
+      data = f.read(dlen)
+      if len(data) != dlen: # Don't trust the given length, specially when pre-allocating
+        raise RuntimeError("File is missing data")
+
+      if typ == b"i2":
+        raise RuntimeError("Cannot switch to index mode")
+      elif typ == b"e2":
+        pass # Ignore extra headers
+
+      yield (typ, data)
+
+def find_offset(name, slot):
+  # Find the offset of a given slot
+  with open(name, "rb") as f:
+    header = f.read(8)
+    typ = header[0:2] # First 2 bytes for type
+
+    if typ != b"i2":
+      raise RuntimeError("this is not an e2store file")
+
+    start_slot = struct.unpack("<q", f.read(8))[0]
+
+    f.seek(8 * (slot - start_slot) + 16)
+
+    return struct.unpack("<q", f.read(8))[0]
+
+name = sys.argv[1]
+if name.endswith(".e2i"):
+  print(find_offset(name, int(sys.argv[2])))
+else:
+  for typ, data in read_e2store(name):
+    print("typ", typ, "data", len(data))
--- a/ncli/ncli_db.nim
+++ b/ncli/ncli_db.nim
@ -7,7 +7,7 @@ import
  ../beacon_chain/spec/[crypto, datatypes, digest, helpers,
                        state_transition, presets],
  ../beacon_chain/ssz, ../beacon_chain/ssz/sszdump,
-  ../research/simutils
+  ../research/simutils, ./e2store

 type Timers = enum
  tInit = "Initialize DB"
@ -26,6 +26,7 @@ type
    dumpBlock
    pruneDatabase
    rewindState
+    exportEra

  # TODO:
  # This should probably allow specifying a run-time preset
@ -90,6 +91,14 @@ type
        argument
        desc: "Slot".}: uint64

+    of exportEra:
+      era* {.
+        defaultValue: 0
+        desc: "The era number to write".}: uint64
+      eraCount* {.
+        defaultValue: 1
+        desc: "Number of eras to write".}: uint64
+
 proc cmdBench(conf: DbConf, runtimePreset: RuntimePreset) =
  var timers: array[Timers, RunningStat]

@ -307,6 +316,55 @@ proc cmdRewindState(conf: DbConf, preset: RuntimePreset) =
    echo "Writing state..."
    dump("./", hashedState, blck)

+proc atCanonicalSlot(blck: BlockRef, slot: Slot): BlockSlot =
+  if slot == 0:
+    blck.atSlot(slot)
+  else:
+    blck.atSlot(slot - 1).blck.atSlot(slot)
+
+proc cmdExportEra(conf: DbConf, preset: RuntimePreset) =
+  let db = BeaconChainDB.init(preset, conf.databaseDir.string)
+  defer: db.close()
+
+  if not ChainDAGRef.isInitialized(db):
+    echo "Database not initialized"
+    quit 1
+
+  echo "Initializing block pool..."
+  let
+    dag = init(ChainDAGRef, preset, db)
+
+  for era in conf.era..<conf.era + conf.eraCount:
+    let
+      firstSlot = if era == 0: Slot(0) else: Slot((era - 1) * SLOTS_PER_HISTORICAL_ROOT)
+      endSlot = Slot(era * SLOTS_PER_HISTORICAL_ROOT)
+      slotCount = endSlot - firstSlot
+      name = &"ethereum2-mainnet-{era.int:08x}-{1:08x}"
+      canonical = dag.head.atCanonicalSlot(endSlot)
+
+    if endSlot > dag.head.slot:
+      echo "Written all complete eras"
+      break
+
+    var e2s = E2Store.open(".", name, firstSlot).get()
+    defer: e2s.close()
+
+    dag.withState(dag.tmpState, canonical):
+      e2s.appendRecord(state).get()
+
+    var
+      ancestors: seq[BlockRef]
+      cur = canonical.blck
+    if era != 0:
+      while cur != nil and cur.slot >= firstSlot:
+        ancestors.add(cur)
+        cur = cur.parent
+
+      for i in 0..<ancestors.len():
+        let
+          ancestor = ancestors[ancestors.len - 1 - i]
+        e2s.appendRecord(db.getBlock(ancestor.root).get()).get()
+
 when isMainModule:
  var
    conf = DbConf.load()
@ -323,3 +381,5 @@ when isMainModule:
    cmdPrune(conf, runtimePreset)
  of rewindState:
    cmdRewindState(conf, runtimePreset)
+  of exportEra:
+    cmdExportEra(conf, runtimePreset)