e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
{.push raises: [Defect].}
|
|
|
|
|
|
|
|
import
|
|
|
|
stew/[endians2, results],
|
|
|
|
snappy, snappy/framing,
|
2021-08-12 15:08:20 +02:00
|
|
|
../beacon_chain/spec/datatypes/phase0,
|
2021-08-18 20:57:58 +02:00
|
|
|
../beacon_chain/spec/eth2_ssz_serialization
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
|
|
|
|
const
|
|
|
|
E2Version = [byte 0x65, 0x32]
|
|
|
|
E2Index = [byte 0x69, 0x32]
|
|
|
|
SnappyBeaconBlock = [byte 0x01, 0x00]
|
|
|
|
SnappyBeaconState = [byte 0x02, 0x00]
|
|
|
|
|
|
|
|
type
|
|
|
|
E2Store* = object
|
|
|
|
data: File
|
|
|
|
index: File
|
|
|
|
slot: Slot
|
|
|
|
|
|
|
|
Header* = object
|
|
|
|
typ*: array[2, byte]
|
|
|
|
len*: uint64
|
|
|
|
|
|
|
|
proc append(f: File, data: openArray[byte]): Result[void, string] =
|
|
|
|
try:
|
|
|
|
if writeBytes(f, data, 0, data.len()) != data.len:
|
|
|
|
err("Cannot write to file")
|
|
|
|
else:
|
|
|
|
ok()
|
|
|
|
except CatchableError as exc:
|
|
|
|
err(exc.msg)
|
|
|
|
|
|
|
|
proc readHeader(f: File): Result[Header, string] =
|
|
|
|
try:
|
|
|
|
var buf: array[8, byte]
|
|
|
|
if system.readBuffer(f, addr buf[0], 8) != 8:
|
|
|
|
return err("Not enough bytes for header")
|
|
|
|
except CatchableError as e:
|
|
|
|
return err("Cannot read header")
|
|
|
|
|
|
|
|
proc appendRecord(f: File, typ: array[2, byte], data: openArray[byte]): Result[int64, string] =
|
|
|
|
try:
|
|
|
|
let start = getFilePos(f)
|
|
|
|
let dlen = toBytesLE(data.len().uint64)
|
|
|
|
|
|
|
|
? append(f, typ)
|
|
|
|
? append(f, dlen.toOpenArray(0, 5))
|
|
|
|
? append(f, data)
|
|
|
|
ok(start)
|
|
|
|
except CatchableError as e:
|
|
|
|
err(e.msg)
|
|
|
|
|
|
|
|
proc open*(T: type E2Store, path: string, name: string, firstSlot: Slot): Result[E2Store, string] =
|
|
|
|
let
|
|
|
|
data =
|
|
|
|
try: open(path / name & ".e2s", fmWrite)
|
|
|
|
except CatchableError as e: return err(e.msg)
|
|
|
|
index =
|
|
|
|
try: system.open(path / name & ".e2i", fmWrite)
|
|
|
|
except CatchableError as e:
|
|
|
|
close(data)
|
|
|
|
return err(e.msg)
|
|
|
|
discard ? appendRecord(data, E2Version, [])
|
|
|
|
discard ? appendRecord(index, E2Index, [])
|
|
|
|
? append(index, toBytesLE(firstSlot.uint64))
|
|
|
|
|
|
|
|
ok(E2Store(data: data, index: index, slot: firstSlot))
|
|
|
|
|
2021-08-25 14:51:52 +00:00
|
|
|
func close*(store: var E2Store) =
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
store.data.close()
|
|
|
|
store.index.close()
|
|
|
|
|
|
|
|
proc toCompressedBytes(item: auto): seq[byte] =
|
|
|
|
try:
|
|
|
|
let
|
|
|
|
payload = SSZ.encode(item)
|
|
|
|
framingFormatCompress(payload)
|
|
|
|
except CatchableError as exc:
|
|
|
|
raiseAssert exc.msg # shouldn't happen
|
|
|
|
|
2021-08-12 15:08:20 +02:00
|
|
|
proc appendRecord*(store: var E2Store, v: phase0.TrustedSignedBeaconBlock): Result[void, string] =
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
if v.message.slot < store.slot:
|
|
|
|
return err("Blocks must be written in order")
|
|
|
|
let start = store.data.appendRecord(SnappyBeaconBlock, toCompressedBytes(v)).get()
|
|
|
|
while store.slot < v.message.slot:
|
|
|
|
? append(store.index, toBytesLE(0'u64))
|
|
|
|
store.slot += 1
|
|
|
|
? append(store.index, toBytesLE(start.uint64))
|
|
|
|
store.slot += 1
|
|
|
|
|
|
|
|
ok()
|
|
|
|
|
2021-08-12 15:08:20 +02:00
|
|
|
proc appendRecord*(store: var E2Store, v: phase0.BeaconState): Result[void, string] =
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
discard ? store.data.appendRecord(SnappyBeaconState, toCompressedBytes(v))
|
|
|
|
ok()
|