2023-07-04 14:29:26 +00:00
|
|
|
# beacon_chain
|
2024-01-06 15:26:56 +01:00
|
|
|
# Copyright (c) 2021-2024 Status Research & Development GmbH
|
2023-07-04 14:29:26 +00:00
|
|
|
# Licensed and distributed under either of
|
|
|
|
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
|
|
|
|
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
|
|
|
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
|
|
|
|
2023-01-20 14:14:37 +00:00
|
|
|
{.push raises: [].}
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
|
|
|
|
import
|
2024-01-16 22:37:14 +00:00
|
|
|
results,
|
2024-02-09 08:59:36 +01:00
|
|
|
stew/[arrayops, endians2, io2]
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
|
era: load blocks and states (#3394)
* era: load blocks and states
Era files contain finalized history and can be thought of as an
alternative source for block and state data that allows clients to avoid
syncing this information from the P2P network - the P2P network is then
used to "top up" the client with the most recent data. They can be
freely shared in the community via whatever means (http, torrent, etc)
and serve as a permanent cold store of consensus data (and, after the
merge, execution data) for history buffs and bean counters alike.
This PR gently introduces support for loading blocks and states in two
cases: block requests from rest/p2p and frontfilling when doing
checkpoint sync.
The era files are used as a secondary source if the information is not
found in the database - compared to the database, there are a few key
differences:
* the database stores the block indexed by block root while the era file
indexes by slot - the former is used only in rest, while the latter is
used both by p2p and rest.
* when loading blocks from era files, the root is no longer trivially
available - if it is needed, it must either be computed (slow) or cached
(messy) - the good news is that for p2p requests, it is not needed
* in era files, "framed" snappy encoding is used while in the database
we store unframed snappy - for p2p2 requests, the latter requires
recompression while the former could avoid it
* front-filling is the process of using era files to replace backfilling
- in theory this front-filling could happen from any block and
front-fills with gaps could also be entertained, but our backfilling
algorithm cannot take advantage of this because there's no (simple) way
to tell it to "skip" a range.
* front-filling, as implemented, is a bit slow (10s to load mainnet): we
load the full BeaconState for every era to grab the roots of the blocks
- it would be better to partially load the state - as such, it would
also be good to be able to partially decompress snappy blobs
* lookups from REST via root are served by first looking up a block
summary in the database, then using the slot to load the block data from
the era file - however, there needs to be an option to create the
summary table from era files to fully support historical queries
To test this, `ncli_db` has an era file exporter: the files it creates
should be placed in an `era` folder next to `db` in the data directory.
What's interesting in particular about this setup is that `db` remains
as the source of truth for security purposes - it stores the latest
synced head root which in turn determines where a node "starts" its
consensus participation - the era directory however can be freely shared
between nodes / people without any (significant) security implications,
assuming the era files are consistent / not broken.
There's lots of future improvements to be had:
* we can drop the in-memory `BlockRef` index almost entirely - at this
point, resident memory usage of Nimbus should drop to a cool 500-600 mb
* we could serve era files via REST trivially: this would drop backfill
times to whatever time it takes to download the files - unlike the
current implementation that downloads block by block, downloading an era
at a time almost entirely cuts out request overhead
* we can "reasonably" recreate detailed state history from almost any
point in time, turning an O(slot) process into O(1) effectively - we'll
still need caches and indices to do this with sufficient efficiency for
the rest api, but at least it cuts the whole process down to minutes
instead of hours, for arbitrary points in time
* CI: ignore failures with Nim-1.6 (temporary)
* test fixes
Co-authored-by: Ștefan Talpalaru <stefantalpalaru@yahoo.com>
2022-03-23 09:58:17 +01:00
|
|
|
export io2
|
|
|
|
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
const
|
2022-01-07 11:13:19 +01:00
|
|
|
E2Version* = [byte 0x65, 0x32]
|
|
|
|
E2Index* = [byte 0x69, 0x32]
|
|
|
|
|
|
|
|
SnappyBeaconBlock* = [byte 0x01, 0x00]
|
|
|
|
SnappyBeaconState* = [byte 0x02, 0x00]
|
|
|
|
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
type
|
2022-01-07 11:13:19 +01:00
|
|
|
Type* = array[2, byte]
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
|
|
|
|
Header* = object
|
2022-01-07 11:13:19 +01:00
|
|
|
typ*: Type
|
|
|
|
len*: int
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
|
2024-02-09 08:59:36 +01:00
|
|
|
proc toString*(v: IoErrorCode): string =
|
2022-01-07 11:13:19 +01:00
|
|
|
try: ioErrorMsg(v)
|
|
|
|
except Exception as e: raiseAssert e.msg
|
|
|
|
|
2024-02-09 08:59:36 +01:00
|
|
|
proc append*(f: IoHandle, data: openArray[byte]): Result[void, string] =
|
2022-01-07 11:13:19 +01:00
|
|
|
if (? writeFile(f, data).mapErr(toString)) != data.len.uint:
|
|
|
|
return err("could not write data")
|
|
|
|
ok()
|
|
|
|
|
2024-02-09 08:59:36 +01:00
|
|
|
proc appendHeader*(f: IoHandle, typ: Type, dataLen: int): Result[int64, string] =
|
2023-02-15 14:51:17 +01:00
|
|
|
if dataLen.uint64 > uint32.high:
|
|
|
|
return err("entry does not fit 32-bit length")
|
|
|
|
|
2022-01-07 11:13:19 +01:00
|
|
|
let start = ? getFilePos(f).mapErr(toString)
|
|
|
|
|
|
|
|
? append(f, typ)
|
2023-02-15 14:51:17 +01:00
|
|
|
? append(f, toBytesLE(dataLen.uint32))
|
|
|
|
? append(f, [0'u8, 0'u8])
|
2022-01-07 11:13:19 +01:00
|
|
|
|
|
|
|
ok(start)
|
|
|
|
|
2022-05-10 02:28:46 +02:00
|
|
|
proc appendRecord*(
|
|
|
|
f: IoHandle, typ: Type, data: openArray[byte]): Result[int64, string] =
|
2022-01-07 11:13:19 +01:00
|
|
|
let start = ? appendHeader(f, typ, data.len())
|
|
|
|
? append(f, data)
|
|
|
|
ok(start)
|
e2store: add era format (#2382)
Era files contain 8192 blocks and a state corresponding to the length of
the array holding block roots in the state, meaning that each block is
verifiable using the pubkeys and block roots from the state. Of course,
one would need to know the root of the state as well, which is available
in the first block of the _next_ file - or known from outside.
This PR also adds an implementation to write e2s, e2i and era files, as
well as a python script to inspect them.
All in all, the format is very similar to what goes on in the network
requests meaning it can trivially serve as a backing format for serving
said requests.
Mainnet, up to the first 671k slots, take up 3.5gb - in each era file,
the BeaconState contributes about 9mb at current validator set sizes, up
from ~3mb in the early blocks, for a grand total of ~558mb for the 82 eras
tested - this overhead could potentially be calculated but one would lose
the ability to verify individual blocks (eras could still be verified using
historical roots).
```
-rw-rw-r--. 1 arnetheduck arnetheduck 16 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 1,8M 5 mar 11.47 ethereum2-mainnet-00000000-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 18M 5 mar 11.47 ethereum2-mainnet-00000001-00000001.e2s
...
-rw-rw-r--. 1 arnetheduck arnetheduck 65K 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 68M 5 mar 11.52 ethereum2-mainnet-00000051-00000001.e2s
-rw-rw-r--. 1 arnetheduck arnetheduck 61K 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2i
-rw-rw-r--. 1 arnetheduck arnetheduck 62M 5 mar 11.11 ethereum2-mainnet-00000052-00000001.e2s
```
2021-03-15 11:31:39 +01:00
|
|
|
|
2022-01-07 11:13:19 +01:00
|
|
|
proc checkBytesLeft(f: IoHandle, expected: int64): Result[void, string] =
|
|
|
|
let size = ? getFileSize(f).mapErr(toString)
|
|
|
|
if expected > size:
|
|
|
|
return err("Record extends past end of file")
|
|
|
|
|
|
|
|
let pos = ? getFilePos(f).mapErr(toString)
|
|
|
|
if expected > size - pos:
|
|
|
|
return err("Record extends past end of file")
|
|
|
|
|
|
|
|
ok()
|
|
|
|
|
2024-02-09 08:59:36 +01:00
|
|
|
proc readFileExact*(f: IoHandle, buf: var openArray[byte]): Result[void, string] =
|
2022-01-07 11:13:19 +01:00
|
|
|
if (? f.readFile(buf).mapErr(toString)) != buf.len().uint:
|
|
|
|
return err("missing data")
|
|
|
|
ok()
|
|
|
|
|
2024-02-09 08:59:36 +01:00
|
|
|
proc readHeader*(f: IoHandle): Result[Header, string] =
|
2022-01-07 11:13:19 +01:00
|
|
|
var buf: array[10, byte]
|
|
|
|
? readFileExact(f, buf.toOpenArray(0, 7))
|
|
|
|
|
|
|
|
var
|
|
|
|
typ: Type
|
|
|
|
discard typ.copyFrom(buf)
|
|
|
|
|
2023-07-04 14:29:26 +00:00
|
|
|
# Conversion safe because we had only 4 bytes of length data
|
|
|
|
let len = (uint32.fromBytesLE(buf.toOpenArray(2, 5))).int64
|
2022-01-07 11:13:19 +01:00
|
|
|
|
|
|
|
# No point reading these..
|
|
|
|
if len > int.high(): return err("header length exceeds int.high")
|
|
|
|
|
|
|
|
# Must have at least that much data, or header is invalid
|
|
|
|
? f.checkBytesLeft(len)
|
|
|
|
|
|
|
|
ok(Header(typ: typ, len: int(len)))
|
|
|
|
|
2022-01-25 09:28:26 +01:00
|
|
|
proc readRecord*(f: IoHandle, data: var seq[byte]): Result[Header, string] =
|
2022-01-07 11:13:19 +01:00
|
|
|
let header = ? readHeader(f)
|
|
|
|
if header.len > 0:
|
|
|
|
? f.checkBytesLeft(header.len)
|
|
|
|
|
2023-07-26 09:47:46 +02:00
|
|
|
if data.len != header.len:
|
|
|
|
data = newSeqUninitialized[byte](header.len)
|
2022-01-07 11:13:19 +01:00
|
|
|
|
|
|
|
? readFileExact(f, data)
|
|
|
|
|
|
|
|
ok(header)
|
|
|
|
|
|
|
|
proc readIndexCount*(f: IoHandle): Result[int, string] =
|
|
|
|
var bytes: array[8, byte]
|
|
|
|
? f.readFileExact(bytes)
|
|
|
|
|
|
|
|
let count = uint64.fromBytesLE(bytes)
|
|
|
|
if count > (int.high() div 8) - 3: return err("count: too large")
|
|
|
|
|
|
|
|
let size = uint64(? f.getFileSize().mapErr(toString))
|
|
|
|
# Need to have at least this much data in the file to read an index with
|
|
|
|
# this count
|
|
|
|
if count > (size div 8 + 3): return err("count: too large")
|
|
|
|
|
|
|
|
ok(int(count)) # Sizes checked against int above
|
|
|
|
|
|
|
|
proc findIndexStartOffset*(f: IoHandle): Result[int64, string] =
|
|
|
|
? f.setFilePos(-8, SeekPosition.SeekCurrent).mapErr(toString)
|
|
|
|
|
|
|
|
let
|
|
|
|
count = ? f.readIndexCount() # Now we're back at the end of the index
|
|
|
|
bytes = count.int64 * 8 + 24
|
|
|
|
|
|
|
|
ok(-bytes)
|
|
|
|
|