mirror of
https://github.com/status-im/nimbus-eth2.git
synced 2025-01-10 14:26:26 +00:00
5cab17dc1a
* database state storage benchmarking via ncli_db * more cleanups from immutable validator state branch * unexport some eth2_network constants and remove unused variables/templates * make two PeerScore constants public
501 lines
19 KiB
Nim
501 lines
19 KiB
Nim
{.push raises: [Defect].}
|
|
|
|
import
|
|
typetraits, tables,
|
|
stew/[endians2, io2, objects, results],
|
|
serialization, chronicles, snappy,
|
|
eth/db/[kvstore, kvstore_sqlite3],
|
|
./network_metadata,
|
|
./spec/[crypto, datatypes, digest, state_transition],
|
|
./ssz/[ssz_serialization, merkleization],
|
|
merkle_minimal, filepath
|
|
|
|
type
|
|
DbSeq*[T] = object
|
|
insertStmt: SqliteStmt[openArray[byte], void]
|
|
selectStmt: SqliteStmt[int64, openArray[byte]]
|
|
recordCount: int64
|
|
|
|
DbMap*[K, V] = object
|
|
db: SqStoreRef
|
|
keyspace: int
|
|
|
|
DepositsSeq = DbSeq[DepositData]
|
|
|
|
DepositsMerkleizer* = SszMerkleizer[depositContractLimit]
|
|
|
|
DepositContractSnapshot* = object
|
|
eth1Block*: Eth2Digest
|
|
depositContractState*: DepositContractState
|
|
|
|
BeaconChainDB* = ref object
|
|
## Database storing resolved blocks and states - resolved blocks are such
|
|
## blocks that form a chain back to the tail block.
|
|
##
|
|
## We assume that the database backend is working / not corrupt - as such,
|
|
## we will raise a Defect any time there is an issue. This should be
|
|
## revisited in the future, when/if the calling code safely can handle
|
|
## corruption of this kind.
|
|
##
|
|
## We do however make an effort not to crash on invalid data inside the
|
|
## database - this may have a number of "natural" causes such as switching
|
|
## between different versions of the client and accidentally using an old
|
|
## database.
|
|
backend: KvStoreRef
|
|
preset*: RuntimePreset
|
|
genesisDeposits*: DepositsSeq
|
|
checkpoint*: proc() {.gcsafe.}
|
|
|
|
Keyspaces* = enum
|
|
defaultKeyspace = "kvstore"
|
|
validatorIndexFromPubKey
|
|
|
|
DbKeyKind = enum
|
|
kHashToState
|
|
kHashToBlock
|
|
kHeadBlock
|
|
## Pointer to the most recent block selected by the fork choice
|
|
kTailBlock
|
|
## Pointer to the earliest finalized block - this is the genesis block when
|
|
## the chain starts, but might advance as the database gets pruned
|
|
## TODO: determine how aggressively the database should be pruned. For a
|
|
## healthy network sync, we probably need to store blocks at least
|
|
## past the weak subjectivity period.
|
|
kBlockSlotStateRoot
|
|
## BlockSlot -> state_root mapping
|
|
kGenesisBlockRoot
|
|
## Immutable reference to the network genesis state
|
|
## (needed for satisfying requests to the beacon node API).
|
|
kEth1PersistedTo
|
|
## (Obsolete) Used to point to the the latest ETH1 block hash which
|
|
## satisfied the follow distance and had its deposits persisted to disk.
|
|
kDepositsFinalizedByEth1
|
|
## A merkleizer checkpoint which can be used for computing the
|
|
## `deposit_root` of all eth1 finalized deposits (i.e. deposits
|
|
## confirmed by ETH1_FOLLOW_DISTANCE blocks). The `deposit_root`
|
|
## is acknowledged and confirmed by the attached web3 provider.
|
|
kDepositsFinalizedByEth2
|
|
## A merkleizer checkpoint used for computing merkle proofs of
|
|
## deposits added to Eth2 blocks (it may lag behind the finalized
|
|
## eth1 deposits checkpoint).
|
|
kHashToBlockSummary
|
|
## Cache of beacon block summaries - during startup when we construct the
|
|
## chain dag, loading full blocks takes a lot of time - the block
|
|
## summary contains a minimal snapshot of what's needed to instanciate
|
|
## the BlockRef tree.
|
|
kSpeculativeDeposits
|
|
## A merkelizer checkpoint created on the basis of deposit events
|
|
## that we were not able to verify against a `deposit_root` served
|
|
## by the web3 provider. This may happen on Geth nodes that serve
|
|
## only recent contract state data (i.e. only recent `deposit_roots`).
|
|
kHashToStateDiff
|
|
## Instead of storing full BeaconStates, one can store only the diff from
|
|
## a different state. As 75% of a typical BeaconState's serialized form's
|
|
## the validators, which are mostly immutable and append-only, just using
|
|
## a simple append-diff representation helps significantly. Various roots
|
|
## are stored in a mod-increment pattern across fixed-sized arrays, which
|
|
## addresses most of the rest of the BeaconState sizes.
|
|
|
|
BeaconBlockSummary* = object
|
|
slot*: Slot
|
|
parent_root*: Eth2Digest
|
|
|
|
const
|
|
# The largest object we're saving is the BeaconState, and by far, the largest
|
|
# part of it is the validator - each validator takes up at least 129 bytes
|
|
# in phase0, which means 100k validators is >12mb - in addition to this,
|
|
# there are several MB of hashes.
|
|
maxDecompressedDbRecordSize = 64*1024*1024
|
|
|
|
# Subkeys essentially create "tables" within the key-value store by prefixing
|
|
# each entry with a table id
|
|
|
|
func subkey(kind: DbKeyKind): array[1, byte] =
|
|
result[0] = byte ord(kind)
|
|
|
|
func subkey[N: static int](kind: DbKeyKind, key: array[N, byte]):
|
|
array[N + 1, byte] =
|
|
result[0] = byte ord(kind)
|
|
result[1 .. ^1] = key
|
|
|
|
func subkey(kind: type BeaconState, key: Eth2Digest): auto =
|
|
subkey(kHashToState, key.data)
|
|
|
|
func subkey(kind: type SignedBeaconBlock, key: Eth2Digest): auto =
|
|
subkey(kHashToBlock, key.data)
|
|
|
|
func subkey(kind: type BeaconBlockSummary, key: Eth2Digest): auto =
|
|
subkey(kHashToBlockSummary, key.data)
|
|
|
|
func subkey(kind: type BeaconStateDiff, key: Eth2Digest): auto =
|
|
subkey(kHashToStateDiff, key.data)
|
|
|
|
func subkey(root: Eth2Digest, slot: Slot): array[40, byte] =
|
|
var ret: array[40, byte]
|
|
# big endian to get a naturally ascending order on slots in sorted indices
|
|
ret[0..<8] = toBytesBE(slot.uint64)
|
|
# .. but 7 bytes should be enough for slots - in return, we get a nicely
|
|
# rounded key length
|
|
ret[0] = byte ord(kBlockSlotStateRoot)
|
|
ret[8..<40] = root.data
|
|
|
|
ret
|
|
|
|
template panic =
|
|
# TODO(zah): Could we recover from a corrupted database?
|
|
# Review all usages.
|
|
raiseAssert "The database should not be corrupted"
|
|
|
|
proc init*[T](Seq: type DbSeq[T], db: SqStoreRef, name: string): Seq =
|
|
db.exec("""
|
|
CREATE TABLE IF NOT EXISTS """ & name & """(
|
|
id INTEGER PRIMARY KEY,
|
|
value BLOB
|
|
);
|
|
""").expect "working database (disk broken/full?)"
|
|
|
|
let
|
|
insertStmt = db.prepareStmt(
|
|
"INSERT INTO " & name & "(value) VALUES (?);",
|
|
openArray[byte], void).expect("this is a valid statement")
|
|
|
|
selectStmt = db.prepareStmt(
|
|
"SELECT value FROM " & name & " WHERE id = ?;",
|
|
int64, openArray[byte]).expect("this is a valid statement")
|
|
|
|
countStmt = db.prepareStmt(
|
|
"SELECT COUNT(1) FROM " & name & ";",
|
|
NoParams, int64).expect("this is a valid statement")
|
|
|
|
var recordCount = int64 0
|
|
let countQueryRes = countStmt.exec do (res: int64):
|
|
recordCount = res
|
|
|
|
let found = countQueryRes.expect("working database (disk broken/full?)")
|
|
if not found: panic()
|
|
|
|
Seq(insertStmt: insertStmt,
|
|
selectStmt: selectStmt,
|
|
recordCount: recordCount)
|
|
|
|
proc add*[T](s: var DbSeq[T], val: T) =
|
|
var bytes = SSZ.encode(val)
|
|
s.insertStmt.exec(bytes).expect "working database (disk broken/full?)"
|
|
inc s.recordCount
|
|
|
|
template len*[T](s: DbSeq[T]): uint64 =
|
|
s.recordCount.uint64
|
|
|
|
proc get*[T](s: DbSeq[T], idx: uint64): T =
|
|
# This is used only locally
|
|
let resultAddr = addr result
|
|
|
|
let queryRes = s.selectStmt.exec(int64(idx) + 1) do (recordBytes: openArray[byte]):
|
|
try:
|
|
resultAddr[] = decode(SSZ, recordBytes, T)
|
|
except SerializationError:
|
|
panic()
|
|
|
|
let found = queryRes.expect("working database (disk broken/full?)")
|
|
if not found: panic()
|
|
|
|
proc createMap*(db: SqStoreRef, keyspace: int;
|
|
K, V: distinct type): DbMap[K, V] =
|
|
DbMap[K, V](db: db, keyspace: keyspace)
|
|
|
|
proc insert*[K, V](m: var DbMap[K, V], key: K, value: V) =
|
|
m.db.put(m.keyspace, SSZ.encode key, SSZ.encode value).expect("working database (disk broken/full?)")
|
|
|
|
proc contains*[K, V](m: DbMap[K, V], key: K): bool =
|
|
contains(m.db, SSZ.encode key).expect("working database (disk broken/full?)")
|
|
|
|
template insert*[K, V](t: var Table[K, V], key: K, value: V) =
|
|
add(t, key, value)
|
|
|
|
proc init*(T: type BeaconChainDB,
|
|
preset: RuntimePreset,
|
|
dir: string,
|
|
inMemory = false): BeaconChainDB =
|
|
if inMemory:
|
|
# TODO
|
|
# To support testing, the inMemory store should offer the complete
|
|
# functionalityof the database-backed one (i.e. tracking of deposits
|
|
# and validators)
|
|
T(backend: kvStore MemStoreRef.init(),
|
|
preset: preset)
|
|
else:
|
|
let s = secureCreatePath(dir)
|
|
doAssert s.isOk # TODO(zah) Handle this in a better way
|
|
|
|
let sqliteStore = SqStoreRef.init(
|
|
dir, "nbc", Keyspaces, manualCheckpoint = true).expect("working database (disk broken/full?)")
|
|
|
|
# Remove the deposits table we used before we switched
|
|
# to storing only deposit contract checkpoints
|
|
if sqliteStore.exec("DROP TABLE IF EXISTS deposits;").isErr:
|
|
debug "Failed to drop the deposits table"
|
|
|
|
var genesisDepositsSeq =
|
|
DbSeq[DepositData].init(sqliteStore, "genesis_deposits")
|
|
|
|
T(backend: kvStore sqliteStore,
|
|
preset: preset,
|
|
genesisDeposits: genesisDepositsSeq,
|
|
checkpoint: proc() = sqliteStore.checkpoint()
|
|
)
|
|
|
|
proc snappyEncode(inp: openArray[byte]): seq[byte] =
|
|
try:
|
|
snappy.encode(inp)
|
|
except CatchableError as err:
|
|
raiseAssert err.msg
|
|
|
|
proc put(db: BeaconChainDB, key: openArray[byte], v: Eth2Digest) =
|
|
db.backend.put(key, v.data).expect("working database (disk broken/full?)")
|
|
|
|
proc put(db: BeaconChainDB, key: openArray[byte], v: auto) =
|
|
db.backend.put(key, snappyEncode(SSZ.encode(v))).expect("working database (disk broken/full?)")
|
|
|
|
proc get(db: BeaconChainDB, key: openArray[byte], T: type Eth2Digest): Opt[T] =
|
|
var res: Opt[T]
|
|
proc decode(data: openArray[byte]) =
|
|
if data.len == 32:
|
|
res.ok Eth2Digest(data: toArray(32, data))
|
|
else:
|
|
# If the data can't be deserialized, it could be because it's from a
|
|
# version of the software that uses a different SSZ encoding
|
|
warn "Unable to deserialize data, old database?",
|
|
typ = name(T), dataLen = data.len
|
|
discard
|
|
|
|
discard db.backend.get(key, decode).expect("working database (disk broken/full?)")
|
|
|
|
res
|
|
|
|
type GetResult = enum
|
|
found = "Found"
|
|
notFound = "Not found"
|
|
corrupted = "Corrupted"
|
|
|
|
proc get[T](db: BeaconChainDB, key: openArray[byte], output: var T): GetResult =
|
|
var status = GetResult.notFound
|
|
|
|
# TODO address is needed because there's no way to express lifetimes in nim
|
|
# we'll use unsafeAddr to find the code later
|
|
var outputPtr = unsafeAddr output # callback is local, ptr wont escape
|
|
proc decode(data: openArray[byte]) =
|
|
try:
|
|
let decompressed = snappy.decode(data, maxDecompressedDbRecordSize)
|
|
if decompressed.len > 0:
|
|
outputPtr[] = SSZ.decode(decompressed, T, updateRoot = false)
|
|
status = GetResult.found
|
|
else:
|
|
warn "Corrupt snappy record found in database", typ = name(T)
|
|
status = GetResult.corrupted
|
|
except SerializationError as e:
|
|
# If the data can't be deserialized, it could be because it's from a
|
|
# version of the software that uses a different SSZ encoding
|
|
warn "Unable to deserialize data, old database?",
|
|
err = e.msg, typ = name(T), dataLen = data.len
|
|
status = GetResult.corrupted
|
|
|
|
discard db.backend.get(key, decode).expect("working database (disk broken/full?)")
|
|
|
|
status
|
|
|
|
proc close*(db: BeaconChainDB) =
|
|
discard db.backend.close()
|
|
|
|
func toBeaconBlockSummary(v: SomeBeaconBlock): BeaconBlockSummary =
|
|
BeaconBlockSummary(
|
|
slot: v.slot,
|
|
parent_root: v.parent_root,
|
|
)
|
|
|
|
# TODO: we should only store TrustedSignedBeaconBlock in the DB.
|
|
proc putBlock*(db: BeaconChainDB, value: SignedBeaconBlock) =
|
|
db.put(subkey(type value, value.root), value)
|
|
db.put(subkey(BeaconBlockSummary, value.root), value.message.toBeaconBlockSummary())
|
|
proc putBlock*(db: BeaconChainDB, value: TrustedSignedBeaconBlock) =
|
|
db.put(subkey(SignedBeaconBlock, value.root), value)
|
|
db.put(subkey(BeaconBlockSummary, value.root), value.message.toBeaconBlockSummary())
|
|
proc putBlock*(db: BeaconChainDB, value: SigVerifiedSignedBeaconBlock) =
|
|
db.put(subkey(SignedBeaconBlock, value.root), value)
|
|
db.put(subkey(BeaconBlockSummary, value.root), value.message.toBeaconBlockSummary())
|
|
|
|
proc putState*(db: BeaconChainDB, key: Eth2Digest, value: BeaconState) =
|
|
# TODO prune old states - this is less easy than it seems as we never know
|
|
# when or if a particular state will become finalized.
|
|
|
|
db.put(subkey(type value, key), value)
|
|
|
|
proc putState*(db: BeaconChainDB, value: BeaconState) =
|
|
db.putState(hash_tree_root(value), value)
|
|
|
|
proc putStateRoot*(db: BeaconChainDB, root: Eth2Digest, slot: Slot,
|
|
value: Eth2Digest) =
|
|
db.put(subkey(root, slot), value)
|
|
|
|
proc putStateDiff*(db: BeaconChainDB, root: Eth2Digest, value: BeaconStateDiff) =
|
|
db.put(subkey(BeaconStateDiff, root), value)
|
|
|
|
proc delBlock*(db: BeaconChainDB, key: Eth2Digest) =
|
|
db.backend.del(subkey(SignedBeaconBlock, key)).expect("working database (disk broken/full?)")
|
|
db.backend.del(subkey(BeaconBlockSummary, key)).expect("working database (disk broken/full?)")
|
|
|
|
proc delState*(db: BeaconChainDB, key: Eth2Digest) =
|
|
db.backend.del(subkey(BeaconState, key)).expect("working database (disk broken/full?)")
|
|
|
|
proc delStateRoot*(db: BeaconChainDB, root: Eth2Digest, slot: Slot) =
|
|
db.backend.del(subkey(root, slot)).expect("working database (disk broken/full?)")
|
|
|
|
proc delStateDiff*(db: BeaconChainDB, root: Eth2Digest) =
|
|
db.backend.del(subkey(BeaconStateDiff, root)).expect("working database (disk broken/full?)")
|
|
|
|
proc putHeadBlock*(db: BeaconChainDB, key: Eth2Digest) =
|
|
db.put(subkey(kHeadBlock), key)
|
|
|
|
proc putTailBlock*(db: BeaconChainDB, key: Eth2Digest) =
|
|
db.put(subkey(kTailBlock), key)
|
|
|
|
proc putGenesisBlockRoot*(db: BeaconChainDB, key: Eth2Digest) =
|
|
db.put(subkey(kGenesisBlockRoot), key)
|
|
|
|
proc putEth1FinalizedTo*(db: BeaconChainDB,
|
|
eth1Checkpoint: DepositContractSnapshot) =
|
|
db.put(subkey(kDepositsFinalizedByEth1), eth1Checkpoint)
|
|
|
|
proc putEth2FinalizedTo*(db: BeaconChainDB,
|
|
eth1Checkpoint: DepositContractSnapshot) =
|
|
db.put(subkey(kDepositsFinalizedByEth2), eth1Checkpoint)
|
|
|
|
proc putSpeculativeDeposits*(db: BeaconChainDB,
|
|
eth1Checkpoint: DepositContractSnapshot) =
|
|
db.put(subkey(kSpeculativeDeposits), eth1Checkpoint)
|
|
|
|
proc getBlock*(db: BeaconChainDB, key: Eth2Digest): Opt[TrustedSignedBeaconBlock] =
|
|
# We only store blocks that we trust in the database
|
|
result.ok(TrustedSignedBeaconBlock())
|
|
if db.get(subkey(SignedBeaconBlock, key), result.get) != GetResult.found:
|
|
result.err()
|
|
else:
|
|
# set root after deserializing (so it doesn't get zeroed)
|
|
result.get().root = key
|
|
|
|
proc getBlockSummary*(db: BeaconChainDB, key: Eth2Digest): Opt[BeaconBlockSummary] =
|
|
# We only store blocks that we trust in the database
|
|
result.ok(BeaconBlockSummary())
|
|
if db.get(subkey(BeaconBlockSummary, key), result.get) != GetResult.found:
|
|
result.err()
|
|
|
|
proc getState*(
|
|
db: BeaconChainDB, key: Eth2Digest, output: var BeaconState,
|
|
rollback: RollbackProc): bool =
|
|
## Load state into `output` - BeaconState is large so we want to avoid
|
|
## re-allocating it if possible
|
|
## Return `true` iff the entry was found in the database and `output` was
|
|
## overwritten.
|
|
## Rollback will be called only if output was partially written - if it was
|
|
## not found at all, rollback will not be called
|
|
# TODO rollback is needed to deal with bug - use `noRollback` to ignore:
|
|
# https://github.com/nim-lang/Nim/issues/14126
|
|
# TODO RVO is inefficient for large objects:
|
|
# https://github.com/nim-lang/Nim/issues/13879
|
|
case db.get(subkey(BeaconState, key), output)
|
|
of GetResult.found:
|
|
true
|
|
of GetResult.notFound:
|
|
false
|
|
of GetResult.corrupted:
|
|
rollback(output)
|
|
false
|
|
|
|
proc getStateRoot*(db: BeaconChainDB,
|
|
root: Eth2Digest,
|
|
slot: Slot): Opt[Eth2Digest] =
|
|
db.get(subkey(root, slot), Eth2Digest)
|
|
|
|
proc getStateDiff*(db: BeaconChainDB,
|
|
root: Eth2Digest): Opt[BeaconStateDiff] =
|
|
result.ok(BeaconStateDiff())
|
|
if db.get(subkey(BeaconStateDiff, root), result.get) != GetResult.found:
|
|
result.err
|
|
|
|
proc getHeadBlock*(db: BeaconChainDB): Opt[Eth2Digest] =
|
|
db.get(subkey(kHeadBlock), Eth2Digest)
|
|
|
|
proc getTailBlock*(db: BeaconChainDB): Opt[Eth2Digest] =
|
|
db.get(subkey(kTailBlock), Eth2Digest)
|
|
|
|
proc getGenesisBlockRoot*(db: BeaconChainDB): Eth2Digest =
|
|
db.get(subkey(kGenesisBlockRoot), Eth2Digest).expect(
|
|
"The database must be seeded with the genesis state")
|
|
|
|
proc getEth1FinalizedTo*(db: BeaconChainDB): Opt[DepositContractSnapshot] =
|
|
result.ok(DepositContractSnapshot())
|
|
let r = db.get(subkey(kDepositsFinalizedByEth1), result.get)
|
|
if r != found: result.err()
|
|
|
|
proc getEth2FinalizedTo*(db: BeaconChainDB): Opt[DepositContractSnapshot] =
|
|
result.ok(DepositContractSnapshot())
|
|
let r = db.get(subkey(kDepositsFinalizedByEth2), result.get)
|
|
if r != found: result.err()
|
|
|
|
proc getSpeculativeDeposits*(db: BeaconChainDB): Opt[DepositContractSnapshot] =
|
|
result.ok(DepositContractSnapshot())
|
|
let r = db.get(subkey(kSpeculativeDeposits), result.get)
|
|
if r != found: result.err()
|
|
|
|
proc delSpeculativeDeposits*(db: BeaconChainDB) =
|
|
db.backend.del(subkey(kSpeculativeDeposits)).expect("working database (disk broken/full?)")
|
|
|
|
proc containsBlock*(db: BeaconChainDB, key: Eth2Digest): bool =
|
|
db.backend.contains(subkey(SignedBeaconBlock, key)).expect("working database (disk broken/full?)")
|
|
|
|
proc containsState*(db: BeaconChainDB, key: Eth2Digest): bool =
|
|
db.backend.contains(subkey(BeaconState, key)).expect("working database (disk broken/full?)")
|
|
|
|
proc containsStateDiff*(db: BeaconChainDB, key: Eth2Digest): bool =
|
|
db.backend.contains(subkey(BeaconStateDiff, key)).expect("working database (disk broken/full?)")
|
|
|
|
iterator getAncestors*(db: BeaconChainDB, root: Eth2Digest):
|
|
TrustedSignedBeaconBlock =
|
|
## Load a chain of ancestors for blck - returns a list of blocks with the
|
|
## oldest block last (blck will be at result[0]).
|
|
##
|
|
## The search will go on until the ancestor cannot be found.
|
|
|
|
var
|
|
res: TrustedSignedBeaconBlock
|
|
root = root
|
|
while db.get(subkey(SignedBeaconBlock, root), res) == GetResult.found:
|
|
res.root = root
|
|
yield res
|
|
root = res.message.parent_root
|
|
|
|
iterator getAncestorSummaries*(db: BeaconChainDB, root: Eth2Digest):
|
|
tuple[root: Eth2Digest, summary: BeaconBlockSummary] =
|
|
## Load a chain of ancestors for blck - returns a list of blocks with the
|
|
## oldest block last (blck will be at result[0]).
|
|
##
|
|
## The search will go on until the ancestor cannot be found.
|
|
|
|
var
|
|
res: tuple[root: Eth2Digest, summary: BeaconBlockSummary]
|
|
tmp: TrustedSignedBeaconBlock
|
|
root = root
|
|
|
|
while true:
|
|
if db.get(subkey(BeaconBlockSummary, root), res.summary) == GetResult.found:
|
|
res.root = root
|
|
yield res
|
|
elif db.get(subkey(SignedBeaconBlock, root), tmp) == GetResult.found:
|
|
res.summary = tmp.message.toBeaconBlockSummary()
|
|
db.put(subkey(BeaconBlockSummary, root), res.summary)
|
|
res.root = root
|
|
yield res
|
|
else:
|
|
break
|
|
|
|
root = res.summary.parent_root
|