nimbus-eth2/beacon_chain/spec/eth2_merkleization.nim
Jacek Sieka 1ef7d237cc
Shared validator pubkey (#5883)
This PR allows sharing the pubkey data between validators by using a
thread-local cache for pubkey data, netting about a 400mb mem usage
reduction on holesky due to us keeping 3 permanent + several ephemeral
state copies in memory at all times and each state copy holding a full
validator.

The PR also introduces a hash cache for the key which gives ~14% speedup
for a full state `hash_tree_root` - the key makes up for a large part of
the `Validator` htr time.

Finally, the time it takes to copy a state goes down as well from ~80m
ms to ~60, for reasons similar to htr.

We use a `ptr` even if a `ref` could in theory have been used - there is
not much practical benefit to a `ref` (given it's mutable) while a `ptr`
is cheaper and easier to copy (when copying temporary states).

We could go further and cache a cooked pubkey but it turns out this is
quite intrusive - in all the relevant places, we're already using a
cooked key from the immutable validator data so there are no immediate
performance gains of doing so while managing the compressed -> cooked
key mapping would become more difficult - something for a future PR
perhaps.

Co-authored-by: Etan Kissling <etan@status.im>
2024-02-21 20:06:19 +01:00

108 lines
3.8 KiB
Nim

# beacon_chain
# Copyright (c) 2018-2024 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.push raises: [].}
# Import this module to get access to `hash_tree_root` for spec types
import
stew/endians2,
std/sets,
ssz_serialization/[merkleization, proofs],
./ssz_codec
from ./datatypes/base import HashedValidatorPubKeyItem
from ./datatypes/phase0 import HashedBeaconState, SignedBeaconBlock
from ./datatypes/altair import HashedBeaconState, SignedBeaconBlock
from ./datatypes/bellatrix import HashedBeaconState, SignedBeaconBlock
from ./datatypes/capella import HashedBeaconState, SignedBeaconBlock
from ./datatypes/deneb import HashedBeaconState, SignedBeaconBlock
export ssz_codec, merkleization, proofs
type
DepositsMerkleizer* = SszMerkleizer2[DEPOSIT_CONTRACT_TREE_DEPTH + 1]
# Can't use `ForkyHashedBeaconState`/`ForkyHashedSignedBeaconBlock` without
# creating recursive module dependency through `forks`.
func hash_tree_root*(
x: phase0.HashedBeaconState | altair.HashedBeaconState |
bellatrix.HashedBeaconState | capella.HashedBeaconState |
deneb.HashedBeaconState) {.
error: "HashedBeaconState should not be hashed".}
func hash_tree_root*(
x: phase0.SignedBeaconBlock | altair.SignedBeaconBlock |
bellatrix.SignedBeaconBlock | capella.SignedBeaconBlock |
deneb.SignedBeaconBlock) {.
error: "SignedBeaconBlock should not be hashed".}
func depositCountBytes*(x: uint64): array[32, byte] =
doAssert(x <= 4294967295'u64)
var z = x
for i in 0..3:
result[31-i] = byte(int64(z) %% 256'i64)
z = z div 256
func depositCountU64*(xs: openArray[byte]): uint64 =
## depositCountU64 considers just the first 4 bytes as
## MAX_DEPOSIT_COUNT is defined as 2^32 - 1.
for i in 0 .. 27:
doAssert xs[i] == 0
return uint64.fromBytesBE(xs[24..31])
func init*(T: type DepositsMerkleizer, s: DepositContractState): DepositsMerkleizer =
let count = depositCountU64(s.deposit_count)
DepositsMerkleizer.init(s.branch, count)
func toDepositContractState*(merkleizer: DepositsMerkleizer): DepositContractState =
# TODO There is an off by one discrepancy in the size of the arrays here that
# need to be investigated. It shouldn't matter as long as the tree is
# not populated to its maximum size.
result.branch[0..31] = merkleizer.getCombinedChunks[0..31]
result.deposit_count[24..31] = merkleizer.getChunkCount().toBytesBE
func getDepositsRoot*(m: var DepositsMerkleizer): Eth2Digest =
mixInLength(m.getFinalHash, int m.totalChunks)
func hash*(v: ref HashedValidatorPubKeyItem): Hash =
if not isNil(v):
hash(v[].key)
else:
default(Hash)
func `==`*(a, b: ref HashedValidatorPubKeyItem): bool =
if isNil(a):
isNil(b)
elif isNil(b):
false
else:
a[].key == b[].key
func init*(T: type HashedValidatorPubKey, key: ValidatorPubKey): HashedValidatorPubKey =
{.noSideEffect.}:
var keys {.threadvar.}: HashSet[ref HashedValidatorPubKeyItem]
let
tmp = (ref HashedValidatorPubKeyItem)(
key: key,
root: hash_tree_root(key)
)
cached =
if keys.containsOrIncl(tmp):
try:
# The interface of HashSet is such that we must construct a full
# instance to check if it's in the set - then we can return that
# instace and discard the one we just created temporarily
keys[tmp]
except KeyError:
raiseAssert "just checked"
else:
tmp
HashedValidatorPubKey(value: addr cached[])