From 43d93bcdabe39e10751a42a984ad02aea76085e1 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Fri, 16 Aug 2024 08:22:51 +0200 Subject: [PATCH] Don't write slot hashes on import (#2564) The reverse slot hash mechanism causes quite a bit of database traffic but is broadly not useful except for iterating the storage of an account, something that a validator never does (it's used by the tracers). This flag adds one more thing that is not stored in the database, to be explored more comprehensively when designing full, validator and archive modes with different pruning options in the future. `ldb` says this is 60gb of data (!): ``` ldb --db=. --ignore_unknown_options --column_family=KvtGen approxsize --hex --from=0x05 --to=0x05ffffffffffffffffffffffffffffffffffffffffffffff 66488353954 ``` --- hive_integration/nodocker/engine/node.nim | 2 +- nimbus/config.nim | 6 ++++ nimbus/core/chain/persist_blocks.nim | 10 +++++-- nimbus/db/ledger.nim | 12 ++++---- nimbus/db/ledger/backend/accounts_ledger.nim | 6 ++-- nimbus/evm/state.nim | 30 +++++++++++++------- nimbus/nimbus_import.nim | 3 +- nimbus/tracer.nim | 13 +++++---- tests/test_ledger.nim | 2 +- 9 files changed, 52 insertions(+), 32 deletions(-) diff --git a/hive_integration/nodocker/engine/node.nim b/hive_integration/nodocker/engine/node.nim index 02ccaed31..6010dbd63 100644 --- a/hive_integration/nodocker/engine/node.nim +++ b/hive_integration/nodocker/engine/node.nim @@ -81,7 +81,7 @@ proc getVmState(c: ChainRef, header: BlockHeader): return ok(c.vmState) let vmState = BaseVMState() - if not vmState.init(header, c.com): + if not vmState.init(header, c.com, storeSlotHash = storeSlotHash): debug "Cannot initialise VmState", number = header.number return err() diff --git a/nimbus/config.nim b/nimbus/config.nim index c16c8badf..c1d7270c6 100644 --- a/nimbus/config.nim +++ b/nimbus/config.nim @@ -550,6 +550,12 @@ type defaultValue: false name: "debug-store-receipts".}: bool + storeSlotHashes* {. + hidden + desc: "Store reverse slot hashes in database" + defaultValue: false + name: "debug-store-slot-hashes".}: bool + func parseCmdArg(T: type NetworkId, p: string): T {.gcsafe, raises: [ValueError].} = parseInt(p).T diff --git a/nimbus/core/chain/persist_blocks.nim b/nimbus/core/chain/persist_blocks.nim index ee97996b7..7ba8d0209 100644 --- a/nimbus/core/chain/persist_blocks.nim +++ b/nimbus/core/chain/persist_blocks.nim @@ -37,6 +37,7 @@ type NoPersistUncles NoPersistWithdrawals NoPersistReceipts + NoPersistSlotHashes PersistBlockFlags* = set[PersistBlockFlag] @@ -54,12 +55,14 @@ const # Private # ------------------------------------------------------------------------------ -proc getVmState(c: ChainRef, header: BlockHeader): Result[BaseVMState, string] = +proc getVmState( + c: ChainRef, header: BlockHeader, storeSlotHash = false +): Result[BaseVMState, string] = if not c.vmState.isNil: return ok(c.vmState) let vmState = BaseVMState() - if not vmState.init(header, c.com): + if not vmState.init(header, c.com, storeSlotHash = storeSlotHash): return err("Could not initialise VMState") ok(vmState) @@ -86,7 +89,8 @@ proc persistBlocksImpl( # Note that `0 < headers.len`, assured when called from `persistBlocks()` let - vmState = ?c.getVmState(blocks[0].header) + vmState = + ?c.getVmState(blocks[0].header, storeSlotHash = NoPersistSlotHashes notin flags) fromBlock = blocks[0].header.number toBlock = blocks[blocks.high()].header.number trace "Persisting blocks", fromBlock, toBlock diff --git a/nimbus/db/ledger.nim b/nimbus/db/ledger.nim index b54a53660..b5774e0fd 100644 --- a/nimbus/db/ledger.nim +++ b/nimbus/db/ledger.nim @@ -22,18 +22,16 @@ import ./ledger/base/[base_config, base_desc, base_helpers], ./ledger/[base, base_iterators] -export - AccountsLedgerRef, - base, - base_config, - base_iterators +export AccountsLedgerRef, base, base_config, base_iterators # ------------------------------------------------------------------------------ # Public constructor # ------------------------------------------------------------------------------ -proc init*(_: type LedgerRef, db: CoreDbRef; root: Hash256): LedgerRef = - LedgerRef(ac: AccountsLedgerRef.init(db, root)).bless(db) +proc init*( + _: type LedgerRef, db: CoreDbRef, root: Hash256, storeSlotHash: bool = false +): LedgerRef = + LedgerRef(ac: AccountsLedgerRef.init(db, root, storeSlotHash)).bless(db) # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/ledger/backend/accounts_ledger.nim b/nimbus/db/ledger/backend/accounts_ledger.nim index ab4fb1175..efcabef17 100644 --- a/nimbus/db/ledger/backend/accounts_ledger.nim +++ b/nimbus/db/ledger/backend/accounts_ledger.nim @@ -65,6 +65,7 @@ type witnessCache: Table[EthAddress, WitnessData] isDirty: bool ripemdSpecial: bool + storeSlotHash*: bool cache: Table[EthAddress, AccountRef] # Second-level cache for the ledger save point, which is cleared on every # persist @@ -149,11 +150,12 @@ proc resetCoreDbAccount(ac: AccountsLedgerRef, acc: AccountRef) = # The AccountsLedgerRef is modeled after TrieDatabase for it's transaction style proc init*(x: typedesc[AccountsLedgerRef], db: CoreDbRef, - root: KeccakHash): AccountsLedgerRef = + root: KeccakHash, storeSlotHash: bool): AccountsLedgerRef = new result result.ledger = db.ctx.getAccounts() result.kvt = db.ctx.getKvt() result.witnessCache = Table[EthAddress, WitnessData]() + result.storeSlotHash = storeSlotHash discard result.beginSavepoint proc init*(x: typedesc[AccountsLedgerRef], db: CoreDbRef): AccountsLedgerRef = @@ -400,7 +402,7 @@ proc persistStorage(acc: AccountRef, ac: AccountsLedgerRef) = discard acc.originalStorage.del(slot) - if not cached: + if ac.storeSlotHash and not cached: # Write only if it was not cached to avoid writing the same data over and # over.. let diff --git a/nimbus/evm/state.nim b/nimbus/evm/state.nim index 4d0e0a128..06a661341 100644 --- a/nimbus/evm/state.nim +++ b/nimbus/evm/state.nim @@ -73,7 +73,8 @@ proc new*( parent: BlockHeader; ## parent header, account sync position blockCtx: BlockContext; com: CommonRef; ## block chain config - tracer: TracerRef = nil): T = + tracer: TracerRef = nil, + storeSlotHash = false): T = ## Create a new `BaseVMState` descriptor from a parent block header. This ## function internally constructs a new account state cache rooted at ## `parent.stateRoot` @@ -83,7 +84,7 @@ proc new*( ## with the `parent` block header. new result result.init( - ac = LedgerRef.init(com.db, parent.stateRoot), + ac = LedgerRef.init(com.db, parent.stateRoot, storeSlotHash), parent = parent, blockCtx = blockCtx, com = com, @@ -109,7 +110,7 @@ proc reinit*(self: BaseVMState; ## Object descriptor com = self.com db = com.db ac = if linear or self.stateDB.rootHash == parent.stateRoot: self.stateDB - else: LedgerRef.init(db, parent.stateRoot) + else: LedgerRef.init(db, parent.stateRoot, self.stateDB.ac.storeSlotHash) flags = self.flags self[].reset self.init( @@ -157,7 +158,8 @@ proc init*( parent: BlockHeader; ## parent header, account sync position header: BlockHeader; ## header with tx environment data fields com: CommonRef; ## block chain config - tracer: TracerRef = nil) = + tracer: TracerRef = nil, + storeSlotHash = false) = ## Variant of `new()` constructor above for in-place initalisation. The ## `parent` argument is used to sync the accounts cache and the `header` ## is used as a container to pass the `timestamp`, `gasLimit`, and `fee` @@ -166,7 +168,7 @@ proc init*( ## It requires the `header` argument properly initalised so that for PoA ## networks, the miner address is retrievable via `ecRecover()`. self.init( - ac = LedgerRef.init(com.db, parent.stateRoot), + ac = LedgerRef.init(com.db, parent.stateRoot, storeSlotHash), parent = parent, blockCtx = com.blockCtx(header), com = com, @@ -177,7 +179,8 @@ proc new*( parent: BlockHeader; ## parent header, account sync position header: BlockHeader; ## header with tx environment data fields com: CommonRef; ## block chain config - tracer: TracerRef = nil): T = + tracer: TracerRef = nil, + storeSlotHash = false): T = ## This is a variant of the `new()` constructor above where the `parent` ## argument is used to sync the accounts cache and the `header` is used ## as a container to pass the `timestamp`, `gasLimit`, and `fee` values. @@ -189,13 +192,15 @@ proc new*( parent = parent, header = header, com = com, - tracer = tracer) + tracer = tracer, + storeSlotHash = storeSlotHash) proc new*( T: type BaseVMState; header: BlockHeader; ## header with tx environment data fields com: CommonRef; ## block chain config - tracer: TracerRef = nil): EvmResult[T] = + tracer: TracerRef = nil, + storeSlotHash = false): EvmResult[T] = ## This is a variant of the `new()` constructor above where the field ## `header.parentHash`, is used to fetch the `parent` BlockHeader to be ## used in the `new()` variant, above. @@ -205,7 +210,8 @@ proc new*( parent = parent, header = header, com = com, - tracer = tracer)) + tracer = tracer, + storeSlotHash = storeSlotHash)) else: err(evmErr(EvmHeaderNotFound)) @@ -213,7 +219,8 @@ proc init*( vmState: BaseVMState; header: BlockHeader; ## header with tx environment data fields com: CommonRef; ## block chain config - tracer: TracerRef = nil): bool = + tracer: TracerRef = nil, + storeSlotHash = false): bool = ## Variant of `new()` which does not throw an exception on a dangling ## `BlockHeader` parent hash reference. var parent: BlockHeader @@ -222,7 +229,8 @@ proc init*( parent = parent, header = header, com = com, - tracer = tracer) + tracer = tracer, + storeSlotHash = storeSlotHash) return true func coinbase*(vmState: BaseVMState): EthAddress = diff --git a/nimbus/nimbus_import.nim b/nimbus/nimbus_import.nim index 8b7018139..6b16325f1 100644 --- a/nimbus/nimbus_import.nim +++ b/nimbus/nimbus_import.nim @@ -103,7 +103,8 @@ proc importBlocks*(conf: NimbusConf, com: CommonRef) = boolFlag({PersistBlockFlag.NoValidation}, conf.noValidation) + boolFlag({PersistBlockFlag.NoFullValidation}, not conf.fullValidation) + boolFlag(NoPersistBodies, not conf.storeBodies) + - boolFlag({PersistBlockFlag.NoPersistReceipts}, not conf.storeReceipts) + boolFlag({PersistBlockFlag.NoPersistReceipts}, not conf.storeReceipts) + + boolFlag({PersistBlockFlag.NoPersistSlotHashes}, not conf.storeSlotHashes) blocks: seq[EthBlock] clConfig: Eth2NetworkMetadata genesis_validators_root: Eth2Digest diff --git a/nimbus/tracer.nim b/nimbus/tracer.nim index 440cc511f..1f328fde9 100644 --- a/nimbus/tracer.nim +++ b/nimbus/tracer.nim @@ -58,7 +58,7 @@ template safeTracer(info: string; code: untyped) = raiseAssert info & " name=" & $e.name & " msg=" & e.msg # ------------------- - + proc init( T: type CaptCtxRef; com: CommonRef; @@ -169,7 +169,7 @@ proc traceTransactionImpl( let tracerInst = newLegacyTracer(tracerFlags) cc = activate CaptCtxRef.init(com, header) - vmState = BaseVMState.new(header, com).valueOr: return newJNull() + vmState = BaseVMState.new(header, com, storeSlotHash = true).valueOr: return newJNull() stateDb = vmState.stateDB defer: cc.release() @@ -217,7 +217,7 @@ proc traceTransactionImpl( # internal transactions: let cx = activate stateCtx - ldgBefore = LedgerRef.init(com.db, cx.root) + ldgBefore = LedgerRef.init(com.db, cx.root, storeSlotHash = true) defer: cx.release() for idx, acc in tracedAccountsPairs(tracerInst): @@ -252,7 +252,7 @@ proc dumpBlockStateImpl( # only need a stack dump when scanning for internal transaction address captureFlags = {DisableMemory, DisableStorage, EnableAccount} tracerInst = newLegacyTracer(captureFlags) - vmState = BaseVMState.new(header, com, tracerInst).valueOr: + vmState = BaseVMState.new(header, com, tracerInst, storeSlotHash = true).valueOr: return newJNull() miner = vmState.coinbase() @@ -261,7 +261,7 @@ proc dumpBlockStateImpl( var before = newJArray() after = newJArray() - stateBefore = LedgerRef.init(com.db, parent.stateRoot) + stateBefore = LedgerRef.init(com.db, parent.stateRoot, storeSlotHash = true) for idx, tx in blk.transactions: let sender = tx.getSender @@ -316,7 +316,8 @@ proc traceBlockImpl( let cc = activate CaptCtxRef.init(com, header) tracerInst = newLegacyTracer(tracerFlags) - vmState = BaseVMState.new(header, com, tracerInst).valueOr: + # Tracer needs a database where the reverse slot hash table has been set up + vmState = BaseVMState.new(header, com, tracerInst, storeSlotHash = true).valueOr: return newJNull() defer: cc.release() diff --git a/tests/test_ledger.nim b/tests/test_ledger.nim index 5308f8804..b86a03983 100644 --- a/tests/test_ledger.nim +++ b/tests/test_ledger.nim @@ -675,7 +675,7 @@ proc runLedgerBasicOperationsTests() = check ac.contractCollision(addr4) == true test "Ledger storage iterator": - var ac = LedgerRef.init(memDB, EMPTY_ROOT_HASH) + var ac = LedgerRef.init(memDB, EMPTY_ROOT_HASH, storeSlotHash = true) let addr2 = initAddr(2) ac.setStorage(addr2, 1.u256, 2.u256) ac.setStorage(addr2, 2.u256, 3.u256)