From 242bbf03fcc1046aeef494cab168e9779cc4f1a6 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Sat, 15 Jun 2024 11:22:37 +0200 Subject: [PATCH] Light verification and storage mode for import (#2367) When performing block import, we can batch state root verifications and header checks, doing them only once per chunk of blocks, assuming that the other blocks in the batch are valid by extension. When we're not generating receipts, we can also skip per-transaction state root computation pre-byzantium, which is what provides a ~20% speedup in this PR, at least on those early blocks :) We also stop storing transactions, receipts and uncles redundantly when importing from era1 - there is no need to waste database storage on this when we can load it from the era1 file (eventually). --- nimbus/config.nim | 23 +++++++ nimbus/core/chain/persist_blocks.nim | 71 +++++++++++++++------ nimbus/core/executor/process_block.nim | 87 ++++++++++++++++---------- nimbus/core/validate.nim | 7 +-- nimbus/db/core_db/core_apps.nim | 4 -- nimbus/nimbus_import.nim | 13 +++- 6 files changed, 144 insertions(+), 61 deletions(-) diff --git a/nimbus/config.nim b/nimbus/config.nim index 5cf3cd10b..a5487f4a1 100644 --- a/nimbus/config.nim +++ b/nimbus/config.nim @@ -516,6 +516,29 @@ type desc: "Save performance statistics to CSV" name: "debug-csv-stats".}: Option[string] + # TODO validation and storage options should be made non-hidden when the + # UX has stabilised and era1 storage is in the app + fullValidation* {. + hidden + desc: "Enable full per-block validation (slow)" + defaultValue: false + name: "debug-full-validation".}: bool + + storeBodies* {. + hidden + desc: "Store block blodies in database" + defaultValue: false + name: "debug-store-bodies".}: bool + + # TODO this option should probably only cover the redundant parts, ie + # those that are in era1 files - era files presently do not store + # receipts + storeReceipts* {. + hidden + desc: "Store receipts in database" + defaultValue: false + name: "debug-store-receipts".}: bool + func parseCmdArg(T: type NetworkId, p: string): T {.gcsafe, raises: [ValueError].} = parseInt(p).T diff --git a/nimbus/core/chain/persist_blocks.nim b/nimbus/core/chain/persist_blocks.nim index 12982cea6..791b3bef4 100644 --- a/nimbus/core/chain/persist_blocks.nim +++ b/nimbus/core/chain/persist_blocks.nim @@ -29,17 +29,21 @@ when not defined(release): export results type - PersistBlockFlag = enum + PersistBlockFlag* = enum + NoFullValidation # Validate the batch instead of validating each block in it NoPersistHeader - NoSaveTxs - NoSaveReceipts - NoSaveWithdrawals + NoPersistTransactions + NoPersistUncles + NoPersistWithdrawals + NoPersistReceipts - PersistBlockFlags = set[PersistBlockFlag] + PersistBlockFlags* = set[PersistBlockFlag] PersistStats = tuple[blocks: int, txs: int, gas: GasInt] const + NoPersistBodies* = {NoPersistTransactions, NoPersistUncles, NoPersistWithdrawals} + CleanUpEpoch = 30_000.BlockNumber ## Regular checks for history clean up (applies to single state DB). This ## is mainly a debugging/testing feature so that the database can be held @@ -95,10 +99,30 @@ proc persistBlocksImpl( template header(): BlockHeader = blk.header + # Full validation means validating the state root at every block and + # performing the more expensive hash computations on the block itself, ie + # verifying that the transaction and receipts roots are valid - when not + # doing full validation, we skip these expensive checks relying instead + # on the source of the data to have performed them previously or because + # the cost of failure is low. + # TODO Figure out the right balance for header fields - in particular, if + # we receive instruction from the CL while syncing that a block is + # CL-valid, do we skip validation while "far from head"? probably yes. + # This requires performing a header-chain validation from that CL-valid + # block which the current code doesn't express. + # Also, the potential avenues for corruption should be described with + # more rigor, ie if the txroot doesn't match but everything else does, + # can the state root of the last block still be correct? Dubious, but + # what would be the consequences? We would roll back the full set of + # blocks which is fairly low-cost. + let skipValidation = NoFullValidation in flags and header.number != toBlock + c.com.hardForkTransition(header) if blks > 0: - template parent(): BlockHeader = blocks[blks - 1].header + template parent(): BlockHeader = + blocks[blks - 1].header + let updated = if header.number == parent.number + 1 and header.parentHash == parentHash: vmState.reinit(parent = parent, header = header, linear = true) @@ -115,32 +139,43 @@ proc persistBlocksImpl( # in theory it's a fresh instance that should not need it (?) doAssert vmState.reinit(header = header) - if c.extraValidation and c.verifyFrom <= header.number: + # TODO even if we're skipping validation, we should perform basic sanity + # checks on the block and header - that fields are sanely set for the + # given hard fork and similar path-independent checks - these same + # sanity checks should be performed early in the processing pipeline no + # matter their provenance. + if not skipValidation and c.extraValidation and c.verifyFrom <= header.number: # TODO: how to checkseal from here - ?c.com.validateHeaderAndKinship(blk, checkSealOK = false) + ?c.com.validateHeaderAndKinship(blk, vmState.parent, checkSealOK = false) - ?vmState.processBlock(blk) + # Generate receipts for storage or validation but skip them otherwise + ?vmState.processBlock( + blk, + skipValidation, + skipReceipts = skipValidation and NoPersistReceipts in flags, + skipUncles = NoPersistUncles in flags, + ) # when defined(nimbusDumpDebuggingMetaData): # if validationResult == ValidationResult.Error and # body.transactions.calcTxRoot == header.txRoot: # vmState.dumpDebuggingMetaData(header, body) # warn "Validation error. Debugging metadata dumped." + let blockHash = header.blockHash() if NoPersistHeader notin flags: if not c.db.persistHeader( - blockHash, header, c.com.consensus == ConsensusType.POS, - c.com.startOfHistory + blockHash, header, c.com.consensus == ConsensusType.POS, c.com.startOfHistory ): return err("Could not persist header") - if NoSaveTxs notin flags: + if NoPersistTransactions notin flags: c.db.persistTransactions(header.number, blk.transactions) - if NoSaveReceipts notin flags: + if NoPersistReceipts notin flags: c.db.persistReceipts(vmState.receipts) - if NoSaveWithdrawals notin flags and blk.withdrawals.isSome: + if NoPersistWithdrawals notin flags and blk.withdrawals.isSome: c.db.persistWithdrawals(blk.withdrawals.get) # update currentBlock *after* we persist it @@ -176,7 +211,7 @@ proc persistBlocksImpl( # ------------------------------------------------------------------------------ proc insertBlockWithoutSetHead*(c: ChainRef, blk: EthBlock): Result[void, string] = - discard ?c.persistBlocksImpl([blk], {NoPersistHeader, NoSaveReceipts}) + discard ?c.persistBlocksImpl([blk], {NoPersistHeader, NoPersistReceipts}) if not c.db.persistHeader(blk.header.blockHash, blk.header, c.com.startOfHistory): return err("Could not persist header") @@ -203,7 +238,7 @@ proc setCanonical*(c: ChainRef, header: BlockHeader): Result[void, string] = discard ?c.persistBlocksImpl( - [EthBlock.init(header, move(body))], {NoPersistHeader, NoSaveTxs} + [EthBlock.init(header, move(body))], {NoPersistHeader, NoPersistTransactions} ) try: @@ -221,14 +256,14 @@ proc setCanonical*(c: ChainRef, blockHash: Hash256): Result[void, string] = setCanonical(c, header) proc persistBlocks*( - c: ChainRef, blocks: openArray[EthBlock] + c: ChainRef, blocks: openArray[EthBlock], flags: PersistBlockFlags = {} ): Result[PersistStats, string] = # Run the VM here if blocks.len == 0: debug "Nothing to do" return ok(default(PersistStats)) # TODO not nice to return nil - c.persistBlocksImpl(blocks) + c.persistBlocksImpl(blocks, flags) # ------------------------------------------------------------------------------ # End diff --git a/nimbus/core/executor/process_block.nim b/nimbus/core/executor/process_block.nim index 46e991b13..14cd25b1b 100644 --- a/nimbus/core/executor/process_block.nim +++ b/nimbus/core/executor/process_block.nim @@ -28,9 +28,12 @@ import # Factored this out of procBlkPreamble so that it can be used directly for # stateless execution of specific transactions. proc processTransactions*( - vmState: BaseVMState, header: BlockHeader, transactions: seq[Transaction] + vmState: BaseVMState, + header: BlockHeader, + transactions: seq[Transaction], + skipReceipts = false, ): Result[void, string] = - vmState.receipts = newSeq[Receipt](transactions.len) + vmState.receipts.setLen(if skipReceipts: 0 else: transactions.len) vmState.cumulativeGasUsed = 0 for txIndex, tx in transactions: @@ -40,10 +43,17 @@ proc processTransactions*( let rc = vmState.processTransaction(tx, sender, header) if rc.isErr: return err("Error processing tx with index " & $(txIndex) & ":" & rc.error) - vmState.receipts[txIndex] = vmState.makeReceipt(tx.txType) + if skipReceipts: + # TODO don't generate logs at all if we're not going to put them in + # receipts + discard vmState.getAndClearLogEntries() + else: + vmState.receipts[txIndex] = vmState.makeReceipt(tx.txType) ok() -proc procBlkPreamble(vmState: BaseVMState, blk: EthBlock): Result[void, string] = +proc procBlkPreamble( + vmState: BaseVMState, blk: EthBlock, skipValidation, skipReceipts, skipUncles: bool +): Result[void, string] = template header(): BlockHeader = blk.header @@ -51,8 +61,9 @@ proc procBlkPreamble(vmState: BaseVMState, blk: EthBlock): Result[void, string] vmState.mutateStateDB: db.applyDAOHardFork() - if blk.transactions.calcTxRoot != header.txRoot: - return err("Mismatched txRoot") + if not skipValidation: # Expensive! + if blk.transactions.calcTxRoot != header.txRoot: + return err("Mismatched txRoot") if vmState.determineFork >= FkCancun: if header.parentBeaconBlockRoot.isNone: @@ -67,7 +78,7 @@ proc procBlkPreamble(vmState: BaseVMState, blk: EthBlock): Result[void, string] if blk.transactions.len == 0: return err("Transactions missing from body") - ?processTransactions(vmState, header, blk.transactions) + ?processTransactions(vmState, header, blk.transactions, skipReceipts) elif blk.transactions.len > 0: return err("Transactions in block with empty txRoot") @@ -92,15 +103,22 @@ proc procBlkPreamble(vmState: BaseVMState, blk: EthBlock): Result[void, string] return err("gasUsed mismatch") if header.ommersHash != EMPTY_UNCLE_HASH: - let h = vmState.com.db.persistUncles(blk.uncles) - if h != header.ommersHash: + # TODO It's strange that we persist uncles before processing block but the + # rest after... + if not skipUncles: + let h = vmState.com.db.persistUncles(blk.uncles) + if h != header.ommersHash: + return err("ommersHash mismatch") + elif not skipValidation and rlpHash(blk.uncles) != header.ommersHash: return err("ommersHash mismatch") elif blk.uncles.len > 0: return err("Uncles in block with empty uncle hash") ok() -proc procBlkEpilogue(vmState: BaseVMState, header: BlockHeader): Result[void, string] = +proc procBlkEpilogue( + vmState: BaseVMState, header: BlockHeader, skipValidation: bool +): Result[void, string] = # Reward beneficiary vmState.mutateStateDB: if vmState.collectWitnessData: @@ -108,28 +126,30 @@ proc procBlkEpilogue(vmState: BaseVMState, header: BlockHeader): Result[void, st db.persist(clearEmptyAccount = vmState.determineFork >= FkSpurious) - let stateDB = vmState.stateDB - if header.stateRoot != stateDB.rootHash: - # TODO replace logging with better error - debug "wrong state root in block", - blockNumber = header.number, - expected = header.stateRoot, - actual = stateDB.rootHash, - arrivedFrom = vmState.com.db.getCanonicalHead().stateRoot - return err("stateRoot mismatch") + if not skipValidation: + let stateDB = vmState.stateDB + if header.stateRoot != stateDB.rootHash: + # TODO replace logging with better error + debug "wrong state root in block", + blockNumber = header.number, + expected = header.stateRoot, + actual = stateDB.rootHash, + arrivedFrom = vmState.com.db.getCanonicalHead().stateRoot + return err("stateRoot mismatch") - let bloom = createBloom(vmState.receipts) - if header.logsBloom != bloom: - return err("bloom mismatch") + let bloom = createBloom(vmState.receipts) - let receiptsRoot = calcReceiptsRoot(vmState.receipts) - if header.receiptsRoot != receiptsRoot: - # TODO replace logging with better error - debug "wrong receiptRoot in block", - blockNumber = header.number, - actual = receiptsRoot, - expected = header.receiptsRoot - return err("receiptRoot mismatch") + if header.logsBloom != bloom: + return err("bloom mismatch") + + let receiptsRoot = calcReceiptsRoot(vmState.receipts) + if header.receiptsRoot != receiptsRoot: + # TODO replace logging with better error + debug "wrong receiptRoot in block", + blockNumber = header.number, + actual = receiptsRoot, + expected = header.receiptsRoot + return err("receiptRoot mismatch") ok() @@ -140,19 +160,22 @@ proc procBlkEpilogue(vmState: BaseVMState, header: BlockHeader): Result[void, st proc processBlock*( vmState: BaseVMState, ## Parent environment of header/body block blk: EthBlock, ## Header/body block to add to the blockchain + skipValidation: bool = false, + skipReceipts: bool = false, + skipUncles: bool = false, ): Result[void, string] = ## Generalised function to processes `blk` for any network. var dbTx = vmState.com.db.newTransaction() defer: dbTx.dispose() - ?vmState.procBlkPreamble(blk) + ?vmState.procBlkPreamble(blk, skipValidation, skipReceipts, skipUncles) # EIP-3675: no reward for miner in POA/POS if vmState.com.consensus == ConsensusType.POW: vmState.calculateReward(blk.header, blk.uncles) - ?vmState.procBlkEpilogue(blk.header) + ?vmState.procBlkEpilogue(blk.header, skipValidation) dbTx.commit() diff --git a/nimbus/core/validate.nim b/nimbus/core/validate.nim index 73d4c5058..38efca27f 100644 --- a/nimbus/core/validate.nim +++ b/nimbus/core/validate.nim @@ -373,6 +373,7 @@ proc validateTransaction*( proc validateHeaderAndKinship*( com: CommonRef; blk: EthBlock; + parent: BlockHeader; checkSealOK: bool; ): Result[void, string] {.gcsafe, raises: [].} = @@ -383,12 +384,6 @@ proc validateHeaderAndKinship*( return err("BlockHeader.extraData larger than 32 bytes") return ok() - let chainDB = com.db - let parent = try: - chainDB.getBlockHeader(header.parentHash) - except CatchableError as err: - return err("Failed to load block header from DB") - ? com.validateHeader(blk, parent, checkSealOK) if blk.uncles.len > MAX_UNCLES: diff --git a/nimbus/db/core_db/core_apps.nim b/nimbus/db/core_db/core_apps.nim index 75b29fcc6..92958968f 100644 --- a/nimbus/db/core_db/core_apps.nim +++ b/nimbus/db/core_db/core_apps.nim @@ -31,10 +31,6 @@ type blockNumber: BlockNumber index: uint -const - extraTraceMessages = false - ## Enabled additional logging noise - # ------------------------------------------------------------------------------ # Forward declarations # ------------------------------------------------------------------------------ diff --git a/nimbus/nimbus_import.nim b/nimbus/nimbus_import.nim index c21f03025..d91324a35 100644 --- a/nimbus/nimbus_import.nim +++ b/nimbus/nimbus_import.nim @@ -62,6 +62,12 @@ proc importBlocks*(conf: NimbusConf, com: CommonRef) = start = com.db.getSavedStateBlockNumber() + 1 chain = com.newChain() + template boolFlag(flags, b): PersistBlockFlags = + if b: + flags + else: + {} + var imported = 0'u64 gas = GasInt(0) @@ -80,6 +86,11 @@ proc importBlocks*(conf: NimbusConf, com: CommonRef) = quit(QuitFailure) else: File(nil) + flags = + boolFlag({PersistBlockFlag.NoFullValidation}, not conf.fullValidation) + + boolFlag(NoPersistBodies, not conf.storeBodies) + + boolFlag({PersistBlockFlag.NoPersistReceipts}, not conf.storeReceipts) + defer: if csv != nil: close(csv) @@ -109,7 +120,7 @@ proc importBlocks*(conf: NimbusConf, com: CommonRef) = template process() = let time1 = Moment.now() - statsRes = chain.persistBlocks(blocks) + statsRes = chain.persistBlocks(blocks, flags) if statsRes.isErr(): error "Failed to persist blocks", error = statsRes.error quit(QuitFailure)