WIP: Super compact account+storage state history

This commit is contained in:
Jamie Lokier 2022-02-08 13:59:37 +00:00
parent 78a88d9c0d
commit 8547e9c4b5
No known key found for this signature in database
GPG Key ID: CBC25C68435C30A2
16 changed files with 980 additions and 25 deletions

View File

@ -128,6 +128,7 @@ type
NimbusCmd* {.pure.} = enum
noCommand
`import`
blockExec
ProtocolFlag* {.pure.} = enum
## Protocol flags
@ -245,6 +246,13 @@ type
defaultValue: 10
name: "log-metrics-interval" .}: int
dbCompare* {.
desc: "Specify path of an archive-mode state history file and check all executed transaction states against that archive. " &
"This option is experimental, currently read-only, and the format is likely to change often"
defaultValue: ""
name: "db-compare"
includeIfEvmc }: string
bootstrapNodes {.
separator: "\pNETWORKING OPTIONS:"
desc: "Specifies one or more bootstrap nodes(as enode URL) to use when connecting to the network"
@ -418,6 +426,19 @@ type
defaultValue: ""
name: "blocks-file" }: InputFile
of blockExec:
blockNumberStart* {.
argument
desc: "Execute from local database starting with this block number",
defaultValueDesc: "0"
name: "start-block" }: Option[uint64]
blockNumberEnd* {.
argument
desc: "Execution stops at this block number",
defaultValueDesc: "no limit"
name: "end-block" }: Option[uint64]
proc parseCmdArg(T: type NetworkId, p: TaintedString): T =
parseInt(p.string).T

View File

@ -16,8 +16,18 @@ const
# address zero by accident, unrecoverably, due to poor user interface issues.
ZERO_ADDRESS* = default(EthAddress)
# ZERO_HASH256 is the parent hash of genesis blocks.
ZERO_HASH256* = Hash256()
# Uses as parent block hash of genesis blocks and other places where "no hash"
# is represented. This is all zero digits.
ZERO_HASH256* = "0000000000000000000000000000000000000000000000000000000000000000".toDigest
# Used for root of empty hexary trie: keccak256(RLP("")) == keccak256(0x80).
BLANK_ROOT_HASH* = "56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421".toDigest
# Used as code hash for contracts with no code: keccak256("").
EMPTY_SHA3* = "c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470".toDigest
# Used for empty `ommersHash` in block headers: keccak256(RLP([])) == keccak256(0xc0).
EMPTY_UNCLE_HASH* = "1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347".toDigest
GAS_LIMIT_ADJUSTMENT_FACTOR* = 1_024
@ -28,8 +38,6 @@ const
MAX_UNCLE_DEPTH* = 6.u256
MAX_UNCLES* = 2
EMPTY_UNCLE_HASH* = "1dcc4de8dec75d7aab85b567b6ccd41ad312451b948a7413f0a142fd40d49347".toDigest
GENESIS_BLOCK_NUMBER* = 0.toBlockNumber
GENESIS_DIFFICULTY* = 131_072.u256
GENESIS_GAS_LIMIT* = 3_141_592
@ -42,9 +50,6 @@ const
GAS_LIMIT_MAXIMUM* = high(GasInt)
DEFAULT_GAS_LIMIT* = 8_000_000
BLANK_ROOT_HASH* = "56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421".toDigest
EMPTY_SHA3* = "c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470".toDigest
GAS_MOD_EXP_QUADRATIC_DENOMINATOR* = 20.u256
MAX_PREV_HEADER_DEPTH* = 256.toBlockNumber

View File

@ -22,7 +22,8 @@ import
config, genesis, rpc/[common, p2p, debug], p2p/chain,
eth/trie/db, metrics, metrics/[chronos_httpserver, chronicles_support],
graphql/ethapi, context,
"."/[conf_utils, sealer, constants]
"."/[conf_utils, sealer, constants],
./transaction/[db_compare, db_exec_range]
when defined(evmc_enabled):
import transaction/evmc_dynamic_loader
@ -203,6 +204,8 @@ proc start(nimbus: NimbusNode, conf: NimbusConf) =
when defined(evmc_enabled):
evmcSetLibraryPath(conf.evm)
if conf.dbCompare.len > 0:
dbCompareOpen(conf.dbCompare)
createDir(string conf.dataDir)
let trieDB = trieDB newChainDb(string conf.dataDir)
@ -222,6 +225,8 @@ proc start(nimbus: NimbusNode, conf: NimbusConf) =
case conf.cmd
of NimbusCmd.`import`:
importBlocks(conf, chainDB)
of NimbusCmd.blockExec:
dbCompareExecBlocks(chainDB, conf.blockNumberStart, conf.blockNumberEnd)
else:
manageAccounts(nimbus, conf)
setupP2P(nimbus, conf, chainDB, protocols)
@ -279,4 +284,6 @@ when isMainModule:
let conf = makeConfig()
nimbus.start(conf)
nimbus.process(conf)
if conf.cmd == noCommand:
nimbus.process(conf)

View File

@ -22,7 +22,8 @@ import
./process_transaction,
chronicles,
eth/[common, trie/db],
nimcrypto
nimcrypto,
../../transaction/db_compare
{.push raises: [Defect].}
@ -33,6 +34,8 @@ import
proc procBlkPreamble(vmState: BaseVMState; dbTx: DbTransaction;
header: BlockHeader, body: BlockBody): bool
{.gcsafe, raises: [Defect,CatchableError].} =
dbCompareResetSeen()
if vmState.chainDB.config.daoForkSupport and
vmState.chainDB.config.daoForkBlock == header.blockNumber:
vmState.mutateStateDB:

View File

@ -32,7 +32,6 @@ proc eip1559TxNormalization(tx: Transaction): Transaction =
result.maxPriorityFee = tx.gasPrice
result.maxFee = tx.gasPrice
proc processTransactionImpl(tx: Transaction, sender: EthAddress,
vmState: BaseVMState, fork: Fork): GasInt
# wildcard exception, wrapped below

View File

@ -217,6 +217,8 @@ proc validateTransaction*(vmState: BaseVMState, tx: Transaction,
sender: EthAddress, fork: Fork): bool =
let balance = vmState.readOnlyStateDB.getBalance(sender)
let nonce = vmState.readOnlyStateDB.getNonce(sender)
# NOTE: Not comparing balance and nonce here against the compact db.
# They will be read again when executing the transaction.
if tx.txType == TxEip2930 and fork < FkBerlin:
debug "invalid tx: Eip2930 Tx type detected before Berlin"

View File

@ -40,6 +40,7 @@ proc toJson*(receipts: seq[Receipt]): JsonNode =
result.add receipt.toJson
proc captureAccount(n: JsonNode, db: AccountsCache, address: EthAddress, name: string) =
# TODO: It might be useful to add dbCompare here.
var jaccount = newJObject()
jaccount["name"] = %name
jaccount["address"] = %("0x" & $address)

View File

@ -11,7 +11,7 @@ import
".."/[vm_types, vm_state, vm_computation, vm_state_transactions],
".."/[vm_internals, vm_precompiles, vm_gas_costs],
".."/[db/accounts_cache, forks],
./host_types
./host_types, ./db_compare
when defined(evmc_enabled):
import ".."/[utils]
@ -140,8 +140,10 @@ proc setupHost(call: CallParams): TransactionHost =
var code: seq[byte]
if call.isCreate:
let sender = call.sender
let contractAddress =
generateAddress(sender, call.vmState.readOnlyStateDB.getNonce(sender))
let nonce = call.vmState.readOnlyStateDB.getNonce(sender)
if host.dbCompare:
host.dbCompareNonce(sender, nonce)
let contractAddress = generateAddress(sender, nonce)
host.msg.destination = contractAddress.toEvmc
host.msg.input_size = 0
host.msg.input_data = nil
@ -150,6 +152,9 @@ proc setupHost(call: CallParams): TransactionHost =
# TODO: Share the underlying data, but only after checking this does not
# cause problems with the database.
code = host.vmState.readOnlyStateDB.getCode(host.msg.destination.fromEvmc)
if host.dbCompare:
let codeHash = host.vmState.readOnlyStateDB.getCodeHash(host.msg.destination.fromEvmc)
host.dbCompareCodeHash(host.msg.destination.fromEvmc, codeHash)
if call.input.len > 0:
host.msg.input_size = call.input.len.csize_t
# Must copy the data so the `host.msg.input_data` pointer
@ -210,6 +215,8 @@ proc runComputation*(call: CallParams): CallResult =
db.subBalance(call.sender, call.gasLimit.u256 * call.gasPrice.u256)
when defined(evmc_enabled):
if dbCompareEnabled:
host.dbCompare = true
doExecEvmc(host, call)
else:
execComputation(host.computation)

View File

@ -0,0 +1,207 @@
# Nimbus - Steps towards a fast and small Ethereum data store
#
# Copyright (c) 2021-2022 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.push raises: [Defect].}
import
sets, tables,
stint, chronicles, stew/byteutils,
eth/common/eth_types,
../constants,
./host_types, ./db_query
template toHex(hash: Hash256): string = hash.data.toHex
type
DbSeenAccount* = ref object
seenNonce*: bool
seenBalance*: bool
seenCodeHash*: bool
seenExists*: bool
seenStorages*: HashSet[UInt256]
seenAllStorages*: bool
DbSeenAccounts* = ref Table[EthAddress, DbSeenAccount]
DbSeenBlocks* = ref Table[BlockNumber, DbSeenAccounts]
DbCompare = object
ethDb: EthDB
errorCount: int
seen: DBSeenBlocks
var dbCompare {.threadvar.}: DbCompare
# `dbCompare` is cross-cutting; a global is fine for these tests.
# This is thread-local for Nim simplicity.
template dbCompareEnabled*: bool =
dbCompare.ethDb != nil
template dbCompareErrorCount*: int =
dbCompare.errorCount
proc dbCompareResetSeen*() =
dbCompare.seen = nil
dbCompare.ethDb.ethDbShowStats()
proc dbCompareOpen*(path: string) {.raises: [IOError, OSError, Defect].} =
# Raises `OSError` on error, good enough for this versipn.
dbCompare.ethDb = ethDbOpen(path)
info "DB: Verifying all EVM inputs against compressed state history database",
file=path, size=dbCompare.ethDb.ethDbSize()
proc lookupAccount(blockNumber: BlockNumber, address: EthAddress,
field: string, accountResult: var DbAccount): bool =
debug "DB COMPARE: Looking up account field",
`block`=blockNumber, account=address, field
return dbCompare.ethDb.ethDbQueryAccount(blockNumber, address, accountResult)
proc lookupStorage(blockNumber: BlockNumber, address: EthAddress,
slot: UInt256, slotResult: var DbSlotResult): bool =
debug "DB COMPARE: Looking up account storage slot",
`block`=blockNumber, account=address, slot=slot.toHex
return dbCompare.ethDb.ethDbQueryStorage(blockNumber, address, slot, slotResult)
proc getSeenAccount(host: TransactionHost, address: EthAddress): DBSeenAccount =
# Keep track of what fields have been read already, so that later requests
# for the same address etc during the processing of a block are not checked.
# Because later requests are intermediate values, not based on the parent
# block's `stateRoot`, their values are not expected to match the DB.
let blockNumber = host.txContext.block_number.toBlockNumber
if dbCompare.seen.isNil:
dbCompare.seen = newTable[BlockNumber, DbSeenAccounts]()
let seenAccountsRef = dbCompare.seen.mgetOrPut(blockNumber, nil).addr
if seenAccountsRef[].isNil:
seenAccountsRef[] = newTable[EthAddress, DBSeenAccount]()
let seenAccountRef = seenAccountsRef[].mgetOrPut(address, nil).addr
if seenAccountRef[].isNil:
seenAccountRef[] = DBSeenAccount()
return seenAccountRef[]
template dbCompareFail() =
inc dbCompare.errorCount
if dbCompare.errorCount < 100 or dbCompare.errorCount mod 100 == 0:
error "*** DB COMPARE: Error count", errorCount=dbCompare.errorCount
doAssert dbCompare.errorCount < 10000
proc dbCompareNonce*(host: TransactionHost, address: EthAddress,
nonce: AccountNonce) =
let seenAccount = getSeenAccount(host, address)
if seenAccount.seenNonce:
return
seenAccount.seenNonce = true
let blockNumber = host.txContext.block_number.toBlockNumber
var accountResult {.noinit.}: DbAccount
let found = lookupAccount(blockNumber, address, "nonce", accountResult)
if not found:
if nonce != 0:
error "*** DB MISMATCH: Account missing, expected nonce != 0",
`block`=blockNumber, account=address, expectedNonce=nonce
dbCompareFail()
else:
if nonce != accountResult.nonce:
error "*** DB MISMATCH: Account found, nonce does not match",
`block`=blockNumber, account=address, expectedNonce=nonce,
foundNonce=accountResult.nonce
dbCompareFail()
proc dbCompareBalance*(host: TransactionHost, address: EthAddress,
balance: UInt256) =
let seenAccount = getSeenAccount(host, address)
if seenAccount.seenBalance:
return
seenAccount.seenBalance = true
let blockNumber = host.txContext.block_number.toBlockNumber
var accountResult {.noinit.}: DbAccount
let found = lookupAccount(blockNumber, address, "balance", accountResult)
if not found:
if balance != 0:
error "*** DB MISMATCH: Account missing, expected balance != 0",
`block`=blockNumber, account=address, expectedBalance=balance.toHex
dbCompareFail()
else:
if balance != accountResult.balance:
error "*** DB MISMATCH: Account found, balance does not match",
`block`=blockNumber, account=address, expectedBalance=balance.toHex,
foundBalance=accountResult.balance.toHex
dbCompareFail()
proc dbCompareCodeHash*(host: TransactionHost, address: EthAddress,
codeHash: Hash256) =
let seenAccount = getSeenAccount(host, address)
if seenAccount.seenCodeHash:
return
seenAccount.seenCodeHash = true
let blockNumber = host.txContext.block_number.toBlockNumber
var accountResult {.noinit.}: DbAccount
let found = lookupAccount(blockNumber, address, "codeHash", accountResult)
if not found:
if codeHash != ZERO_HASH256:
error "*** DB MISMATCH: Account missing, expected codeHash != 0",
`block`=blockNumber, account=address, expectedCodeHash=codeHash.toHex
dbCompareFail()
else:
if codeHash != accountResult.codeHash:
error "*** DB MISMATCH: Account found, codeHash does not match",
`block`=blockNumber, account=address, expectedCodeHash=codeHash.toHex,
foundCodeHash=accountResult.codeHash.toHex
dbCompareFail()
proc dbCompareExists*(host: TransactionHost, address: EthAddress,
exists: bool, forkSpurious: bool) =
let seenAccount = getSeenAccount(host, address)
if seenAccount.seenExists:
return
seenAccount.seenExists = true
let blockNumber = host.txContext.block_number.toBlockNumber
var accountResult {.noinit.}: DbAccount
let found = lookupAccount(blockNumber, address, "exists", accountResult)
if found != exists:
if exists:
error "*** DB MISMATCH: Account missing, expected exists=true",
`block`=blockNumber, account=address
else:
error "*** DB MISMATCH: Account found, expected exists=false",
`block`=blockNumber, account=address
dbCompareFail()
proc dbCompareStorage*(host: TransactionHost, address: EthAddress,
slot: UInt256, value: UInt256) =
let seenAccount = getSeenAccount(host, address)
if seenAccount.seenAllStorages:
return
if seenAccount.seenStorages.containsOrIncl(slot):
return
let blockNumber = host.txContext.block_number.toBlockNumber
var slotResult {.noinit.}: DbSlotResult
let found = lookupStorage(blockNumber, address, slot, slotResult)
if not found:
if slotResult.value != 0.u256:
error "*** DB MISMATCH: Storage slot missing, expecting value != 0",
`block`=blockNumber, account=address, slot=slot.toHex,
expectedValue=slotResult.value.toHex
dbCompareFail()
else:
if value != slotResult.value:
error "*** DB MISMATCH: Storage slot found, value does not match",
`block`=blockNumber, account=address, slot=slot.toHex,
expectedValue=value.toHex, foundValue=slotResult.value.toHex
dbCompareFail()
proc dbCompareClearStorage*(host: TransactionHost, address: EthAddress) =
let seenAccount = getSeenAccount(host, address)
seenAccount.seenAllStorages = true
seenAccount.seenStorages.init()
let blockNumber = host.txContext.block_number.toBlockNumber
debug "DB COMPARE: Clearing all storage slots for self-destruct",
`block`=blockNumber, account=address

View File

@ -0,0 +1,90 @@
# Nimbus - Steps towards a fast and small Ethereum data store
#
# Copyright (c) 2021-2022 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.push raises: [Defect].}
import
options, memfiles,
stint, chronicles, stew/byteutils,
eth/common/eth_types,
".."/[db/db_chain, db/accounts_cache, vm_state, p2p/executor/process_block],
"."/[host_types, db_compare]
template toHex(hash: Hash256): string = hash.data.toHex
proc dbCompareExecBlock*(chainDB: BaseChainDB, blockNumber: BlockNumber): bool =
debug "DB COMPARE: Trying to execute block", `block`=blockNumber
try:
var blockHash: Hash256
if not chainDB.getBlockHash(blockNumber, blockHash):
error "*** DB COMPARE: Don't have block hash for block",
`block`=blockNumber
return false
var header: BlockHeader
if not chainDB.getBlockHeader(blockHash, header):
error "*** DB COMPARE: Don't have block header for block",
`block`=blockNumber
return false
var body: BlockBody
if not chainDB.getBlockBody(blockHash, body):
error "*** DB COMPARE: Don't have block body for block",
`block`=blockNumber
return false
if blockNumber == 0:
debug "*** DB COMPARE: No calculations to be done for genesis block"
return true
var parentHeader: BlockHeader
if not chainDB.getBlockHeader(header.parentHash, parentHeader):
error "*** DB COMPARE: Don't have block header for parent block",
`block`=blockNumber, parentBlock=(blockNumber-1)
return false
debug "DB COMPARE: Read block from local db ok",
`block`=blockNumber, blockHash=blockHash.toHex,
stateRoot=header.stateRoot.toHex
dbCompareErrorCount = 0
let stateDb = AccountsCache.init(chainDB.db, parentHeader.stateRoot)
let vmState = newBaseVMState(stateDB, header, chainDB)
let validationResult = vmState.processBlock(nil, header, body)
if validationResult != OK:
error "*** DB COMPARE: Block validation failed, not even affected by new DB",
`block`=blockNumber, blockHash=blockHash.toHex
if dbCompareErrorCount == 0:
debug "DB COMPARE: Block execution completed ok",
`block`=blockNumber, blockHash=blockHash.toHex
else:
error "***DB ERRORS: Block execution has comparison errors",
errorCount=dbCompareErrorCount,
`block`=blockNumber, blockHash=blockHash.toHex
result = validationResult == OK
except Exception as e:
error "*** DB COMPARE: Exception while trying to execute block",
`block`=blockNumber, error=e.msg
proc dbCompareExecBlocks*(chainDB: BaseChainDB,
blockNumberStart, blockNumberEnd: Option[uint64]) =
var blockNumber = blockNumberStart.get(0.uint64).toBlockNumber
var stopAt = blockNumberEnd.get(high(int64).uint64).toBlockNumber
while true:
if not dbCompareExecBlock(chainDB, blockNumber):
error "*** DB COMPARE: Stopping block execution due to errors at block",
`block`=blockNumber
break
if blockNumber >= stopAt:
break
blockNumber = blockNumber + 1

View File

@ -0,0 +1,530 @@
# Nimbus - Steps towards a fast and small Ethereum data store
#
# Copyright (c) 2021-2022 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except according to those terms.
{.push raises: [Defect].}
import
sets, tables, memfiles,
stint, chronicles, stew/byteutils,
eth/common/eth_types,
../constants,
./host_types
# The purposes of the present format and very simple query methods here is to
# be a read-only proof of concept of very compact Eth account/storage state
# history, to show that it's _sufficiently_ compact to be interesting.
#
# The files it reads are generated by extracting the state history from an
# Erigon node in a separate ad-hoc ETL process.
#
# ----------------------------------------------------------------------------
#
# `memfiles.nim` stdlib module very helpfully provides memory-mapped files
# for POSIX and Windows. (Thanks Zahary!)
#
# IMPORTANT: `mmap` is used here for this PoC but not intended to remain.
#
# This comment is to preempt anticipated feedback about the LMDB issues
# mentioned in `nimbus/db/select_backend.nim`.
#
# It is very convenient for this read-only proof of concept, while the
# database is built up in Nim layers. Certainly it simplifies the query
# code, making it easier to explain and show the compression and traversal,
# compared with a version using high-performance async file I/O.
#
# But actually single-threaded `mmap` is a very slow method for this type of
# database. Memory-mapped files don't have appropriate performance
# characteristics, not to mention portability issues. I measured 25 times
# slower doing random-access reads via `mmap` like this compared with good
# async I/O on my Linux test system.
#
# Specifically, memory-mapped files perform badly with single-threaded code
# doing random-access reads to a very large database. Because the state file
# is much larger than RAM, most queries block in a page fault doing I/O to
# the underlying storage device. Most queries are located randomly due to
# Ethereum hashed trie keys, and are different from one Ethereum block to the
# next, so OS filesystem read-ahead just makes performance worse. Those
# reads block Chronos in a page fault, and it's not possible for other async
# tasks to progress while waiting for storage. Worse, it's not possible to
# reach I/O queue depth > 1 this way, so the underlying filesystem and the
# SSD itself operate at their very slowest.
#
# ----------------------------------------------------------------------------
#
# The format is a large, read-only file which is easy to search, but that's a
# little misleading.
#
# To those who know what makes a database work (B-trees/LSM-trees etc), it may
# be clear that the format here can't be updated fast. Really though, it's not
# far off a format that supports updates while keeping good query performance
# and similar compression. Watch this space if you're intrigued about how that
# transformation is done!
#
# This is being implemented in tested steps into a compressed (for Eth) but
# _writable_ data format, that supports "O(1)"[*] and 1 IOP state queries, fast
# bulk-range writes for super-snap sync, and fast random-writes for execution.
# Offline experiments have been done on Mainnet data to validate the concepts
# and to benchmark low-level I/O performance. Also don't pay too close
# attention to the ad-hoc byte coding in this file. It's scaffolding while
# we're importing data.
#
# [*] Because other clients' literature says "O(1)" queries. Technically all
# of them are at least O(log N) because they mean O(1) database lookups, and a
# single database lookup is O(log N) reads by itself. It may be reduced to
# O(1) under certain cache scaling assumptions and well-designed caching of
# interior pages. We count "leaf IOPS" as a reasonably proportional proxy for
# IOPS under these assumptions.
#
# On a more concrete note, our method, when cached, takes approximately 1 leaf
# IOPS to read both a small account and N `SLOAD` storage reads, due to
# locality in the layout of small accounts. 1+N for large accounts (those with
# large storage or history). Contrast 1 and 1+N with 2+2N and 2+2N leaf IOPS
# are used by TurboGeth-derived clients.
#
# So our method should be faster at random-read queries in an absolute sense
# when SSD I/O performance is the bottleneck, rather than CPU. Our method is
# more CPU intensive, due to compression not layout (they could be separated).
type
EthDB* = ref object
memFile: MemFile
queries: uint64
queryPagesL1: uint64
queryPagesL2: uint64
DbHeader = object
fileVersion: uint64
fileSize: uint64
startOffset: uint64
pageShift: uint64
DbAccount* = object
nonce*: AccountNonce
incarnation*: uint64
balance*: UInt256
codeHash*: Hash256
DbStorage = object
slot*: UInt256
value*: UInt256
DbSlotResult* = object
value*: UInt256
DbGeneralKeyBits = enum
DbkHasBlockNumber
DbkHasAddress
#DbHasAddressHash
DbkHasSlot
#DbHasSlotHash
DbGeneralKeyFlags = set[DbGeneralKeyBits]
DbGeneralKey = object
flags: DbGeneralKeyFlags
blockNumber: BlockNumber
address: EthAddress
#addressHash: Hash256
slot: UInt256
#slotHash: Hash256
DbGeneralValueBits = enum
DbvHasBlockNumber
DbvHasAddress
DbvHasAccount
DbvHasStorage
DbGeneralValueFlags = set[DbGeneralValueBits]
DbGeneralValue = object
flags: DbGeneralValueFlags
blockNumber: BlockNumber
address: EthAddress
account: DbAccount
storage: DbStorage
DbMatch = enum
# Polarity: Whether a key is EQ, GT or LT than a looked up value.
MatchEQ # Key equals value.
MatchGT # Key strictly greater than value.
MatchLT # Key strictly less than value.
MatchNotFound # No value.
template header(db: EthDB): DbHeader =
doAssert not db.memFile.mem.isNil
cast[ptr DbHeader](db.memFile.mem)[]
template offset(db: EthDB, offset: uint64): ptr byte =
cast[ptr byte](db.memFile.mem) + offset
template `+`(p: ptr byte, offset: uint64): ptr byte =
cast[ptr byte](cast[uint](p) + offset)
template inc(p: var ptr byte) = p = p + 1
template toHex(hash: Hash256): string = hash.data.toHex
proc compareAddresses(a, b: EthAddress): DbMatch {.inline.} =
for i in 0 ..< 20:
if a[i] < b[i]:
return MatchLT
elif a[i] > b[i]:
return MatchGT
return MatchEQ
proc compareNumbers(a, b: uint64 | BlockNumber | UInt256): DbMatch {.inline.} =
if a < b: MatchLT elif a > b: MatchGT else: MatchEQ
proc compareBooleans(a, b: bool): DbMatch {.inline.} =
if a == b: MatchEQ elif not a: MatchLT else: MatchGT
proc compareGeneral(key: DbGeneralKey, value: DbGeneralValue): DbMatch =
var match = compareBooleans(DbkHasAddress in key.flags,
DbvHasAddress in value.flags)
if match != MatchEQ:
return match
if DbkHasAddress in key.flags:
match = compareAddresses(key.address, value.address)
if match != MatchEQ:
return match
match = compareBooleans(DbkHasSlot in key.flags,
DbvHasStorage in value.flags)
if match != MatchEQ:
return match
if DbkHasSlot in key.flags:
match = compareNumbers(key.slot, value.storage.slot)
if match != MatchEQ:
return match
match = compareBooleans(DbkHasBlockNumber in key.flags,
DbvHasBlockNumber in value.flags)
if match != MatchEQ:
return match
if DbkHasBlockNumber in key.flags:
match = compareNumbers(key.blockNumber, value.blockNumber)
if match != MatchEQ:
return match
return match
proc queryPage(db: EthDB, offsetStart, offsetEnd: uint64, key: DbGeneralKey,
valueOut: var DbGeneralValue, all: bool): DbMatch =
var
posEnd = db.offset(offsetEnd)
pos = db.offset(offsetStart)
b: byte
template getByte =
if pos > posEnd:
break
b = pos[]
inc pos
template readFixed64(into: var uint64) =
into = 0
for i in 0 ..< 8:
getByte
into = (into shl 8) or b.uint64
template readFixed256(into: var UInt256) =
getByte
into = b.u256
for i in 0 ..< 31:
getByte
into = (into shl 8) or b.u256
template readVariable256(into: var UInt256) =
getByte
if b < 224:
into = b.u256
else:
var remainder = (b - 224).int
getByte
into = b.u256
while remainder != 0:
getByte
into = (into shl 8) or b.u256
dec remainder
var blockNumber: uint64
var address: EthAddress
var readerIncarnation: uint64
var readerSlot: UInt256
const
CODE_BLOCK_NUMBER = 1 # Range 1..8
CODE_ADDRESS = 9 # Single value 9
CODE_ACCOUNT = 10 # Range 10..73
CODE_STORAGE = 74 # Range 74..249
CODE_INCARNATION = 250 # Single value 250
CODE_BLOCK_INLINE = 251 # Range 251..255
template nextEntry(generalValue: var DbGeneralValue) =
while true:
getByte
if b < CODE_BLOCK_NUMBER:
# End of page
break
elif b <= CODE_BLOCK_NUMBER + 7:
let len = (b - CODE_BLOCK_NUMBER + 1).int
blockNumber = 0
for i in 0 ..< len:
getByte
blockNumber = (blockNumber shl 8) or b.uint64
elif b == CODE_ADDRESS:
for i in 0 ..< 20:
getByte
address[i] = b
elif b <= CODE_ACCOUNT + 63:
generalValue.flags = { DbvHasBlockNumber, DbvHasAddress, DbvHasAccount }
generalValue.blockNumber = blockNumber.toBlockNumber
generalValue.address = address
template account: var DbAccount = generalValue.account
account.nonce = 0
account.incarnation = 0
account.balance = 0.u256
account.codeHash = ZERO_HASH256
let flags = b - CODE_ACCOUNT
if (flags and 1) != 0:
readVariable256(account.balance)
if (flags and 2) != 0:
for i in 0 ..< 32:
getByte
account.codeHash.data[i] = b
if (flags and (3 shl 2)) == (3 shl 2):
readFixed64(account.nonce)
else:
account.nonce = ((flags shr 2) and 3).AccountNonce
if (flags and (3 shl 4)) == (3 shl 4):
readFixed64(account.incarnation)
else:
account.incarnation = ((flags shr 2) and 3).uint64
if account.incarnation != 0:
readerIncarnation = account.incarnation
# At this point we have an account entry.
break
elif b <= CODE_STORAGE + 160 + 15:
generalValue.flags = { DbvHasBlockNumber, DbvHasAddress, DbvHasStorage }
generalValue.blockNumber = blockNumber.toBlockNumber
generalValue.address = address
template storage: var DbStorage = generalValue.storage
let flags = b - CODE_STORAGE
if (flags shr 4) < 9:
storage.slot = (flags shr 4).u256
elif (flags shr 4) == 9:
readVariable256(storage.slot)
else:
readFixed256(storage.slot)
if (flags and (1 shl 3)) != 0:
storage.slot += readerSlot
readerSlot = storage.slot
if (flags and 7) < 6:
storage.value = (flags and 7).u256
else:
readVariable256(storage.value)
if (flags and 7) == 6:
storage.value = not storage.value
# At this point we have a storage entry.
break
elif b <= CODE_INCARNATION + 4:
if b - CODE_INCARNATION < 4:
readerIncarnation = (b - CODE_INCARNATION + 1).uint64
else:
readFixed64(readerIncarnation)
else:
trace "DB: Syntax error in data file"
break
var haveSavedValue: bool
var savedValue: DbGeneralValue
var match = MatchNotFound
while true:
valueOut.flags = {}
nextEntry(valueOut)
match = compareGeneral(key, valueOut)
if match != MatchLT or not all:
break
savedValue = valueOut
haveSavedValue = true
if match == MatchGT and haveSavedValue:
valueOut = savedValue
match = MatchLT
return match
template checkHeader(db: EthDB) =
doAssert db.memFile.mem != nil
doAssert db.memFile.size >= sizeof(db.header)
doAssert db.header.fileVersion == 2022020701
doAssert db.header.fileSize <= db.memFile.size.uint64
doAssert db.header.fileSize >= db.header.startOffset
doAssert db.header.pageShift >= 8 and db.header.pageShift <= 24
proc generalQuery(db: EthDB, key: DbGeneralKey,
valueOut: var DbGeneralValue): bool =
db.checkHeader()
let
fileSize = db.header.fileSize
pageShift = db.header.pageShift
pageMask = ((1 shl pageShift) - 1).uint64
inc db.queries
var
lowOffset = db.header.startOffset
highOffset = fileSize - 1
value {.noinit.}: DbGeneralValue
# The key we are looking for is between the lowest key in page
# `lowOffset shr pageShift` and the highest key in page
# `highOffset shr pageShift`, both inclusive.
#
# We hunt for it with two levels of binary search. First using "search in
# page, first key only" as a subroutine to locate the page. Then using
# "search in page, all keys" to locate the value.
#
# A subtle twist is that the query has mixed comparators. We're not looking
# for an exact match on all key components. We need an exact match to
# `address` and `slot`, but for `blockNumber` we search for the "max entry <=
# search key", because this is really an interval tree with implicit
# intervals (block ranges). To find the entry, the second level search may
# re-visit a page that was discarded early in the first level search.
while true:
if lowOffset > highOffset:
return false
var
# Subtraction like this biases the rounding upwards, which reduces the
# average number of search steps and page reads.
midOffset = highOffset - ((highOffset - lowOffset) shr 1)
midPageStart = midOffset shr pageShift
midPageEnd = midPageStart or pageMask
# For "max entry <= search key" it is always better to skip the first page
# at L1 search if there is another page after (unless the first entry
# happens to `MatchEQ`, which is unlikely).
if midPageStart <= lowOffset:
if midPageEnd >= highOffset:
break
midPageStart += pageMask + 1
midPageEnd = midPageStart or pageMask
if midPageEnd > highOffset:
midPageEnd = highOffset
inc db.queryPagesL1
case queryPage(db, midPageStart, midPageEnd, key, valueOut, false):
of MatchEQ:
return true
of MatchLT:
highOffset = midPageStart - 1
of MatchGT:
# The "max entry <= search key" might be inside the current page, but
# might also be in a much higher page number. We can set `lowOffset`
# to either the start of the current page, or the start of the next one
# and be prepared to backtrack one page. Each has a subtle effect on
# the average number of steps in L1 search. The choice below is made
# to interact with skipping the first page in code above, because
# skipping the first page is beneficial at the first iteration as well.
lowOffset = midPageStart
of MatchNotFound:
return false
inc db.queryPagesL2
case queryPage(db, lowOffset, highOffset, key, valueOut, true):
of MatchEQ:
return true
of MatchGT:
# Match exact address and slot but "lowest >=" block number.
if compareBooleans(DbkHasAddress in key.flags,
DbvHasAddress in value.flags) != MatchEQ:
return false
if DbkHasAddress in key.flags:
if compareAddresses(key.address, valueOut.address) != MatchEQ:
return false
if compareBooleans(DbkHasSlot in key.flags,
DbvHasStorage in value.flags) != MatchEQ:
return false
if DbkHasSlot in key.flags:
if compareNumbers(key.slot, valueOut.storage.slot) != MatchEQ:
return false
return true
else:
return false
proc ethDbQueryAccount*(db: EthDB, blockNumber: BlockNumber,
address: EthAddress, accountResult: var DbAccount): bool =
var key {.noinit.}: DbGeneralKey
key.flags = { DbkHasBlockNumber, DbkHasAddress }
key.blockNumber = blockNumber
key.address = address
var value {.noinit.}: DbGeneralValue
let found = generalQuery(db, key, value)
if not found:
accountResult = DbAccount()
else:
accountResult = value.account
return found
proc ethDbQueryStorage*(db: EthDB, blockNumber: BlockNumber,
address: EthAddress, slot: UInt256,
slotResult: var DbSlotResult): bool =
var key {.noinit.}: DbGeneralKey
key.flags = { DbkHasBlockNumber, DbkHasAddress, DbkHasSlot }
key.blockNumber = blockNumber
key.address = address
key.slot = slot
var value {.noinit.}: DbGeneralValue
let found = generalQuery(db, key, value)
if not found:
slotResult.value = 0.u256
else:
slotResult.value = value.storage.value
return found
proc ethDbOpen*(path: string): EthDB {.raises: [IOError, OSError].} =
## Open an EthDB file. Note, format is subject to rapid change.
## See comment at the start of this file about use of `memfile`.
info "DB: Opening experimental compressed state history database",
file=path
let db = EthDB()
# Raises `OSError` on error, good enough for this versipn.
result.memFile = memfiles.open(path)
proc ethDbSize*(db: EthDB): uint64 =
db.checkHeader()
return db.header.fileSize
proc ethDbShowStats*(db: EthDb) =
let queryPages = db.queryPagesL1 + db.queryPagesL2
debug "DB: Statistics so far", queries=db.queries,
pagesPerQuery=(queryPages.float / db.queries.float), queryPages,
queryPagesL1=db.queryPagesL1, queryPagesL1=db.queryPagesL2

View File

@ -85,17 +85,22 @@ when use_evmc_glue:
{.push inline.}
proc accountExists(host: TransactionHost, address: HostAddress): bool {.show.} =
if host.vmState.fork >= FkSpurious:
not host.vmState.readOnlyStateDB.isDeadAccount(address)
else:
host.vmState.readOnlyStateDB.accountExists(address)
result =
if host.vmState.fork >= FkSpurious:
not host.vmState.readOnlyStateDB.isDeadAccount(address)
else:
host.vmState.readOnlyStateDB.accountExists(address)
if host.dbCompare:
host.dbCompareExists(address, result, host.vmState.fork >= FkSpurious)
# TODO: Why is `address` an argument in `getStorage`, `setStorage` and
# `selfDestruct`, if an EVM is only allowed to do these things to its own
# contract account and the host always knows which account?
proc getStorage(host: TransactionHost, address: HostAddress, key: HostKey): HostValue {.show.} =
host.vmState.readOnlyStateDB.getStorage(address, key)
result = host.vmState.readOnlyStateDB.getStorage(address, key)
if host.dbCompare:
host.dbCompareStorage(address, key, result)
const
# EIP-1283
@ -140,6 +145,11 @@ proc setStorage(host: TransactionHost, address: HostAddress,
key: HostKey, value: HostValue): EvmcStorageStatus {.show.} =
let db = host.vmState.readOnlyStateDB
let oldValue = db.getStorage(address, key)
# NOTE: No need to `dbCompareStorage` here because the only place
# `host.setStorage` is called (in `opcodes_impl.nim`), it is preceded by
# `host.getStorage.
#if host.dbCompare:
# host.dbCompareStorage(address, key, oldValue)
if oldValue == value:
return EVMC_STORAGE_UNCHANGED
@ -173,9 +183,14 @@ proc setStorage(host: TransactionHost, address: HostAddress,
return EVMC_STORAGE_MODIFIED
proc getBalance(host: TransactionHost, address: HostAddress): HostBalance {.show.} =
host.vmState.readOnlyStateDB.getBalance(address)
result = host.vmState.readOnlyStateDB.getBalance(address)
if host.dbCompare:
host.dbCompareBalance(address, result)
proc getCodeSize(host: TransactionHost, address: HostAddress): HostSize {.show.} =
if host.dbCompare:
let codeHash = host.vmState.readOnlyStateDB.getCodeHash(address)
host.dbCompareCodeHash(address, codeHash)
# TODO: Check this `HostSize`, it was copied as `uint` from other code.
# Note: Old `evmc_host` uses `getCode(address).len` instead.
host.vmState.readOnlyStateDB.getCodeSize(address).HostSize
@ -184,14 +199,21 @@ proc getCodeHash(host: TransactionHost, address: HostAddress): HostHash {.show.}
let db = host.vmState.readOnlyStateDB
# TODO: Copied from `Computation`, but check if that code is wrong with
# `FkSpurious`, as it has different calls from `accountExists` above.
if not db.accountExists(address) or db.isEmptyAccount(address):
default(HostHash)
else:
db.getCodeHash(address)
result =
# TODO: When `host.dbCompare` should we trace the existence check as well?
if not db.accountExists(address) or db.isEmptyAccount(address):
default(HostHash)
else:
db.getCodeHash(address)
if host.dbCompare:
host.dbCompareCodeHash(address, result)
proc copyCode(host: TransactionHost, address: HostAddress,
code_offset: HostSize, buffer_data: ptr byte,
buffer_size: HostSize): HostSize {.show.} =
if host.dbCompare:
let codeHash = host.vmState.readOnlyStateDB.getCodeHash(address)
host.dbCompareCodeHash(address, codeHash)
# We must handle edge cases carefully to prevent overflows. `len` is signed
# type `int`, but `code_offset` and `buffer_size` are _unsigned_, and may
# have large values (deliberately if attacked) that exceed the range of `int`.
@ -224,6 +246,10 @@ proc selfDestruct(host: TransactionHost, address, beneficiary: HostAddress) {.sh
let closingBalance = db.getBalance(address)
let beneficiaryBalance = db.getBalance(beneficiary)
if host.dbCompare:
host.dbCompareBalance(address, closingBalance)
host.dbCompareBalance(beneficiary, beneficiaryBalance)
# Transfer to beneficiary
db.setBalance(beneficiary, beneficiaryBalance + closingBalance)

View File

@ -7,7 +7,7 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
sets, stint, evmc/evmc, eth/common/eth_types, ../vm_types
sets, tables, stint, evmc/evmc, eth/common/eth_types, ../vm_types
# Object `TransactionHost` represents "EVMC host" to the EVM. "Host services"
# manage account state outside EVM such as balance transfers, storage, logs and
@ -65,6 +65,8 @@ type
depth*: int
saveComputation*: seq[Computation]
hostInterface*: ptr evmc_host_interface
when defined(evmc_enabled):
dbCompare*: bool
# These versions of `toEvmc` and `fromEvmc` don't flip big/little-endian like
# the older functions in `evmc_helpers`. New code only flips with _explicit_
@ -100,3 +102,7 @@ template isStatic*(msg: EvmcMessage): bool =
export
evmc_status_code, evmc_call_kind,
evmc_flag_bit_shifts, evmc_storage_status, evmc_access_status
when defined(evmc_enabled):
import ./db_compare
export db_compare

View File

@ -20,6 +20,15 @@ when defined(chronicles_log_level):
when defined(evmc_enabled):
import evmc/evmc, evmc_helpers, evmc_api, stew/ranges/ptr_arith
# Let `dbCompare` be called from `Computation` inside the EVMC API boundary.
# TODO: The logic which calls these in `Computation` should not be there at
# all. Removing it will clear up EVMC compatibility bugs, but that requires
# a transformation of the nested call code and we're focused on the DB.
from ../transaction/host_types import
TransactionHost, dbCompareNonce, dbCompareClearStorage
template transactionHost*(c: Computation): TransactionHost =
cast[TransactionHost](c.host.context)
export dbCompareNonce
logScope:
topics = "vm computation"
@ -139,6 +148,11 @@ template getCode*(c: Computation, address: EthAddress): seq[byte] =
proc generateContractAddress(c: Computation, salt: ContractSalt): EthAddress =
if c.msg.kind == evmcCreate:
let creationNonce = c.vmState.readOnlyStateDb().getNonce(c.msg.sender)
when evmc_enabled:
if c.transactionHost.dbCompare:
# NOTE: The nonce is read so it must be added to the `dbCompare`
# read-set.
c.transactionHost.dbCompareNonce(c.msg.sender, creationNonce)
result = generateAddress(c.msg.sender, creationNonce)
else:
result = generateSafeAddress(c.msg.sender, salt, c.msg.data)
@ -264,6 +278,13 @@ proc writeContract*(c: Computation) =
let codeCost = c.gasCosts[Create].c_handler(0.u256, gasParams).gasCost
if codeCost <= c.gasMeter.gasRemaining:
c.gasMeter.consumeGas(codeCost, reason = "Write new contract code")
when evmc_enabled:
if c.transactionHost.dbCompare:
# NOTE: The execution is not a function of the old account codeHash,
# but `c.getCodeHash` is needed to tell `dbCompare` not to read
# codeHash again after it's set below, so that it won't see wrong
# values for this block.
discard c.getCodeHash(c.msg.contractAddress)
c.vmState.mutateStateDb:
db.setCode(c.msg.contractAddress, c.output)
withExtra trace, "Writing new contract code"
@ -320,11 +341,31 @@ proc beforeExecCreate(c: Computation): bool =
c.snapshot()
when evmc_enabled:
if c.transactionHost.dbCompare:
# NOTE: `hasCodeOrNonce` below reads nonce and codeHash so they must be
# added to the `dbCompare` read-set. For the nonce we need to call
# `dbCompareNonce` directly. `c.getCodeHash` does it indirectly via EVMC.
let nonce = c.vmState.readOnlyStateDB.getNonce(c.msg.contractAddress)
c.transactionHost.dbCompareNonce(c.msg.contractAddress, nonce)
let codeHash = c.getCodeHash(c.msg.contractAddress)
if c.vmState.readOnlyStateDb().hasCodeOrNonce(c.msg.contractAddress):
c.setError("Address collision when creating contract address={c.msg.contractAddress.toHex}", true)
c.rollback()
return true
when evmc_enabled:
if c.transactionHost.dbCompare:
# NOTE: The balance updates below read balances to they must be added to
# the `dbCompare` read-set. `c.getBalance` indirectly tells the
# `dbCompare` code via EVMC. `dbCompareClearStorage` is needed to tell
# `dbCompare` not to read storage slots again after they are cleared
# below, so that it won't see wrong values for this block.
discard c.getBalance(c.msg.sender)
discard c.getBalance(c.msg.contractAddress)
c.transactionHost.dbCompareClearStorage(c.msg.contractAddress)
c.vmState.mutateStateDb:
db.subBalance(c.msg.sender, c.msg.value)
db.addBalance(c.msg.contractAddress, c.msg.value)

View File

@ -494,6 +494,9 @@ when not evmc_enabled:
when evmc_enabled:
template sstoreEvmc(c: Computation, slot, newValue: Uint256) =
let
# NOTE: `dbCompareAccountStorage` relies on the c.getStorage(slot)` here
# to add the storage to the read-set. The next line is redundant for
# other purposes but it's necessary for `dbCompare`.
currentValue {.inject.} = c.getStorage(slot)
status = c.host.setStorage(c.msg.contractAddress, slot, newValue)
gasParam = GasParams(kind: Op.Sstore, s_status: status)

View File

@ -12,6 +12,13 @@ import
./computation, ./interpreter, ./state, ./types
proc execComputation*(c: Computation) =
when defined(evmc_enabled):
if not c.msg.isCreate and c.transactionHost.dbCompare:
# NOTE: The nonce is read so it must be added to the `dbCompare`
# read-set.
let nonce = c.vmState.readOnlyStateDB.getNonce(c.msg.sender)
c.transactionHost.dbCompareNonce(c.msg.sender, nonce)
if not c.msg.isCreate:
c.vmState.mutateStateDB:
db.incNonce(c.msg.sender)