Cache code and invalid jump destination tables (fixes #2268) (#2404)

It is common for many accounts to share the same code - at the database
level, code is stored by hash meaning only one copy exists per unique
program but when loaded in memory, a copy is made for each account.

Further, every time we execute the code, it must be scanned for invalid
jump destinations which slows down EVM exeuction.

Finally, the extcodesize call causes code to be loaded even if only the
size is needed.

This PR improves on all these points by introducing a shared
CodeBytesRef type whose code section is immutable and that can be shared
between accounts. Further, a dedicated `len` API call is added so that
the EXTCODESIZE opcode can operate without polluting the GC and code
cache, for cases where only the size is requested - rocksdb will in this
case cache the code itself in the row cache meaning that lookup of the
code itself remains fast when length is asked for first.

With 16k code entries, there's a 90% hit rate which goes up to 99%
during the 2.3M attack - the cache significantly lowers memory
consumption and execution time not only during this event but across the
board.
This commit is contained in:
Jacek Sieka 2024-06-21 09:44:10 +02:00 committed by GitHub
parent 83b3eeeb18
commit 768307d91d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 378 additions and 181 deletions

View File

@ -1,5 +1,5 @@
# Nimbus
# Copyright (c) 2018-2023 Status Research & Development GmbH
# Copyright (c) 2018-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
@ -8,9 +8,10 @@
# at your option. This file may not be copied, modified, or distributed except
# according to those terms.
import ../nimbus/vm/code_stream, strformat
import ../nimbus/evm/code_stream, strformat
var c = newCodeStreamFromUnescaped("0x6003600202600055")
var c =
CodeStream.init(CodeBytesRef.fromHex("0x6003600202600055").expect("valid code"))
let opcodes = c.decompile()
for op in opcodes:

View File

@ -18,8 +18,6 @@ import
../constants,
../common/common
from std/sequtils import mapIt
{.push raises: [].}
type

View File

@ -107,6 +107,19 @@ proc kvtMethods(cKvt: KvtCoreDbKvtRef): CoreDbKvtFns =
else:
rc.toRc(cKvt.base, info)
proc kvtLen(
cKvt: KvtCoreDbKvtRef;
k: openArray[byte];
info: static[string];
): CoreDbRc[int] =
let rc = cKvt.base.api.len(cKvt.kvt, k)
if rc.isOk:
ok(rc.value)
elif rc.error == GetNotFound:
err(rc.error.toError(cKvt.base, info, KvtNotFound))
else:
rc.toRc(cKvt.base, info)
proc kvtPut(
cKvt: KvtCoreDbKvtRef;
k: openArray[byte];
@ -148,6 +161,9 @@ proc kvtMethods(cKvt: KvtCoreDbKvtRef): CoreDbKvtFns =
getFn: proc(k: openArray[byte]): CoreDbRc[Blob] =
cKvt.kvtGet(k, "getFn()"),
lenFn: proc(k: openArray[byte]): CoreDbRc[int] =
cKvt.kvtLen(k, "lenFn()"),
delFn: proc(k: openArray[byte]): CoreDbRc[void] =
cKvt.kvtDel(k, "delFn()"),

View File

@ -255,6 +255,12 @@ proc get*(kvt: CoreDbKvtRef; key: openArray[byte]): CoreDbRc[Blob] =
result = kvt.methods.getFn key
kvt.ifTrackNewApi: debug newApiTxt, api, elapsed, key=key.toStr, result
proc len*(kvt: CoreDbKvtRef; key: openArray[byte]): CoreDbRc[int] =
## This function always returns a non-empty `Blob` or an error code.
kvt.setTrackNewApi KvtLenFn
result = kvt.methods.lenFn key
kvt.ifTrackNewApi: debug newApiTxt, api, elapsed, key=key.toStr, result
proc getOrEmpty*(kvt: CoreDbKvtRef; key: openArray[byte]): CoreDbRc[Blob] =
## This function sort of mimics the behaviour of the legacy database
## returning an empty `Blob` if the argument `key` is not found on the

View File

@ -70,6 +70,7 @@ type
KvtDelFn = "kvt/del"
KvtForgetFn = "kvt/forget"
KvtGetFn = "kvt/get"
KvtLenFn = "kvt/len"
KvtGetOrEmptyFn = "kvt/getOrEmpty"
KvtHasKeyFn = "kvt/hasKey"
KvtPairsIt = "kvt/pairs"

View File

@ -140,6 +140,7 @@ type
# --------------------------------------------------
CoreDbKvtBackendFn* = proc(): CoreDbKvtBackendRef {.noRaise.}
CoreDbKvtGetFn* = proc(k: openArray[byte]): CoreDbRc[Blob] {.noRaise.}
CoreDbKvtLenFn* = proc(k: openArray[byte]): CoreDbRc[int] {.noRaise.}
CoreDbKvtDelFn* = proc(k: openArray[byte]): CoreDbRc[void] {.noRaise.}
CoreDbKvtPutFn* =
proc(k: openArray[byte]; v: openArray[byte]): CoreDbRc[void] {.noRaise.}
@ -150,6 +151,7 @@ type
## Methods for key-value table
backendFn*: CoreDbKvtBackendFn
getFn*: CoreDbKvtGetFn
lenFn*: CoreDbKvtLenFn
delFn*: CoreDbKvtDelFn
putFn*: CoreDbKvtPutFn
hasKeyFn*: CoreDbKvtHasKeyFn

View File

@ -44,6 +44,7 @@ proc validateMethodsDesc(base: CoreDbBaseFns) =
proc validateMethodsDesc(kvt: CoreDbKvtFns) =
doAssert not kvt.backendFn.isNil
doAssert not kvt.getFn.isNil
doAssert not kvt.lenFn.isNil
doAssert not kvt.delFn.isNil
doAssert not kvt.putFn.isNil
doAssert not kvt.hasKeyFn.isNil

View File

@ -50,6 +50,8 @@ type
backLevel: int): Result[KvtDbRef,KvtError] {.noRaise.}
KvtApiGetFn* = proc(db: KvtDbRef,
key: openArray[byte]): Result[Blob,KvtError] {.noRaise.}
KvtApiLenFn* = proc(db: KvtDbRef,
key: openArray[byte]): Result[int,KvtError] {.noRaise.}
KvtApiHasKeyFn* = proc(db: KvtDbRef,
key: openArray[byte]): Result[bool,KvtError] {.noRaise.}
KvtApiIsCentreFn* = proc(db: KvtDbRef): bool {.noRaise.}
@ -76,6 +78,7 @@ type
forget*: KvtApiForgetFn
forkTx*: KvtApiForkTxFn
get*: KvtApiGetFn
len*: KvtApiLenFn
hasKey*: KvtApiHasKeyFn
isCentre*: KvtApiIsCentreFn
isTop*: KvtApiIsTopFn
@ -100,6 +103,7 @@ type
KvtApiProfForgetFn = "forget"
KvtApiProfForkTxFn = "forkTx"
KvtApiProfGetFn = "get"
KvtApiProfLenFn = "len"
KvtApiProfHasKeyFn = "hasKey"
KvtApiProfIsCentreFn = "isCentre"
KvtApiProfIsTopFn = "isTop"
@ -114,6 +118,7 @@ type
KvtApiProfTxTopFn = "txTop"
KvtApiProfBeGetKvpFn = "be/getKvp"
KvtApiProfBeLenKvpFn = "be/lenKvp"
KvtApiProfBePutKvpFn = "be/putKvp"
KvtApiProfBePutEndFn = "be/putEnd"
@ -176,6 +181,7 @@ func init*(api: var KvtApiObj) =
api.forget = forget
api.forkTx = forkTx
api.get = get
api.len = len
api.hasKey = hasKey
api.isCentre = isCentre
api.isTop = isTop
@ -203,6 +209,7 @@ func dup*(api: KvtApiRef): KvtApiRef =
forget: api.forget,
forkTx: api.forkTx,
get: api.get,
len: api.len,
hasKey: api.hasKey,
isCentre: api.isCentre,
isTop: api.isTop,
@ -275,6 +282,11 @@ func init*(
KvtApiProfGetFn.profileRunner:
result = api.get(a, b)
profApi.len =
proc(a: KvtDbRef, b: openArray[byte]): auto =
KvtApiProfLenFn.profileRunner:
result = api.len(a, b)
profApi.hasKey =
proc(a: KvtDbRef, b: openArray[byte]): auto =
KvtApiProfHasKeyFn.profileRunner:
@ -346,6 +358,12 @@ func init*(
result = be.getKvpFn(a)
data.list[KvtApiProfBeGetKvpFn.ord].masked = true
beDup.lenKvpFn =
proc(a: openArray[byte]): auto =
KvtApiProfBeLenKvpFn.profileRunner:
result = be.lenKvpFn(a)
data.list[KvtApiProfBeLenKvpFn.ord].masked = true
beDup.putKvpFn =
proc(a: PutHdlRef; b: openArray[(Blob,Blob)]) =
be.putKvpFn(a,b)

View File

@ -23,6 +23,9 @@ type
GetKvpFn* =
proc(key: openArray[byte]): Result[Blob,KvtError] {.gcsafe, raises: [].}
## Generic backend database retrieval function
LenKvpFn* =
proc(key: openArray[byte]): Result[int,KvtError] {.gcsafe, raises: [].}
## Generic backend database retrieval function
# -------------
@ -76,6 +79,7 @@ type
## Backend interface.
getKvpFn*: GetKvpFn ## Read key-value pair
lenKvpFn*: LenKvpFn ## Read key-value pair length
putBegFn*: PutBegFn ## Start bulk store session
putKvpFn*: PutKvpFn ## Bulk store key-value pairs
@ -88,6 +92,7 @@ type
proc init*(trg: var BackendObj; src: BackendObj) =
trg.getKvpFn = src.getKvpFn
trg.lenKvpFn = src.lenKvpFn
trg.putBegFn = src.putBegFn
trg.putKvpFn = src.putKvpFn
trg.putEndFn = src.putEndFn

View File

@ -82,6 +82,16 @@ proc getKvpFn(db: MemBackendRef): GetKvpFn =
return ok(move(data))
err(GetNotFound)
proc lenKvpFn(db: MemBackendRef): LenKvpFn =
result =
proc(key: openArray[byte]): Result[int,KvtError] =
if key.len == 0:
return err(KeyInvalid)
var data = db.mdb.tab.getOrVoid @key
if data.isValid:
return ok(data.len)
err(GetNotFound)
# -------------
proc putBegFn(db: MemBackendRef): PutBegFn =
@ -140,6 +150,7 @@ proc memoryBackend*: BackendRef =
mdb: MemDbRef())
db.getKvpFn = getKvpFn db
db.lenKvpFn = lenKvpFn db
db.putBegFn = putBegFn db
db.putKvpFn = putKvpFn db

View File

@ -92,6 +92,22 @@ proc getKvpFn(db: RdbBackendRef): GetKvpFn =
err(GetNotFound)
proc lenKvpFn(db: RdbBackendRef): LenKvpFn =
result =
proc(key: openArray[byte]): Result[int,KvtError] =
# Get data record
var len = db.rdb.len(key).valueOr:
when extraTraceMessages:
debug logTxt "lenKvpFn() failed", key, error=error[0], info=error[1]
return err(error[0])
# Return if non-empty
if 0 < len:
return ok(len)
err(GetNotFound)
# -------------
proc putBegFn(db: RdbBackendRef): PutBegFn =
@ -268,6 +284,7 @@ proc rocksDbKvtBackend*(
return err(error)
db.getKvpFn = getKvpFn db
db.lenKvpFn = lenKvpFn db
db.putBegFn = putBegFn db
db.putKvpFn = putKvpFn db
@ -297,6 +314,7 @@ proc rocksDbKvtTriggeredBackend*(
return err((RdbBeHostError,$error))
db.getKvpFn = getKvpFn db
db.lenKvpFn = lenKvpFn db
db.putBegFn = putBegTriggeredFn db
db.putKvpFn = putKvpFn db

View File

@ -53,6 +53,24 @@ proc get*(
res = EmptyBlob
ok move(res)
proc len*(
rdb: RdbInst;
key: openArray[byte],
): Result[int,(KvtError,string)] =
var res: int
let onData: DataProc = proc(data: openArray[byte]) =
res = data.len
let gotData = rdb.store[KvtGeneric].get(key, onData).valueOr:
const errSym = RdbBeDriverGetError
when extraTraceMessages:
trace logTxt "len", error=errSym, info=error
return err((errSym,error))
if not gotData:
res = 0
ok res
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------

View File

@ -31,18 +31,25 @@ func nLayersKeys*(db: KvtDbRef): int =
# Public functions: get function
# ------------------------------------------------------------------------------
func layersHasKey*(db: KvtDbRef; key: openArray[byte]|seq[byte]): bool =
func layersLen*(db: KvtDbRef; key: openArray[byte]|seq[byte]): Opt[int] =
## Return `true` id the argument key is cached.
##
when key isnot seq[byte]:
let key = @key
if db.top.delta.sTab.hasKey key:
return true
db.top.delta.sTab.withValue(key, item):
return Opt.some(item[].len())
for w in db.rstack:
if w.delta.sTab.hasKey key:
return true
w.delta.sTab.withValue(key, item):
return Opt.some(item[].len())
Opt.none(int)
func layersHasKey*(db: KvtDbRef; key: openArray[byte]|seq[byte]): bool =
## Return `true` id the argument key is cached.
##
db.layersLen(key).isSome()
func layersGet*(db: KvtDbRef; key: openArray[byte]|seq[byte]): Opt[Blob] =
## Find an item on the cache layers. An `ok()` result might contain an

View File

@ -36,6 +36,18 @@ proc getUbe*(
return be.getKvpFn key
err(GetNotFound)
proc getUbeLen*(
db: KvtDbRef; # Database
key: openArray[byte]; # Key of database record
): Result[int,KvtError] =
## For the argument `key` return the associated value from the backend
## database if available.
##
let be = db.backend
if not be.isNil:
return be.lenKvpFn key
err(GetNotFound)
proc getBe*(
db: KvtDbRef; # Database
key: openArray[byte]; # Key of database record
@ -48,6 +60,18 @@ proc getBe*(
return ok(w[])
db.getUbe key
proc getBeLen*(
db: KvtDbRef; # Database
key: openArray[byte]; # Key of database record
): Result[int,KvtError] =
## Get the vertex from the (filtered) backened if available.
if not db.balancer.isNil:
db.balancer.sTab.withValue(@key, w):
if w[].len == 0:
return err(GetNotFound)
return ok(w[].len)
db.getUbeLen key
# ------------
proc put*(
@ -95,6 +119,19 @@ proc get*(
return ok(move(data))
proc len*(
db: KvtDbRef; # Database
key: openArray[byte]; # Key of database record
): Result[int,KvtError] =
## For the argument `key` return the associated value preferably from the
## top layer, or the database otherwise.
##
if key.len == 0:
return err(KeyInvalid)
let len = db.layersLen(key).valueOr:
return db.getBeLen key
ok(len)
proc hasKey*(
db: KvtDbRef; # Database

View File

@ -25,6 +25,7 @@
##
import
stew/keyed_queue,
std/[tables, hashes, sets],
chronicles,
eth/[common, rlp],
@ -33,10 +34,20 @@ import
"../.."/[constants, utils/utils],
../access_list as ac_access_list,
".."/[core_db, storage_types, transient_storage],
../../evm/code_bytes,
./distinct_ledgers
export code_bytes
const
debugAccountsLedgerRef = false
codeLruSize = 16*1024
# An LRU cache of 16K items gives roughly 90% hit rate anecdotally on a
# small range of test blocks - this number could be studied in more detail
# Per EIP-170, a the code of a contract can be up to `MAX_CODE_SIZE` = 24kb,
# which would cause a worst case of 386MB memory usage though in reality
# code sizes are much smaller - it would make sense to study these numbers
# in greater detail.
type
AccountFlag = enum
@ -44,7 +55,6 @@ type
IsNew
Dirty
Touched
CodeLoaded
CodeChanged
StorageChanged
NewlyCreated # EIP-6780: self destruct only in same transaction
@ -54,7 +64,7 @@ type
AccountRef = ref object
statement: CoreDbAccount
flags: AccountFlags
code: seq[byte]
code: CodeBytesRef
originalStorage: TableRef[UInt256, UInt256]
overlayStorage: Table[UInt256, UInt256]
@ -69,6 +79,16 @@ type
witnessCache: Table[EthAddress, WitnessData]
isDirty: bool
ripemdSpecial: bool
code: KeyedQueue[Hash256, CodeBytesRef]
## The code cache provides two main benefits:
##
## * duplicate code is shared in memory beween accounts
## * the jump destination table does not have to be recomputed for every
## execution, for commonly called called contracts
##
## The former feature is specially important in the 2.3-2.7M block range
## when underpriced code opcodes are being run en masse - both advantages
## help performance broadly as well.
ReadOnlyStateDB* = distinct AccountsLedgerRef
@ -330,7 +350,7 @@ proc persistMode(acc: AccountRef): PersistMode =
proc persistCode(acc: AccountRef, ac: AccountsLedgerRef) =
if acc.code.len != 0:
let rc = ac.kvt.put(
contractHashKey(acc.statement.codeHash).toOpenArray, acc.code)
contractHashKey(acc.statement.codeHash).toOpenArray, acc.code.bytes())
if rc.isErr:
warn logTxt "persistCode()",
codeHash=acc.statement.codeHash, error=($$rc.error)
@ -412,33 +432,49 @@ proc getNonce*(ac: AccountsLedgerRef, address: EthAddress): AccountNonce =
if acc.isNil: emptyEthAccount.nonce
else: acc.statement.nonce
proc getCode(acc: AccountRef, kvt: CoreDbKvtRef): lent seq[byte] =
if CodeLoaded notin acc.flags and CodeChanged notin acc.flags:
if acc.statement.codeHash != EMPTY_CODE_HASH:
var rc = kvt.get(contractHashKey(acc.statement.codeHash).toOpenArray)
if rc.isErr:
warn logTxt "getCode()", codeHash=acc.statement.codeHash, error=($$rc.error)
else:
acc.code = move(rc.value)
acc.flags.incl CodeLoaded
else:
acc.flags.incl CodeLoaded # avoid hash comparisons
acc.code
proc getCode*(ac: AccountsLedgerRef, address: EthAddress): seq[byte] =
proc getCode*(ac: AccountsLedgerRef, address: EthAddress): CodeBytesRef =
# Always returns non-nil!
let acc = ac.getAccount(address, false)
if acc.isNil:
return
return CodeBytesRef()
acc.getCode(ac.kvt)
if acc.code == nil:
acc.code =
if acc.statement.codeHash != EMPTY_CODE_HASH:
ac.code.lruFetch(acc.statement.codeHash).valueOr:
var rc = ac.kvt.get(contractHashKey(acc.statement.codeHash).toOpenArray)
if rc.isErr:
warn logTxt "getCode()", codeHash=acc.statement.codeHash, error=($$rc.error)
CodeBytesRef()
else:
let newCode = CodeBytesRef.init(move(rc.value))
ac.code.lruAppend(acc.statement.codeHash, newCode, codeLruSize)
else:
CodeBytesRef()
acc.code
proc getCodeSize*(ac: AccountsLedgerRef, address: EthAddress): int =
let acc = ac.getAccount(address, false)
if acc.isNil:
return
return 0
acc.getCode(ac.kvt).len
if acc.code == nil:
if acc.statement.codeHash == EMPTY_CODE_HASH:
return 0
acc.code = ac.code.lruFetch(acc.statement.codeHash).valueOr:
# On a cache miss, we don't fetch the code - instead, we fetch just the
# length - should the code itself be needed, it will typically remain
# cached and easily accessible in the database layer - this is to prevent
# EXTCODESIZE calls from messing up the code cache and thus causing
# recomputation of the jump destination table
var rc = ac.kvt.len(contractHashKey(acc.statement.codeHash).toOpenArray)
return rc.valueOr:
warn logTxt "getCodeSize()", codeHash=acc.statement.codeHash, error=($$rc.error)
0
acc.code.len()
proc getCommittedStorage*(ac: AccountsLedgerRef, address: EthAddress, slot: UInt256): UInt256 =
let acc = ac.getAccount(address, false)
@ -521,7 +557,9 @@ proc setCode*(ac: AccountsLedgerRef, address: EthAddress, code: seq[byte]) =
if acc.statement.codeHash != codeHash:
var acc = ac.makeDirty(address)
acc.statement.codeHash = codeHash
acc.code = code
# Try to reuse cache entry if it exists, but don't save the code - it's not
# a given that it will be executed within LRU range
acc.code = ac.code.lruFetch(codeHash).valueOr(CodeBytesRef.init(code))
acc.flags.incl CodeChanged
proc setStorage*(ac: AccountsLedgerRef, address: EthAddress, slot, value: UInt256) =
@ -701,7 +739,7 @@ proc getStorageRoot*(ac: AccountsLedgerRef, address: EthAddress): Hash256 =
proc update(wd: var WitnessData, acc: AccountRef) =
# once the code is touched make sure it doesn't get reset back to false in another update
if not wd.codeTouched:
wd.codeTouched = CodeChanged in acc.flags or CodeLoaded in acc.flags
wd.codeTouched = CodeChanged in acc.flags or acc.code != nil
if not acc.originalStorage.isNil:
for k, v in acc.originalStorage:
@ -801,7 +839,7 @@ proc getStorageRoot*(db: ReadOnlyStateDB, address: EthAddress): Hash256 {.borrow
proc getBalance*(db: ReadOnlyStateDB, address: EthAddress): UInt256 {.borrow.}
proc getStorage*(db: ReadOnlyStateDB, address: EthAddress, slot: UInt256): UInt256 {.borrow.}
proc getNonce*(db: ReadOnlyStateDB, address: EthAddress): AccountNonce {.borrow.}
proc getCode*(db: ReadOnlyStateDB, address: EthAddress): seq[byte] {.borrow.}
proc getCode*(db: ReadOnlyStateDB, address: EthAddress): CodeBytesRef {.borrow.}
proc getCodeSize*(db: ReadOnlyStateDB, address: EthAddress): int {.borrow.}
proc contractCollision*(db: ReadOnlyStateDB, address: EthAddress): bool {.borrow.}
proc accountExists*(db: ReadOnlyStateDB, address: EthAddress): bool {.borrow.}

View File

@ -14,6 +14,7 @@
import
eth/common,
../../evm/code_bytes,
../../stateless/multi_keys,
../core_db,
./base/[api_tracking, base_desc]
@ -33,6 +34,7 @@ type
ReadOnlyStateDB* = distinct LedgerRef
export
code_bytes,
LedgerFnInx,
LedgerProfListRef,
LedgerRef,
@ -175,7 +177,7 @@ proc getBalance*(ldg: LedgerRef, eAddr: EthAddress): UInt256 =
result = ldg.ac.getBalance(eAddr)
ldg.ifTrackApi: debug apiTxt, api, elapsed, eAddr, result
proc getCode*(ldg: LedgerRef, eAddr: EthAddress): Blob =
proc getCode*(ldg: LedgerRef, eAddr: EthAddress): CodeBytesRef =
ldg.beginTrackApi LdgGetCodeFn
result = ldg.ac.getCode(eAddr)
ldg.ifTrackApi: debug apiTxt, api, elapsed, eAddr, result=result.toStr
@ -371,7 +373,7 @@ proc getStorageRoot*(db: ReadOnlyStateDB, eAddr: EthAddress): Hash256 {.borrow.}
proc getBalance*(db: ReadOnlyStateDB, eAddr: EthAddress): UInt256 {.borrow.}
proc getStorage*(db: ReadOnlyStateDB, eAddr: EthAddress, slot: UInt256): UInt256 {.borrow.}
proc getNonce*(db: ReadOnlyStateDB, eAddr: EthAddress): AccountNonce {.borrow.}
proc getCode*(db: ReadOnlyStateDB, eAddr: EthAddress): seq[byte] {.borrow.}
proc getCode*(db: ReadOnlyStateDB, eAddr: EthAddress): CodeBytesRef {.borrow.}
proc getCodeSize*(db: ReadOnlyStateDB, eAddr: EthAddress): int {.borrow.}
proc contractCollision*(db: ReadOnlyStateDB, eAddr: EthAddress): bool {.borrow.}
proc accountExists*(db: ReadOnlyStateDB, eAddr: EthAddress): bool {.borrow.}

74
nimbus/evm/code_bytes.nim Normal file
View File

@ -0,0 +1,74 @@
# Nimbus
# Copyright (c) 2018-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import stew/byteutils, results, ./interpreter/op_codes
export results
type CodeBytesRef* = ref object
## Code buffer that caches invalid jump positions used for verifying jump
## destinations - `bytes` is immutable once instances is created while
## `invalidPositions` will be built up on demand
bytes: seq[byte]
invalidPositions: seq[byte] # bit seq of invalid jump positions
processed: int
template bitpos(pos: int): (int, byte) =
(pos shr 3, 1'u8 shl (pos and 0x07))
func init*(T: type CodeBytesRef, bytes: sink seq[byte]): CodeBytesRef =
let ip = newSeq[byte]((bytes.len + 7) div 8)
CodeBytesRef(bytes: move(bytes), invalidPositions: ip)
func init*(T: type CodeBytesRef, bytes: openArray[byte]): CodeBytesRef =
CodeBytesRef.init(@bytes)
func init*(T: type CodeBytesRef, bytes: openArray[char]): CodeBytesRef =
CodeBytesRef.init(bytes.toOpenArrayByte(0, bytes.high()))
func fromHex*(T: type CodeBytesRef, hex: string): Opt[CodeBytesRef] =
try:
Opt.some(CodeBytesRef.init(hexToSeqByte(hex)))
except ValueError:
Opt.none(CodeBytesRef)
func invalidPosition(c: CodeBytesRef, pos: int): bool =
let (bpos, bbit) = bitpos(pos)
(c.invalidPositions[bpos] and bbit) > 0
func bytes*(c: CodeBytesRef): lent seq[byte] =
c[].bytes
func len*(c: CodeBytesRef): int =
len(c.bytes)
func isValidOpcode*(c: CodeBytesRef, position: int): bool =
if position >= len(c):
false
elif c.invalidPosition(position):
false
elif position <= c.processed:
true
else:
var i = c.processed
while i <= position:
var opcode = Op(c.bytes[i])
if opcode >= Op.Push1 and opcode <= Op.Push32:
var leftBound = (i + 1)
var rightBound = leftBound + (opcode.int - 95)
for z in leftBound ..< rightBound:
let (bpos, bbit) = bitpos(z)
c.invalidPositions[bpos] = c.invalidPositions[bpos] or bbit
i = rightBound
else:
i += 1
c.processed = i - 1
not c.invalidPosition(position)
func `==`*(a: CodeBytesRef, b: openArray[byte]): bool =
a.bytes == b

View File

@ -6,76 +6,52 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
chronicles, strformat, strutils, sequtils, parseutils,
eth/common,
./interpreter/op_codes
std/[sequtils, strutils], chronicles, eth/common, ./interpreter/op_codes, ./code_bytes
logScope:
topics = "vm code_stream"
export code_bytes
type
CodeStream* = ref object
bytes*: seq[byte]
depthProcessed: int
invalidPositions: seq[byte] # bit seq of invalid jump positions
pc*: int
type CodeStream* = object
code: CodeBytesRef
pc*: int
proc `$`*(b: byte): string =
$(b.int)
func init*(T: type CodeStream, code: CodeBytesRef): T =
T(code: code)
template bitpos(pos: int): (int, byte) =
(pos shr 3, 1'u8 shl (pos and 0x07))
func init*(T: type CodeStream, code: sink seq[byte]): T =
T(code: CodeBytesRef.init(move(code)))
proc newCodeStream*(codeBytes: sink seq[byte]): CodeStream =
new(result)
result.bytes = system.move(codeBytes)
result.pc = 0
result.invalidPositions = newSeq[byte]((result.bytes.len + 7) div 8)
result.depthProcessed = 0
func init*(T: type CodeStream, code: openArray[byte]): T =
T(code: CodeBytesRef.init(code))
proc invalidPosition(c: CodeStream, pos: int): bool =
let (bpos, bbit) = bitpos(pos)
(c.invalidPositions[bpos] and bbit) > 0
func init*(T: type CodeStream, code: openArray[char]): T =
T(code: CodeBytesRef.init(code))
proc newCodeStream*(codeBytes: string): CodeStream =
newCodeStream(codeBytes.mapIt(it.byte))
proc newCodeStreamFromUnescaped*(code: string): CodeStream =
# from 0xunescaped
var codeBytes: seq[byte] = @[]
for z, c in code[2..^1]:
if z mod 2 == 1:
var value: int
discard parseHex(&"0x{code[z+1..z+2]}", value)
codeBytes.add(value.byte)
newCodeStream(codeBytes)
template read*(c: CodeStream, size: int): openArray[byte] =
# TODO: use openArray[bytes]
template read*(c: var CodeStream, size: int): openArray[byte] =
if c.pc + size - 1 < c.bytes.len:
let pos = c.pc
c.pc += size
c.bytes.toOpenArray(pos, pos + size - 1)
c.code.bytes.toOpenArray(pos, pos + size - 1)
else:
c.pc = c.bytes.len
c.bytes.toOpenArray(0, -1)
c.code.bytes.toOpenArray(0, -1)
proc readVmWord*(c: var CodeStream, n: static int): UInt256 =
func readVmWord*(c: var CodeStream, n: static int): UInt256 =
## Reads `n` bytes from the code stream and pads
## the remaining bytes with zeros.
let result_bytes = cast[ptr array[32, byte]](addr result)
let last = min(c.pc + n, c.bytes.len)
let last = min(c.pc + n, c.code.bytes.len)
let toWrite = last - c.pc
for i in 0 ..< toWrite : result_bytes[i] = c.bytes[last - i - 1]
for i in 0 ..< toWrite:
result_bytes[i] = c.code.bytes[last - i - 1]
c.pc = last
proc len*(c: CodeStream): int =
len(c.bytes)
func len*(c: CodeStream): int =
len(c.code)
proc next*(c: var CodeStream): Op =
if c.pc != c.bytes.len:
result = Op(c.bytes[c.pc])
func next*(c: var CodeStream): Op =
if c.pc != c.code.len:
result = Op(c.code.bytes[c.pc])
inc c.pc
else:
result = Op.Stop
@ -86,55 +62,42 @@ iterator items*(c: var CodeStream): Op =
yield nextOpcode
nextOpcode = c.next()
proc `[]`*(c: CodeStream, offset: int): Op =
Op(c.bytes[offset])
func `[]`*(c: CodeStream, offset: int): Op =
Op(c.code.bytes[offset])
proc peek*(c: var CodeStream): Op =
if c.pc < c.bytes.len:
Op(c.bytes[c.pc])
func peek*(c: var CodeStream): Op =
if c.pc < c.code.bytes.len:
Op(c.code.bytes[c.pc])
else:
Op.Stop
proc updatePc*(c: var CodeStream, value: int) =
func updatePc*(c: var CodeStream, value: int) =
c.pc = min(value, len(c))
proc isValidOpcode*(c: CodeStream, position: int): bool =
if position >= len(c):
false
elif c.invalidPosition(position):
false
elif position <= c.depthProcessed:
true
else:
var i = c.depthProcessed
while i <= position:
var opcode = Op(c[i])
if opcode >= Op.Push1 and opcode <= Op.Push32:
var leftBound = (i + 1)
var rightBound = leftBound + (opcode.int - 95)
for z in leftBound ..< rightBound:
let (bpos, bbit) = bitpos(z)
c.invalidPositions[bpos] = c.invalidPositions[bpos] or bbit
i = rightBound
else:
i += 1
c.depthProcessed = i - 1
func isValidOpcode*(c: CodeStream, position: int): bool =
c.code.isValidOpcode(position)
not c.invalidPosition(position)
func bytes*(c: CodeStream): lent seq[byte] =
c.code.bytes()
proc decompile*(original: CodeStream): seq[(int, Op, string)] =
# behave as https://etherscan.io/opcode-tool
var c = newCodeStream(original.bytes)
var c = CodeStream.init(original.bytes)
while true:
var op = c.next
if op >= Push1 and op <= Push32:
result.add(
(c.pc - 1, op, "0x" & c.read(op.int - 95).mapIt($(it.BiggestInt.toHex(2))).join("")))
(
c.pc - 1,
op,
"0x" & c.read(op.int - 95).mapIt($(it.BiggestInt.toHex(2))).join(""),
)
)
elif op != Op.Stop:
result.add((c.pc - 1, op, ""))
else:
result.add((-1, Op.Stop, ""))
break
proc atEnd*(c: CodeStream): bool =
c.pc >= c.bytes.len
func atEnd*(c: CodeStream): bool =
c.pc >= c.code.bytes.len

View File

@ -16,6 +16,7 @@ import
"."/[types],
./interpreter/[gas_meter, gas_costs, op_codes],
./evm_errors,
./code_bytes,
../common/[common, evmforks],
../utils/utils,
stew/byteutils,
@ -203,9 +204,9 @@ template selfDestruct*(c: Computation, address: EthAddress) =
else:
c.execSelfDestruct(address)
template getCode*(c: Computation, address: EthAddress): seq[byte] =
template getCode*(c: Computation, address: EthAddress): CodeBytesRef =
when evmc_enabled:
c.host.copyCode(address)
CodeBytesRef.init(c.host.copyCode(address))
else:
c.vmState.readOnlyStateDB.getCode(address)
@ -236,14 +237,14 @@ proc newComputation*(vmState: BaseVMState, sysCall: bool, message: Message,
if result.msg.isCreate():
result.msg.contractAddress = result.generateContractAddress(salt)
result.code = newCodeStream(message.data)
result.code = CodeStream.init(message.data)
message.data = @[]
else:
result.code = newCodeStream(
result.code = CodeStream.init(
vmState.readOnlyStateDB.getCode(message.codeAddress))
func newComputation*(vmState: BaseVMState, sysCall: bool,
message: Message, code: seq[byte]): Computation =
message: Message, code: CodeBytesRef): Computation =
new result
result.vmState = vmState
result.msg = message
@ -251,7 +252,7 @@ func newComputation*(vmState: BaseVMState, sysCall: bool,
result.stack = EvmStackRef.new()
result.returnStack = @[]
result.gasMeter.init(message.gas)
result.code = newCodeStream(code)
result.code = CodeStream.init(code)
result.sysCall = sysCall
template gasCosts*(c: Computation): untyped =

View File

@ -176,8 +176,8 @@ proc extCodeCopyOp (k: var VmCtx): EvmResultVoid =
cpt.gasCosts[ExtCodeCopy].m_handler(cpt.memory.len, memPos, len),
reason = "ExtCodeCopy fee")
let codeBytes = cpt.getCode(address)
cpt.memory.writePadded(codeBytes, memPos, codePos, len)
let code = cpt.getCode(address)
cpt.memory.writePadded(code.bytes, memPos, codePos, len)
ok()
@ -194,8 +194,8 @@ proc extCodeCopyEIP2929Op (k: var VmCtx): EvmResultVoid =
cpt.gasEip2929AccountCheck(address)
? cpt.opcodeGastCost(ExtCodeCopy, gasCost, reason = "ExtCodeCopy EIP2929")
let codeBytes = cpt.getCode(address)
cpt.memory.writePadded(codeBytes, memPos, codePos, len)
let code = cpt.getCode(address)
cpt.memory.writePadded(code.bytes(), memPos, codePos, len)
ok()
# -----------

View File

@ -543,7 +543,7 @@ proc accountCode(ud: RootRef, params: Args, parent: Node): RespResult {.apiPragm
let acc = AccountNode(parent)
try:
let code = acc.db.getCode(acc.address)
resp(code)
resp(code.bytes())
except RlpError as ex:
err(ex.msg)

View File

@ -246,7 +246,7 @@ proc setupEthRpc*(
let
accDB = stateDBFromTag(quantityTag)
address = data.ethAddr
result = accDB.getCode(address)
result = accDB.getCode(address).bytes()
template sign(privateKey: PrivateKey, message: string): seq[byte] =
# message length encoded as ASCII representation of decimal

View File

@ -166,7 +166,7 @@ proc setupHost(call: CallParams): TransactionHost =
# with the contract address. This differs from the previous Nimbus EVM API.
# Guarded under `evmc_enabled` for now so it doesn't break vm2.
when defined(evmc_enabled):
var code: seq[byte]
var code: CodeBytesRef
if call.isCreate:
let sender = call.sender
let contractAddress =
@ -174,7 +174,7 @@ proc setupHost(call: CallParams): TransactionHost =
host.msg.recipient = contractAddress.toEvmc
host.msg.input_size = 0
host.msg.input_data = nil
code = call.input
code = CodeBytesRef.init(call.input)
else:
# TODO: Share the underlying data, but only after checking this does not
# cause problems with the database.
@ -189,7 +189,7 @@ proc setupHost(call: CallParams): TransactionHost =
let cMsg = hostToComputationMessage(host.msg)
host.computation = newComputation(vmState, call.sysCall, cMsg, code)
host.code = system.move(code)
host.code = code
else:
if call.input.len > 0:

View File

@ -14,7 +14,7 @@ import
evmc/evmc, ../config
# The built-in Nimbus EVM, via imported C function.
proc evmc_create_nimbus_evm(): ptr evmc_vm {.cdecl, importc, raises: [].}
proc evmc_create_nimbus_evm(): ptr evmc_vm {.cdecl, importc, raises: [], gcsafe.}
# Import this module to link in the definition of `evmc_create_nimbus_evm`.
# Nim thinks the module is unused because the function is only called via

View File

@ -123,28 +123,11 @@ proc evmcExecComputation*(host: TransactionHost): EvmcResult =
let hostContext = cast[evmc_host_context](host)
host.hostInterface = hostInterface.unsafeAddr
# Without `{.gcsafe.}:` here, the call via `vm.execute` results in a Nim
# compile-time error in a far away function. Starting here, a cascade of
# warnings takes place: "Warning: '...' is not GC-safe as it performs an
# indirect call here [GCUnsafe2]", then a list of "Warning: '...' is not
# GC-safe as it calls '...'" at each function up the call stack, to a high
# level function `persistBlocks` where it terminates compilation as an error
# instead of a warning.
#
# It is tempting to annotate all EVMC API functions with `{.cdecl, gcsafe.}`,
# overriding the function signatures from the Nim EVMC module. Perhaps we
# will do that, though it's conceptually dubious, as the two sides of the
# EVMC ABI live in different GC worlds (when loaded as a shared library with
# its own Nim runtime), very similar to calling between threads.
#
# TODO: But wait: Why does the Nim EVMC test program compile fine without
# any `gcsafe`, even with `--threads:on`?
{.gcsafe.}:
result = vm.execute(vm, hostInterface.unsafeAddr, hostContext,
evmc_revision(host.vmState.fork.ord), host.msg,
if host.code.len > 0: host.code[0].unsafeAddr
else: nil,
host.code.len.csize_t)
result = vm.execute(vm, hostInterface.unsafeAddr, hostContext,
evmc_revision(host.vmState.fork.ord), host.msg,
if host.code.len > 0: host.code.bytes[0].unsafeAddr
else: nil,
host.code.len.csize_t)
host.showCallReturn(result)

View File

@ -218,7 +218,7 @@ proc copyCode(host: TransactionHost, address: HostAddress,
#
# Note, when there is no code, `getCode` result is empty `seq`. It was `nil`
# when the DB was first implemented, due to Nim language changes since then.
var code: seq[byte] = host.vmState.readOnlyStateDB.getCode(address)
let code = host.vmState.readOnlyStateDB.getCode(address)
var safe_len: int = code.len # It's safe to assume >= 0.
if code_offset >= safe_len.HostSize:
@ -230,7 +230,7 @@ proc copyCode(host: TransactionHost, address: HostAddress,
safe_len = buffer_size.int
if safe_len > 0:
copyMem(buffer_data, code[safe_offset].addr, safe_len)
copyMem(buffer_data, code.bytes()[safe_offset].addr, safe_len)
return safe_len.HostSize
proc selfDestruct(host: TransactionHost, address, beneficiary: HostAddress) {.show.} =

View File

@ -10,7 +10,7 @@ import
std/sets,
stint, evmc/evmc,
eth/common/eth_types,
../evm/types
../evm/[code_bytes, types]
# Object `TransactionHost` represents "EVMC host" to the EVM. "Host services"
# manage account state outside EVM such as balance transfers, storage, logs and
@ -59,7 +59,7 @@ type
computation*: Computation
msg*: EvmcMessage
input*: seq[byte]
code*: seq[byte]
code*: CodeBytesRef
cachedTxContext*: bool
txContext*: EvmcTxContext
depth*: int

View File

@ -1,5 +1,5 @@
# Nimbus
# Copyright (c) 2023 Status Research & Development GmbH
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
@ -75,7 +75,7 @@ proc dumpAccount*(db: LedgerRef, acc: EthAddress): DumpAccount =
nonce : db.getNonce(acc),
root : db.getStorageRoot(acc),
codeHash: db.getCodeHash(acc),
code : db.getCode(acc),
code : db.getCode(acc).bytes(),
key : keccakHash(acc)
)
for k, v in db.cachedStorage(acc):

View File

@ -11,7 +11,7 @@
import
std/[json, strutils],
json_rpc/[rpcclient], httputils,
eth/[common, rlp], chronicles,
eth/common, chronicles,
../nimbus/utils/utils,
./parser

View File

@ -6,15 +6,12 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
std/[times, macros, strutils, os, osproc, threadpool],
std/[times, macros, strutils, os, osproc],
unittest2,
../nimbus/compile_info,
../nimbus/utils/utils
export strutils, os, unittest2, osproc, threadpool
# AppVeyor may go out of memory with the default of 4
setMinPoolSize(2)
export strutils, os, unittest2, osproc
proc executeMyself(numModules: int, names: openArray[string]): int =
let appName = getAppFilename()

View File

@ -11,7 +11,7 @@ import unittest2, sequtils,
proc codeStreamMain*() =
suite "parse bytecode":
test "accepts bytes":
let codeStream = newCodeStream("\x01")
let codeStream = CodeStream.init("\x01")
check(codeStream.len == 1)
@ -22,14 +22,14 @@ proc codeStreamMain*() =
# CodeStream(code_bytes)
test "next returns the correct opcode":
var codeStream = newCodeStream("\x01\x02\x30")
var codeStream = CodeStream.init("\x01\x02\x30")
check(codeStream.next == Op.ADD)
check(codeStream.next == Op.MUL)
check(codeStream.next == Op.ADDRESS)
test "peek returns next opcode without changing location":
var codeStream = newCodeStream("\x01\x02\x30")
var codeStream = CodeStream.init("\x01\x02\x30")
check(codeStream.pc == 0)
check(codeStream.peek == Op.ADD)
check(codeStream.pc == 0)
@ -40,14 +40,14 @@ proc codeStreamMain*() =
test "stop opcode is returned when end reached":
var codeStream = newCodeStream("\x01\x02")
var codeStream = CodeStream.init("\x01\x02")
discard codeStream.next
discard codeStream.next
check(codeStream.next == Op.STOP)
# Seek has been dommented out for future deletion
# test "seek reverts to original position on exit":
# var codeStream = newCodeStream("\x01\x02\x30")
# var codeStream = CodeStream.init("\x01\x02\x30")
# check(codeStream.pc == 0)
# codeStream.seek(1):
# check(codeStream.pc == 1)
@ -56,13 +56,13 @@ proc codeStreamMain*() =
# check(codeStream.peek == Op.ADD)
test "[] returns opcode":
let codeStream = newCodeStream("\x01\x02\x30")
let codeStream = CodeStream.init("\x01\x02\x30")
check(codeStream[0] == Op.ADD)
check(codeStream[1] == Op.MUL)
check(codeStream[2] == Op.ADDRESS)
test "isValidOpcode invalidates after PUSHXX":
var codeStream = newCodeStream("\x02\x60\x02\x04")
var codeStream = CodeStream.init("\x02\x60\x02\x04")
check(codeStream.isValidOpcode(0))
check(codeStream.isValidOpcode(1))
check(not codeStream.isValidOpcode(2))
@ -71,7 +71,7 @@ proc codeStreamMain*() =
test "isValidOpcode 0":
var codeStream = newCodeStream(@[2.byte, 3.byte, 0x72.byte].concat(repeat(4.byte, 32)).concat(@[5.byte]))
var codeStream = CodeStream.init(@[2.byte, 3.byte, 0x72.byte].concat(repeat(4.byte, 32)).concat(@[5.byte]))
# valid: 0 - 2 :: 22 - 35
# invalid: 3-21 (PUSH19) :: 36+ (too long)
check(codeStream.isValidOpcode(0))
@ -86,7 +86,7 @@ proc codeStreamMain*() =
test "isValidOpcode 1":
let test = @[2.byte, 3.byte, 0x7d.byte].concat(repeat(4.byte, 32)).concat(@[5.byte, 0x7e.byte]).concat(repeat(4.byte, 35)).concat(@[1.byte, 0x61.byte, 1.byte, 1.byte, 1.byte])
var codeStream = newCodeStream(test)
var codeStream = CodeStream.init(test)
# valid: 0 - 2 :: 33 - 36 :: 68 - 73 :: 76
# invalid: 3 - 32 (PUSH30) :: 37 - 67 (PUSH31) :: 74, 75 (PUSH2) :: 77+ (too long)
check(codeStream.isValidOpcode(0))
@ -109,7 +109,7 @@ proc codeStreamMain*() =
test "right number of bytes invalidates":
var codeStream = newCodeStream("\x02\x03\x60\x02\x02")
var codeStream = CodeStream.init("\x02\x03\x60\x02\x02")
check(codeStream.isValidOpcode(0))
check(codeStream.isValidOpcode(1))
check(codeStream.isValidOpcode(2))

View File

@ -128,7 +128,7 @@ proc verifyStateDB*(wantedState: JsonNode, stateDB: ReadOnlyStateDB) =
wantedBalance = UInt256.fromHex accountData{"balance"}.getStr
wantedNonce = accountData{"nonce"}.getHexadecimalInt.AccountNonce
actualCode = stateDB.getCode(account)
actualCode = stateDB.getCode(account).bytes()
actualBalance = stateDB.getBalance(account)
actualNonce = stateDB.getNonce(account)

View File

@ -106,7 +106,7 @@ proc envToHeader(env: EnvStruct): BlockHeader =
proc postState(db: LedgerRef, alloc: var GenesisAlloc) =
for accAddr in db.addresses():
var acc = GenesisAccount(
code: db.getCode(accAddr),
code: db.getCode(accAddr).bytes(),
balance: db.getBalance(accAddr),
nonce: db.getNonce(accAddr)
)

2
vendor/nim-evmc vendored

@ -1 +1 @@
Subproject commit 86d22a026b0aa07c07b3afd7d91ca475e0eae12a
Subproject commit 6e261148565a311536b1a29f1568e8c4470baf9d