create per-fork opcode dispatcher (#2579)

In the current VM opcode dispatcher, a two-level case statement is
generated that first matches the opcode and then uses another nested
case statement to select the actual implementation based on which fork
it is, causing the dispatcher to grow by `O(opcodes) * O(forks)`.

The fork does not change between instructions causing significant
inefficiency for this approach - not only because it repeats the fork
lookup but also because of code size bloat and missed optimizations.

A second source of inefficiency in dispatching is the tracer code which
in the vast majority of cases is disabled but nevertheless sees multiple
conditionals being evaluated for each instruction only to remain
disabled throughout exeuction.

This PR rewrites the opcode dispatcher macro to generate a separate
dispatcher for each fork and tracer setting and goes on to pick the
right one at the start of the computation.

This has many advantages:

* much smaller dispatcher
* easier to compile
* better inlining
* fewer pointlessly repeated instruction
* simplified macro (!)
* slow "low-compiler-memory" dispatcher code can be removed

Net block import improvement at about 4-6% depending on the contract -
synthetic EVM benchmnarks would show an even better result most likely.
This commit is contained in:
Jacek Sieka 2024-08-28 10:20:36 +02:00 committed by GitHub
parent fa59898388
commit 8857fccb44
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 145 additions and 200 deletions

View File

@ -53,8 +53,10 @@ func readVmWord*(c: var CodeStream, n: static int): UInt256 =
func len*(c: CodeStream): int =
len(c.code)
func next*(c: var CodeStream): Op =
if c.pc != c.code.len:
func next*(c: var CodeStream): Op {.inline.} =
# The extra >= 0 check helps eliminate `IndexDefect` from the optimized code
# which keeps this hotspot in the EVM small, code-size-wise
if c.pc >= 0 and c.pc < c.code.len:
result = Op(c.code.bytes[c.pc])
inc c.pc
else:

View File

@ -451,6 +451,20 @@ func traceError*(c: Computation) =
func prepareTracer*(c: Computation) =
c.vmState.capturePrepare(c, c.msg.depth)
func opcodeGasCost*(
c: Computation, op: Op, gasCost: static GasInt, tracingEnabled: static bool,
reason: static string): EvmResultVoid {.inline.} =
# Special case of the opcodeGasCost function used for fixed-gas opcodes - since
# the parameters are known at compile time, we inline and specialize it
when tracingEnabled:
c.vmState.captureGasCost(
c,
op,
gasCost,
c.gasMeter.gasRemaining,
c.msg.depth + 1)
c.gasMeter.consumeGas(gasCost, reason)
func opcodeGasCost*(
c: Computation, op: Op, gasCost: GasInt, reason: static string): EvmResultVoid =
if c.vmState.tracingEnabled:

View File

@ -801,32 +801,6 @@ gasCosts(FkBerlin, berlin, BerlinGasCosts)
gasCosts(FkLondon, london, LondonGasCosts)
gasCosts(FkShanghai, shanghai, ShanghaiGasCosts)
type
OpGck* = array[Op, GasCostKind]
func opGck(gc: GasCosts): OpGck {.compileTime.} =
for op, x in gc:
result[op] = x.kind
# Map fork to GasCostKind
# used in op_dispatcher.nim
const forkToGck*: array[EVMFork, OpGck] = [
opGck BaseGasCosts , # FkFrontier
opGck HomesteadGasCosts , # FkHomestead
opGck TangerineGasCosts , # kTangerine
opGck SpuriousGasCosts , # FkSpurious
opGck SpuriousGasCosts , # FkByzantium
opGck ConstantinopleGasCosts, # FkConstantinople
opGck SpuriousGasCosts , # FkPetersburg
opGck IstanbulGasCosts , # FkIstanbul
opGck BerlinGasCosts , # FkBerlin
opGck LondonGasCosts , # FkLondon
opGck LondonGasCosts , # FkParis
opGck ShanghaiGasCosts , # FkShanghai
opGck ShanghaiGasCosts , # FkCancun
opGck ShanghaiGasCosts , # FkPrague
]
proc forkToSchedule*(fork: EVMFork): GasCosts =
if fork < FkHomestead:
BaseGasCosts

View File

@ -20,7 +20,9 @@ func init*(m: var GasMeter, startGas: GasInt) =
m.gasRefunded = 0
func consumeGas*(
gasMeter: var GasMeter; amount: GasInt; reason: static string): EvmResultVoid =
gasMeter: var GasMeter; amount: GasInt; reason: static string): EvmResultVoid {.inline.} =
# consumeGas is a hotspot in the vm due to it being called for every
# instruction
# TODO report reason - consumeGas is a hotspot in EVM execution so it has to
# be done carefully
if amount > gasMeter.gasRemaining:

View File

@ -15,7 +15,6 @@ const
isChatty {.used.} = noisy > 1
import
../code_stream,
../computation,
../evm_errors,
../../common/evmforks,
@ -24,98 +23,90 @@ import
./op_codes,
./op_handlers,
./op_handlers/oph_defs,
chronicles,
macros
export
EVMFork, Op,
oph_defs,
gas_meter
export EVMFork, Op, oph_defs, gas_meter
# ------------------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------------------
template handleStopDirective(cpt: VmCpt) =
template handleStopDirective(cpt: VmCpt, tracingEnabled: bool) =
#trace "op: Stop"
if not cpt.code.atEnd() and cpt.tracingEnabled:
# we only trace `REAL STOP` and ignore `FAKE STOP`
cpt.opIndex = cpt.traceOpCodeStarted(Stop)
cpt.traceOpCodeEnded(Stop, cpt.opIndex)
when tracingEnabled:
if not cpt.code.atEnd():
# we only trace `REAL STOP` and ignore `FAKE STOP`
cpt.opIndex = cpt.traceOpCodeStarted(Stop)
cpt.traceOpCodeEnded(Stop, cpt.opIndex)
template handleFixedGasCostsDirective(fork: EVMFork; op: Op; cpt: VmCpt) =
if cpt.tracingEnabled:
template handleFixedGasCostsDirective(
fork: EVMFork, op: Op, cost: GasInt, cpt: VmCpt, tracingEnabled: bool
) =
when tracingEnabled:
cpt.opIndex = cpt.traceOpCodeStarted(op)
? cpt.opcodeGasCost(op, cpt.gasCosts[op].cost, reason = $op)
? vmOpHandlers[fork][op].run(cpt)
?cpt.opcodeGasCost(op, cost, tracingEnabled, reason = $op)
?vmOpHandlers[fork][op].run(cpt)
# If continuation is not nil, traceOpCodeEnded will be called in executeOpcodes.
if cpt.tracingEnabled and cpt.continuation.isNil:
cpt.traceOpCodeEnded(op, cpt.opIndex)
when tracingEnabled:
if cpt.continuation.isNil:
cpt.traceOpCodeEnded(op, cpt.opIndex)
template handleOtherDirective(fork: EVMFork; op: Op; cpt: VmCpt) =
if cpt.tracingEnabled:
template handleOtherDirective(fork: EVMFork, op: Op, cpt: VmCpt, tracingEnabled: bool) =
when tracingEnabled:
cpt.opIndex = cpt.traceOpCodeStarted(op)
? vmOpHandlers[fork][op].run(cpt)
?vmOpHandlers[fork][op].run(cpt)
# If continuation is not nil, traceOpCodeEnded will be called in executeOpcodes.
if cpt.tracingEnabled and cpt.continuation.isNil:
cpt.traceOpCodeEnded(op, cpt.opIndex)
when tracingEnabled:
if cpt.continuation.isNil:
cpt.traceOpCodeEnded(op, cpt.opIndex)
# ------------------------------------------------------------------------------
# Private, big nasty doubly nested case matrix generator
# ------------------------------------------------------------------------------
proc makeCaseDispatcher(forkArg: EVMFork, tracingEnabled: bool, opArg, cpt: NimNode): NimNode =
# Create a case statement for dispatching opcode to handler for the given
# fork, taking care to record the gas cost
# TODO there are several forks for which neither opcodes nor gas costs
# changed - these could use the same dispatcher thus saving some space
# and compile time
let gasCosts = forkToSchedule(forkArg)
# reminiscent of Mamy's opTableToCaseStmt() from original VM
proc toCaseStmt(forkArg, opArg, cpt: NimNode): NimNode =
# Outer case/switch => Op
let branchOnOp = quote do: `opArg`
result = nnkCaseStmt.newTree(branchOnOp)
result = nnkCaseStmt.newTree(opArg)
for op in Op:
let asOp = quote do: Op(`op`)
# Inner case/switch => Fork
let branchOnFork = quote do: `forkArg`
var forkCaseSubExpr = nnkCaseStmt.newTree(branchOnFork)
for fork in EVMFork:
let asFork = quote do: EVMFork(`fork`)
let gcTable = forkToGck[fork]
let branchStmt = block:
let
asOp = quote: `op`
handler =
if op == Stop:
quote do:
handleStopDirective(`cpt`)
elif gcTable[op] == GckFixed:
quote do:
handleFixedGasCostsDirective(`asFork`,`asOp`,`cpt`)
quote:
handleStopDirective(`cpt`, `tracingEnabled`)
elif gasCosts[op].kind == GckFixed:
let cost = gasCosts[op].cost
quote:
handleFixedGasCostsDirective(
`forkArg`, `op`, `cost`, `cpt`, `tracingEnabled`
)
else:
quote do:
handleOtherDirective(`asFork`,`asOp`,`cpt`)
quote:
handleOtherDirective(`forkArg`, `op`, `cpt`, `tracingEnabled`)
branch =
case op
of Create, Create2, Call, CallCode, DelegateCall, StaticCall:
# These opcodes use `chainTo` to create a continuation call which must
# be handled separately
quote:
`handler`
if not `cpt`.continuation.isNil:
break
forkCaseSubExpr.add nnkOfBranch.newTree(asFork, branchStmt)
# Wrap innner case/switch into outer case/switch
let branchStmt = block:
case op
of Stop, Return, Revert, SelfDestruct:
quote do:
`forkCaseSubExpr`
break
else:
# Anyway, the point is that now we might as well just do this check
# for *every* opcode (other than Return/Revert/etc, which need to
# break no matter what).
quote do:
`forkCaseSubExpr`
if not `cpt`.continuation.isNil:
of Stop, Return, Revert, SelfDestruct:
quote:
`handler`
break
else:
handler
result.add nnkOfBranch.newTree(asOp, branchStmt)
result.add nnkOfBranch.newTree(asOp, branch)
when isChatty:
echo ">>> ", result.repr
@ -124,39 +115,21 @@ proc toCaseStmt(forkArg, opArg, cpt: NimNode): NimNode =
# Public macros/functions
# ------------------------------------------------------------------------------
macro genOptimisedDispatcher*(fork: EVMFork; op: Op; cpt: VmCpt): untyped =
result = fork.toCaseStmt(op, cpt)
template genLowMemDispatcher*(fork: EVMFork; op: Op; cpt: VmCpt) =
if op == Stop:
handleStopDirective(cpt)
break
if BaseGasCosts[op].kind == GckFixed:
handleFixedGasCostsDirective(fork, op, cpt)
else:
handleOtherDirective(fork, op, cpt)
case cpt.instr
of Return, Revert, SelfDestruct:
break
else:
# FIXME-manyOpcodesNowRequireContinuations
if not cpt.continuation.isNil:
break
macro dispatchInstr*(
fork: static EVMFork, tracingEnabled: static bool, op: Op, cpt: VmCpt
): untyped =
makeCaseDispatcher(fork, tracingEnabled, op, cpt)
# ------------------------------------------------------------------------------
# Debugging ...
# ------------------------------------------------------------------------------
when isMainModule and isChatty:
import ../types
proc optimised(cpt: VmCpt, fork: EVMFork): EvmResultVoid {.compileTime.} =
proc optimised(cpt: VmCpt): EvmResultVoid {.compileTime.} =
while true:
genOptimisedDispatcher(fork, cpt.instr, desc)
dispatchInstr(FkFrontier, false, cpt.instr, cpt)
# ------------------------------------------------------------------------------
# End

View File

@ -27,7 +27,7 @@ import
oph_create, oph_call, oph_sysops]
const
allHandlersList = @[
allHandlersList = [
(VmOpExecArithmetic, "Arithmetic"),
(VmOpExecHash, "Hash"),
(VmOpExecEnvInfo, "EnvInfo"),

View File

@ -38,8 +38,15 @@ when optimizationCondition:
# this is a top level pragma since nim 1.6.16
{.optimization: speed.}
proc selectVM(c: VmCpt, fork: EVMFork, shouldPrepareTracer: bool): EvmResultVoid =
## Op code execution handler main loop.
proc runVM(
c: VmCpt,
shouldPrepareTracer: bool,
fork: static EVMFork,
tracingEnabled: static bool,
): EvmResultVoid =
## VM instruction handler main loop - for each fork, a distinc version of
## this function is instantiated so that selection of fork-specific
## versions of functions happens only once
# It's important not to re-prepare the tracer after
# an async operation, only after a call/create.
@ -49,63 +56,38 @@ proc selectVM(c: VmCpt, fork: EVMFork, shouldPrepareTracer: bool): EvmResultVoid
# enabled?", whereas shouldPrepareTracer is more like,
# "Are we at a spot right now where we want to re-initialize
# the tracer?"
if c.tracingEnabled and shouldPrepareTracer:
c.prepareTracer()
when tracingEnabled:
if shouldPrepareTracer:
c.prepareTracer()
while true:
{.computedGoto.}
c.instr = c.code.next()
# Note Mamy's observation in opTableToCaseStmt() from original VM
# regarding computed goto
#
# ackn:
# #{.computedGoto.}
# # computed goto causing stack overflow, it consumes a lot of space
# # we could use manual jump table instead
# # TODO lots of macro magic here to unravel, with chronicles...
# # `c`.logger.log($`c`.stack & "\n\n", fgGreen)
when not lowMemoryCompileTime:
when defined(release):
#
# FIXME: OS case list below needs to be adjusted
#
when defined(windows):
when defined(cpu64):
{.warning: "*** Win64/VM2 handler switch => computedGoto".}
{.computedGoto.}
else:
# computedGoto not compiling on github/ci (out of memory) -- jordan
{.warning: "*** Win32/VM2 handler switch => optimisation disabled".}
# {.computedGoto.}
elif defined(linux):
when defined(cpu64):
{.warning: "*** Linux64/VM2 handler switch => computedGoto".}
{.computedGoto.}
else:
{.warning: "*** Linux32/VM2 handler switch => computedGoto".}
{.computedGoto.}
elif defined(macosx):
when defined(cpu64):
{.warning: "*** MacOs64/VM2 handler switch => computedGoto".}
{.computedGoto.}
else:
{.warning: "*** MacOs32/VM2 handler switch => computedGoto".}
{.computedGoto.}
else:
{.warning: "*** Unsupported OS => no handler switch optimisation".}
genOptimisedDispatcher(fork, c.instr, c)
else:
{.warning: "*** low memory compiler mode => program will be slow".}
genLowMemDispatcher(fork, c.instr, c)
dispatchInstr(fork, tracingEnabled, c.instr, c)
ok()
macro selectVM(v: VmCpt, shouldPrepareTracer: bool, fork: EVMFork, tracingEnabled: bool): EvmResultVoid =
# Generate opcode dispatcher that calls selectVM with a literal for each fork:
#
# case fork
# of A: runVM(v, A, ...)
# ...
let caseStmt = nnkCaseStmt.newTree(fork)
for fork in EVMFork:
let
forkVal = quote:
`fork`
call = quote:
case `tracingEnabled`
of false: runVM(`v`, `shouldPrepareTracer`, `forkVal`, `false`)
of true: runVM(`v`, `shouldPrepareTracer`, `forkVal`, `true`)
caseStmt.add nnkOfBranch.newTree(forkVal, call)
caseStmt
proc beforeExecCall(c: Computation) =
c.snapshot()
if c.msg.kind == EVMC_CALL:
@ -131,11 +113,13 @@ proc afterExecCall(c: Computation) =
proc beforeExecCreate(c: Computation): bool =
c.vmState.mutateStateDB:
let nonce = db.getNonce(c.msg.sender)
if nonce+1 < nonce:
if nonce + 1 < nonce:
let sender = c.msg.sender.toHex
c.setError("Nonce overflow when sender=" & sender & " wants to create contract", false)
c.setError(
"Nonce overflow when sender=" & sender & " wants to create contract", false
)
return true
db.setNonce(c.msg.sender, nonce+1)
db.setNonce(c.msg.sender, nonce + 1)
# We add this to the access list _before_ taking a snapshot.
# Even if the creation fails, the access-list change should not be rolled
@ -176,16 +160,8 @@ proc afterExecCreate(c: Computation) =
else:
c.rollback()
const
MsgKindToOp: array[CallKind, Op] = [
Call,
DelegateCall,
CallCode,
Create,
Create2,
EofCreate
]
const MsgKindToOp: array[CallKind, Op] =
[Call, DelegateCall, CallCode, Create, Create2, EofCreate]
func msgToOp(msg: Message): Op =
if EVMC_STATIC in msg.flags:
@ -194,11 +170,15 @@ func msgToOp(msg: Message): Op =
proc beforeExec(c: Computation): bool =
if c.msg.depth > 0:
c.vmState.captureEnter(c,
msgToOp(c.msg),
c.msg.sender, c.msg.contractAddress,
c.msg.data, c.msg.gas,
c.msg.value)
c.vmState.captureEnter(
c,
msgToOp(c.msg),
c.msg.sender,
c.msg.contractAddress,
c.msg.data,
c.msg.gas,
c.msg.value,
)
if not c.msg.isCreate:
c.beforeExecCall()
@ -253,20 +233,19 @@ proc executeOpcodes*(c: Computation, shouldPrepareTracer: bool = true) =
# if an exception (e.g. out of gas) is thrown during a continuation.
# So this code says, "If we've just run a continuation, but there's
# no *subsequent* continuation, then the opcode is done."
if c.tracingEnabled and not(cont.isNil) and nextCont.isNil:
if c.tracingEnabled and not (cont.isNil) and nextCont.isNil:
c.traceOpCodeEnded(c.instr, c.opIndex)
if c.instr == Return or
c.instr == Revert or
c.instr == SelfDestruct:
if c.instr == Return or c.instr == Revert or c.instr == SelfDestruct:
break blockOne
c.selectVM(fork, shouldPrepareTracer).isOkOr:
c.selectVM(shouldPrepareTracer, fork, c.tracingEnabled).isOkOr:
handleEvmError(error)
break blockOne # this break is not needed but make the flow clear
if c.isError() and c.continuation.isNil:
if c.tracingEnabled: c.traceError()
if c.tracingEnabled:
c.traceError()
when vm_use_recursion:
# Recursion with tiny stack frame per level.
@ -298,17 +277,18 @@ else:
if c.continuation.isNil:
c.afterExec()
break
(before, shouldPrepareTracer, c.child, c, c.parent) = (true, true, nil.Computation, c.child, c)
(before, shouldPrepareTracer, c.child, c, c.parent) =
(true, true, nil.Computation, c.child, c)
if c.parent.isNil:
break
c.dispose()
(before, shouldPrepareTracer, c.parent, c) = (false, true, nil.Computation, c.parent)
(before, shouldPrepareTracer, c.parent, c) =
(false, true, nil.Computation, c.parent)
while not c.isNil:
c.dispose()
c = c.parent
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------