create per-fork opcode dispatcher (#2579)

In the current VM opcode dispatcher, a two-level case statement is
generated that first matches the opcode and then uses another nested
case statement to select the actual implementation based on which fork
it is, causing the dispatcher to grow by `O(opcodes) * O(forks)`.

The fork does not change between instructions causing significant
inefficiency for this approach - not only because it repeats the fork
lookup but also because of code size bloat and missed optimizations.

A second source of inefficiency in dispatching is the tracer code which
in the vast majority of cases is disabled but nevertheless sees multiple
conditionals being evaluated for each instruction only to remain
disabled throughout exeuction.

This PR rewrites the opcode dispatcher macro to generate a separate
dispatcher for each fork and tracer setting and goes on to pick the
right one at the start of the computation.

This has many advantages:

* much smaller dispatcher
* easier to compile
* better inlining
* fewer pointlessly repeated instruction
* simplified macro (!)
* slow "low-compiler-memory" dispatcher code can be removed

Net block import improvement at about 4-6% depending on the contract -
synthetic EVM benchmnarks would show an even better result most likely.
This commit is contained in:
Jacek Sieka 2024-08-28 10:20:36 +02:00 committed by GitHub
parent fa59898388
commit 8857fccb44
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 145 additions and 200 deletions

View File

@ -53,8 +53,10 @@ func readVmWord*(c: var CodeStream, n: static int): UInt256 =
func len*(c: CodeStream): int = func len*(c: CodeStream): int =
len(c.code) len(c.code)
func next*(c: var CodeStream): Op = func next*(c: var CodeStream): Op {.inline.} =
if c.pc != c.code.len: # The extra >= 0 check helps eliminate `IndexDefect` from the optimized code
# which keeps this hotspot in the EVM small, code-size-wise
if c.pc >= 0 and c.pc < c.code.len:
result = Op(c.code.bytes[c.pc]) result = Op(c.code.bytes[c.pc])
inc c.pc inc c.pc
else: else:

View File

@ -451,6 +451,20 @@ func traceError*(c: Computation) =
func prepareTracer*(c: Computation) = func prepareTracer*(c: Computation) =
c.vmState.capturePrepare(c, c.msg.depth) c.vmState.capturePrepare(c, c.msg.depth)
func opcodeGasCost*(
c: Computation, op: Op, gasCost: static GasInt, tracingEnabled: static bool,
reason: static string): EvmResultVoid {.inline.} =
# Special case of the opcodeGasCost function used for fixed-gas opcodes - since
# the parameters are known at compile time, we inline and specialize it
when tracingEnabled:
c.vmState.captureGasCost(
c,
op,
gasCost,
c.gasMeter.gasRemaining,
c.msg.depth + 1)
c.gasMeter.consumeGas(gasCost, reason)
func opcodeGasCost*( func opcodeGasCost*(
c: Computation, op: Op, gasCost: GasInt, reason: static string): EvmResultVoid = c: Computation, op: Op, gasCost: GasInt, reason: static string): EvmResultVoid =
if c.vmState.tracingEnabled: if c.vmState.tracingEnabled:

View File

@ -801,32 +801,6 @@ gasCosts(FkBerlin, berlin, BerlinGasCosts)
gasCosts(FkLondon, london, LondonGasCosts) gasCosts(FkLondon, london, LondonGasCosts)
gasCosts(FkShanghai, shanghai, ShanghaiGasCosts) gasCosts(FkShanghai, shanghai, ShanghaiGasCosts)
type
OpGck* = array[Op, GasCostKind]
func opGck(gc: GasCosts): OpGck {.compileTime.} =
for op, x in gc:
result[op] = x.kind
# Map fork to GasCostKind
# used in op_dispatcher.nim
const forkToGck*: array[EVMFork, OpGck] = [
opGck BaseGasCosts , # FkFrontier
opGck HomesteadGasCosts , # FkHomestead
opGck TangerineGasCosts , # kTangerine
opGck SpuriousGasCosts , # FkSpurious
opGck SpuriousGasCosts , # FkByzantium
opGck ConstantinopleGasCosts, # FkConstantinople
opGck SpuriousGasCosts , # FkPetersburg
opGck IstanbulGasCosts , # FkIstanbul
opGck BerlinGasCosts , # FkBerlin
opGck LondonGasCosts , # FkLondon
opGck LondonGasCosts , # FkParis
opGck ShanghaiGasCosts , # FkShanghai
opGck ShanghaiGasCosts , # FkCancun
opGck ShanghaiGasCosts , # FkPrague
]
proc forkToSchedule*(fork: EVMFork): GasCosts = proc forkToSchedule*(fork: EVMFork): GasCosts =
if fork < FkHomestead: if fork < FkHomestead:
BaseGasCosts BaseGasCosts

View File

@ -20,7 +20,9 @@ func init*(m: var GasMeter, startGas: GasInt) =
m.gasRefunded = 0 m.gasRefunded = 0
func consumeGas*( func consumeGas*(
gasMeter: var GasMeter; amount: GasInt; reason: static string): EvmResultVoid = gasMeter: var GasMeter; amount: GasInt; reason: static string): EvmResultVoid {.inline.} =
# consumeGas is a hotspot in the vm due to it being called for every
# instruction
# TODO report reason - consumeGas is a hotspot in EVM execution so it has to # TODO report reason - consumeGas is a hotspot in EVM execution so it has to
# be done carefully # be done carefully
if amount > gasMeter.gasRemaining: if amount > gasMeter.gasRemaining:

View File

@ -15,7 +15,6 @@ const
isChatty {.used.} = noisy > 1 isChatty {.used.} = noisy > 1
import import
../code_stream,
../computation, ../computation,
../evm_errors, ../evm_errors,
../../common/evmforks, ../../common/evmforks,
@ -24,98 +23,90 @@ import
./op_codes, ./op_codes,
./op_handlers, ./op_handlers,
./op_handlers/oph_defs, ./op_handlers/oph_defs,
chronicles,
macros macros
export export EVMFork, Op, oph_defs, gas_meter
EVMFork, Op,
oph_defs,
gas_meter
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Helpers # Helpers
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
template handleStopDirective(cpt: VmCpt) = template handleStopDirective(cpt: VmCpt, tracingEnabled: bool) =
#trace "op: Stop" #trace "op: Stop"
if not cpt.code.atEnd() and cpt.tracingEnabled: when tracingEnabled:
if not cpt.code.atEnd():
# we only trace `REAL STOP` and ignore `FAKE STOP` # we only trace `REAL STOP` and ignore `FAKE STOP`
cpt.opIndex = cpt.traceOpCodeStarted(Stop) cpt.opIndex = cpt.traceOpCodeStarted(Stop)
cpt.traceOpCodeEnded(Stop, cpt.opIndex) cpt.traceOpCodeEnded(Stop, cpt.opIndex)
template handleFixedGasCostsDirective(
template handleFixedGasCostsDirective(fork: EVMFork; op: Op; cpt: VmCpt) = fork: EVMFork, op: Op, cost: GasInt, cpt: VmCpt, tracingEnabled: bool
if cpt.tracingEnabled: ) =
when tracingEnabled:
cpt.opIndex = cpt.traceOpCodeStarted(op) cpt.opIndex = cpt.traceOpCodeStarted(op)
? cpt.opcodeGasCost(op, cpt.gasCosts[op].cost, reason = $op) ?cpt.opcodeGasCost(op, cost, tracingEnabled, reason = $op)
?vmOpHandlers[fork][op].run(cpt) ?vmOpHandlers[fork][op].run(cpt)
# If continuation is not nil, traceOpCodeEnded will be called in executeOpcodes. # If continuation is not nil, traceOpCodeEnded will be called in executeOpcodes.
if cpt.tracingEnabled and cpt.continuation.isNil: when tracingEnabled:
if cpt.continuation.isNil:
cpt.traceOpCodeEnded(op, cpt.opIndex) cpt.traceOpCodeEnded(op, cpt.opIndex)
template handleOtherDirective(fork: EVMFork, op: Op, cpt: VmCpt, tracingEnabled: bool) =
template handleOtherDirective(fork: EVMFork; op: Op; cpt: VmCpt) = when tracingEnabled:
if cpt.tracingEnabled:
cpt.opIndex = cpt.traceOpCodeStarted(op) cpt.opIndex = cpt.traceOpCodeStarted(op)
?vmOpHandlers[fork][op].run(cpt) ?vmOpHandlers[fork][op].run(cpt)
# If continuation is not nil, traceOpCodeEnded will be called in executeOpcodes. # If continuation is not nil, traceOpCodeEnded will be called in executeOpcodes.
if cpt.tracingEnabled and cpt.continuation.isNil: when tracingEnabled:
if cpt.continuation.isNil:
cpt.traceOpCodeEnded(op, cpt.opIndex) cpt.traceOpCodeEnded(op, cpt.opIndex)
# ------------------------------------------------------------------------------ proc makeCaseDispatcher(forkArg: EVMFork, tracingEnabled: bool, opArg, cpt: NimNode): NimNode =
# Private, big nasty doubly nested case matrix generator # Create a case statement for dispatching opcode to handler for the given
# ------------------------------------------------------------------------------ # fork, taking care to record the gas cost
# TODO there are several forks for which neither opcodes nor gas costs
# changed - these could use the same dispatcher thus saving some space
# and compile time
let gasCosts = forkToSchedule(forkArg)
# reminiscent of Mamy's opTableToCaseStmt() from original VM result = nnkCaseStmt.newTree(opArg)
proc toCaseStmt(forkArg, opArg, cpt: NimNode): NimNode =
# Outer case/switch => Op
let branchOnOp = quote do: `opArg`
result = nnkCaseStmt.newTree(branchOnOp)
for op in Op: for op in Op:
let asOp = quote do: Op(`op`) let
asOp = quote: `op`
# Inner case/switch => Fork handler =
let branchOnFork = quote do: `forkArg`
var forkCaseSubExpr = nnkCaseStmt.newTree(branchOnFork)
for fork in EVMFork:
let asFork = quote do: EVMFork(`fork`)
let gcTable = forkToGck[fork]
let branchStmt = block:
if op == Stop: if op == Stop:
quote do: quote:
handleStopDirective(`cpt`) handleStopDirective(`cpt`, `tracingEnabled`)
elif gcTable[op] == GckFixed: elif gasCosts[op].kind == GckFixed:
quote do: let cost = gasCosts[op].cost
handleFixedGasCostsDirective(`asFork`,`asOp`,`cpt`) quote:
handleFixedGasCostsDirective(
`forkArg`, `op`, `cost`, `cpt`, `tracingEnabled`
)
else: else:
quote do: quote:
handleOtherDirective(`asFork`,`asOp`,`cpt`) handleOtherDirective(`forkArg`, `op`, `cpt`, `tracingEnabled`)
branch =
forkCaseSubExpr.add nnkOfBranch.newTree(asFork, branchStmt)
# Wrap innner case/switch into outer case/switch
let branchStmt = block:
case op case op
of Stop, Return, Revert, SelfDestruct: of Create, Create2, Call, CallCode, DelegateCall, StaticCall:
quote do: # These opcodes use `chainTo` to create a continuation call which must
`forkCaseSubExpr` # be handled separately
break quote:
else: `handler`
# Anyway, the point is that now we might as well just do this check
# for *every* opcode (other than Return/Revert/etc, which need to
# break no matter what).
quote do:
`forkCaseSubExpr`
if not `cpt`.continuation.isNil: if not `cpt`.continuation.isNil:
break break
result.add nnkOfBranch.newTree(asOp, branchStmt) of Stop, Return, Revert, SelfDestruct:
quote:
`handler`
break
else:
handler
result.add nnkOfBranch.newTree(asOp, branch)
when isChatty: when isChatty:
echo ">>> ", result.repr echo ">>> ", result.repr
@ -124,39 +115,21 @@ proc toCaseStmt(forkArg, opArg, cpt: NimNode): NimNode =
# Public macros/functions # Public macros/functions
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
macro genOptimisedDispatcher*(fork: EVMFork; op: Op; cpt: VmCpt): untyped = macro dispatchInstr*(
result = fork.toCaseStmt(op, cpt) fork: static EVMFork, tracingEnabled: static bool, op: Op, cpt: VmCpt
): untyped =
makeCaseDispatcher(fork, tracingEnabled, op, cpt)
template genLowMemDispatcher*(fork: EVMFork; op: Op; cpt: VmCpt) =
if op == Stop:
handleStopDirective(cpt)
break
if BaseGasCosts[op].kind == GckFixed:
handleFixedGasCostsDirective(fork, op, cpt)
else:
handleOtherDirective(fork, op, cpt)
case cpt.instr
of Return, Revert, SelfDestruct:
break
else:
# FIXME-manyOpcodesNowRequireContinuations
if not cpt.continuation.isNil:
break
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Debugging ... # Debugging ...
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
when isMainModule and isChatty: when isMainModule and isChatty:
import ../types import ../types
proc optimised(cpt: VmCpt, fork: EVMFork): EvmResultVoid {.compileTime.} = proc optimised(cpt: VmCpt): EvmResultVoid {.compileTime.} =
while true: while true:
genOptimisedDispatcher(fork, cpt.instr, desc) dispatchInstr(FkFrontier, false, cpt.instr, cpt)
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# End # End

View File

@ -27,7 +27,7 @@ import
oph_create, oph_call, oph_sysops] oph_create, oph_call, oph_sysops]
const const
allHandlersList = @[ allHandlersList = [
(VmOpExecArithmetic, "Arithmetic"), (VmOpExecArithmetic, "Arithmetic"),
(VmOpExecHash, "Hash"), (VmOpExecHash, "Hash"),
(VmOpExecEnvInfo, "EnvInfo"), (VmOpExecEnvInfo, "EnvInfo"),

View File

@ -38,8 +38,15 @@ when optimizationCondition:
# this is a top level pragma since nim 1.6.16 # this is a top level pragma since nim 1.6.16
{.optimization: speed.} {.optimization: speed.}
proc selectVM(c: VmCpt, fork: EVMFork, shouldPrepareTracer: bool): EvmResultVoid = proc runVM(
## Op code execution handler main loop. c: VmCpt,
shouldPrepareTracer: bool,
fork: static EVMFork,
tracingEnabled: static bool,
): EvmResultVoid =
## VM instruction handler main loop - for each fork, a distinc version of
## this function is instantiated so that selection of fork-specific
## versions of functions happens only once
# It's important not to re-prepare the tracer after # It's important not to re-prepare the tracer after
# an async operation, only after a call/create. # an async operation, only after a call/create.
@ -49,63 +56,38 @@ proc selectVM(c: VmCpt, fork: EVMFork, shouldPrepareTracer: bool): EvmResultVoid
# enabled?", whereas shouldPrepareTracer is more like, # enabled?", whereas shouldPrepareTracer is more like,
# "Are we at a spot right now where we want to re-initialize # "Are we at a spot right now where we want to re-initialize
# the tracer?" # the tracer?"
if c.tracingEnabled and shouldPrepareTracer: when tracingEnabled:
if shouldPrepareTracer:
c.prepareTracer() c.prepareTracer()
while true: while true:
{.computedGoto.}
c.instr = c.code.next() c.instr = c.code.next()
# Note Mamy's observation in opTableToCaseStmt() from original VM dispatchInstr(fork, tracingEnabled, c.instr, c)
# regarding computed goto
#
# ackn:
# #{.computedGoto.}
# # computed goto causing stack overflow, it consumes a lot of space
# # we could use manual jump table instead
# # TODO lots of macro magic here to unravel, with chronicles...
# # `c`.logger.log($`c`.stack & "\n\n", fgGreen)
when not lowMemoryCompileTime:
when defined(release):
#
# FIXME: OS case list below needs to be adjusted
#
when defined(windows):
when defined(cpu64):
{.warning: "*** Win64/VM2 handler switch => computedGoto".}
{.computedGoto.}
else:
# computedGoto not compiling on github/ci (out of memory) -- jordan
{.warning: "*** Win32/VM2 handler switch => optimisation disabled".}
# {.computedGoto.}
elif defined(linux):
when defined(cpu64):
{.warning: "*** Linux64/VM2 handler switch => computedGoto".}
{.computedGoto.}
else:
{.warning: "*** Linux32/VM2 handler switch => computedGoto".}
{.computedGoto.}
elif defined(macosx):
when defined(cpu64):
{.warning: "*** MacOs64/VM2 handler switch => computedGoto".}
{.computedGoto.}
else:
{.warning: "*** MacOs32/VM2 handler switch => computedGoto".}
{.computedGoto.}
else:
{.warning: "*** Unsupported OS => no handler switch optimisation".}
genOptimisedDispatcher(fork, c.instr, c)
else:
{.warning: "*** low memory compiler mode => program will be slow".}
genLowMemDispatcher(fork, c.instr, c)
ok() ok()
macro selectVM(v: VmCpt, shouldPrepareTracer: bool, fork: EVMFork, tracingEnabled: bool): EvmResultVoid =
# Generate opcode dispatcher that calls selectVM with a literal for each fork:
#
# case fork
# of A: runVM(v, A, ...)
# ...
let caseStmt = nnkCaseStmt.newTree(fork)
for fork in EVMFork:
let
forkVal = quote:
`fork`
call = quote:
case `tracingEnabled`
of false: runVM(`v`, `shouldPrepareTracer`, `forkVal`, `false`)
of true: runVM(`v`, `shouldPrepareTracer`, `forkVal`, `true`)
caseStmt.add nnkOfBranch.newTree(forkVal, call)
caseStmt
proc beforeExecCall(c: Computation) = proc beforeExecCall(c: Computation) =
c.snapshot() c.snapshot()
if c.msg.kind == EVMC_CALL: if c.msg.kind == EVMC_CALL:
@ -133,7 +115,9 @@ proc beforeExecCreate(c: Computation): bool =
let nonce = db.getNonce(c.msg.sender) let nonce = db.getNonce(c.msg.sender)
if nonce + 1 < nonce: if nonce + 1 < nonce:
let sender = c.msg.sender.toHex let sender = c.msg.sender.toHex
c.setError("Nonce overflow when sender=" & sender & " wants to create contract", false) c.setError(
"Nonce overflow when sender=" & sender & " wants to create contract", false
)
return true return true
db.setNonce(c.msg.sender, nonce + 1) db.setNonce(c.msg.sender, nonce + 1)
@ -176,16 +160,8 @@ proc afterExecCreate(c: Computation) =
else: else:
c.rollback() c.rollback()
const MsgKindToOp: array[CallKind, Op] =
const [Call, DelegateCall, CallCode, Create, Create2, EofCreate]
MsgKindToOp: array[CallKind, Op] = [
Call,
DelegateCall,
CallCode,
Create,
Create2,
EofCreate
]
func msgToOp(msg: Message): Op = func msgToOp(msg: Message): Op =
if EVMC_STATIC in msg.flags: if EVMC_STATIC in msg.flags:
@ -194,11 +170,15 @@ func msgToOp(msg: Message): Op =
proc beforeExec(c: Computation): bool = proc beforeExec(c: Computation): bool =
if c.msg.depth > 0: if c.msg.depth > 0:
c.vmState.captureEnter(c, c.vmState.captureEnter(
c,
msgToOp(c.msg), msgToOp(c.msg),
c.msg.sender, c.msg.contractAddress, c.msg.sender,
c.msg.data, c.msg.gas, c.msg.contractAddress,
c.msg.value) c.msg.data,
c.msg.gas,
c.msg.value,
)
if not c.msg.isCreate: if not c.msg.isCreate:
c.beforeExecCall() c.beforeExecCall()
@ -256,17 +236,16 @@ proc executeOpcodes*(c: Computation, shouldPrepareTracer: bool = true) =
if c.tracingEnabled and not (cont.isNil) and nextCont.isNil: if c.tracingEnabled and not (cont.isNil) and nextCont.isNil:
c.traceOpCodeEnded(c.instr, c.opIndex) c.traceOpCodeEnded(c.instr, c.opIndex)
if c.instr == Return or if c.instr == Return or c.instr == Revert or c.instr == SelfDestruct:
c.instr == Revert or
c.instr == SelfDestruct:
break blockOne break blockOne
c.selectVM(fork, shouldPrepareTracer).isOkOr: c.selectVM(shouldPrepareTracer, fork, c.tracingEnabled).isOkOr:
handleEvmError(error) handleEvmError(error)
break blockOne # this break is not needed but make the flow clear break blockOne # this break is not needed but make the flow clear
if c.isError() and c.continuation.isNil: if c.isError() and c.continuation.isNil:
if c.tracingEnabled: c.traceError() if c.tracingEnabled:
c.traceError()
when vm_use_recursion: when vm_use_recursion:
# Recursion with tiny stack frame per level. # Recursion with tiny stack frame per level.
@ -298,17 +277,18 @@ else:
if c.continuation.isNil: if c.continuation.isNil:
c.afterExec() c.afterExec()
break break
(before, shouldPrepareTracer, c.child, c, c.parent) = (true, true, nil.Computation, c.child, c) (before, shouldPrepareTracer, c.child, c, c.parent) =
(true, true, nil.Computation, c.child, c)
if c.parent.isNil: if c.parent.isNil:
break break
c.dispose() c.dispose()
(before, shouldPrepareTracer, c.parent, c) = (false, true, nil.Computation, c.parent) (before, shouldPrepareTracer, c.parent, c) =
(false, true, nil.Computation, c.parent)
while not c.isNil: while not c.isNil:
c.dispose() c.dispose()
c = c.parent c = c.parent
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# End # End
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------