From 8857fccb44c7ac834e65a4d821f267bb2b55bf0e Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Wed, 28 Aug 2024 10:20:36 +0200 Subject: [PATCH] create per-fork opcode dispatcher (#2579) In the current VM opcode dispatcher, a two-level case statement is generated that first matches the opcode and then uses another nested case statement to select the actual implementation based on which fork it is, causing the dispatcher to grow by `O(opcodes) * O(forks)`. The fork does not change between instructions causing significant inefficiency for this approach - not only because it repeats the fork lookup but also because of code size bloat and missed optimizations. A second source of inefficiency in dispatching is the tracer code which in the vast majority of cases is disabled but nevertheless sees multiple conditionals being evaluated for each instruction only to remain disabled throughout exeuction. This PR rewrites the opcode dispatcher macro to generate a separate dispatcher for each fork and tracer setting and goes on to pick the right one at the start of the computation. This has many advantages: * much smaller dispatcher * easier to compile * better inlining * fewer pointlessly repeated instruction * simplified macro (!) * slow "low-compiler-memory" dispatcher code can be removed Net block import improvement at about 4-6% depending on the contract - synthetic EVM benchmnarks would show an even better result most likely. --- nimbus/evm/code_stream.nim | 6 +- nimbus/evm/computation.nim | 14 ++ nimbus/evm/interpreter/gas_costs.nim | 26 ---- nimbus/evm/interpreter/gas_meter.nim | 4 +- nimbus/evm/interpreter/op_dispatcher.nim | 155 ++++++++++------------- nimbus/evm/interpreter/op_handlers.nim | 2 +- nimbus/evm/interpreter_dispatch.nim | 138 +++++++++----------- 7 files changed, 145 insertions(+), 200 deletions(-) diff --git a/nimbus/evm/code_stream.nim b/nimbus/evm/code_stream.nim index f0553af20..ec8349a17 100644 --- a/nimbus/evm/code_stream.nim +++ b/nimbus/evm/code_stream.nim @@ -53,8 +53,10 @@ func readVmWord*(c: var CodeStream, n: static int): UInt256 = func len*(c: CodeStream): int = len(c.code) -func next*(c: var CodeStream): Op = - if c.pc != c.code.len: +func next*(c: var CodeStream): Op {.inline.} = + # The extra >= 0 check helps eliminate `IndexDefect` from the optimized code + # which keeps this hotspot in the EVM small, code-size-wise + if c.pc >= 0 and c.pc < c.code.len: result = Op(c.code.bytes[c.pc]) inc c.pc else: diff --git a/nimbus/evm/computation.nim b/nimbus/evm/computation.nim index a0efed2fa..36778bbce 100644 --- a/nimbus/evm/computation.nim +++ b/nimbus/evm/computation.nim @@ -451,6 +451,20 @@ func traceError*(c: Computation) = func prepareTracer*(c: Computation) = c.vmState.capturePrepare(c, c.msg.depth) +func opcodeGasCost*( + c: Computation, op: Op, gasCost: static GasInt, tracingEnabled: static bool, + reason: static string): EvmResultVoid {.inline.} = + # Special case of the opcodeGasCost function used for fixed-gas opcodes - since + # the parameters are known at compile time, we inline and specialize it + when tracingEnabled: + c.vmState.captureGasCost( + c, + op, + gasCost, + c.gasMeter.gasRemaining, + c.msg.depth + 1) + c.gasMeter.consumeGas(gasCost, reason) + func opcodeGasCost*( c: Computation, op: Op, gasCost: GasInt, reason: static string): EvmResultVoid = if c.vmState.tracingEnabled: diff --git a/nimbus/evm/interpreter/gas_costs.nim b/nimbus/evm/interpreter/gas_costs.nim index d05994450..a27fbe27e 100644 --- a/nimbus/evm/interpreter/gas_costs.nim +++ b/nimbus/evm/interpreter/gas_costs.nim @@ -801,32 +801,6 @@ gasCosts(FkBerlin, berlin, BerlinGasCosts) gasCosts(FkLondon, london, LondonGasCosts) gasCosts(FkShanghai, shanghai, ShanghaiGasCosts) -type - OpGck* = array[Op, GasCostKind] - -func opGck(gc: GasCosts): OpGck {.compileTime.} = - for op, x in gc: - result[op] = x.kind - -# Map fork to GasCostKind -# used in op_dispatcher.nim -const forkToGck*: array[EVMFork, OpGck] = [ - opGck BaseGasCosts , # FkFrontier - opGck HomesteadGasCosts , # FkHomestead - opGck TangerineGasCosts , # kTangerine - opGck SpuriousGasCosts , # FkSpurious - opGck SpuriousGasCosts , # FkByzantium - opGck ConstantinopleGasCosts, # FkConstantinople - opGck SpuriousGasCosts , # FkPetersburg - opGck IstanbulGasCosts , # FkIstanbul - opGck BerlinGasCosts , # FkBerlin - opGck LondonGasCosts , # FkLondon - opGck LondonGasCosts , # FkParis - opGck ShanghaiGasCosts , # FkShanghai - opGck ShanghaiGasCosts , # FkCancun - opGck ShanghaiGasCosts , # FkPrague - ] - proc forkToSchedule*(fork: EVMFork): GasCosts = if fork < FkHomestead: BaseGasCosts diff --git a/nimbus/evm/interpreter/gas_meter.nim b/nimbus/evm/interpreter/gas_meter.nim index 51b93c954..9f6f9586c 100644 --- a/nimbus/evm/interpreter/gas_meter.nim +++ b/nimbus/evm/interpreter/gas_meter.nim @@ -20,7 +20,9 @@ func init*(m: var GasMeter, startGas: GasInt) = m.gasRefunded = 0 func consumeGas*( - gasMeter: var GasMeter; amount: GasInt; reason: static string): EvmResultVoid = + gasMeter: var GasMeter; amount: GasInt; reason: static string): EvmResultVoid {.inline.} = + # consumeGas is a hotspot in the vm due to it being called for every + # instruction # TODO report reason - consumeGas is a hotspot in EVM execution so it has to # be done carefully if amount > gasMeter.gasRemaining: diff --git a/nimbus/evm/interpreter/op_dispatcher.nim b/nimbus/evm/interpreter/op_dispatcher.nim index ffec01c0f..6aa07da79 100644 --- a/nimbus/evm/interpreter/op_dispatcher.nim +++ b/nimbus/evm/interpreter/op_dispatcher.nim @@ -15,7 +15,6 @@ const isChatty {.used.} = noisy > 1 import - ../code_stream, ../computation, ../evm_errors, ../../common/evmforks, @@ -24,98 +23,90 @@ import ./op_codes, ./op_handlers, ./op_handlers/oph_defs, - chronicles, macros -export - EVMFork, Op, - oph_defs, - gas_meter +export EVMFork, Op, oph_defs, gas_meter # ------------------------------------------------------------------------------ # Helpers # ------------------------------------------------------------------------------ -template handleStopDirective(cpt: VmCpt) = +template handleStopDirective(cpt: VmCpt, tracingEnabled: bool) = #trace "op: Stop" - if not cpt.code.atEnd() and cpt.tracingEnabled: - # we only trace `REAL STOP` and ignore `FAKE STOP` - cpt.opIndex = cpt.traceOpCodeStarted(Stop) - cpt.traceOpCodeEnded(Stop, cpt.opIndex) + when tracingEnabled: + if not cpt.code.atEnd(): + # we only trace `REAL STOP` and ignore `FAKE STOP` + cpt.opIndex = cpt.traceOpCodeStarted(Stop) + cpt.traceOpCodeEnded(Stop, cpt.opIndex) - -template handleFixedGasCostsDirective(fork: EVMFork; op: Op; cpt: VmCpt) = - if cpt.tracingEnabled: +template handleFixedGasCostsDirective( + fork: EVMFork, op: Op, cost: GasInt, cpt: VmCpt, tracingEnabled: bool +) = + when tracingEnabled: cpt.opIndex = cpt.traceOpCodeStarted(op) - ? cpt.opcodeGasCost(op, cpt.gasCosts[op].cost, reason = $op) - ? vmOpHandlers[fork][op].run(cpt) + ?cpt.opcodeGasCost(op, cost, tracingEnabled, reason = $op) + ?vmOpHandlers[fork][op].run(cpt) # If continuation is not nil, traceOpCodeEnded will be called in executeOpcodes. - if cpt.tracingEnabled and cpt.continuation.isNil: - cpt.traceOpCodeEnded(op, cpt.opIndex) + when tracingEnabled: + if cpt.continuation.isNil: + cpt.traceOpCodeEnded(op, cpt.opIndex) - -template handleOtherDirective(fork: EVMFork; op: Op; cpt: VmCpt) = - if cpt.tracingEnabled: +template handleOtherDirective(fork: EVMFork, op: Op, cpt: VmCpt, tracingEnabled: bool) = + when tracingEnabled: cpt.opIndex = cpt.traceOpCodeStarted(op) - ? vmOpHandlers[fork][op].run(cpt) + ?vmOpHandlers[fork][op].run(cpt) # If continuation is not nil, traceOpCodeEnded will be called in executeOpcodes. - if cpt.tracingEnabled and cpt.continuation.isNil: - cpt.traceOpCodeEnded(op, cpt.opIndex) + when tracingEnabled: + if cpt.continuation.isNil: + cpt.traceOpCodeEnded(op, cpt.opIndex) -# ------------------------------------------------------------------------------ -# Private, big nasty doubly nested case matrix generator -# ------------------------------------------------------------------------------ +proc makeCaseDispatcher(forkArg: EVMFork, tracingEnabled: bool, opArg, cpt: NimNode): NimNode = + # Create a case statement for dispatching opcode to handler for the given + # fork, taking care to record the gas cost + # TODO there are several forks for which neither opcodes nor gas costs + # changed - these could use the same dispatcher thus saving some space + # and compile time + let gasCosts = forkToSchedule(forkArg) -# reminiscent of Mamy's opTableToCaseStmt() from original VM -proc toCaseStmt(forkArg, opArg, cpt: NimNode): NimNode = - - # Outer case/switch => Op - let branchOnOp = quote do: `opArg` - result = nnkCaseStmt.newTree(branchOnOp) + result = nnkCaseStmt.newTree(opArg) for op in Op: - let asOp = quote do: Op(`op`) - - # Inner case/switch => Fork - let branchOnFork = quote do: `forkArg` - var forkCaseSubExpr = nnkCaseStmt.newTree(branchOnFork) - for fork in EVMFork: - let asFork = quote do: EVMFork(`fork`) - let gcTable = forkToGck[fork] - - let branchStmt = block: + let + asOp = quote: `op` + handler = if op == Stop: - quote do: - handleStopDirective(`cpt`) - elif gcTable[op] == GckFixed: - quote do: - handleFixedGasCostsDirective(`asFork`,`asOp`,`cpt`) + quote: + handleStopDirective(`cpt`, `tracingEnabled`) + elif gasCosts[op].kind == GckFixed: + let cost = gasCosts[op].cost + quote: + handleFixedGasCostsDirective( + `forkArg`, `op`, `cost`, `cpt`, `tracingEnabled` + ) else: - quote do: - handleOtherDirective(`asFork`,`asOp`,`cpt`) + quote: + handleOtherDirective(`forkArg`, `op`, `cpt`, `tracingEnabled`) + branch = + case op + of Create, Create2, Call, CallCode, DelegateCall, StaticCall: + # These opcodes use `chainTo` to create a continuation call which must + # be handled separately + quote: + `handler` + if not `cpt`.continuation.isNil: + break - forkCaseSubExpr.add nnkOfBranch.newTree(asFork, branchStmt) - - # Wrap innner case/switch into outer case/switch - let branchStmt = block: - case op - of Stop, Return, Revert, SelfDestruct: - quote do: - `forkCaseSubExpr` - break - else: - # Anyway, the point is that now we might as well just do this check - # for *every* opcode (other than Return/Revert/etc, which need to - # break no matter what). - quote do: - `forkCaseSubExpr` - if not `cpt`.continuation.isNil: + of Stop, Return, Revert, SelfDestruct: + quote: + `handler` break + else: + handler - result.add nnkOfBranch.newTree(asOp, branchStmt) + result.add nnkOfBranch.newTree(asOp, branch) when isChatty: echo ">>> ", result.repr @@ -124,39 +115,21 @@ proc toCaseStmt(forkArg, opArg, cpt: NimNode): NimNode = # Public macros/functions # ------------------------------------------------------------------------------ -macro genOptimisedDispatcher*(fork: EVMFork; op: Op; cpt: VmCpt): untyped = - result = fork.toCaseStmt(op, cpt) - - -template genLowMemDispatcher*(fork: EVMFork; op: Op; cpt: VmCpt) = - if op == Stop: - handleStopDirective(cpt) - break - - if BaseGasCosts[op].kind == GckFixed: - handleFixedGasCostsDirective(fork, op, cpt) - else: - handleOtherDirective(fork, op, cpt) - - case cpt.instr - of Return, Revert, SelfDestruct: - break - else: - # FIXME-manyOpcodesNowRequireContinuations - if not cpt.continuation.isNil: - break +macro dispatchInstr*( + fork: static EVMFork, tracingEnabled: static bool, op: Op, cpt: VmCpt +): untyped = + makeCaseDispatcher(fork, tracingEnabled, op, cpt) # ------------------------------------------------------------------------------ # Debugging ... # ------------------------------------------------------------------------------ when isMainModule and isChatty: - import ../types - proc optimised(cpt: VmCpt, fork: EVMFork): EvmResultVoid {.compileTime.} = + proc optimised(cpt: VmCpt): EvmResultVoid {.compileTime.} = while true: - genOptimisedDispatcher(fork, cpt.instr, desc) + dispatchInstr(FkFrontier, false, cpt.instr, cpt) # ------------------------------------------------------------------------------ # End diff --git a/nimbus/evm/interpreter/op_handlers.nim b/nimbus/evm/interpreter/op_handlers.nim index 7940c171a..a5cbeb302 100644 --- a/nimbus/evm/interpreter/op_handlers.nim +++ b/nimbus/evm/interpreter/op_handlers.nim @@ -27,7 +27,7 @@ import oph_create, oph_call, oph_sysops] const - allHandlersList = @[ + allHandlersList = [ (VmOpExecArithmetic, "Arithmetic"), (VmOpExecHash, "Hash"), (VmOpExecEnvInfo, "EnvInfo"), diff --git a/nimbus/evm/interpreter_dispatch.nim b/nimbus/evm/interpreter_dispatch.nim index 4d1b3a80c..066245d31 100644 --- a/nimbus/evm/interpreter_dispatch.nim +++ b/nimbus/evm/interpreter_dispatch.nim @@ -38,8 +38,15 @@ when optimizationCondition: # this is a top level pragma since nim 1.6.16 {.optimization: speed.} -proc selectVM(c: VmCpt, fork: EVMFork, shouldPrepareTracer: bool): EvmResultVoid = - ## Op code execution handler main loop. +proc runVM( + c: VmCpt, + shouldPrepareTracer: bool, + fork: static EVMFork, + tracingEnabled: static bool, +): EvmResultVoid = + ## VM instruction handler main loop - for each fork, a distinc version of + ## this function is instantiated so that selection of fork-specific + ## versions of functions happens only once # It's important not to re-prepare the tracer after # an async operation, only after a call/create. @@ -49,63 +56,38 @@ proc selectVM(c: VmCpt, fork: EVMFork, shouldPrepareTracer: bool): EvmResultVoid # enabled?", whereas shouldPrepareTracer is more like, # "Are we at a spot right now where we want to re-initialize # the tracer?" - if c.tracingEnabled and shouldPrepareTracer: - c.prepareTracer() + when tracingEnabled: + if shouldPrepareTracer: + c.prepareTracer() while true: + {.computedGoto.} c.instr = c.code.next() - # Note Mamy's observation in opTableToCaseStmt() from original VM - # regarding computed goto - # - # ackn: - # #{.computedGoto.} - # # computed goto causing stack overflow, it consumes a lot of space - # # we could use manual jump table instead - # # TODO lots of macro magic here to unravel, with chronicles... - # # `c`.logger.log($`c`.stack & "\n\n", fgGreen) - when not lowMemoryCompileTime: - when defined(release): - # - # FIXME: OS case list below needs to be adjusted - # - when defined(windows): - when defined(cpu64): - {.warning: "*** Win64/VM2 handler switch => computedGoto".} - {.computedGoto.} - else: - # computedGoto not compiling on github/ci (out of memory) -- jordan - {.warning: "*** Win32/VM2 handler switch => optimisation disabled".} - # {.computedGoto.} - - elif defined(linux): - when defined(cpu64): - {.warning: "*** Linux64/VM2 handler switch => computedGoto".} - {.computedGoto.} - else: - {.warning: "*** Linux32/VM2 handler switch => computedGoto".} - {.computedGoto.} - - elif defined(macosx): - when defined(cpu64): - {.warning: "*** MacOs64/VM2 handler switch => computedGoto".} - {.computedGoto.} - else: - {.warning: "*** MacOs32/VM2 handler switch => computedGoto".} - {.computedGoto.} - - else: - {.warning: "*** Unsupported OS => no handler switch optimisation".} - - genOptimisedDispatcher(fork, c.instr, c) - - else: - {.warning: "*** low memory compiler mode => program will be slow".} - - genLowMemDispatcher(fork, c.instr, c) + dispatchInstr(fork, tracingEnabled, c.instr, c) ok() +macro selectVM(v: VmCpt, shouldPrepareTracer: bool, fork: EVMFork, tracingEnabled: bool): EvmResultVoid = + # Generate opcode dispatcher that calls selectVM with a literal for each fork: + # + # case fork + # of A: runVM(v, A, ...) + # ... + + let caseStmt = nnkCaseStmt.newTree(fork) + for fork in EVMFork: + let + forkVal = quote: + `fork` + call = quote: + case `tracingEnabled` + of false: runVM(`v`, `shouldPrepareTracer`, `forkVal`, `false`) + of true: runVM(`v`, `shouldPrepareTracer`, `forkVal`, `true`) + + caseStmt.add nnkOfBranch.newTree(forkVal, call) + caseStmt + proc beforeExecCall(c: Computation) = c.snapshot() if c.msg.kind == EVMC_CALL: @@ -131,11 +113,13 @@ proc afterExecCall(c: Computation) = proc beforeExecCreate(c: Computation): bool = c.vmState.mutateStateDB: let nonce = db.getNonce(c.msg.sender) - if nonce+1 < nonce: + if nonce + 1 < nonce: let sender = c.msg.sender.toHex - c.setError("Nonce overflow when sender=" & sender & " wants to create contract", false) + c.setError( + "Nonce overflow when sender=" & sender & " wants to create contract", false + ) return true - db.setNonce(c.msg.sender, nonce+1) + db.setNonce(c.msg.sender, nonce + 1) # We add this to the access list _before_ taking a snapshot. # Even if the creation fails, the access-list change should not be rolled @@ -176,16 +160,8 @@ proc afterExecCreate(c: Computation) = else: c.rollback() - -const - MsgKindToOp: array[CallKind, Op] = [ - Call, - DelegateCall, - CallCode, - Create, - Create2, - EofCreate - ] +const MsgKindToOp: array[CallKind, Op] = + [Call, DelegateCall, CallCode, Create, Create2, EofCreate] func msgToOp(msg: Message): Op = if EVMC_STATIC in msg.flags: @@ -194,11 +170,15 @@ func msgToOp(msg: Message): Op = proc beforeExec(c: Computation): bool = if c.msg.depth > 0: - c.vmState.captureEnter(c, - msgToOp(c.msg), - c.msg.sender, c.msg.contractAddress, - c.msg.data, c.msg.gas, - c.msg.value) + c.vmState.captureEnter( + c, + msgToOp(c.msg), + c.msg.sender, + c.msg.contractAddress, + c.msg.data, + c.msg.gas, + c.msg.value, + ) if not c.msg.isCreate: c.beforeExecCall() @@ -253,20 +233,19 @@ proc executeOpcodes*(c: Computation, shouldPrepareTracer: bool = true) = # if an exception (e.g. out of gas) is thrown during a continuation. # So this code says, "If we've just run a continuation, but there's # no *subsequent* continuation, then the opcode is done." - if c.tracingEnabled and not(cont.isNil) and nextCont.isNil: + if c.tracingEnabled and not (cont.isNil) and nextCont.isNil: c.traceOpCodeEnded(c.instr, c.opIndex) - if c.instr == Return or - c.instr == Revert or - c.instr == SelfDestruct: + if c.instr == Return or c.instr == Revert or c.instr == SelfDestruct: break blockOne - c.selectVM(fork, shouldPrepareTracer).isOkOr: + c.selectVM(shouldPrepareTracer, fork, c.tracingEnabled).isOkOr: handleEvmError(error) break blockOne # this break is not needed but make the flow clear if c.isError() and c.continuation.isNil: - if c.tracingEnabled: c.traceError() + if c.tracingEnabled: + c.traceError() when vm_use_recursion: # Recursion with tiny stack frame per level. @@ -298,17 +277,18 @@ else: if c.continuation.isNil: c.afterExec() break - (before, shouldPrepareTracer, c.child, c, c.parent) = (true, true, nil.Computation, c.child, c) + (before, shouldPrepareTracer, c.child, c, c.parent) = + (true, true, nil.Computation, c.child, c) if c.parent.isNil: break c.dispose() - (before, shouldPrepareTracer, c.parent, c) = (false, true, nil.Computation, c.parent) + (before, shouldPrepareTracer, c.parent, c) = + (false, true, nil.Computation, c.parent) while not c.isNil: c.dispose() c = c.parent - # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------