nimbus-eth1/nimbus/evm/code_stream.nim
Jacek Sieka 768307d91d
Cache code and invalid jump destination tables (fixes #2268) (#2404)
It is common for many accounts to share the same code - at the database
level, code is stored by hash meaning only one copy exists per unique
program but when loaded in memory, a copy is made for each account.

Further, every time we execute the code, it must be scanned for invalid
jump destinations which slows down EVM exeuction.

Finally, the extcodesize call causes code to be loaded even if only the
size is needed.

This PR improves on all these points by introducing a shared
CodeBytesRef type whose code section is immutable and that can be shared
between accounts. Further, a dedicated `len` API call is added so that
the EXTCODESIZE opcode can operate without polluting the GC and code
cache, for cases where only the size is requested - rocksdb will in this
case cache the code itself in the row cache meaning that lookup of the
code itself remains fast when length is asked for first.

With 16k code entries, there's a 90% hit rate which goes up to 99%
during the 2.3M attack - the cache significantly lowers memory
consumption and execution time not only during this event but across the
board.
2024-06-21 09:44:10 +02:00

104 lines
2.8 KiB
Nim

# Nimbus
# Copyright (c) 2018-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
std/[sequtils, strutils], chronicles, eth/common, ./interpreter/op_codes, ./code_bytes
export code_bytes
type CodeStream* = object
code: CodeBytesRef
pc*: int
func init*(T: type CodeStream, code: CodeBytesRef): T =
T(code: code)
func init*(T: type CodeStream, code: sink seq[byte]): T =
T(code: CodeBytesRef.init(move(code)))
func init*(T: type CodeStream, code: openArray[byte]): T =
T(code: CodeBytesRef.init(code))
func init*(T: type CodeStream, code: openArray[char]): T =
T(code: CodeBytesRef.init(code))
template read*(c: var CodeStream, size: int): openArray[byte] =
if c.pc + size - 1 < c.bytes.len:
let pos = c.pc
c.pc += size
c.code.bytes.toOpenArray(pos, pos + size - 1)
else:
c.pc = c.bytes.len
c.code.bytes.toOpenArray(0, -1)
func readVmWord*(c: var CodeStream, n: static int): UInt256 =
## Reads `n` bytes from the code stream and pads
## the remaining bytes with zeros.
let result_bytes = cast[ptr array[32, byte]](addr result)
let last = min(c.pc + n, c.code.bytes.len)
let toWrite = last - c.pc
for i in 0 ..< toWrite:
result_bytes[i] = c.code.bytes[last - i - 1]
c.pc = last
func len*(c: CodeStream): int =
len(c.code)
func next*(c: var CodeStream): Op =
if c.pc != c.code.len:
result = Op(c.code.bytes[c.pc])
inc c.pc
else:
result = Op.Stop
iterator items*(c: var CodeStream): Op =
var nextOpcode = c.next()
while nextOpcode != Op.Stop:
yield nextOpcode
nextOpcode = c.next()
func `[]`*(c: CodeStream, offset: int): Op =
Op(c.code.bytes[offset])
func peek*(c: var CodeStream): Op =
if c.pc < c.code.bytes.len:
Op(c.code.bytes[c.pc])
else:
Op.Stop
func updatePc*(c: var CodeStream, value: int) =
c.pc = min(value, len(c))
func isValidOpcode*(c: CodeStream, position: int): bool =
c.code.isValidOpcode(position)
func bytes*(c: CodeStream): lent seq[byte] =
c.code.bytes()
proc decompile*(original: CodeStream): seq[(int, Op, string)] =
# behave as https://etherscan.io/opcode-tool
var c = CodeStream.init(original.bytes)
while true:
var op = c.next
if op >= Push1 and op <= Push32:
result.add(
(
c.pc - 1,
op,
"0x" & c.read(op.int - 95).mapIt($(it.BiggestInt.toHex(2))).join(""),
)
)
elif op != Op.Stop:
result.add((c.pc - 1, op, ""))
else:
result.add((-1, Op.Stop, ""))
break
func atEnd*(c: CodeStream): bool =
c.pc >= c.code.bytes.len