nim-eth/eth/rlp.nim

585 lines
16 KiB
Nim
Raw Permalink Normal View History

2019-02-05 12:01:10 +00:00
## This module implements RLP encoding and decoding as
## defined in Appendix B of the Ethereum Yellow Paper:
## https://ethereum.github.io/yellowpaper/paper.pdf
import
std/[strutils, options],
stew/[byteutils, shims/macros],
results,
./rlp/[writer, object_serialization],
./rlp/priv/defs
2019-02-05 12:01:10 +00:00
from stew/objects import checkedEnumAssign
export writer, object_serialization
2019-02-05 12:01:10 +00:00
type
Rlp* = object
bytes: seq[byte]
2019-12-16 19:38:45 +00:00
position*: int
2019-02-05 12:01:10 +00:00
RlpNodeType* = enum
rlpBlob
rlpList
RlpError* = object of CatchableError
2019-02-05 12:01:10 +00:00
MalformedRlpError* = object of RlpError
UnsupportedRlpError* = object of RlpError
RlpTypeMismatch* = object of RlpError
RlpItem = tuple[payload: Slice[int], typ: RlpNodeType]
func raiseOutOfBounds() {.noreturn, noinline.} =
raise (ref MalformedRlpError)(msg: "out-of-bounds payload access")
2019-02-05 12:01:10 +00:00
func raiseExpectedBlob() {.noreturn, noinline.} =
raise (ref RlpTypeMismatch)(msg: "expected blob")
2019-02-05 12:01:10 +00:00
func raiseExpectedList() {.noreturn, noinline.} =
raise (ref RlpTypeMismatch)(msg: "expected list")
2019-02-05 12:01:10 +00:00
func raiseNonCanonical() {.noreturn, noinline.} =
raise (ref MalformedRlpError)(msg: "non-canonical encoding")
2019-02-05 12:01:10 +00:00
func raiseIntOutOfBounds() {.noreturn, noinline.} =
raise (ref UnsupportedRlpError)(msg: "integer out of bounds")
2019-02-05 12:01:10 +00:00
template view(input: openArray[byte], position: int): openArray[byte] =
if position >= input.len:
raiseOutOfBounds()
2019-02-05 12:01:10 +00:00
toOpenArray(input, position, input.high())
template view(input: openArray[byte], slice: Slice[int]): openArray[byte] =
if slice.b >= input.len:
raiseOutOfBounds()
2019-02-05 12:01:10 +00:00
toOpenArray(input, slice.a, slice.b)
2019-02-05 12:01:10 +00:00
template getPtr(x: untyped): auto =
when (NimMajor, NimMinor) <= (1, 6):
unsafeAddr(x)
else:
addr(x)
2019-02-05 12:01:10 +00:00
func toString(self: Rlp, item: RlpItem): string =
result = "" # TODO https://github.com/nim-lang/Nim/issues/23645
if item.typ != rlpBlob:
raiseExpectedBlob()
2019-02-05 12:01:10 +00:00
if 0 < item.payload.len:
result = newString(item.payload.len)
copyMem(addr result[0], self.bytes.view(item.payload)[0].getPtr, result.len)
2019-02-05 12:01:10 +00:00
func decodeInteger(input: openArray[byte]): uint64 =
# For a positive integer, it is converted to the the shortest byte array whose
# big-endian interpretation is the integer, and then encoded as a string
# according to the rules below.
if input.len > sizeof(uint64):
raiseIntOutOfBounds()
2019-02-05 12:01:10 +00:00
if input.len == 0:
0
else:
if input[0] == 0:
raiseNonCanonical()
2019-02-05 12:01:10 +00:00
var v: uint64
for b in input:
v = (v shl 8) or uint64(b)
v
2019-02-05 12:01:10 +00:00
# https://ethereum.org/en/developers/docs/data-structures-and-encoding/rlp/
func rlpItem(input: openArray[byte], start = 0): RlpItem =
# Extract coordinates for the RLP item starting at `start`, ensuring that
# it (but not necessarily its payload) is correctly encoded
if start >= len(input):
raiseOutOfBounds()
2019-02-05 12:01:10 +00:00
let
length = len(input) - start # >= 1
prefix = input[start]
if prefix <= 0x7f:
# For a single byte whose value is in the [0x00, 0x7f] (decimal [0, 127])
# range, that byte is its own RLP encoding.
(start .. start, rlpBlob)
elif prefix <= 0xb7:
# Otherwise, if a string is 0-55 bytes long, the RLP encoding consists of a
# single byte with value 0x80 (dec. 128) plus the length of the string
# followed by the string. The range of the first byte is thus [0x80, 0xb7]
# (dec. [128, 183]).
let strLen = int(prefix - 0x80)
if strLen >= length:
raiseOutOfBounds()
if strLen == 1 and input[start + 1] <= 0x7f:
raiseNonCanonical()
(start + 1 .. start + strLen, rlpBlob)
elif prefix <= 0xbf:
# If a string is more than 55 bytes long, the RLP encoding consists of a
# single byte with value 0xb7 (dec. 183) plus the length in bytes of the
# length of the string in binary form, followed by the length of the string,
# followed by the string. For example, a 1024 byte long string would be
# encoded as \xb9\x04\x00 (dec. 185, 4, 0) followed by the string.
# Here, 0xb9 (183 + 2 = 185) as the first byte, followed by the 2 bytes
# 0x0400 (dec. 1024) that denote the length of the actual string. The range
# of the first byte is thus [0xb8, 0xbf] (dec. [184, 191]).
2019-02-05 12:01:10 +00:00
let
lenOfStrLen = int(prefix - 0xb7)
strLen = decodeInteger(input.view(start + 1 .. start + lenOfStrLen))
if strLen < THRESHOLD_LIST_LEN:
raiseNonCanonical()
if strLen >= uint64(length - lenOfStrLen):
raiseOutOfBounds()
(start + 1 + lenOfStrLen .. start + lenOfStrLen + int(strLen), rlpBlob)
elif prefix <= 0xf7:
# If the total payload of a list (i.e. the combined length of all its items
# being RLP encoded) is 0-55 bytes long, the RLP encoding consists of a
# single byte with value 0xc0 plus the length of the payload followed by the
# concatenation of the RLP encodings of the items. The range of the first
# byte is thus [0xc0, 0xf7] (dec. [192, 247]).
let listLen = int(prefix - 0xc0)
if listLen >= length:
raiseOutOfBounds()
(start + 1 .. start + listLen, rlpList)
else:
# If the total payload of a list is more than 55 bytes long, the RLP
# encoding consists of a single byte with value 0xf7 plus the length in
# bytes of the length of the payload in binary form, followed by the length
# of the payload, followed by the concatenation of the RLP encodings of the
# items. The range of the first byte is thus [0xf8, 0xff] (dec. [248, 255]).
let
lenOfListLen = int(prefix - 0xf7)
listLen = decodeInteger(input.view(start + 1 .. start + lenOfListLen))
2019-02-05 12:01:10 +00:00
if listLen < THRESHOLD_LIST_LEN:
raiseNonCanonical()
2019-02-05 12:01:10 +00:00
if listLen >= uint64(length - lenOfListLen):
raiseOutOfBounds()
2019-02-05 12:01:10 +00:00
(start + 1 + lenOfListLen .. start + lenOfListLen + int(listLen), rlpList)
2019-02-05 12:01:10 +00:00
func item(self: Rlp, position: int): RlpItem =
rlpItem(self.bytes, position)
2019-02-05 12:01:10 +00:00
func item(self: Rlp): RlpItem =
self.item(self.position)
2019-02-05 12:01:10 +00:00
func rlpFromBytes*(data: openArray[byte]): Rlp =
Rlp(bytes: @data, position: 0)
2019-02-05 12:01:10 +00:00
func rlpFromBytes*(data: sink seq[byte]): Rlp =
Rlp(bytes: move(data), position: 0)
2019-02-05 12:01:10 +00:00
const zeroBytesRlp* = Rlp()
2019-02-05 12:01:10 +00:00
func rlpFromHex*(input: string): Rlp =
Rlp(bytes: hexToSeqByte(input), position: 0)
2019-02-05 12:01:10 +00:00
func hasData(self: Rlp, position: int): bool =
position < self.bytes.len
2019-02-05 12:01:10 +00:00
func hasData*(self: Rlp): bool =
self.hasData(self.position)
2019-02-05 12:01:10 +00:00
func isBlob(self: Rlp, position: int): bool =
self.hasData(position) and self.bytes[position] < LIST_START_MARKER
2019-02-05 12:01:10 +00:00
func isBlob*(self: Rlp): bool =
self.isBlob(self.position)
2019-02-05 12:01:10 +00:00
func isEmpty*(self: Rlp): bool =
### Contains a blob or a list of zero length
self.hasData() and (
self.bytes[self.position] == BLOB_START_MARKER or
self.bytes[self.position] == LIST_START_MARKER
)
2019-02-05 12:01:10 +00:00
func isList(self: Rlp, position: int): bool =
self.hasData(position) and self.bytes[position] >= LIST_START_MARKER
2019-02-05 12:01:10 +00:00
func isList*(self: Rlp): bool =
self.isList(self.position)
2019-02-05 12:01:10 +00:00
func isSingleByte(self: Rlp, position: int): bool =
self.hasData(position) and self.bytes[position] < BLOB_START_MARKER
2019-02-05 12:01:10 +00:00
func isSingleByte*(self: Rlp): bool =
self.isSingleByte(self.position)
2019-02-05 12:01:10 +00:00
func getByteValue*(self: Rlp): byte =
doAssert self.isSingleByte()
self.bytes[self.position]
func readRawByte*(self: var Rlp): byte =
### Read a raw byte that is not RLP encoded
### This is sometimes used to communicate union type information
doAssert self.hasData
let res = self.bytes[self.position]
inc self.position
res
func blobLen*(self: Rlp): int =
if self.isBlob():
self.item().payload.len()
else:
0
func isInt*(self: Rlp): bool =
if not self.hasData():
return false
let item = self.item()
item.typ == rlpBlob and (
item.payload.len() == 0 or
self.bytes[item.payload.a] != 0)
2019-02-05 12:01:10 +00:00
template maxBytes*(o: type[Ordinal | uint64 | uint]): int =
sizeof(o)
func toInt(self: Rlp, item: RlpItem, IntType: type): IntType =
mixin maxBytes, to
if item.typ != rlpBlob:
raiseExpectedBlob()
2019-02-05 12:01:10 +00:00
if item.payload.len > maxBytes(IntType):
raiseIntOutOfBounds()
2019-02-05 12:01:10 +00:00
for b in self.bytes.view(item.payload):
result = (result shl 8) or IntType(b)
2019-02-05 12:01:10 +00:00
func toInt*(self: Rlp, IntType: type): IntType =
self.toInt(self.item(), IntType)
2019-05-31 12:23:30 +00:00
func toString*(self: Rlp): string =
# TODO https://github.com/nim-lang/Nim/issues/23645
# the returnd string is cleared properly on exception here - the double
# result assignment can be removed once that bug is fixed
result = ""
result = self.toString(self.item())
2019-02-05 12:01:10 +00:00
func toBytes(self: Rlp, item: RlpItem): seq[byte] =
if item.typ != rlpBlob:
raiseExpectedBlob()
2019-02-05 12:01:10 +00:00
@(self.bytes.view(item.payload))
2019-02-05 12:01:10 +00:00
func toBytes*(self: Rlp): seq[byte] =
self.toBytes(self.item())
2019-02-05 12:01:10 +00:00
func currentElemEnd(self: Rlp, position: int): int =
let item = self.item(position).payload
item.b + 1
func currentElemEnd*(self: Rlp): int =
self.currentElemEnd(self.position)
2019-02-05 12:01:10 +00:00
func enterList*(self: var Rlp): bool =
try: # TODO Refactor to remove exception here..
let item = self.item()
if item.typ != rlpList:
return false
2019-02-05 12:01:10 +00:00
self.position = item.payload.a
return true
except RlpError:
return false
2019-02-05 12:01:10 +00:00
func tryEnterList*(self: var Rlp) =
if not self.enterList():
raiseExpectedList()
func positionAfter(rlp: var Rlp, item: RlpItem) =
rlp.position = item.payload.b + 1
func positionAt(rlp: var Rlp, item: RlpItem) =
rlp.position = item.payload.a
func skipElem*(rlp: var Rlp) =
doAssert rlp.hasData()
rlp.positionAfter(rlp.item())
template iterateIt(self: Rlp, position: int, body: untyped) =
let item = self.item(position)
doAssert item.typ == rlpList
var it {.inject.} = item.payload.a
let last = item.payload.b
while it <= last:
let subItem = rlpItem(self.bytes.view(it .. last)).payload
body
it += subItem.b + 1
iterator items(self: var Rlp, item: RlpItem): var Rlp =
# Iterate over items while updating "current" element view, mutating self
doAssert item.typ == rlpList
self.position = item.payload.a
let last = item.payload.b
while self.position <= last:
let
subItem = rlpItem(self.bytes.view(self.position .. last)).payload
next = self.position + subItem.b + 1
yield self
self.position = next # self.position might have changed during yield
2019-02-05 12:01:10 +00:00
iterator items*(self: var Rlp): var Rlp =
# Iterate over items while updating "current" element view, mutating self
let item = self.item()
for item in self.items(item):
yield item
2019-02-05 12:01:10 +00:00
func listElem*(self: Rlp, i: int): Rlp =
let item = self.item()
doAssert item.typ == rlpList
2019-02-05 12:01:10 +00:00
var
i = i
start = item.payload.a
payload = rlpItem(self.bytes.view(start .. item.payload.b)).payload
while i > 0:
start += payload.b + 1
payload = rlpItem(self.bytes.view(start .. item.payload.b)).payload
dec i
rlpFromBytes self.bytes.view(start .. start + payload.b)
2019-02-05 12:01:10 +00:00
func listLen*(self: Rlp): int =
if not self.isList():
2019-05-24 08:14:05 +00:00
return 0
2019-02-05 12:01:10 +00:00
self.iterateIt(self.position):
2019-02-05 12:01:10 +00:00
inc result
func readImpl(rlp: var Rlp, T: type string): string =
let item = rlp.item()
result = rlp.toString(item)
rlp.positionAfter(item)
2019-02-05 12:01:10 +00:00
func readImpl(rlp: var Rlp, T: type SomeUnsignedInt): T =
let item = rlp.item()
result = rlp.toInt(item, T)
rlp.positionAfter(item)
2019-02-05 12:01:10 +00:00
func readImpl(rlp: var Rlp, T: type[enum]): T =
let
item = rlp.item()
value = rlp.toInt(item, uint64)
var res: T
if not checkedEnumAssign(res, value):
raise newException(
RlpTypeMismatch, "Enum value expected, but the source RLP is not in valid range."
)
rlp.positionAfter(item)
2019-02-05 12:01:10 +00:00
res
func readImpl(rlp: var Rlp, T: type bool): T =
rlp.readImpl(uint64) != 0
2019-02-05 12:01:10 +00:00
func readImpl[R, E](rlp: var Rlp, T: type array[R, E]): T =
2019-02-05 12:01:10 +00:00
mixin read
let item = rlp.item()
2019-02-05 12:01:10 +00:00
when E is (byte or char):
if item.typ != rlpBlob:
raiseExpectedBlob()
2019-02-05 12:01:10 +00:00
if item.payload.len != result.len:
raise newException(
RlpTypeMismatch,
"Fixed-size array expected, but the source RLP contains a blob of different length",
)
2019-02-05 12:01:10 +00:00
copyMem(addr result[0], unsafeAddr rlp.bytes[item.payload.a], result.len)
2019-02-05 12:01:10 +00:00
else:
if result.len != rlp.listLen:
raise newException(
RlpTypeMismatch,
"Fixed-size array expected, but the source RLP contains a list of different length",
)
2019-02-05 12:01:10 +00:00
var i = 0
for elem in rlp.items(item):
2019-02-05 12:01:10 +00:00
result[i] = rlp.read(E)
inc i
rlp.positionAfter(item)
2019-02-05 12:01:10 +00:00
func readImpl[E](rlp: var Rlp, T: type seq[E]): T =
mixin read
let item = rlp.item()
when E is byte:
result = rlp.toBytes(item)
2019-02-05 12:01:10 +00:00
else:
if item.typ != rlpList:
raiseExpectedList()
2019-02-05 12:01:10 +00:00
result = newSeqOfCap[E](rlp.listLen)
for elem in rlp.items():
2019-02-05 12:01:10 +00:00
result.add rlp.read(E)
rlp.positionAfter(item)
func readImpl[E](rlp: var Rlp, T: type openArray[E]): seq[E] =
readImpl(rlp, seq[E])
2019-02-05 12:01:10 +00:00
func readImpl(
rlp: var Rlp, T: type[object | tuple], wrappedInList = wrapObjsInList
): T =
2019-02-05 12:01:10 +00:00
mixin enumerateRlpFields, read
let payloadEnd =
if wrappedInList:
let item = rlp.item()
if item.typ != rlpList:
raiseExpectedList()
rlp.positionAt(item)
item.payload.b + 1
else:
rlp.bytes.len()
2019-02-05 12:01:10 +00:00
template getUnderlyingType[T](_: Option[T]): untyped =
T
2019-02-05 12:01:10 +00:00
template getUnderlyingType[T](_: Opt[T]): untyped =
T
2024-02-13 12:34:27 +00:00
template op(RecordType, fieldName, field) {.used.} =
type FieldType {.used.} = type field
when hasCustomPragmaFixed(RecordType, fieldName, rlpCustomSerialization):
field = rlp.read(result, FieldType)
elif field is Option:
# this works for optional fields at the end of an object/tuple
# if the optional field is followed by a mandatory field,
# custom serialization for a field or for the parent object
# will be better
type UT = getUnderlyingType(field)
if rlp.position < payloadEnd:
field = some(rlp.read(UT))
else:
field = none(UT)
elif field is Opt:
# this works for optional fields at the end of an object/tuple
# if the optional field is followed by a mandatory field,
# custom serialization for a field or for the parent object
# will be better
type UT = getUnderlyingType(field)
if rlp.position < payloadEnd:
field = Opt.some(rlp.read(UT))
else:
field = Opt.none(UT)
2019-02-05 12:01:10 +00:00
else:
field = rlp.read(FieldType)
2019-02-05 12:01:10 +00:00
enumerateRlpFields(result, op)
proc validate(self: Rlp, position: int) =
var item = self.item(position)
while true:
if item.typ == rlpList:
self.iterateIt(item.payload.a):
self.validate(it)
2019-02-05 12:01:10 +00:00
if item.payload.b >= self.bytes.high():
break
item = self.item(item.payload.b + 1)
func validate*(self: Rlp) =
self.validate(self.position)
2019-02-05 12:01:10 +00:00
2022-11-16 16:44:00 +00:00
# We define a single `read` template with a pretty low specificity
2019-02-05 12:01:10 +00:00
# score in order to facilitate easier overloading with user types:
template read*(rlp: var Rlp, T: type): auto =
when T is SomeSignedInt:
{.error "Signed integer encoding is not defined for rlp".}
else:
readImpl(rlp, T)
2019-02-05 12:01:10 +00:00
func `>>`*[T](rlp: var Rlp, location: var T) =
2019-02-05 12:01:10 +00:00
mixin read
location = rlp.read(T)
template readRecordType*(rlp: var Rlp, T: type, wrappedInList: bool): auto =
readImpl(rlp, T, wrappedInList)
template decode*(bytes: openArray[byte], T: type): untyped =
2019-02-05 12:01:10 +00:00
mixin read
var rlp = rlpFromBytes(bytes)
rlp.read(T)
template decode*(bytes: seq[byte], T: type): untyped =
mixin read
var rlp = rlpFromBytes(bytes)
rlp.read(T)
2019-02-05 12:01:10 +00:00
template rawData*(self: Rlp): openArray[byte] =
self.bytes.toOpenArray(self.position, self.currentElemEnd - 1)
func append*(writer: var RlpWriter, rlp: Rlp) =
2019-02-05 12:01:10 +00:00
appendRawBytes(writer, rlp.rawData)
func isPrintable(s: string): bool =
2019-02-05 12:01:10 +00:00
for c in s:
if ord(c) < 32 or ord(c) >= 128:
return false
return true
func renderBlob(self: var Rlp, hexOutput: bool, output: var string) =
let str = self.toString
if str.isPrintable:
output.add '"'
output.add str
output.add '"'
else:
output.add "blob(" & $str.len & ") ["
for c in str:
if hexOutput:
output.add toHex(int(c), 2)
else:
output.add $ord(c)
output.add ","
if hexOutput:
output.add ']'
else:
output[^1] = ']'
func inspectAux(self: var Rlp, depth: int, hexOutput: bool, output: var string) =
if not self.hasData():
2019-02-05 12:01:10 +00:00
return
template indent() =
for i in 0 ..< depth:
2019-02-05 12:01:10 +00:00
output.add " "
indent()
if self.isSingleByte:
output.add "byte "
output.add $self.bytes[self.position]
2019-02-05 12:01:10 +00:00
elif self.isBlob:
self.renderBlob(hexOutput, output)
2019-02-05 12:01:10 +00:00
else:
output.add "{\n"
for subitem in self.items:
2019-02-05 12:01:10 +00:00
inspectAux(subitem, depth + 1, hexOutput, output)
output.add "\n"
indent()
output.add "}"
func inspect*(self: Rlp, indent = 0, hexOutput = true): string =
2019-02-05 12:01:10 +00:00
var rlpCopy = self
result = newStringOfCap(self.bytes.len)
2019-02-05 12:01:10 +00:00
inspectAux(rlpCopy, indent, hexOutput, result)