## This module implements RLP encoding and decoding as ## defined in Appendix B of the Ethereum Yellow Paper: ## https://ethereum.github.io/yellowpaper/paper.pdf import std/[strutils, options], stew/[byteutils, shims/macros], results, ./rlp/[writer, object_serialization], ./rlp/priv/defs from stew/objects import checkedEnumAssign export writer, object_serialization type Rlp* = object bytes: seq[byte] position*: int RlpNodeType* = enum rlpBlob rlpList RlpError* = object of CatchableError MalformedRlpError* = object of RlpError UnsupportedRlpError* = object of RlpError RlpTypeMismatch* = object of RlpError RlpItem = tuple[payload: Slice[int], typ: RlpNodeType] func raiseOutOfBounds() {.noreturn, noinline.} = raise (ref MalformedRlpError)(msg: "out-of-bounds payload access") func raiseExpectedBlob() {.noreturn, noinline.} = raise (ref RlpTypeMismatch)(msg: "expected blob") func raiseExpectedList() {.noreturn, noinline.} = raise (ref RlpTypeMismatch)(msg: "expected list") func raiseNonCanonical() {.noreturn, noinline.} = raise (ref MalformedRlpError)(msg: "non-canonical encoding") func raiseIntOutOfBounds() {.noreturn, noinline.} = raise (ref UnsupportedRlpError)(msg: "integer out of bounds") template view(input: openArray[byte], position: int): openArray[byte] = if position >= input.len: raiseOutOfBounds() toOpenArray(input, position, input.high()) template view(input: openArray[byte], slice: Slice[int]): openArray[byte] = if slice.b >= input.len: raiseOutOfBounds() toOpenArray(input, slice.a, slice.b) template getPtr(x: untyped): auto = when (NimMajor, NimMinor) <= (1, 6): unsafeAddr(x) else: addr(x) func toString(self: Rlp, item: RlpItem): string = result = "" # TODO https://github.com/nim-lang/Nim/issues/23645 if item.typ != rlpBlob: raiseExpectedBlob() if 0 < item.payload.len: result = newString(item.payload.len) copyMem(addr result[0], self.bytes.view(item.payload)[0].getPtr, result.len) func decodeInteger(input: openArray[byte]): uint64 = # For a positive integer, it is converted to the the shortest byte array whose # big-endian interpretation is the integer, and then encoded as a string # according to the rules below. if input.len > sizeof(uint64): raiseIntOutOfBounds() if input.len == 0: 0 else: if input[0] == 0: raiseNonCanonical() var v: uint64 for b in input: v = (v shl 8) or uint64(b) v # https://ethereum.org/en/developers/docs/data-structures-and-encoding/rlp/ func rlpItem(input: openArray[byte], start = 0): RlpItem = # Extract coordinates for the RLP item starting at `start`, ensuring that # it (but not necessarily its payload) is correctly encoded if start >= len(input): raiseOutOfBounds() let length = len(input) - start # >= 1 prefix = input[start] if prefix <= 0x7f: # For a single byte whose value is in the [0x00, 0x7f] (decimal [0, 127]) # range, that byte is its own RLP encoding. (start .. start, rlpBlob) elif prefix <= 0xb7: # Otherwise, if a string is 0-55 bytes long, the RLP encoding consists of a # single byte with value 0x80 (dec. 128) plus the length of the string # followed by the string. The range of the first byte is thus [0x80, 0xb7] # (dec. [128, 183]). let strLen = int(prefix - 0x80) if strLen >= length: raiseOutOfBounds() if strLen == 1 and input[start + 1] <= 0x7f: raiseNonCanonical() (start + 1 .. start + strLen, rlpBlob) elif prefix <= 0xbf: # If a string is more than 55 bytes long, the RLP encoding consists of a # single byte with value 0xb7 (dec. 183) plus the length in bytes of the # length of the string in binary form, followed by the length of the string, # followed by the string. For example, a 1024 byte long string would be # encoded as \xb9\x04\x00 (dec. 185, 4, 0) followed by the string. # Here, 0xb9 (183 + 2 = 185) as the first byte, followed by the 2 bytes # 0x0400 (dec. 1024) that denote the length of the actual string. The range # of the first byte is thus [0xb8, 0xbf] (dec. [184, 191]). let lenOfStrLen = int(prefix - 0xb7) strLen = decodeInteger(input.view(start + 1 .. start + lenOfStrLen)) if strLen < THRESHOLD_LIST_LEN: raiseNonCanonical() if strLen >= uint64(length - lenOfStrLen): raiseOutOfBounds() (start + 1 + lenOfStrLen .. start + lenOfStrLen + int(strLen), rlpBlob) elif prefix <= 0xf7: # If the total payload of a list (i.e. the combined length of all its items # being RLP encoded) is 0-55 bytes long, the RLP encoding consists of a # single byte with value 0xc0 plus the length of the payload followed by the # concatenation of the RLP encodings of the items. The range of the first # byte is thus [0xc0, 0xf7] (dec. [192, 247]). let listLen = int(prefix - 0xc0) if listLen >= length: raiseOutOfBounds() (start + 1 .. start + listLen, rlpList) else: # If the total payload of a list is more than 55 bytes long, the RLP # encoding consists of a single byte with value 0xf7 plus the length in # bytes of the length of the payload in binary form, followed by the length # of the payload, followed by the concatenation of the RLP encodings of the # items. The range of the first byte is thus [0xf8, 0xff] (dec. [248, 255]). let lenOfListLen = int(prefix - 0xf7) listLen = decodeInteger(input.view(start + 1 .. start + lenOfListLen)) if listLen < THRESHOLD_LIST_LEN: raiseNonCanonical() if listLen >= uint64(length - lenOfListLen): raiseOutOfBounds() (start + 1 + lenOfListLen .. start + lenOfListLen + int(listLen), rlpList) func item(self: Rlp, position: int): RlpItem = rlpItem(self.bytes, position) func item(self: Rlp): RlpItem = self.item(self.position) func rlpFromBytes*(data: openArray[byte]): Rlp = Rlp(bytes: @data, position: 0) func rlpFromBytes*(data: sink seq[byte]): Rlp = Rlp(bytes: move(data), position: 0) const zeroBytesRlp* = Rlp() func rlpFromHex*(input: string): Rlp = Rlp(bytes: hexToSeqByte(input), position: 0) func hasData(self: Rlp, position: int): bool = position < self.bytes.len func hasData*(self: Rlp): bool = self.hasData(self.position) func isBlob(self: Rlp, position: int): bool = self.hasData(position) and self.bytes[position] < LIST_START_MARKER func isBlob*(self: Rlp): bool = self.isBlob(self.position) func isEmpty*(self: Rlp): bool = ### Contains a blob or a list of zero length self.hasData() and ( self.bytes[self.position] == BLOB_START_MARKER or self.bytes[self.position] == LIST_START_MARKER ) func isList(self: Rlp, position: int): bool = self.hasData(position) and self.bytes[position] >= LIST_START_MARKER func isList*(self: Rlp): bool = self.isList(self.position) func isSingleByte(self: Rlp, position: int): bool = self.hasData(position) and self.bytes[position] < BLOB_START_MARKER func isSingleByte*(self: Rlp): bool = self.isSingleByte(self.position) func getByteValue*(self: Rlp): byte = doAssert self.isSingleByte() self.bytes[self.position] func readRawByte*(self: var Rlp): byte = ### Read a raw byte that is not RLP encoded ### This is sometimes used to communicate union type information doAssert self.hasData let res = self.bytes[self.position] inc self.position res func blobLen*(self: Rlp): int = if self.isBlob(): self.item().payload.len() else: 0 func isInt*(self: Rlp): bool = if not self.hasData(): return false let item = self.item() item.typ == rlpBlob and ( item.payload.len() == 0 or self.bytes[item.payload.a] != 0) template maxBytes*(o: type[Ordinal | uint64 | uint]): int = sizeof(o) func toInt(self: Rlp, item: RlpItem, IntType: type): IntType = mixin maxBytes, to if item.typ != rlpBlob: raiseExpectedBlob() if item.payload.len > maxBytes(IntType): raiseIntOutOfBounds() for b in self.bytes.view(item.payload): result = (result shl 8) or IntType(b) func toInt*(self: Rlp, IntType: type): IntType = self.toInt(self.item(), IntType) func toString*(self: Rlp): string = # TODO https://github.com/nim-lang/Nim/issues/23645 # the returnd string is cleared properly on exception here - the double # result assignment can be removed once that bug is fixed result = "" result = self.toString(self.item()) func toBytes(self: Rlp, item: RlpItem): seq[byte] = if item.typ != rlpBlob: raiseExpectedBlob() @(self.bytes.view(item.payload)) func toBytes*(self: Rlp): seq[byte] = self.toBytes(self.item()) func currentElemEnd(self: Rlp, position: int): int = let item = self.item(position).payload item.b + 1 func currentElemEnd*(self: Rlp): int = self.currentElemEnd(self.position) func enterList*(self: var Rlp): bool = try: # TODO Refactor to remove exception here.. let item = self.item() if item.typ != rlpList: return false self.position = item.payload.a return true except RlpError: return false func tryEnterList*(self: var Rlp) = if not self.enterList(): raiseExpectedList() func positionAfter(rlp: var Rlp, item: RlpItem) = rlp.position = item.payload.b + 1 func positionAt(rlp: var Rlp, item: RlpItem) = rlp.position = item.payload.a func skipElem*(rlp: var Rlp) = doAssert rlp.hasData() rlp.positionAfter(rlp.item()) template iterateIt(self: Rlp, position: int, body: untyped) = let item = self.item(position) doAssert item.typ == rlpList var it {.inject.} = item.payload.a let last = item.payload.b while it <= last: let subItem = rlpItem(self.bytes.view(it .. last)).payload body it += subItem.b + 1 iterator items(self: var Rlp, item: RlpItem): var Rlp = # Iterate over items while updating "current" element view, mutating self doAssert item.typ == rlpList self.position = item.payload.a let last = item.payload.b while self.position <= last: let subItem = rlpItem(self.bytes.view(self.position .. last)).payload next = self.position + subItem.b + 1 yield self self.position = next # self.position might have changed during yield iterator items*(self: var Rlp): var Rlp = # Iterate over items while updating "current" element view, mutating self let item = self.item() for item in self.items(item): yield item func listElem*(self: Rlp, i: int): Rlp = let item = self.item() doAssert item.typ == rlpList var i = i start = item.payload.a payload = rlpItem(self.bytes.view(start .. item.payload.b)).payload while i > 0: start += payload.b + 1 payload = rlpItem(self.bytes.view(start .. item.payload.b)).payload dec i rlpFromBytes self.bytes.view(start .. start + payload.b) func listLen*(self: Rlp): int = if not self.isList(): return 0 self.iterateIt(self.position): inc result func readImpl(rlp: var Rlp, T: type string): string = let item = rlp.item() result = rlp.toString(item) rlp.positionAfter(item) func readImpl(rlp: var Rlp, T: type SomeUnsignedInt): T = let item = rlp.item() result = rlp.toInt(item, T) rlp.positionAfter(item) func readImpl(rlp: var Rlp, T: type[enum]): T = let item = rlp.item() value = rlp.toInt(item, uint64) var res: T if not checkedEnumAssign(res, value): raise newException( RlpTypeMismatch, "Enum value expected, but the source RLP is not in valid range." ) rlp.positionAfter(item) res func readImpl(rlp: var Rlp, T: type bool): T = rlp.readImpl(uint64) != 0 func readImpl[R, E](rlp: var Rlp, T: type array[R, E]): T = mixin read let item = rlp.item() when E is (byte or char): if item.typ != rlpBlob: raiseExpectedBlob() if item.payload.len != result.len: raise newException( RlpTypeMismatch, "Fixed-size array expected, but the source RLP contains a blob of different length", ) copyMem(addr result[0], unsafeAddr rlp.bytes[item.payload.a], result.len) else: if result.len != rlp.listLen: raise newException( RlpTypeMismatch, "Fixed-size array expected, but the source RLP contains a list of different length", ) var i = 0 for elem in rlp.items(item): result[i] = rlp.read(E) inc i rlp.positionAfter(item) func readImpl[E](rlp: var Rlp, T: type seq[E]): T = mixin read let item = rlp.item() when E is byte: result = rlp.toBytes(item) else: if item.typ != rlpList: raiseExpectedList() result = newSeqOfCap[E](rlp.listLen) for elem in rlp.items(): result.add rlp.read(E) rlp.positionAfter(item) func readImpl[E](rlp: var Rlp, T: type openArray[E]): seq[E] = readImpl(rlp, seq[E]) func readImpl( rlp: var Rlp, T: type[object | tuple], wrappedInList = wrapObjsInList ): T = mixin enumerateRlpFields, read let payloadEnd = if wrappedInList: let item = rlp.item() if item.typ != rlpList: raiseExpectedList() rlp.positionAt(item) item.payload.b + 1 else: rlp.bytes.len() template getUnderlyingType[T](_: Option[T]): untyped = T template getUnderlyingType[T](_: Opt[T]): untyped = T template op(RecordType, fieldName, field) {.used.} = type FieldType {.used.} = type field when hasCustomPragmaFixed(RecordType, fieldName, rlpCustomSerialization): field = rlp.read(result, FieldType) elif field is Option: # this works for optional fields at the end of an object/tuple # if the optional field is followed by a mandatory field, # custom serialization for a field or for the parent object # will be better type UT = getUnderlyingType(field) if rlp.position < payloadEnd: field = some(rlp.read(UT)) else: field = none(UT) elif field is Opt: # this works for optional fields at the end of an object/tuple # if the optional field is followed by a mandatory field, # custom serialization for a field or for the parent object # will be better type UT = getUnderlyingType(field) if rlp.position < payloadEnd: field = Opt.some(rlp.read(UT)) else: field = Opt.none(UT) else: field = rlp.read(FieldType) enumerateRlpFields(result, op) proc validate(self: Rlp, position: int) = var item = self.item(position) while true: if item.typ == rlpList: self.iterateIt(item.payload.a): self.validate(it) if item.payload.b >= self.bytes.high(): break item = self.item(item.payload.b + 1) func validate*(self: Rlp) = self.validate(self.position) # We define a single `read` template with a pretty low specificity # score in order to facilitate easier overloading with user types: template read*(rlp: var Rlp, T: type): auto = when T is SomeSignedInt: {.error "Signed integer encoding is not defined for rlp".} else: readImpl(rlp, T) func `>>`*[T](rlp: var Rlp, location: var T) = mixin read location = rlp.read(T) template readRecordType*(rlp: var Rlp, T: type, wrappedInList: bool): auto = readImpl(rlp, T, wrappedInList) template decode*(bytes: openArray[byte], T: type): untyped = mixin read var rlp = rlpFromBytes(bytes) rlp.read(T) template decode*(bytes: seq[byte], T: type): untyped = mixin read var rlp = rlpFromBytes(bytes) rlp.read(T) template rawData*(self: Rlp): openArray[byte] = self.bytes.toOpenArray(self.position, self.currentElemEnd - 1) func append*(writer: var RlpWriter, rlp: Rlp) = appendRawBytes(writer, rlp.rawData) func isPrintable(s: string): bool = for c in s: if ord(c) < 32 or ord(c) >= 128: return false return true func renderBlob(self: var Rlp, hexOutput: bool, output: var string) = let str = self.toString if str.isPrintable: output.add '"' output.add str output.add '"' else: output.add "blob(" & $str.len & ") [" for c in str: if hexOutput: output.add toHex(int(c), 2) else: output.add $ord(c) output.add "," if hexOutput: output.add ']' else: output[^1] = ']' func inspectAux(self: var Rlp, depth: int, hexOutput: bool, output: var string) = if not self.hasData(): return template indent() = for i in 0 ..< depth: output.add " " indent() if self.isSingleByte: output.add "byte " output.add $self.bytes[self.position] elif self.isBlob: self.renderBlob(hexOutput, output) else: output.add "{\n" for subitem in self.items: inspectAux(subitem, depth + 1, hexOutput, output) output.add "\n" indent() output.add "}" func inspect*(self: Rlp, indent = 0, hexOutput = true): string = var rlpCopy = self result = newStringOfCap(self.bytes.len) inspectAux(rlpCopy, indent, hexOutput, result)