import stew/[endians2], ./codec # These load templates assume there is enough data to read at the margin, which # the code ensures via manual range checking - the built-in range check adds 40% # execution time template load16(input: openArray[byte], offsetParam: int): uint16 = let offset = offsetParam uint16.fromBytesLE( cast[ptr UncheckedArray[byte]](input).toOpenArray(offset, offset + 1)) template load32(input: openArray[byte], offsetParam: int): uint32 = let offset = offsetParam uint32.fromBytesLE( cast[ptr UncheckedArray[byte]](input).toOpenArray(offset, offset + 3)) func decodeAllTags*( input: openArray[byte], output: var openArray[byte]): Result[int, CodecError] = ## Decode all bytes of `input` into `output` and return the number of ## of bytes written. Returns error if input does not fit in output. if input.len <= 0: # let the optimizer know len > 0 return ok(0) if output.len <= 0: # let the optimizer know len > 0 return err(CodecError.bufferTooSmall) var op = 0 ip = 0 length: int offset: uint32 # TODO https://github.com/nim-lang/Nim/issues/19653 while uint(ip) < uint(input.len): let tag = input[ip] case (tag and 0x03) of tagLiteral: ip += 1 length = int((tag shr 2) + 1) # 1 <= len32 <= 64 if length <= 16 and (output.len - op) >= 16 and (input.len - ip) >= 16: copyMem(addr output[op], unsafeAddr input[ip], 16) op += length ip += length continue if length >= 61: if (input.len - ip) < 61: # There must be at least 61 bytes, else we wouldn't be in this branch return err(CodecError.invalidInput) const mask = [0'u32, 0xff'u32, 0xffff'u32, 0xffffff'u32, 0xffffffff'u32] # Length is actually in the little-endian bytes that follow # Decode 4 bytes then mask the excess (to avoid branching) let lenlen = length - 60 # 1-4 len32 = (load32(input, ip) and mask[lenlen]) + 1 if len32 == 0: # wrap-around for 4-byte length return err(CodecError.invalidInput) when sizeof(int) == sizeof(len32): if len32 > int.high.uint32: # Can't have this many bytes.. return err(CodecError.invalidInput) length = int len32 ip += lenlen if ((output.len - op) < length) or ((input.len - ip) < length): return err(CodecError.invalidInput) copyMem(addr output[op], unsafeAddr input[ip], length) op += length ip += length continue of tagCopy1: if (input.len - ip) < 2: return err(CodecError.invalidInput) length = int(4 + ((tag shr 2) and 0x07)) offset = (uint32(tag and 0xe0) shl 3) or uint32(input[ip + 1]) ip += 2 of tagCopy2: if (input.len - ip) < 3: return err(CodecError.invalidInput) length = int(1 + (tag shr 2)) offset = uint32(load16(input, ip + 1)) ip += 3 else: # tagCopy4: if (input.len - ip) < 5: return err(CodecError.invalidInput) length = int(1 + (tag shr 2)) offset = load32(input, ip + 1) ip += 5 # offset = 0 is invalid, and we catch it by doing a wrapping -1 if op.uint32 <= (offset - 1'u32): return err(CodecError.invalidInput) var src = op - int offset # safe, because offset < op and op < int.high # Fast path: short non-overlapping copies if length <= 16 and offset >= 8 and (output.len - op) >= 16: # When offset is large enough, there is no overlap and we can use # bulk copy instructions - this is safe because we just checked that # there's enough space in the output buffer copyMem(addr output[op], addr output[src], 8) copyMem(addr output[op + 8], addr output[src + 8], 8) op += length continue if (output.len - op) < length: return err(CodecError.invalidInput) if (output.len - op) >= length + 10: var pos = op len = length while pos - src < 8: copyMem(addr output[pos], addr output[src], 8) len -= pos - src pos += pos - src while len > 0: copyMem(addr output[pos], addr output[src], 8) src += 8 pos += 8 len -= 8 else: var pos = op while pos < op + length: output[pos] = output[src] pos += 1 src += 1 op += length ok(op)