154 lines
4.3 KiB
154 lines
4.3 KiB
# These load templates assume there is enough data to read at the margin, which
# the code ensures via manual range checking - the built-in range check adds 40%
# execution time
template load16(input: openArray[byte], offsetParam: int): uint16 =
let offset = offsetParam
cast[ptr UncheckedArray[byte]](input).toOpenArray(offset, offset + 1))
template load32(input: openArray[byte], offsetParam: int): uint32 =
let offset = offsetParam
cast[ptr UncheckedArray[byte]](input).toOpenArray(offset, offset + 3))
func decodeAllTags*(
input: openArray[byte],
output: var openArray[byte]): Result[int, CodecError] =
## Decode all bytes of `input` into `output` and return the number of
## of bytes written. Returns error if input does not fit in output.
if input.len <= 0: # let the optimizer know len > 0
return ok(0)
if output.len <= 0: # let the optimizer know len > 0
return err(CodecError.bufferTooSmall)
op = 0
ip = 0
length: int
offset: uint32
# TODO https://github.com/nim-lang/Nim/issues/19653
while uint(ip) < uint(input.len):
let tag = input[ip]
case (tag and 0x03)
of tagLiteral:
ip += 1
length = int((tag shr 2) + 1) # 1 <= len32 <= 64
if length <= 16 and (output.len - op) >= 16 and (input.len - ip) >= 16:
copyMem(addr output[op], unsafeAddr input[ip], 16)
op += length
ip += length
if length >= 61:
if (input.len - ip) < 61:
# There must be at least 61 bytes, else we wouldn't be in this branch
return err(CodecError.invalidInput)
const mask = [0'u32, 0xff'u32, 0xffff'u32, 0xffffff'u32, 0xffffffff'u32]
# Length is actually in the little-endian bytes that follow
# Decode 4 bytes then mask the excess (to avoid branching)
lenlen = length - 60 # 1-4
len32 = (load32(input, ip) and mask[lenlen]) + 1
if len32 == 0: # wrap-around for 4-byte length
return err(CodecError.invalidInput)
when sizeof(int) == sizeof(len32):
if len32 > int.high.uint32: # Can't have this many bytes..
return err(CodecError.invalidInput)
length = int len32
ip += lenlen
if ((output.len - op) < length) or
((input.len - ip) < length):
return err(CodecError.invalidInput)
copyMem(addr output[op], unsafeAddr input[ip], length)
op += length
ip += length
of tagCopy1:
if (input.len - ip) < 2:
return err(CodecError.invalidInput)
length = int(4 + ((tag shr 2) and 0x07))
offset = (uint32(tag and 0xe0) shl 3) or uint32(input[ip + 1])
ip += 2
of tagCopy2:
if (input.len - ip) < 3:
return err(CodecError.invalidInput)
length = int(1 + (tag shr 2))
offset = uint32(load16(input, ip + 1))
ip += 3
else: # tagCopy4:
if (input.len - ip) < 5:
return err(CodecError.invalidInput)
length = int(1 + (tag shr 2))
offset = load32(input, ip + 1)
ip += 5
# offset = 0 is invalid, and we catch it by doing a wrapping -1
if op.uint32 <= (offset - 1'u32):
return err(CodecError.invalidInput)
var src = op - int offset # safe, because offset < op and op < int.high
# Fast path: short non-overlapping copies
if length <= 16 and offset >= 8 and (output.len - op) >= 16:
# When offset is large enough, there is no overlap and we can use
# bulk copy instructions - this is safe because we just checked that
# there's enough space in the output buffer
copyMem(addr output[op], addr output[src], 8)
copyMem(addr output[op + 8], addr output[src + 8], 8)
op += length
if (output.len - op) < length:
return err(CodecError.invalidInput)
if (output.len - op) >= length + 10:
pos = op
len = length
while pos - src < 8:
copyMem(addr output[pos], addr output[src], 8)
len -= pos - src
pos += pos - src
while len > 0:
copyMem(addr output[pos], addr output[src], 8)
src += 8
pos += 8
len -= 8
var pos = op
while pos < op + length:
output[pos] = output[src]
pos += 1
src += 1
op += length