154 lines
4.3 KiB
Nim
154 lines
4.3 KiB
Nim
import
|
|
stew/[endians2],
|
|
./codec
|
|
|
|
# These load templates assume there is enough data to read at the margin, which
|
|
# the code ensures via manual range checking - the built-in range check adds 40%
|
|
# execution time
|
|
template load16(input: openArray[byte], offsetParam: int): uint16 =
|
|
let offset = offsetParam
|
|
uint16.fromBytesLE(
|
|
cast[ptr UncheckedArray[byte]](input).toOpenArray(offset, offset + 1))
|
|
|
|
template load32(input: openArray[byte], offsetParam: int): uint32 =
|
|
let offset = offsetParam
|
|
uint32.fromBytesLE(
|
|
cast[ptr UncheckedArray[byte]](input).toOpenArray(offset, offset + 3))
|
|
|
|
func decodeAllTags*(
|
|
input: openArray[byte],
|
|
output: var openArray[byte]): Result[int, CodecError] =
|
|
## Decode all bytes of `input` into `output` and return the number of
|
|
## of bytes written. Returns error if input does not fit in output.
|
|
|
|
if input.len <= 0: # let the optimizer know len > 0
|
|
return ok(0)
|
|
|
|
if output.len <= 0: # let the optimizer know len > 0
|
|
return err(CodecError.bufferTooSmall)
|
|
|
|
var
|
|
op = 0
|
|
ip = 0
|
|
length: int
|
|
offset: uint32
|
|
|
|
# TODO https://github.com/nim-lang/Nim/issues/19653
|
|
while uint(ip) < uint(input.len):
|
|
let tag = input[ip]
|
|
|
|
case (tag and 0x03)
|
|
of tagLiteral:
|
|
ip += 1
|
|
|
|
length = int((tag shr 2) + 1) # 1 <= len32 <= 64
|
|
|
|
if length <= 16 and (output.len - op) >= 16 and (input.len - ip) >= 16:
|
|
copyMem(addr output[op], unsafeAddr input[ip], 16)
|
|
op += length
|
|
ip += length
|
|
continue
|
|
|
|
if length >= 61:
|
|
if (input.len - ip) < 61:
|
|
# There must be at least 61 bytes, else we wouldn't be in this branch
|
|
return err(CodecError.invalidInput)
|
|
|
|
const mask = [0'u32, 0xff'u32, 0xffff'u32, 0xffffff'u32, 0xffffffff'u32]
|
|
|
|
# Length is actually in the little-endian bytes that follow
|
|
# Decode 4 bytes then mask the excess (to avoid branching)
|
|
let
|
|
lenlen = length - 60 # 1-4
|
|
len32 = (load32(input, ip) and mask[lenlen]) + 1
|
|
|
|
if len32 == 0: # wrap-around for 4-byte length
|
|
return err(CodecError.invalidInput)
|
|
|
|
when sizeof(int) == sizeof(len32):
|
|
if len32 > int.high.uint32: # Can't have this many bytes..
|
|
return err(CodecError.invalidInput)
|
|
|
|
length = int len32
|
|
ip += lenlen
|
|
|
|
if ((output.len - op) < length) or
|
|
((input.len - ip) < length):
|
|
return err(CodecError.invalidInput)
|
|
|
|
copyMem(addr output[op], unsafeAddr input[ip], length)
|
|
|
|
op += length
|
|
ip += length
|
|
continue
|
|
|
|
of tagCopy1:
|
|
if (input.len - ip) < 2:
|
|
return err(CodecError.invalidInput)
|
|
|
|
length = int(4 + ((tag shr 2) and 0x07))
|
|
offset = (uint32(tag and 0xe0) shl 3) or uint32(input[ip + 1])
|
|
|
|
ip += 2
|
|
of tagCopy2:
|
|
if (input.len - ip) < 3:
|
|
return err(CodecError.invalidInput)
|
|
|
|
length = int(1 + (tag shr 2))
|
|
offset = uint32(load16(input, ip + 1))
|
|
|
|
ip += 3
|
|
else: # tagCopy4:
|
|
if (input.len - ip) < 5:
|
|
return err(CodecError.invalidInput)
|
|
|
|
length = int(1 + (tag shr 2))
|
|
offset = load32(input, ip + 1)
|
|
ip += 5
|
|
|
|
# offset = 0 is invalid, and we catch it by doing a wrapping -1
|
|
if op.uint32 <= (offset - 1'u32):
|
|
return err(CodecError.invalidInput)
|
|
|
|
var src = op - int offset # safe, because offset < op and op < int.high
|
|
|
|
# Fast path: short non-overlapping copies
|
|
if length <= 16 and offset >= 8 and (output.len - op) >= 16:
|
|
# When offset is large enough, there is no overlap and we can use
|
|
# bulk copy instructions - this is safe because we just checked that
|
|
# there's enough space in the output buffer
|
|
copyMem(addr output[op], addr output[src], 8)
|
|
copyMem(addr output[op + 8], addr output[src + 8], 8)
|
|
op += length
|
|
continue
|
|
|
|
if (output.len - op) < length:
|
|
return err(CodecError.invalidInput)
|
|
|
|
if (output.len - op) >= length + 10:
|
|
var
|
|
pos = op
|
|
len = length
|
|
|
|
while pos - src < 8:
|
|
copyMem(addr output[pos], addr output[src], 8)
|
|
len -= pos - src
|
|
pos += pos - src
|
|
|
|
while len > 0:
|
|
copyMem(addr output[pos], addr output[src], 8)
|
|
src += 8
|
|
pos += 8
|
|
len -= 8
|
|
|
|
else:
|
|
var pos = op
|
|
while pos < op + length:
|
|
output[pos] = output[src]
|
|
pos += 1
|
|
src += 1
|
|
|
|
op += length
|
|
|
|
ok(op)
|