298 lines
10 KiB
Nim
298 lines
10 KiB
Nim
{.push raises: [].}
|
|
|
|
import
|
|
stew/[arrayops, endians2, leb128],
|
|
results,
|
|
./snappy/[codec, decoder, encoder]
|
|
|
|
export codec, results
|
|
|
|
## Compression and decompression utilities for the snappy compression algorithm:
|
|
##
|
|
## * [Landing page](http://google.github.io/snappy/)
|
|
## * [Format description](https://github.com/google/snappy/blob/main/format_description.txt)
|
|
##
|
|
## This file contains the in-memory API - see
|
|
## `snappy/faststreams` and `snappy/streams` for `faststreams` and `std/streams`
|
|
## support.
|
|
##
|
|
## * `compress`/`uncompress` work with caller-allocated buffers
|
|
## * `encode`/`decode` are convenience wrappers for the above that take care of
|
|
## memory allocation
|
|
##
|
|
## Framed encodings are also supported via functions carrying the `Framed` suffix
|
|
##
|
|
## * [Framing format](https://github.com/google/snappy/blob/main/framing_format.txt)
|
|
|
|
func compress*(
|
|
input: openArray[byte],
|
|
output: var openArray[byte]): Result[int, CodecError] =
|
|
## Compresses `input` and returns the number of bytes written to `output`.
|
|
##
|
|
## `input` may be no larger than 2^32-1 bytes, or `CodecError.invalidInput` is
|
|
## returned.
|
|
##
|
|
## `output` must be at least `maxCompressedLen(input.len)` bytes, or
|
|
## `CodecError.bufferTooSmall` is returned.
|
|
##
|
|
## See `compressFramed` for the framed format that supports arbitrary inputs.
|
|
## See `snappy/faststreams` and `snappy/streams` for stream-based versions.
|
|
let
|
|
lenU32 = checkInputLen(input.len).valueOr:
|
|
return err(CodecError.invalidInput)
|
|
|
|
if output.len.uint64 < maxCompressedLen(lenU32):
|
|
return err(CodecError.bufferTooSmall)
|
|
|
|
let
|
|
# The block starts with the varint-encoded length of the unencoded bytes.
|
|
header = lenU32.toBytes(Leb128)
|
|
output[0..<header.len] = header.toOpenArray()
|
|
|
|
var
|
|
read = 0
|
|
written = int(header.len)
|
|
|
|
while (let remaining = input.len - read; remaining > 0):
|
|
let
|
|
blockSize = min(remaining, maxBlockLen.int)
|
|
written += encodeBlock(
|
|
input.toOpenArray(read, read + blockSize - 1),
|
|
output.toOpenArray(written, output.high))
|
|
read += blockSize
|
|
|
|
ok(written)
|
|
|
|
func encode*(input: openArray[byte]): seq[byte] =
|
|
## Compresses `input` and returns the compressed output.
|
|
##
|
|
## `input` may be no larger than 2^32-1 bytes, or an empty buffer is returned.
|
|
## `input` must also be small enough that we can construct the output buffer
|
|
## with at least `maxCompressedLen(input.len)` bytes, or an empty buffer is
|
|
## returned.
|
|
##
|
|
## See `encodeFramed` for the framed format that supports arbitrary lengths.
|
|
## See `snappy/faststreams` and `snappy/streams` for stream-based versions.
|
|
let
|
|
maxCompressed = maxCompressedLen(input.len).valueOr:
|
|
return
|
|
# TODO https://github.com/nim-lang/Nim/issues/19357
|
|
result = newSeqUninitialized[byte](maxCompressed)
|
|
let written = compress(input, result).expect("we've checked lengths already")
|
|
result.setLen(written)
|
|
|
|
func uncompress*(input: openArray[byte], output: var openArray[byte]):
|
|
Result[int, CodecError] =
|
|
## Write the uncompressed bytes of `input` to `output` and return the number
|
|
## of bytes written.
|
|
##
|
|
## `output` must be at least `uncompressedLen` bytes.
|
|
##
|
|
## In case of errors, `output` may have been partially written to.
|
|
let (lenU32, bytesRead) = uint32.fromBytes(input, Leb128)
|
|
if bytesRead <= 0:
|
|
return err(CodecError.invalidInput)
|
|
|
|
if output.len.uint64 < lenU32.uint64:
|
|
return err(CodecError.bufferTooSmall)
|
|
|
|
if lenU32 == 0:
|
|
if bytesRead != input.len():
|
|
return err(CodecError.invalidInput)
|
|
return ok(0)
|
|
|
|
let written =
|
|
? decodeAllTags(input.toOpenArray(bytesRead, input.high), output)
|
|
|
|
if written.uint64 != lenU32:
|
|
return err(CodecError.invalidInput) # Header does not match content
|
|
|
|
ok(written)
|
|
|
|
func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] =
|
|
## Decode input returning the uncompressed output. On error, return an empty
|
|
## sequence, including when output would exceed `maxSize`.
|
|
##
|
|
## `maxSize` must be used for untrusted inputs to limit the amount of memory
|
|
## allocated by this function, which otherwise is read from the stream.
|
|
let uncompressed = uncompressedLen(input).valueOr:
|
|
return
|
|
|
|
if uncompressed > maxSize or uncompressed > int.high.uint64:
|
|
return
|
|
|
|
# TODO https://github.com/nim-lang/Nim/issues/19357
|
|
result = newSeqUninitialized[byte](int uncompressed)
|
|
|
|
if uncompress(input, result).isErr():
|
|
result = @[] # Empty return on error
|
|
|
|
func compressFramed*(input: openArray[byte], output: var openArray[byte]):
|
|
Result[int, FrameError] =
|
|
## Compresses `input` and returns the number of bytes written to `output`.
|
|
##
|
|
## `output` must be at least `maxCompressedLenFramed(input.len)` bytes, or
|
|
## `SnappyError.bufferTooSmall` is returned.
|
|
##
|
|
## See `compress` for the simple non-framed snappy format.
|
|
## See `snappy/faststreams` and `snappy/streams` for stream-based versions.
|
|
if output.len.uint64 < maxCompressedLenFramed(input.len):
|
|
return err(FrameError.bufferTooSmall)
|
|
|
|
output[0..<framingHeader.len] = framingHeader
|
|
var
|
|
read = 0
|
|
written = framingHeader.len
|
|
while (let remaining = input.len - read; remaining > 0):
|
|
let
|
|
frameSize = min(remaining, int maxUncompressedFrameDataLen)
|
|
written += encodeFrame(
|
|
input.toOpenArray(read, read + frameSize - 1),
|
|
output.toOpenArray(written, output.high))
|
|
|
|
read += frameSize
|
|
|
|
ok(written)
|
|
|
|
func encodeFramed*(input: openArray[byte]): seq[byte] =
|
|
let maxCompressed = maxCompressedLenFramed(input.len)
|
|
if maxCompressed > int.high.uint64:
|
|
return
|
|
|
|
# TODO https://github.com/nim-lang/Nim/issues/19357
|
|
result = newSeqUninitialized[byte](int maxCompressed)
|
|
let
|
|
written = compressFramed(input, result).expect("lengths checked")
|
|
|
|
result.setLen(written)
|
|
|
|
func uncompressFramed*(
|
|
input: openArray[byte], output: var openArray[byte], checkHeader = true,
|
|
checkIntegrity = true):
|
|
Result[tuple[read: int, written: int], FrameError] =
|
|
## Uncompress as many frames as possible from `input` and write them to
|
|
## `output`, returning the number of bytes read and written.
|
|
##
|
|
## When the `output` buffer is too small to hold the uncompressed data,
|
|
## the function will return the number of bytes consumed from the input and
|
|
## the number of correctly written bytes in the output (which may be smaller
|
|
## than the length of the output buffer).
|
|
##
|
|
## Decompression can be resumed by calling `uncompressFramed` again with
|
|
## `checkHeader = false` and the input positioned at the returned read offset
|
|
## and a new output buffer.
|
|
##
|
|
## In case of errors, `output` may be partially overwritten with invalid data.
|
|
var
|
|
read =
|
|
if checkHeader:
|
|
if input.len < framingHeader.len:
|
|
return err(FrameError.invalidInput)
|
|
|
|
if input.toOpenArray(0, framingHeader.len - 1) != framingHeader:
|
|
return err(FrameError.invalidInput)
|
|
framingHeader.len
|
|
else:
|
|
0
|
|
written = 0
|
|
|
|
while (let remaining = input.len - read; remaining > 0):
|
|
if remaining < 4:
|
|
return err(FrameError.invalidInput)
|
|
let
|
|
(id, dataLen) = decodeFrameHeader(input.toOpenArray(read, read + 3))
|
|
read += 4
|
|
|
|
if remaining - 4 < dataLen:
|
|
return err(FrameError.invalidInput)
|
|
|
|
if id == chunkCompressed:
|
|
if dataLen < 4:
|
|
return err(FrameError.invalidInput)
|
|
|
|
let
|
|
crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
|
|
maxOutput = min(maxUncompressedFrameDataLen.int, output.len - written)
|
|
uncompressed = uncompress(
|
|
input.toOpenArray(read + 4, read + dataLen - 1),
|
|
output.toOpenArray(written, written + maxOutput - 1)).valueOr:
|
|
let res = case error
|
|
of CodecError.bufferTooSmall:
|
|
let uncompressed =
|
|
uncompressedLen(input.toOpenArray(read + 4, read + dataLen - 1))
|
|
if uncompressed.isErr() or
|
|
uncompressed.get() > maxUncompressedFrameDataLen:
|
|
err(FrameError.invalidInput)
|
|
else:
|
|
ok((read - 4, written))
|
|
of CodecError.invalidInput: err(FrameError.invalidInput)
|
|
return res
|
|
|
|
if checkIntegrity and maskedCrc(
|
|
output.toOpenArray(written, written + (uncompressed - 1))) != crc:
|
|
return err(FrameError.crcMismatch)
|
|
|
|
written += uncompressed
|
|
|
|
elif id == chunkUncompressed:
|
|
if dataLen < 4:
|
|
return err(FrameError.invalidInput)
|
|
|
|
let
|
|
crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
|
|
|
|
if checkIntegrity and
|
|
maskedCrc(input.toOpenArray(read + 4, read + (dataLen - 1))) != crc:
|
|
return err(FrameError.crcMismatch)
|
|
|
|
let uncompressed = dataLen - 4 # dataLen includes CRC length
|
|
|
|
if uncompressed > maxUncompressedFrameDataLen.int:
|
|
return err(FrameError.invalidInput)
|
|
|
|
if uncompressed > output.len - written:
|
|
return ok((read - 4, written))
|
|
|
|
copyMem(addr output[written], unsafeAddr input[read + 4], uncompressed)
|
|
written += uncompressed
|
|
|
|
elif id < 0x80:
|
|
return err(FrameError.unknownChunk) # Reserved unskippable chunk
|
|
|
|
else:
|
|
discard # Reserved skippable chunk (for example framing format header)
|
|
|
|
read += dataLen
|
|
|
|
ok((read, written))
|
|
|
|
func decodeFramed*(
|
|
input: openArray[byte], maxSize = int.high,
|
|
checkIntegrity = true): seq[byte] =
|
|
## Uncompress as many frames as possible from `input` and return the
|
|
## uncompressed output.
|
|
##
|
|
## `maxSize` puts a cap on actual memory consumption, not the final length
|
|
## of the data - reading will continue until we run out of space based on
|
|
## the margins in maxCompresssedLen!
|
|
##
|
|
## In case of errors, an empty buffer is returned.
|
|
let uncompressed = uncompressedLenFramed(input).valueOr:
|
|
return
|
|
|
|
if uncompressed > maxSize.uint64:
|
|
return
|
|
|
|
# TODO https://github.com/nim-lang/Nim/issues/19357
|
|
result = newSeqUninitialized[byte](int uncompressed)
|
|
|
|
if uncompressFramed(input, result, checkIntegrity = checkIntegrity).isErr():
|
|
result = @[] # Empty return on error
|
|
|
|
template compress*(input: openArray[byte]): seq[byte] {.
|
|
deprecated: "use `encode` - compress is for user-supplied buffers".} =
|
|
encode(input)
|
|
template uncompress*(input: openArray[byte]): seq[byte] {.
|
|
deprecated: "use `decode` - uncompress is for user-supplied buffers".} =
|
|
decode(input)
|