nim-snappy/snappy.nim

298 lines
10 KiB
Nim

{.push raises: [].}
import
stew/[arrayops, endians2, leb128],
results,
./snappy/[codec, decoder, encoder]
export codec, results
## Compression and decompression utilities for the snappy compression algorithm:
##
## * [Landing page](http://google.github.io/snappy/)
## * [Format description](https://github.com/google/snappy/blob/main/format_description.txt)
##
## This file contains the in-memory API - see
## `snappy/faststreams` and `snappy/streams` for `faststreams` and `std/streams`
## support.
##
## * `compress`/`uncompress` work with caller-allocated buffers
## * `encode`/`decode` are convenience wrappers for the above that take care of
## memory allocation
##
## Framed encodings are also supported via functions carrying the `Framed` suffix
##
## * [Framing format](https://github.com/google/snappy/blob/main/framing_format.txt)
func compress*(
input: openArray[byte],
output: var openArray[byte]): Result[int, CodecError] =
## Compresses `input` and returns the number of bytes written to `output`.
##
## `input` may be no larger than 2^32-1 bytes, or `CodecError.invalidInput` is
## returned.
##
## `output` must be at least `maxCompressedLen(input.len)` bytes, or
## `CodecError.bufferTooSmall` is returned.
##
## See `compressFramed` for the framed format that supports arbitrary inputs.
## See `snappy/faststreams` and `snappy/streams` for stream-based versions.
let
lenU32 = checkInputLen(input.len).valueOr:
return err(CodecError.invalidInput)
if output.len.uint64 < maxCompressedLen(lenU32):
return err(CodecError.bufferTooSmall)
let
# The block starts with the varint-encoded length of the unencoded bytes.
header = lenU32.toBytes(Leb128)
output[0..<header.len] = header.toOpenArray()
var
read = 0
written = int(header.len)
while (let remaining = input.len - read; remaining > 0):
let
blockSize = min(remaining, maxBlockLen.int)
written += encodeBlock(
input.toOpenArray(read, read + blockSize - 1),
output.toOpenArray(written, output.high))
read += blockSize
ok(written)
func encode*(input: openArray[byte]): seq[byte] =
## Compresses `input` and returns the compressed output.
##
## `input` may be no larger than 2^32-1 bytes, or an empty buffer is returned.
## `input` must also be small enough that we can construct the output buffer
## with at least `maxCompressedLen(input.len)` bytes, or an empty buffer is
## returned.
##
## See `encodeFramed` for the framed format that supports arbitrary lengths.
## See `snappy/faststreams` and `snappy/streams` for stream-based versions.
let
maxCompressed = maxCompressedLen(input.len).valueOr:
return
# TODO https://github.com/nim-lang/Nim/issues/19357
result = newSeqUninitialized[byte](maxCompressed)
let written = compress(input, result).expect("we've checked lengths already")
result.setLen(written)
func uncompress*(input: openArray[byte], output: var openArray[byte]):
Result[int, CodecError] =
## Write the uncompressed bytes of `input` to `output` and return the number
## of bytes written.
##
## `output` must be at least `uncompressedLen` bytes.
##
## In case of errors, `output` may have been partially written to.
let (lenU32, bytesRead) = uint32.fromBytes(input, Leb128)
if bytesRead <= 0:
return err(CodecError.invalidInput)
if output.len.uint64 < lenU32.uint64:
return err(CodecError.bufferTooSmall)
if lenU32 == 0:
if bytesRead != input.len():
return err(CodecError.invalidInput)
return ok(0)
let written =
? decodeAllTags(input.toOpenArray(bytesRead, input.high), output)
if written.uint64 != lenU32:
return err(CodecError.invalidInput) # Header does not match content
ok(written)
func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] =
## Decode input returning the uncompressed output. On error, return an empty
## sequence, including when output would exceed `maxSize`.
##
## `maxSize` must be used for untrusted inputs to limit the amount of memory
## allocated by this function, which otherwise is read from the stream.
let uncompressed = uncompressedLen(input).valueOr:
return
if uncompressed > maxSize or uncompressed > int.high.uint64:
return
# TODO https://github.com/nim-lang/Nim/issues/19357
result = newSeqUninitialized[byte](int uncompressed)
if uncompress(input, result).isErr():
result = @[] # Empty return on error
func compressFramed*(input: openArray[byte], output: var openArray[byte]):
Result[int, FrameError] =
## Compresses `input` and returns the number of bytes written to `output`.
##
## `output` must be at least `maxCompressedLenFramed(input.len)` bytes, or
## `SnappyError.bufferTooSmall` is returned.
##
## See `compress` for the simple non-framed snappy format.
## See `snappy/faststreams` and `snappy/streams` for stream-based versions.
if output.len.uint64 < maxCompressedLenFramed(input.len):
return err(FrameError.bufferTooSmall)
output[0..<framingHeader.len] = framingHeader
var
read = 0
written = framingHeader.len
while (let remaining = input.len - read; remaining > 0):
let
frameSize = min(remaining, int maxUncompressedFrameDataLen)
written += encodeFrame(
input.toOpenArray(read, read + frameSize - 1),
output.toOpenArray(written, output.high))
read += frameSize
ok(written)
func encodeFramed*(input: openArray[byte]): seq[byte] =
let maxCompressed = maxCompressedLenFramed(input.len)
if maxCompressed > int.high.uint64:
return
# TODO https://github.com/nim-lang/Nim/issues/19357
result = newSeqUninitialized[byte](int maxCompressed)
let
written = compressFramed(input, result).expect("lengths checked")
result.setLen(written)
func uncompressFramed*(
input: openArray[byte], output: var openArray[byte], checkHeader = true,
checkIntegrity = true):
Result[tuple[read: int, written: int], FrameError] =
## Uncompress as many frames as possible from `input` and write them to
## `output`, returning the number of bytes read and written.
##
## When the `output` buffer is too small to hold the uncompressed data,
## the function will return the number of bytes consumed from the input and
## the number of correctly written bytes in the output (which may be smaller
## than the length of the output buffer).
##
## Decompression can be resumed by calling `uncompressFramed` again with
## `checkHeader = false` and the input positioned at the returned read offset
## and a new output buffer.
##
## In case of errors, `output` may be partially overwritten with invalid data.
var
read =
if checkHeader:
if input.len < framingHeader.len:
return err(FrameError.invalidInput)
if input.toOpenArray(0, framingHeader.len - 1) != framingHeader:
return err(FrameError.invalidInput)
framingHeader.len
else:
0
written = 0
while (let remaining = input.len - read; remaining > 0):
if remaining < 4:
return err(FrameError.invalidInput)
let
(id, dataLen) = decodeFrameHeader(input.toOpenArray(read, read + 3))
read += 4
if remaining - 4 < dataLen:
return err(FrameError.invalidInput)
if id == chunkCompressed:
if dataLen < 4:
return err(FrameError.invalidInput)
let
crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
maxOutput = min(maxUncompressedFrameDataLen.int, output.len - written)
uncompressed = uncompress(
input.toOpenArray(read + 4, read + dataLen - 1),
output.toOpenArray(written, written + maxOutput - 1)).valueOr:
let res = case error
of CodecError.bufferTooSmall:
let uncompressed =
uncompressedLen(input.toOpenArray(read + 4, read + dataLen - 1))
if uncompressed.isErr() or
uncompressed.get() > maxUncompressedFrameDataLen:
err(FrameError.invalidInput)
else:
ok((read - 4, written))
of CodecError.invalidInput: err(FrameError.invalidInput)
return res
if checkIntegrity and maskedCrc(
output.toOpenArray(written, written + (uncompressed - 1))) != crc:
return err(FrameError.crcMismatch)
written += uncompressed
elif id == chunkUncompressed:
if dataLen < 4:
return err(FrameError.invalidInput)
let
crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
if checkIntegrity and
maskedCrc(input.toOpenArray(read + 4, read + (dataLen - 1))) != crc:
return err(FrameError.crcMismatch)
let uncompressed = dataLen - 4 # dataLen includes CRC length
if uncompressed > maxUncompressedFrameDataLen.int:
return err(FrameError.invalidInput)
if uncompressed > output.len - written:
return ok((read - 4, written))
copyMem(addr output[written], unsafeAddr input[read + 4], uncompressed)
written += uncompressed
elif id < 0x80:
return err(FrameError.unknownChunk) # Reserved unskippable chunk
else:
discard # Reserved skippable chunk (for example framing format header)
read += dataLen
ok((read, written))
func decodeFramed*(
input: openArray[byte], maxSize = int.high,
checkIntegrity = true): seq[byte] =
## Uncompress as many frames as possible from `input` and return the
## uncompressed output.
##
## `maxSize` puts a cap on actual memory consumption, not the final length
## of the data - reading will continue until we run out of space based on
## the margins in maxCompresssedLen!
##
## In case of errors, an empty buffer is returned.
let uncompressed = uncompressedLenFramed(input).valueOr:
return
if uncompressed > maxSize.uint64:
return
# TODO https://github.com/nim-lang/Nim/issues/19357
result = newSeqUninitialized[byte](int uncompressed)
if uncompressFramed(input, result, checkIntegrity = checkIntegrity).isErr():
result = @[] # Empty return on error
template compress*(input: openArray[byte]): seq[byte] {.
deprecated: "use `encode` - compress is for user-supplied buffers".} =
encode(input)
template uncompress*(input: openArray[byte]): seq[byte] {.
deprecated: "use `decode` - uncompress is for user-supplied buffers".} =
decode(input)