From eabc45992a19c20d1c16e3e4bbfe8b7428b25ff7 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Sat, 2 Apr 2022 17:13:29 +0200 Subject: [PATCH] cleanups --- snappy.nim | 133 ++++++++++++++--------------------------- snappy/codec.nim | 35 ++++++++++- snappy/faststreams.nim | 37 +++++------- snappy/streams.nim | 5 +- 4 files changed, 96 insertions(+), 114 deletions(-) diff --git a/snappy.nim b/snappy.nim index a6f5db3..4ab4baa 100644 --- a/snappy.nim +++ b/snappy.nim @@ -100,7 +100,13 @@ func uncompress*(input: openArray[byte], output: var openArray[byte]): return err(CodecError.invalidInput) return ok(0) - decodeAllTags(input.toOpenArray(bytesRead, input.high), output) + let written = + ? decodeAllTags(input.toOpenArray(bytesRead, input.high), output) + + if written.uint64 != lenU32: + return err(CodecError.invalidInput) # Header does not match content + + return ok(written) func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] = ## Decode input returning the uncompressed output. On error, return an empty @@ -111,19 +117,14 @@ func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] = let uncompressed = uncompressedLen(input).valueOr: return - if uncompressed > maxSize: + if uncompressed > maxSize.uint64 or uncompressed > int.high.uint64: return - when sizeof(int) <= sizeof(uncompressed): - if uncompressed.uint64 > int.high.uint64: - return - # TODO https://github.com/nim-lang/Nim/issues/19357 result = newSeqUninitialized[byte](int uncompressed) - let written = uncompress(input, result).valueOr: - return @[] # Empty return on error - if written != result.len: - return @[] # Header does not match content + + if uncompress(input, result).isErr(): + result = @[] # Empty return on error func compressFramed*(input: openArray[byte], output: var openArray[byte]): Result[int, FrameError] = @@ -143,7 +144,7 @@ func compressFramed*(input: openArray[byte], output: var openArray[byte]): written = framingHeader.len while (let remaining = input.len - read; remaining > 0): let - frameSize = min(remaining, maxUncompressedFrameDataLen.int) + frameSize = min(remaining, int maxUncompressedFrameDataLen) written += encodeFrame( input.toOpenArray(read, read + frameSize - 1), output.toOpenArray(written, output.high)) @@ -153,12 +154,12 @@ func compressFramed*(input: openArray[byte], output: var openArray[byte]): ok(written) func encodeFramed*(input: openArray[byte]): seq[byte] = - let compressedLen = maxCompressedLenFramed(input.len) - if compressedLen > int.high.uint64: + let maxCompressed = maxCompressedLenFramed(input.len) + if maxCompressed > int.high.uint64: return # TODO https://github.com/nim-lang/Nim/issues/19357 - result = newSeqUninitialized[byte](int compressedLen.int) + result = newSeqUninitialized[byte](int maxCompressed) let written = compressFramed(input, result).expect("lengths checked") @@ -187,51 +188,51 @@ func uncompressFramed*(input: openArray[byte], output: var openArray[byte]): (id, dataLen) = decodeFrameHeader(input.toOpenArray(read, read + 3)) read += 4 - if remaining < dataLen: + if remaining - 4 < dataLen: return err(FrameError.invalidInput) if id == chunkCompressed: + if dataLen < 4: + return err(FrameError.invalidInput) + let crc = uint32.fromBytesLE input.toOpenArray(read, read + 3) - - # `dataLen` includes length of crc - let uncompressed = uncompress( + uncompressed = uncompress( input.toOpenArray(read + 4, read + dataLen - 1), output.toOpenArray(written, output.high)).valueOr: - let res = case error - of CodecError.bufferTooSmall: ok((read - 4, written)) - of CodecError.invalidInput: err(FrameError.invalidInput) - return res + let res = case error + of CodecError.bufferTooSmall: ok((read - 4, written)) + of CodecError.invalidInput: err(FrameError.invalidInput) + return res - if maskedCrc(output.toOpenArray(written, written + uncompressed - 1)) != crc: + if maskedCrc( + output.toOpenArray(written, written + (uncompressed - 1))) != crc: return err(FrameError.crcMismatch) written += uncompressed elif id == chunkUncompressed: + if dataLen < 4: + return err(FrameError.invalidInput) + let crc = uint32.fromBytesLE input.toOpenArray(read, read + 3) - # `dataLen` includes length of crc - if maskedCrc(input.toOpenArray(read + 4, read + dataLen - 1)) != crc: + if maskedCrc(input.toOpenArray(read + 4, read + (dataLen - 1))) != crc: return err(FrameError.crcMismatch) - if dataLen - 4 > output.len - written: - return ok((read, written)) + let uncompressed = dataLen - 4 # dataLen includes CRC length + if uncompressed > output.len - written: + return ok((read - 4, written)) - copyMem(addr output[written], unsafeAddr input[read + 4], dataLen - 4) - written += dataLen - 4 + copyMem(addr output[written], unsafeAddr input[read + 4], uncompressed) + written += uncompressed elif id < 0x80: - # Reserved unskippable chunks (chunk types 0x02-0x7f) - # if we encounter this type of chunk, stop decoding - # the spec says it is an error - return err(FrameError.unknownFrame) + return err(FrameError.unknownChunk) # Reserved unskippable chunk else: - # Reserved skippable chunks (chunk types 0x80-0xfe) - # including chunkStream (0xff) should be skipped - discard + discard # Reserved skippable chunk (for example framing format header) read += dataLen @@ -246,61 +247,17 @@ func decodeFramed*(input: openArray[byte], maxSize = int.high): seq[byte] = ## the margins in maxCompresssedLen! ## ## In case of errors, an empty buffer is returned. + let uncompressed = uncompressedLenFramed(input).valueOr: + return - # Start by computing expected length - in-depth error checking will be done - # during actual decoding! - var - read = 0 - expected = 0 + if uncompressed > maxSize.uint64 or uncompressed > int.high.uint64: + return - while (let remaining = input.len - read; remaining > 0): - if remaining < 4: - return + # TODO https://github.com/nim-lang/Nim/issues/19357 + result = newSeqUninitialized[byte](int uncompressed) - let - (id, dataLen) = decodeFrameHeader(input.toOpenArray(read, read + 3)) - - if remaining < dataLen + 4: - return - read += 4 - - if id == chunkCompressed: - # `dataLen` includes length of crc - let - uncompressed = uncompressedLen( - input.toOpenArray(read + 4, read + dataLen - 1)).valueOr: - return - if (type(expected).high - expected).uint64 < uncompressed: - return # length overflow - - expected += int uncompressed - - elif id == chunkUncompressed: - expected += dataLen - 4 - - elif id < 0x80: - # Reserved unskippable chunks (chunk types 0x02-0x7f) - # if we encounter this type of chunk, stop decoding - # the spec says it is an error - return - - else: - # Reserved skippable chunks (chunk types 0x80-0xfe) - # including chunkStream (0xff) should be skipped - discard - - read += dataLen - - # We have an expected length - time to allocate a seq that can hold this much - # data and a work area for the decompression algorithm - - result = newSeqUninitialized[byte](min(expected, maxSize)) - let - (_, written) = uncompressFramed(input, result).valueOr: - result = @[] # Empty result on error - return - - result.setLen(written) + if uncompressFramed(input, result).isErr(): + result = @[] # Empty return on error template compress*(input: openArray[byte]): seq[byte] {. deprecated: "use `encode` - compress is for user-supplied buffers".} = diff --git a/snappy/codec.nim b/snappy/codec.nim index 89e6cc5..3c53bc6 100644 --- a/snappy/codec.nim +++ b/snappy/codec.nim @@ -58,7 +58,7 @@ type bufferTooSmall invalidInput crcMismatch - unknownFrame + unknownChunk {.compile: "crc32c.c".} # TODO: we don't have a native implementation of CRC32C algorithm yet. @@ -165,6 +165,39 @@ func decodeFrameHeader*(input: openArray[byte]): tuple[id: byte, len: int] = dataLen = int(header shr 8) (id, dataLen) +func uncompressedLenFramed*(input: openArray[byte]): Opt[uint64] = + var + read = 0 + expected = 0'u64 + + while (let remaining = input.len - read; remaining > 0): + if remaining < 4: + return + + let + (id, dataLen) = decodeFrameHeader(input.toOpenArray(read, read + 3)) + + if remaining < dataLen + 4: + return + + read += 4 + + let uncompressed = + if id == chunkCompressed: + uncompressedLen(input.toOpenArray(read + 4, read + dataLen - 1)).valueOr: + return + elif id == chunkUncompressed: uint32(dataLen - 4) + elif id < 0x80: return # Reserved unskippable chunk + else: 0'u32 # Reserved skippable (for example framing format header) + + if uncompressed > uint64.high - expected: + return # Overflow (unlikely, but..) + + expected += uncompressed + read += dataLen + + ok(expected) + const maxCompressedBlockLen* = maxCompressedLen(maxBlockLen).uint32 maxCompressedFrameDataLen* = diff --git a/snappy/faststreams.nim b/snappy/faststreams.nim index cc0dd0f..ed51fff 100644 --- a/snappy/faststreams.nim +++ b/snappy/faststreams.nim @@ -1,4 +1,5 @@ import + std/strutils, pkg/faststreams/[inputs, multisync, outputs], "."/[codec, encoder, exceptions], ../snappy @@ -22,24 +23,20 @@ proc compress*(input: InputStream, output: OutputStream) {. ## Input length must not exceed `maxUncompressedLen == 2^32-1` or ## `InputTooLarge` will be raised. Other errors are raised as they happen on ## the given streams. - let inputLen = input.len - if inputLen.isSome: - let - lenU32 = checkInputLen(inputLen.get).valueOr: - raiseInputTooLarge() - maxCompressed = maxCompressedLen(inputLen.get).valueOr: - raiseInputTooLarge() + doAssert input.len.isSome, "TODO: support actual .. streams" + let + lenU32 = checkInputLen(input.len.get).valueOr: + raiseInputTooLarge() + maxCompressed = maxCompressedLen(input.len.get).valueOr: + raiseInputTooLarge() - output.ensureRunway maxCompressed - output.write lenU32.toBytes(Leb128).toOpenArray() - else: - # TODO: This is a temporary limitation - doAssert false, "snappy requires an input stream with a known length" + output.ensureRunway maxCompressed + output.write lenU32.toBytes(Leb128).toOpenArray() var # TODO instead of a temporary buffer, use `getWriteableBytes` once it # works - tmp = newSeqUninitialized[byte](int(maxCompressedLen(maxBlockLen))) + tmp = newSeqUninitialized[byte](int maxCompressedBlockLen) while input.readable(maxBlockLen.int): let written = encodeBlock(input.read(maxBlockLen.int), tmp) @@ -66,9 +63,7 @@ proc compressFramed*(input: InputStream, output: OutputStream) {. output.write(framingHeader) var - read = 0 - tmp = newSeqUninitialized[byte]( - maxCompressedLen(maxUncompressedFrameDataLen)) + tmp = newSeqUninitialized[byte](int maxCompressedFrameDataLen) while input.readable(maxUncompressedFrameDataLen.int): let written = encodeFrame(input.read(maxUncompressedFrameDataLen.int), tmp) @@ -95,9 +90,7 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {. if input.read(framingHeader.len) != framingHeader: raise newException(MalformedSnappyData, "Invalid header value") - var uncompressedData = - newSeqUninitialized[byte](maxUncompressedFrameDataLen) - + var tmp = newSeqUninitialized[byte](maxUncompressedFrameDataLen) while input.readable(4): let (id, dataLen) = decodeFrameHeader(input.read(4)) @@ -113,10 +106,10 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {. let crc = uint32.fromBytesLE input.read(4) - uncompressedLen = snappy.uncompress(input.read(dataLen - 4), uncompressedData).valueOr: + uncompressed = uncompress(input.read(dataLen - 4), tmp).valueOr: raise newException(MalformedSnappyData, "Failed to decompress content") - if not checkCrcAndAppend(Sync output, uncompressedData.toOpenArray(0, uncompressedLen-1), crc): + if not checkCrcAndAppend(Sync output, tmp.toOpenArray(0, uncompressed-1), crc): raise newException(MalformedSnappyData, "Content CRC checksum failed") elif id == chunkUncompressed: @@ -131,7 +124,7 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {. # Reserved unskippable chunks (chunk types 0x02-0x7f) # if we encounter this type of chunk, stop decoding # the spec says it is an error - raise newException(MalformedSnappyData, "Invalid chunk type") + raise newException(MalformedSnappyData, "Invalid chunk type " & toHex(id)) else: # Reserved skippable chunks (chunk types 0x80-0xfe) diff --git a/snappy/streams.nim b/snappy/streams.nim index 0be5ef8..bb1cbcc 100644 --- a/snappy/streams.nim +++ b/snappy/streams.nim @@ -24,8 +24,8 @@ proc compress*(input: Stream, inputLen: int, output: Stream) {. output.writeData(unsafeAddr header.data[0], header.len) var - tmpIn = newSeqUninitialized[byte](int(maxBlockLen)) - tmpOut = newSeqUninitialized[byte](int(maxCompressedLen(maxBlockLen))) + tmpIn = newSeqUninitialized[byte](int maxBlockLen) + tmpOut = newSeqUninitialized[byte](int maxCompressedBlockLen) read = 0 while read < inputLen: @@ -40,6 +40,5 @@ proc compress*(input: Stream, inputLen: int, output: Stream) {. output.writeData(addr tmpOut[0], written) read += bytes -# TODO uncompress # TODO compressFramed # TODO uncompressFramed