From eabc45992a19c20d1c16e3e4bbfe8b7428b25ff7 Mon Sep 17 00:00:00 2001
From: Jacek Sieka <jacek@status.im>
Date: Sat, 2 Apr 2022 17:13:29 +0200
Subject: [PATCH] cleanups

---
 snappy.nim             | 133 ++++++++++++++---------------------------
 snappy/codec.nim       |  35 ++++++++++-
 snappy/faststreams.nim |  37 +++++-------
 snappy/streams.nim     |   5 +-
 4 files changed, 96 insertions(+), 114 deletions(-)

diff --git a/snappy.nim b/snappy.nim
index a6f5db3..4ab4baa 100644
--- a/snappy.nim
+++ b/snappy.nim
@@ -100,7 +100,13 @@ func uncompress*(input: openArray[byte], output: var openArray[byte]):
       return err(CodecError.invalidInput)
     return ok(0)
 
-  decodeAllTags(input.toOpenArray(bytesRead, input.high), output)
+  let written =
+    ? decodeAllTags(input.toOpenArray(bytesRead, input.high), output)
+
+  if written.uint64 != lenU32:
+    return err(CodecError.invalidInput) # Header does not match content
+
+  return ok(written)
 
 func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] =
   ## Decode input returning the uncompressed output. On error, return an empty
@@ -111,19 +117,14 @@ func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] =
   let uncompressed = uncompressedLen(input).valueOr:
     return
 
-  if uncompressed > maxSize:
+  if uncompressed > maxSize.uint64 or uncompressed > int.high.uint64:
     return
 
-  when sizeof(int) <= sizeof(uncompressed):
-    if uncompressed.uint64 > int.high.uint64:
-      return
-
   # TODO https://github.com/nim-lang/Nim/issues/19357
   result = newSeqUninitialized[byte](int uncompressed)
-  let written = uncompress(input, result).valueOr:
-    return @[] # Empty return on error
-  if written != result.len:
-    return @[] # Header does not match content
+
+  if uncompress(input, result).isErr():
+    result = @[] # Empty return on error
 
 func compressFramed*(input: openArray[byte], output: var openArray[byte]):
     Result[int, FrameError] =
@@ -143,7 +144,7 @@ func compressFramed*(input: openArray[byte], output: var openArray[byte]):
     written = framingHeader.len
   while (let remaining = input.len - read; remaining > 0):
     let
-      frameSize = min(remaining, maxUncompressedFrameDataLen.int)
+      frameSize = min(remaining, int maxUncompressedFrameDataLen)
     written += encodeFrame(
       input.toOpenArray(read, read + frameSize - 1),
       output.toOpenArray(written, output.high))
@@ -153,12 +154,12 @@ func compressFramed*(input: openArray[byte], output: var openArray[byte]):
   ok(written)
 
 func encodeFramed*(input: openArray[byte]): seq[byte] =
-  let compressedLen = maxCompressedLenFramed(input.len)
-  if compressedLen > int.high.uint64:
+  let maxCompressed = maxCompressedLenFramed(input.len)
+  if maxCompressed > int.high.uint64:
     return
 
   # TODO https://github.com/nim-lang/Nim/issues/19357
-  result = newSeqUninitialized[byte](int compressedLen.int)
+  result = newSeqUninitialized[byte](int maxCompressed)
   let
     written = compressFramed(input, result).expect("lengths checked")
 
@@ -187,51 +188,51 @@ func uncompressFramed*(input: openArray[byte], output: var openArray[byte]):
       (id, dataLen) = decodeFrameHeader(input.toOpenArray(read, read + 3))
     read += 4
 
-    if remaining < dataLen:
+    if remaining - 4 < dataLen:
       return err(FrameError.invalidInput)
 
     if id == chunkCompressed:
+      if dataLen < 4:
+        return err(FrameError.invalidInput)
+
       let
         crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
-
-      # `dataLen` includes length of crc
-      let uncompressed = uncompress(
+        uncompressed = uncompress(
           input.toOpenArray(read + 4, read + dataLen - 1),
           output.toOpenArray(written, output.high)).valueOr:
-        let res = case error
-        of CodecError.bufferTooSmall: ok((read - 4, written))
-        of CodecError.invalidInput: err(FrameError.invalidInput)
-        return res
+            let res = case error
+            of CodecError.bufferTooSmall: ok((read - 4, written))
+            of CodecError.invalidInput: err(FrameError.invalidInput)
+            return res
 
-      if maskedCrc(output.toOpenArray(written, written + uncompressed - 1)) != crc:
+      if maskedCrc(
+          output.toOpenArray(written, written + (uncompressed - 1))) != crc:
         return err(FrameError.crcMismatch)
 
       written += uncompressed
 
     elif id == chunkUncompressed:
+      if dataLen < 4:
+        return err(FrameError.invalidInput)
+
       let
         crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
 
-      # `dataLen` includes length of crc
-      if maskedCrc(input.toOpenArray(read + 4, read + dataLen - 1)) != crc:
+      if maskedCrc(input.toOpenArray(read + 4, read + (dataLen - 1))) != crc:
         return err(FrameError.crcMismatch)
 
-      if dataLen - 4 > output.len - written:
-        return ok((read, written))
+      let uncompressed = dataLen - 4 # dataLen includes CRC length
+      if uncompressed > output.len - written:
+        return ok((read - 4, written))
 
-      copyMem(addr output[written], unsafeAddr input[read + 4], dataLen - 4)
-      written += dataLen - 4
+      copyMem(addr output[written], unsafeAddr input[read + 4], uncompressed)
+      written += uncompressed
 
     elif id < 0x80:
-      # Reserved unskippable chunks (chunk types 0x02-0x7f)
-      # if we encounter this type of chunk, stop decoding
-      # the spec says it is an error
-      return err(FrameError.unknownFrame)
+      return err(FrameError.unknownChunk) # Reserved unskippable chunk
 
     else:
-      # Reserved skippable chunks (chunk types 0x80-0xfe)
-      # including chunkStream (0xff) should be skipped
-      discard
+      discard # Reserved skippable chunk (for example framing format header)
 
     read += dataLen
 
@@ -246,61 +247,17 @@ func decodeFramed*(input: openArray[byte], maxSize = int.high): seq[byte] =
   ## the margins in maxCompresssedLen!
   ##
   ## In case of errors, an empty buffer is returned.
+  let uncompressed = uncompressedLenFramed(input).valueOr:
+    return
 
-  # Start by computing expected length - in-depth error checking will be done
-  # during actual decoding!
-  var
-    read = 0
-    expected = 0
+  if uncompressed > maxSize.uint64 or uncompressed > int.high.uint64:
+    return
 
-  while (let remaining = input.len - read; remaining > 0):
-    if remaining < 4:
-      return
+  # TODO https://github.com/nim-lang/Nim/issues/19357
+  result = newSeqUninitialized[byte](int uncompressed)
 
-    let
-      (id, dataLen) = decodeFrameHeader(input.toOpenArray(read, read + 3))
-
-    if remaining < dataLen + 4:
-      return
-    read += 4
-
-    if id == chunkCompressed:
-      # `dataLen` includes length of crc
-      let
-        uncompressed = uncompressedLen(
-          input.toOpenArray(read + 4, read + dataLen - 1)).valueOr:
-            return
-      if (type(expected).high - expected).uint64 < uncompressed:
-        return # length overflow
-
-      expected += int uncompressed
-
-    elif id == chunkUncompressed:
-      expected += dataLen - 4
-
-    elif id < 0x80:
-      # Reserved unskippable chunks (chunk types 0x02-0x7f)
-      # if we encounter this type of chunk, stop decoding
-      # the spec says it is an error
-      return
-
-    else:
-      # Reserved skippable chunks (chunk types 0x80-0xfe)
-      # including chunkStream (0xff) should be skipped
-      discard
-
-    read += dataLen
-
-  # We have an expected length - time to allocate a seq that can hold this much
-  # data and a work area for the decompression algorithm
-
-  result = newSeqUninitialized[byte](min(expected, maxSize))
-  let
-    (_, written) = uncompressFramed(input, result).valueOr:
-      result = @[] # Empty result on error
-      return
-
-  result.setLen(written)
+  if uncompressFramed(input, result).isErr():
+    result = @[] # Empty return on error
 
 template compress*(input: openArray[byte]): seq[byte] {.
     deprecated: "use `encode` - compress is for user-supplied buffers".} =
diff --git a/snappy/codec.nim b/snappy/codec.nim
index 89e6cc5..3c53bc6 100644
--- a/snappy/codec.nim
+++ b/snappy/codec.nim
@@ -58,7 +58,7 @@ type
     bufferTooSmall
     invalidInput
     crcMismatch
-    unknownFrame
+    unknownChunk
 
 {.compile: "crc32c.c".}
 # TODO: we don't have a native implementation of CRC32C algorithm yet.
@@ -165,6 +165,39 @@ func decodeFrameHeader*(input: openArray[byte]): tuple[id: byte, len: int] =
     dataLen = int(header shr 8)
   (id, dataLen)
 
+func uncompressedLenFramed*(input: openArray[byte]): Opt[uint64] =
+  var
+    read = 0
+    expected = 0'u64
+
+  while (let remaining = input.len - read; remaining > 0):
+    if remaining < 4:
+      return
+
+    let
+      (id, dataLen) = decodeFrameHeader(input.toOpenArray(read, read + 3))
+
+    if remaining < dataLen + 4:
+      return
+
+    read += 4
+
+    let uncompressed =
+      if id == chunkCompressed:
+        uncompressedLen(input.toOpenArray(read + 4, read + dataLen - 1)).valueOr:
+          return
+      elif id == chunkUncompressed: uint32(dataLen - 4)
+      elif id < 0x80: return # Reserved unskippable chunk
+      else: 0'u32 # Reserved skippable (for example framing format header)
+
+    if uncompressed > uint64.high - expected:
+      return # Overflow (unlikely, but..)
+
+    expected += uncompressed
+    read += dataLen
+
+  ok(expected)
+
 const
   maxCompressedBlockLen* = maxCompressedLen(maxBlockLen).uint32
   maxCompressedFrameDataLen* =
diff --git a/snappy/faststreams.nim b/snappy/faststreams.nim
index cc0dd0f..ed51fff 100644
--- a/snappy/faststreams.nim
+++ b/snappy/faststreams.nim
@@ -1,4 +1,5 @@
 import
+  std/strutils,
   pkg/faststreams/[inputs, multisync, outputs],
   "."/[codec, encoder, exceptions],
   ../snappy
@@ -22,24 +23,20 @@ proc compress*(input: InputStream, output: OutputStream) {.
   ## Input length must not exceed `maxUncompressedLen == 2^32-1` or
   ## `InputTooLarge` will be raised. Other errors are raised as they happen on
   ## the given streams.
-  let inputLen = input.len
-  if inputLen.isSome:
-    let
-      lenU32 = checkInputLen(inputLen.get).valueOr:
-        raiseInputTooLarge()
-      maxCompressed = maxCompressedLen(inputLen.get).valueOr:
-        raiseInputTooLarge()
+  doAssert input.len.isSome, "TODO: support actual .. streams"
+  let
+    lenU32 = checkInputLen(input.len.get).valueOr:
+      raiseInputTooLarge()
+    maxCompressed = maxCompressedLen(input.len.get).valueOr:
+      raiseInputTooLarge()
 
-    output.ensureRunway maxCompressed
-    output.write lenU32.toBytes(Leb128).toOpenArray()
-  else:
-    # TODO: This is a temporary limitation
-    doAssert false, "snappy requires an input stream with a known length"
+  output.ensureRunway maxCompressed
+  output.write lenU32.toBytes(Leb128).toOpenArray()
 
   var
     # TODO instead of a temporary buffer, use `getWriteableBytes` once it
     #      works
-    tmp = newSeqUninitialized[byte](int(maxCompressedLen(maxBlockLen)))
+    tmp = newSeqUninitialized[byte](int maxCompressedBlockLen)
 
   while input.readable(maxBlockLen.int):
     let written = encodeBlock(input.read(maxBlockLen.int), tmp)
@@ -66,9 +63,7 @@ proc compressFramed*(input: InputStream, output: OutputStream) {.
   output.write(framingHeader)
 
   var
-    read = 0
-    tmp = newSeqUninitialized[byte](
-      maxCompressedLen(maxUncompressedFrameDataLen))
+    tmp = newSeqUninitialized[byte](int maxCompressedFrameDataLen)
 
   while input.readable(maxUncompressedFrameDataLen.int):
     let written = encodeFrame(input.read(maxUncompressedFrameDataLen.int), tmp)
@@ -95,9 +90,7 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {.
   if input.read(framingHeader.len) != framingHeader:
     raise newException(MalformedSnappyData, "Invalid header value")
 
-  var uncompressedData =
-    newSeqUninitialized[byte](maxUncompressedFrameDataLen)
-
+  var tmp = newSeqUninitialized[byte](maxUncompressedFrameDataLen)
   while input.readable(4):
     let (id, dataLen) = decodeFrameHeader(input.read(4))
 
@@ -113,10 +106,10 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {.
 
       let
         crc = uint32.fromBytesLE input.read(4)
-        uncompressedLen = snappy.uncompress(input.read(dataLen - 4), uncompressedData).valueOr:
+        uncompressed = uncompress(input.read(dataLen - 4), tmp).valueOr:
           raise newException(MalformedSnappyData, "Failed to decompress content")
 
-      if not checkCrcAndAppend(Sync output, uncompressedData.toOpenArray(0, uncompressedLen-1), crc):
+      if not checkCrcAndAppend(Sync output, tmp.toOpenArray(0, uncompressed-1), crc):
         raise newException(MalformedSnappyData, "Content CRC checksum failed")
 
     elif id == chunkUncompressed:
@@ -131,7 +124,7 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {.
       # Reserved unskippable chunks (chunk types 0x02-0x7f)
       # if we encounter this type of chunk, stop decoding
       # the spec says it is an error
-      raise newException(MalformedSnappyData, "Invalid chunk type")
+      raise newException(MalformedSnappyData, "Invalid chunk type " & toHex(id))
 
     else:
       # Reserved skippable chunks (chunk types 0x80-0xfe)
diff --git a/snappy/streams.nim b/snappy/streams.nim
index 0be5ef8..bb1cbcc 100644
--- a/snappy/streams.nim
+++ b/snappy/streams.nim
@@ -24,8 +24,8 @@ proc compress*(input: Stream, inputLen: int, output: Stream) {.
   output.writeData(unsafeAddr header.data[0], header.len)
 
   var
-    tmpIn = newSeqUninitialized[byte](int(maxBlockLen))
-    tmpOut = newSeqUninitialized[byte](int(maxCompressedLen(maxBlockLen)))
+    tmpIn = newSeqUninitialized[byte](int maxBlockLen)
+    tmpOut = newSeqUninitialized[byte](int maxCompressedBlockLen)
     read = 0
 
   while read < inputLen:
@@ -40,6 +40,5 @@ proc compress*(input: Stream, inputLen: int, output: Stream) {.
     output.writeData(addr tmpOut[0], written)
     read += bytes
 
-# TODO uncompress
 # TODO compressFramed
 # TODO uncompressFramed