nim-snappy/snappy.nim

import
  stew/[leb128, ranges/ptr_arith],
  faststreams/[inputs, outputs, buffers, multisync],
  ./snappy/[codec, decoder, encoder_fs, types]

export
  types

proc appendSnappyBytes*(s: OutputStream, src: openArray[byte]) =
  var
    lenU32 = checkInputLen(src.len).valueOr:
      raiseInputTooLarge()
    p = 0

  # The block starts with the varint-encoded length of the decompressed bytes.
  s.write lenU32.toBytes(Leb128).toOpenArray()
  if lenU32 <= 0: return

  while lenU32 > maxBlockSize:
    s.encodeBlock src.toOpenArray(p, p + maxBlockSize.int - 1)
    p += maxBlockSize.int
    lenU32 -= maxBlockSize

  # The `lenU32.int` expressions below cannot overflow because
  # `lenU32` is already less than `maxBlockSize` here:
  if lenU32 < minNonLiteralBlockSize.uint32:
    s.emitLiteral src.toOpenArray(p, p + lenU32.int - 1)
  else:
    s.encodeBlock src.toOpenArray(p, p + lenU32.int - 1)

proc snappyCompress*(input: InputStream, output: OutputStream) =
  try:
    let inputLen = input.len
    if inputLen.isSome:
      let
        lenU32 = checkInputLen(inputLen.get).valueOr:
          raiseInputTooLarge()
        maxCompressed = maxCompressedLen(lenU32).valueOr:
          raiseInputTooLarge()

      output.ensureRunway maxCompressed
      output.write lenU32.toBytes(Leb128).toOpenArray()
    else:
      # TODO: This is a temporary limitation
      doAssert false, "snappy requires an input stream with a known length"

    while input.readable(maxBlockSize.int):
      encodeBlock(output, input.read(maxBlockSize.int))

    let remainingBytes = input.totalUnconsumedBytes
    if remainingBytes > 0:
      if remainingBytes < minNonLiteralBlockSize:
        output.emitLiteral input.read(remainingBytes)
      else:
        output.encodeBlock input.read(remainingBytes)
  finally:
    close output

# Encode returns the encoded form of src.
func encode*(src: openarray[byte]): seq[byte] =
  # Memory streams doesn't have side effects:
  {.noSideEffect.}:
    let output = memoryOutput()
    snappyCompress(unsafeMemoryInput(src), output)
    output.getOutput

func decode*(src: openArray[byte], maxSize = 0xffffffff'u32): seq[byte] =
  let (lenU32, bytesRead) = uint32.fromBytes(src, Leb128)
  if bytesRead <= 0 or lenU32 > maxSize:
    return

  if lenU32 > 0:
    when sizeof(uint) == 4:
      if lenU32 > 0x7fffffff'u32:
        return
    # `lenU32.int` cannot overflow because of the extra check above
    result = newSeq[byte](lenU32.int)
    let errCode = decode(result, src.toOpenArray(bytesRead, src.len - 1))
    if errCode != 0: result = @[]

proc snappyUncompress*(src: openArray[byte], dst: var openArray[byte]): uint32 =
  let (lenU32, bytesRead) = uint32.fromBytes(src, Leb128)
  if bytesRead <= 0 or lenU32.BiggestUInt > dst.len.BiggestUInt:
    return 0

  if lenU32 > 0:
    # `result.int` cannot overflow here, because we've already
    # checked that it's smaller than the `dst.len` which is an int.
    let errCode = decode(dst.toOpenArray(0, lenU32.int - 1),
                         src.toOpenArray(bytesRead, src.len - 1))
    if errCode != 0:
      return 0

  return lenU32
Migrate to faststreams; WIP benchmark 2019-07-07 12:33:25 +00:00			`import`
deduplicate and reorganise code (#9) The snappy codebase is a mess with competing implementations, nonsensical code duplication and no real direction due to a partially implemented faststreams migration. This PR makes it slightly less of a mess, but make no mistake, it's still a mess - the difference being that there are a few more signposts along the way in terms of module organisation, and a little less mess as the line count of the PR discloses. Performance remains poor - ~3x slower than C++ - but at least there's less code to look at :) 2022-04-01 10:57:39 +00:00			`stew/[leb128, ranges/ptr_arith],`
Async version of the Snappy framing format based on the latest FastStreams version 2020-05-05 21:35:55 +00:00			`faststreams/[inputs, outputs, buffers, multisync],`
deduplicate and reorganise code (#9) The snappy codebase is a mess with competing implementations, nonsensical code duplication and no real direction due to a partially implemented faststreams migration. This PR makes it slightly less of a mess, but make no mistake, it's still a mess - the difference being that there are a few more signposts along the way in terms of module organisation, and a little less mess as the line count of the PR discloses. Performance remains poor - ~3x slower than C++ - but at least there's less code to look at :) 2022-04-01 10:57:39 +00:00			`./snappy/[codec, decoder, encoder_fs, types]`
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00
			`export`
			`types`
Migrate to faststreams; WIP benchmark 2019-07-07 12:33:25 +00:00
Use the latest faststreams OutputStream API 2020-04-09 20:02:36 +00:00			`proc appendSnappyBytes*(s: OutputStream, src: openArray[byte]) =`
initial commit 2018-11-02 05:10:58 +00:00			`var`
deduplicate and reorganise code (#9) The snappy codebase is a mess with competing implementations, nonsensical code duplication and no real direction due to a partially implemented faststreams migration. This PR makes it slightly less of a mess, but make no mistake, it's still a mess - the difference being that there are a few more signposts along the way in terms of module organisation, and a little less mess as the line count of the PR discloses. Performance remains poor - ~3x slower than C++ - but at least there's less code to look at :) 2022-04-01 10:57:39 +00:00			`lenU32 = checkInputLen(src.len).valueOr:`
			`raiseInputTooLarge()`
initial commit 2018-11-02 05:10:58 +00:00			`p = 0`
Migrate to faststreams; WIP benchmark 2019-07-07 12:33:25 +00:00
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			`# The block starts with the varint-encoded length of the decompressed bytes.`
use stew/leb128 2020-12-14 11:03:12 +00:00			`s.write lenU32.toBytes(Leb128).toOpenArray()`
fix `appendSnappyBytes` index computation The `appendSnappyBytes` implementation of `snappy` computes indices incorrectly, resulting in wrong data being produced. The implementation was fixed and the test suite extended accordingly. Note that this issue is not reachable because `appendSnappyBytes` is only used in test code. 2021-12-11 17:05:30 +00:00			`if lenU32 <= 0: return`
initial commit 2018-11-02 05:10:58 +00:00
deduplicate and reorganise code (#9) The snappy codebase is a mess with competing implementations, nonsensical code duplication and no real direction due to a partially implemented faststreams migration. This PR makes it slightly less of a mess, but make no mistake, it's still a mess - the difference being that there are a few more signposts along the way in terms of module organisation, and a little less mess as the line count of the PR discloses. Performance remains poor - ~3x slower than C++ - but at least there's less code to look at :) 2022-04-01 10:57:39 +00:00			`while lenU32 > maxBlockSize:`
			`s.encodeBlock src.toOpenArray(p, p + maxBlockSize.int - 1)`
			`p += maxBlockSize.int`
			`lenU32 -= maxBlockSize`
initial commit 2018-11-02 05:10:58 +00:00
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			# The `lenU32.int` expressions below cannot overflow because
			# `lenU32` is already less than `maxBlockSize` here:
			`if lenU32 < minNonLiteralBlockSize.uint32:`
fix `appendSnappyBytes` index computation The `appendSnappyBytes` implementation of `snappy` computes indices incorrectly, resulting in wrong data being produced. The implementation was fixed and the test suite extended accordingly. Note that this issue is not reachable because `appendSnappyBytes` is only used in test code. 2021-12-11 17:05:30 +00:00			`s.emitLiteral src.toOpenArray(p, p + lenU32.int - 1)`
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			`else:`
fix `appendSnappyBytes` index computation The `appendSnappyBytes` implementation of `snappy` computes indices incorrectly, resulting in wrong data being produced. The implementation was fixed and the test suite extended accordingly. Note that this issue is not reachable because `appendSnappyBytes` is only used in test code. 2021-12-11 17:05:30 +00:00			`s.encodeBlock src.toOpenArray(p, p + lenU32.int - 1)`
initial commit 2018-11-02 05:10:58 +00:00
Async version of the Snappy framing format based on the latest FastStreams version 2020-05-05 21:35:55 +00:00			`proc snappyCompress*(input: InputStream, output: OutputStream) =`
			`try:`
			`let inputLen = input.len`
			`if inputLen.isSome:`
deduplicate and reorganise code (#9) The snappy codebase is a mess with competing implementations, nonsensical code duplication and no real direction due to a partially implemented faststreams migration. This PR makes it slightly less of a mess, but make no mistake, it's still a mess - the difference being that there are a few more signposts along the way in terms of module organisation, and a little less mess as the line count of the PR discloses. Performance remains poor - ~3x slower than C++ - but at least there's less code to look at :) 2022-04-01 10:57:39 +00:00			`let`
			`lenU32 = checkInputLen(inputLen.get).valueOr:`
			`raiseInputTooLarge()`
			`maxCompressed = maxCompressedLen(lenU32).valueOr:`
			`raiseInputTooLarge()`

			`output.ensureRunway maxCompressed`
use stew/leb128 2020-12-14 11:03:12 +00:00			`output.write lenU32.toBytes(Leb128).toOpenArray()`
Async version of the Snappy framing format based on the latest FastStreams version 2020-05-05 21:35:55 +00:00			`else:`
			`# TODO: This is a temporary limitation`
			`doAssert false, "snappy requires an input stream with a known length"`
Use the latest faststreams OutputStream API 2020-04-09 20:02:36 +00:00
deduplicate and reorganise code (#9) The snappy codebase is a mess with competing implementations, nonsensical code duplication and no real direction due to a partially implemented faststreams migration. This PR makes it slightly less of a mess, but make no mistake, it's still a mess - the difference being that there are a few more signposts along the way in terms of module organisation, and a little less mess as the line count of the PR discloses. Performance remains poor - ~3x slower than C++ - but at least there's less code to look at :) 2022-04-01 10:57:39 +00:00			`while input.readable(maxBlockSize.int):`
			`encodeBlock(output, input.read(maxBlockSize.int))`
Use the latest faststreams OutputStream API 2020-04-09 20:02:36 +00:00
Async version of the Snappy framing format based on the latest FastStreams version 2020-05-05 21:35:55 +00:00			`let remainingBytes = input.totalUnconsumedBytes`
			`if remainingBytes > 0:`
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			`if remainingBytes < minNonLiteralBlockSize:`
			`output.emitLiteral input.read(remainingBytes)`
			`else:`
			`output.encodeBlock input.read(remainingBytes)`
Async version of the Snappy framing format based on the latest FastStreams version 2020-05-05 21:35:55 +00:00			`finally:`
			`close output`
Migrate to faststreams; WIP benchmark 2019-07-07 12:33:25 +00:00
			`# Encode returns the encoded form of src.`
Use the latest faststreams OutputStream API 2020-04-09 20:02:36 +00:00			`func encode*(src: openarray[byte]): seq[byte] =`
Async version of the Snappy framing format based on the latest FastStreams version 2020-05-05 21:35:55 +00:00			`# Memory streams doesn't have side effects:`
Use the latest faststreams OutputStream API 2020-04-09 20:02:36 +00:00			`{.noSideEffect.}:`
Async version of the Snappy framing format based on the latest FastStreams version 2020-05-05 21:35:55 +00:00			`let output = memoryOutput()`
			`snappyCompress(unsafeMemoryInput(src), output)`
			`output.getOutput`
initial commit 2018-11-02 05:10:58 +00:00
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			`func decode*(src: openArray[byte], maxSize = 0xffffffff'u32): seq[byte] =`
use stew/leb128 2020-12-14 11:03:12 +00:00			`let (lenU32, bytesRead) = uint32.fromBytes(src, Leb128)`
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			`if bytesRead <= 0 or lenU32 > maxSize:`
initial commit 2018-11-02 05:10:58 +00:00			`return`

Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			`if lenU32 > 0:`
			`when sizeof(uint) == 4:`
			`if lenU32 > 0x7fffffff'u32:`
			`return`
			# `lenU32.int` cannot overflow because of the extra check above
			`result = newSeq[byte](lenU32.int)`
			`let errCode = decode(result, src.toOpenArray(bytesRead, src.len - 1))`
initial commit 2018-11-02 05:10:58 +00:00			`if errCode != 0: result = @[]`
add compress/uncompress alias 2018-11-02 13:36:21 +00:00
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			`proc snappyUncompress*(src: openArray[byte], dst: var openArray[byte]): uint32 =`
deduplicate and reorganise code (#9) The snappy codebase is a mess with competing implementations, nonsensical code duplication and no real direction due to a partially implemented faststreams migration. This PR makes it slightly less of a mess, but make no mistake, it's still a mess - the difference being that there are a few more signposts along the way in terms of module organisation, and a little less mess as the line count of the PR discloses. Performance remains poor - ~3x slower than C++ - but at least there's less code to look at :) 2022-04-01 10:57:39 +00:00			`let (lenU32, bytesRead) = uint32.fromBytes(src, Leb128)`
			`if bytesRead <= 0 or lenU32.BiggestUInt > dst.len.BiggestUInt:`
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			`return 0`
working snappy framing compress prototype 2020-03-31 05:21:44 +00:00
deduplicate and reorganise code (#9) The snappy codebase is a mess with competing implementations, nonsensical code duplication and no real direction due to a partially implemented faststreams migration. This PR makes it slightly less of a mess, but make no mistake, it's still a mess - the difference being that there are a few more signposts along the way in terms of module organisation, and a little less mess as the line count of the PR discloses. Performance remains poor - ~3x slower than C++ - but at least there's less code to look at :) 2022-04-01 10:57:39 +00:00			`if lenU32 > 0:`
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			# `result.int` cannot overflow here, because we've already
			# checked that it's smaller than the `dst.len` which is an int.
deduplicate and reorganise code (#9) The snappy codebase is a mess with competing implementations, nonsensical code duplication and no real direction due to a partially implemented faststreams migration. This PR makes it slightly less of a mess, but make no mistake, it's still a mess - the difference being that there are a few more signposts along the way in terms of module organisation, and a little less mess as the line count of the PR discloses. Performance remains poor - ~3x slower than C++ - but at least there's less code to look at :) 2022-04-01 10:57:39 +00:00			`let errCode = decode(dst.toOpenArray(0, lenU32.int - 1),`
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			`src.toOpenArray(bytesRead, src.len - 1))`
working framing uncompress prototype 2020-03-28 09:10:15 +00:00			`if errCode != 0:`
Fix various integer overflow issues found through fuzzing 2020-08-18 20:11:42 +00:00			`return 0`
working framing uncompress prototype 2020-03-28 09:10:15 +00:00
deduplicate and reorganise code (#9) The snappy codebase is a mess with competing implementations, nonsensical code duplication and no real direction due to a partially implemented faststreams migration. This PR makes it slightly less of a mess, but make no mistake, it's still a mess - the difference being that there are a few more signposts along the way in terms of module organisation, and a little less mess as the line count of the PR discloses. Performance remains poor - ~3x slower than C++ - but at least there's less code to look at :) 2022-04-01 10:57:39 +00:00			`return lenU32`