2023-07-21 12:44:16 +00:00
|
|
|
{.push raises: [].}
|
|
|
|
|
2022-04-14 14:22:41 +00:00
|
|
|
import
|
2023-07-21 12:44:16 +00:00
|
|
|
stew/byteutils,
|
2022-04-14 14:22:41 +00:00
|
|
|
pkg/faststreams/[inputs, multisync, outputs],
|
|
|
|
"."/[codec, encoder, exceptions],
|
|
|
|
../snappy
|
|
|
|
|
|
|
|
export
|
|
|
|
inputs, multisync, outputs, codec, exceptions
|
|
|
|
|
|
|
|
proc checkCrcAndAppend(
|
allow skipping crc32 integrity check (#22)
Some data is already protected by stronger checks - crc32 on the other
hand significantly slows down framed reading - ie 2.5x slower:
```
118.853 / 41.781, 129.115 / 0.000, 188.438 / 0.000, 90.565 / 44.371, 50, 115613038, state-6800000-488b7150-d613b584.ssz
186.600 / 97.202, 191.935 /123.325, 0.000 / 0.000, 0.000 / 0.000, 50, 115613038, state-6800000-488b7150-d613b584.ssz(framed)
```
The difference between unframed and framed decoding is the CRC32 check -
it takes ~50ms on a decent laptop for a 110mb file.
2023-07-25 15:50:36 +00:00
|
|
|
output: OutputStream, data: openArray[byte], crc: uint32,
|
|
|
|
checkIntegrity: bool): bool {.
|
2023-07-21 12:44:16 +00:00
|
|
|
raises: [IOError].}=
|
allow skipping crc32 integrity check (#22)
Some data is already protected by stronger checks - crc32 on the other
hand significantly slows down framed reading - ie 2.5x slower:
```
118.853 / 41.781, 129.115 / 0.000, 188.438 / 0.000, 90.565 / 44.371, 50, 115613038, state-6800000-488b7150-d613b584.ssz
186.600 / 97.202, 191.935 /123.325, 0.000 / 0.000, 0.000 / 0.000, 50, 115613038, state-6800000-488b7150-d613b584.ssz(framed)
```
The difference between unframed and framed decoding is the CRC32 check -
it takes ~50ms on a decent laptop for a 110mb file.
2023-07-25 15:50:36 +00:00
|
|
|
if not checkIntegrity or maskedCrc(data) == crc:
|
2022-04-14 14:22:41 +00:00
|
|
|
output.write(data)
|
|
|
|
return true
|
|
|
|
|
|
|
|
proc compress*(input: InputStream, output: OutputStream) {.
|
2023-07-21 12:44:16 +00:00
|
|
|
raises: [InputTooLarge, IOError].} =
|
2022-04-14 14:22:41 +00:00
|
|
|
## Compress all bytes of `input`, writing into `output` and flushing at the end.
|
|
|
|
##
|
|
|
|
## Input length must not exceed `maxUncompressedLen == 2^32-1` or
|
|
|
|
## `InputTooLarge` will be raised. Other errors are raised as they happen on
|
|
|
|
## the given streams.
|
|
|
|
doAssert input.len.isSome, "TODO: support actual .. streams"
|
|
|
|
let
|
|
|
|
lenU32 = checkInputLen(input.len.get).valueOr:
|
|
|
|
raiseInputTooLarge()
|
|
|
|
maxCompressed = maxCompressedLen(input.len.get).valueOr:
|
|
|
|
raiseInputTooLarge()
|
|
|
|
|
|
|
|
# TODO https://github.com/status-im/nim-faststreams/issues/28
|
|
|
|
# output.ensureRunway maxCompressed
|
|
|
|
output.write lenU32.toBytes(Leb128).toOpenArray()
|
|
|
|
|
|
|
|
var
|
|
|
|
# TODO instead of a temporary buffer, use `getWriteableBytes` once it
|
|
|
|
# works
|
|
|
|
tmp = newSeqUninitialized[byte](int maxCompressedBlockLen)
|
|
|
|
|
|
|
|
while input.readable(maxBlockLen.int):
|
|
|
|
let written = encodeBlock(input.read(maxBlockLen.int), tmp)
|
|
|
|
# TODO async streams could be supported efficiently by waiting here, after
|
|
|
|
# each 64kb-block
|
|
|
|
output.write(tmp.toOpenArray(0, written - 1))
|
|
|
|
|
|
|
|
let remainingBytes = input.totalUnconsumedBytes
|
|
|
|
if remainingBytes > 0:
|
|
|
|
let written = encodeBlock(input.read(remainingBytes), tmp)
|
|
|
|
output.write(tmp.toOpenArray(0, written - 1))
|
|
|
|
|
|
|
|
output.flush()
|
|
|
|
|
|
|
|
proc compress*(input: openArray[byte], output: OutputStream) {.
|
2023-07-21 12:44:16 +00:00
|
|
|
raises: [InputTooLarge, IOError].} =
|
2022-04-14 14:22:41 +00:00
|
|
|
compress(unsafeMemoryInput(input), output)
|
|
|
|
|
|
|
|
# `uncompress` is not implemented due to the requirement that the full output
|
|
|
|
# must remain accessible throughout uncompression
|
|
|
|
# TODO reading from a stream is still feasible
|
|
|
|
|
|
|
|
proc compressFramed*(input: InputStream, output: OutputStream) {.
|
2023-07-21 12:44:16 +00:00
|
|
|
raises: [IOError].} =
|
2022-04-14 14:22:41 +00:00
|
|
|
# write the magic identifier
|
|
|
|
output.write(framingHeader)
|
|
|
|
|
|
|
|
var
|
|
|
|
tmp = newSeqUninitialized[byte](int maxCompressedFrameDataLen)
|
|
|
|
|
|
|
|
while input.readable(maxUncompressedFrameDataLen.int):
|
|
|
|
let written = encodeFrame(input.read(maxUncompressedFrameDataLen.int), tmp)
|
|
|
|
# TODO async streams could be supported efficiently by waiting here, after
|
|
|
|
# each 64kb-block
|
|
|
|
output.write(tmp.toOpenArray(0, written - 1))
|
|
|
|
|
|
|
|
let remainingBytes = input.totalUnconsumedBytes
|
|
|
|
if remainingBytes > 0:
|
|
|
|
let written = encodeFrame(input.read(remainingBytes), tmp)
|
|
|
|
output.write(tmp.toOpenArray(0, written - 1))
|
|
|
|
|
|
|
|
output.flush()
|
|
|
|
|
|
|
|
proc compressFramed*(input: openArray[byte], output: OutputStream) {.
|
2023-07-21 12:44:16 +00:00
|
|
|
raises: [IOError].} =
|
2022-04-14 14:22:41 +00:00
|
|
|
compressFramed(unsafeMemoryInput(input), output)
|
|
|
|
|
allow skipping crc32 integrity check (#22)
Some data is already protected by stronger checks - crc32 on the other
hand significantly slows down framed reading - ie 2.5x slower:
```
118.853 / 41.781, 129.115 / 0.000, 188.438 / 0.000, 90.565 / 44.371, 50, 115613038, state-6800000-488b7150-d613b584.ssz
186.600 / 97.202, 191.935 /123.325, 0.000 / 0.000, 0.000 / 0.000, 50, 115613038, state-6800000-488b7150-d613b584.ssz(framed)
```
The difference between unframed and framed decoding is the CRC32 check -
it takes ~50ms on a decent laptop for a 110mb file.
2023-07-25 15:50:36 +00:00
|
|
|
proc uncompressFramed*(
|
|
|
|
input: InputStream, output: OutputStream, checkIntegrity = true) {.
|
2023-07-21 12:44:16 +00:00
|
|
|
fsMultiSync, raises: [IOError, SnappyDecodingError].} =
|
2022-04-14 14:22:41 +00:00
|
|
|
if not input.readable(framingHeader.len):
|
|
|
|
raise newException(UnexpectedEofError, "Failed to read stream header")
|
|
|
|
|
|
|
|
if input.read(framingHeader.len) != framingHeader:
|
|
|
|
raise newException(MalformedSnappyData, "Invalid header value")
|
|
|
|
|
|
|
|
var tmp = newSeqUninitialized[byte](maxUncompressedFrameDataLen)
|
|
|
|
while input.readable(4):
|
|
|
|
let (id, dataLen) = decodeFrameHeader(input.read(4))
|
|
|
|
|
|
|
|
if dataLen.uint64 > maxCompressedFrameDataLen:
|
|
|
|
raise newException(MalformedSnappyData, "Invalid frame length: " & $dataLen)
|
|
|
|
|
|
|
|
if not input.readable(dataLen):
|
|
|
|
raise newException(UnexpectedEofError, "Failed to read the entire snappy frame")
|
|
|
|
|
|
|
|
if id == chunkCompressed:
|
|
|
|
if dataLen < 4:
|
|
|
|
raise newException(MalformedSnappyData, "Frame size too low to contain CRC checksum")
|
|
|
|
|
|
|
|
let
|
|
|
|
crc = uint32.fromBytesLE input.read(4)
|
|
|
|
uncompressed = uncompress(input.read(dataLen - 4), tmp).valueOr:
|
|
|
|
raise newException(MalformedSnappyData, "Failed to decompress content")
|
|
|
|
|
allow skipping crc32 integrity check (#22)
Some data is already protected by stronger checks - crc32 on the other
hand significantly slows down framed reading - ie 2.5x slower:
```
118.853 / 41.781, 129.115 / 0.000, 188.438 / 0.000, 90.565 / 44.371, 50, 115613038, state-6800000-488b7150-d613b584.ssz
186.600 / 97.202, 191.935 /123.325, 0.000 / 0.000, 0.000 / 0.000, 50, 115613038, state-6800000-488b7150-d613b584.ssz(framed)
```
The difference between unframed and framed decoding is the CRC32 check -
it takes ~50ms on a decent laptop for a 110mb file.
2023-07-25 15:50:36 +00:00
|
|
|
if not checkCrcAndAppend(
|
|
|
|
Sync output, tmp.toOpenArray(0, uncompressed-1), crc, checkIntegrity):
|
2022-04-14 14:22:41 +00:00
|
|
|
raise newException(MalformedSnappyData, "Content CRC checksum failed")
|
|
|
|
|
|
|
|
elif id == chunkUncompressed:
|
|
|
|
if dataLen < 4:
|
|
|
|
raise newException(MalformedSnappyData, "Frame size too low to contain CRC checksum")
|
|
|
|
|
|
|
|
if dataLen.uint64 - 4 > maxUncompressedFrameDataLen:
|
|
|
|
raise newException(MalformedSnappyData, "Invalid frame length: " & $dataLen)
|
|
|
|
|
|
|
|
let crc = uint32.fromBytesLE(input.read(4))
|
allow skipping crc32 integrity check (#22)
Some data is already protected by stronger checks - crc32 on the other
hand significantly slows down framed reading - ie 2.5x slower:
```
118.853 / 41.781, 129.115 / 0.000, 188.438 / 0.000, 90.565 / 44.371, 50, 115613038, state-6800000-488b7150-d613b584.ssz
186.600 / 97.202, 191.935 /123.325, 0.000 / 0.000, 0.000 / 0.000, 50, 115613038, state-6800000-488b7150-d613b584.ssz(framed)
```
The difference between unframed and framed decoding is the CRC32 check -
it takes ~50ms on a decent laptop for a 110mb file.
2023-07-25 15:50:36 +00:00
|
|
|
if not checkCrcAndAppend(
|
|
|
|
Sync output, input.read(dataLen - 4), crc, checkIntegrity):
|
2022-04-14 14:22:41 +00:00
|
|
|
raise newException(MalformedSnappyData, "Content CRC checksum failed")
|
|
|
|
|
|
|
|
elif id < 0x80:
|
|
|
|
# Reserved unskippable chunks (chunk types 0x02-0x7f)
|
|
|
|
# if we encounter this type of chunk, stop decoding
|
|
|
|
# the spec says it is an error
|
2023-07-21 12:44:16 +00:00
|
|
|
raise newException(MalformedSnappyData, "Invalid chunk type " & toHex([id]))
|
2022-04-14 14:22:41 +00:00
|
|
|
|
|
|
|
else:
|
|
|
|
# Reserved skippable chunks (chunk types 0x80-0xfe)
|
|
|
|
# including STREAM_HEADER (0xff) should be skipped
|
|
|
|
input.advance dataLen
|
|
|
|
|
|
|
|
if input.readable(1):
|
|
|
|
raise newException(MalformedSnappyData, "Input contains unknown trailing bytes")
|
|
|
|
|
|
|
|
output.flush()
|
|
|
|
|
allow skipping crc32 integrity check (#22)
Some data is already protected by stronger checks - crc32 on the other
hand significantly slows down framed reading - ie 2.5x slower:
```
118.853 / 41.781, 129.115 / 0.000, 188.438 / 0.000, 90.565 / 44.371, 50, 115613038, state-6800000-488b7150-d613b584.ssz
186.600 / 97.202, 191.935 /123.325, 0.000 / 0.000, 0.000 / 0.000, 50, 115613038, state-6800000-488b7150-d613b584.ssz(framed)
```
The difference between unframed and framed decoding is the CRC32 check -
it takes ~50ms on a decent laptop for a 110mb file.
2023-07-25 15:50:36 +00:00
|
|
|
proc uncompressFramed*(
|
|
|
|
input: openArray[byte], output: OutputStream, checkIntegrity = true) {.
|
2023-07-21 12:44:16 +00:00
|
|
|
raises: [IOError, SnappyDecodingError].} =
|
allow skipping crc32 integrity check (#22)
Some data is already protected by stronger checks - crc32 on the other
hand significantly slows down framed reading - ie 2.5x slower:
```
118.853 / 41.781, 129.115 / 0.000, 188.438 / 0.000, 90.565 / 44.371, 50, 115613038, state-6800000-488b7150-d613b584.ssz
186.600 / 97.202, 191.935 /123.325, 0.000 / 0.000, 0.000 / 0.000, 50, 115613038, state-6800000-488b7150-d613b584.ssz(framed)
```
The difference between unframed and framed decoding is the CRC32 check -
it takes ~50ms on a decent laptop for a 110mb file.
2023-07-25 15:50:36 +00:00
|
|
|
uncompressFramed(
|
|
|
|
unsafeMemoryInput(input), output, checkIntegrity = checkIntegrity)
|