allow skipping crc32 integrity check (#22)
Some data is already protected by stronger checks - crc32 on the other hand significantly slows down framed reading - ie 2.5x slower: ``` 118.853 / 41.781, 129.115 / 0.000, 188.438 / 0.000, 90.565 / 44.371, 50, 115613038, state-6800000-488b7150-d613b584.ssz 186.600 / 97.202, 191.935 /123.325, 0.000 / 0.000, 0.000 / 0.000, 50, 115613038, state-6800000-488b7150-d613b584.ssz(framed) ``` The difference between unframed and framed decoding is the CRC32 check - it takes ~50ms on a decent laptop for a 110mb file.
This commit is contained in:
parent
e36f19d886
commit
ecbcee1d10
|
@ -7,3 +7,4 @@ master
|
|||
# Fuzzer-generated files
|
||||
crash-*
|
||||
|
||||
build/
|
||||
|
|
20
snappy.nim
20
snappy.nim
|
@ -106,7 +106,7 @@ func uncompress*(input: openArray[byte], output: var openArray[byte]):
|
|||
if written.uint64 != lenU32:
|
||||
return err(CodecError.invalidInput) # Header does not match content
|
||||
|
||||
return ok(written)
|
||||
ok(written)
|
||||
|
||||
func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] =
|
||||
## Decode input returning the uncompressed output. On error, return an empty
|
||||
|
@ -117,7 +117,7 @@ func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] =
|
|||
let uncompressed = uncompressedLen(input).valueOr:
|
||||
return
|
||||
|
||||
if uncompressed > maxSize.uint64 or uncompressed > int.high.uint64:
|
||||
if uncompressed > maxSize or uncompressed > int.high.uint64:
|
||||
return
|
||||
|
||||
# TODO https://github.com/nim-lang/Nim/issues/19357
|
||||
|
@ -166,7 +166,8 @@ func encodeFramed*(input: openArray[byte]): seq[byte] =
|
|||
result.setLen(written)
|
||||
|
||||
func uncompressFramed*(
|
||||
input: openArray[byte], output: var openArray[byte], checkHeader = true):
|
||||
input: openArray[byte], output: var openArray[byte], checkHeader = true,
|
||||
checkIntegrity = true):
|
||||
Result[tuple[read: int, written: int], FrameError] =
|
||||
## Uncompress as many frames as possible from `input` and write them to
|
||||
## `output`, returning the number of bytes read and written.
|
||||
|
@ -226,7 +227,7 @@ func uncompressFramed*(
|
|||
of CodecError.invalidInput: err(FrameError.invalidInput)
|
||||
return res
|
||||
|
||||
if maskedCrc(
|
||||
if checkIntegrity and maskedCrc(
|
||||
output.toOpenArray(written, written + (uncompressed - 1))) != crc:
|
||||
return err(FrameError.crcMismatch)
|
||||
|
||||
|
@ -239,7 +240,8 @@ func uncompressFramed*(
|
|||
let
|
||||
crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
|
||||
|
||||
if maskedCrc(input.toOpenArray(read + 4, read + (dataLen - 1))) != crc:
|
||||
if checkIntegrity and
|
||||
maskedCrc(input.toOpenArray(read + 4, read + (dataLen - 1))) != crc:
|
||||
return err(FrameError.crcMismatch)
|
||||
|
||||
let uncompressed = dataLen - 4 # dataLen includes CRC length
|
||||
|
@ -263,7 +265,9 @@ func uncompressFramed*(
|
|||
|
||||
ok((read, written))
|
||||
|
||||
func decodeFramed*(input: openArray[byte], maxSize = int.high): seq[byte] =
|
||||
func decodeFramed*(
|
||||
input: openArray[byte], maxSize = int.high,
|
||||
checkIntegrity = true): seq[byte] =
|
||||
## Uncompress as many frames as possible from `input` and return the
|
||||
## uncompressed output.
|
||||
##
|
||||
|
@ -275,13 +279,13 @@ func decodeFramed*(input: openArray[byte], maxSize = int.high): seq[byte] =
|
|||
let uncompressed = uncompressedLenFramed(input).valueOr:
|
||||
return
|
||||
|
||||
if uncompressed > maxSize.uint64 or uncompressed > int.high.uint64:
|
||||
if uncompressed > maxSize.uint64:
|
||||
return
|
||||
|
||||
# TODO https://github.com/nim-lang/Nim/issues/19357
|
||||
result = newSeqUninitialized[byte](int uncompressed)
|
||||
|
||||
if uncompressFramed(input, result).isErr():
|
||||
if uncompressFramed(input, result, checkIntegrity = checkIntegrity).isErr():
|
||||
result = @[] # Empty return on error
|
||||
|
||||
template compress*(input: openArray[byte]): seq[byte] {.
|
||||
|
|
|
@ -196,7 +196,7 @@ func uncompressedLenFramed*(input: openArray[byte]): Opt[uint64] =
|
|||
else: 0'u32 # Reserved skippable (for example framing format header)
|
||||
|
||||
if uncompressed > maxUncompressedFrameDataLen:
|
||||
return # Uncomnpressed data has limits (for the known chunk types)
|
||||
return # Uncompressed data has limits (for the known chunk types)
|
||||
|
||||
expected += uncompressed
|
||||
read += dataLen
|
||||
|
|
|
@ -10,9 +10,10 @@ export
|
|||
inputs, multisync, outputs, codec, exceptions
|
||||
|
||||
proc checkCrcAndAppend(
|
||||
output: OutputStream, data: openArray[byte], crc: uint32): bool {.
|
||||
output: OutputStream, data: openArray[byte], crc: uint32,
|
||||
checkIntegrity: bool): bool {.
|
||||
raises: [IOError].}=
|
||||
if maskedCrc(data) == crc:
|
||||
if not checkIntegrity or maskedCrc(data) == crc:
|
||||
output.write(data)
|
||||
return true
|
||||
|
||||
|
@ -85,7 +86,8 @@ proc compressFramed*(input: openArray[byte], output: OutputStream) {.
|
|||
raises: [IOError].} =
|
||||
compressFramed(unsafeMemoryInput(input), output)
|
||||
|
||||
proc uncompressFramed*(input: InputStream, output: OutputStream) {.
|
||||
proc uncompressFramed*(
|
||||
input: InputStream, output: OutputStream, checkIntegrity = true) {.
|
||||
fsMultiSync, raises: [IOError, SnappyDecodingError].} =
|
||||
if not input.readable(framingHeader.len):
|
||||
raise newException(UnexpectedEofError, "Failed to read stream header")
|
||||
|
@ -112,7 +114,8 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {.
|
|||
uncompressed = uncompress(input.read(dataLen - 4), tmp).valueOr:
|
||||
raise newException(MalformedSnappyData, "Failed to decompress content")
|
||||
|
||||
if not checkCrcAndAppend(Sync output, tmp.toOpenArray(0, uncompressed-1), crc):
|
||||
if not checkCrcAndAppend(
|
||||
Sync output, tmp.toOpenArray(0, uncompressed-1), crc, checkIntegrity):
|
||||
raise newException(MalformedSnappyData, "Content CRC checksum failed")
|
||||
|
||||
elif id == chunkUncompressed:
|
||||
|
@ -123,7 +126,8 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {.
|
|||
raise newException(MalformedSnappyData, "Invalid frame length: " & $dataLen)
|
||||
|
||||
let crc = uint32.fromBytesLE(input.read(4))
|
||||
if not checkCrcAndAppend(Sync output, input.read(dataLen - 4), crc):
|
||||
if not checkCrcAndAppend(
|
||||
Sync output, input.read(dataLen - 4), crc, checkIntegrity):
|
||||
raise newException(MalformedSnappyData, "Content CRC checksum failed")
|
||||
|
||||
elif id < 0x80:
|
||||
|
@ -142,6 +146,8 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {.
|
|||
|
||||
output.flush()
|
||||
|
||||
proc uncompressFramed*(input: openArray[byte], output: OutputStream) {.
|
||||
proc uncompressFramed*(
|
||||
input: openArray[byte], output: OutputStream, checkIntegrity = true) {.
|
||||
raises: [IOError, SnappyDecodingError].} =
|
||||
uncompressFramed(unsafeMemoryInput(input), output)
|
||||
uncompressFramed(
|
||||
unsafeMemoryInput(input), output, checkIntegrity = checkIntegrity)
|
||||
|
|
|
@ -49,7 +49,13 @@ proc readSource(sourceName: string): seq[byte] =
|
|||
doAssert(size == f.readBytes(result, 0, size))
|
||||
f.close()
|
||||
|
||||
proc streamsEncode(input: openArray[byte]): seq[byte] =
|
||||
proc memEncode(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||
snappy.encode(input)
|
||||
|
||||
proc memDecode(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||
snappy.decode(input)
|
||||
|
||||
proc streamsEncode(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||
let
|
||||
ins = newStringStream(string.fromBytes(input))
|
||||
outs = newStringStream()
|
||||
|
@ -57,21 +63,27 @@ proc streamsEncode(input: openArray[byte]): seq[byte] =
|
|||
outs.setPosition(0)
|
||||
outs.readAll().toBytes() # This line is a hotspot due to missing RVO
|
||||
|
||||
proc faststreamsEncode(input: openArray[byte]): seq[byte] =
|
||||
proc faststreamsEncode(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||
let
|
||||
ins = unsafeMemoryInput(input)
|
||||
outs = memoryOutput()
|
||||
compress(ins, outs)
|
||||
outs.getOutput() # This line is a hotspot due to missing RVO
|
||||
|
||||
proc faststreamsEncodeFramed(input: openArray[byte]): seq[byte] =
|
||||
proc memEncodeFramed(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||
snappy.encodeFramed(input)
|
||||
|
||||
proc memDecodeFramed(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||
snappy.decodeFramed(input)
|
||||
|
||||
proc faststreamsEncodeFramed(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||
let
|
||||
ins = unsafeMemoryInput(input)
|
||||
outs = memoryOutput()
|
||||
compressFramed(ins, outs)
|
||||
outs.getOutput() # This line is a hotspot due to missing RVO
|
||||
|
||||
proc faststreamsDecodeFramed(input: openArray[byte]): seq[byte] =
|
||||
proc faststreamsDecodeFramed(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||
let
|
||||
ins = unsafeMemoryInput(input)
|
||||
outs = memoryOutput()
|
||||
|
@ -87,9 +99,9 @@ proc timedRoundTrip(msg: string, source: openArray[byte], iterations = 100) =
|
|||
|
||||
for i in 0..<iterations:
|
||||
timeit(timers.inMemory[0]):
|
||||
let encodedWithSnappy = snappy.encode(source)
|
||||
let encodedWithSnappy = memEncode(source)
|
||||
timeit(timers.inMemory[1]):
|
||||
let decodedWithSnappy = snappy.decode(encodedWithSnappy)
|
||||
let decodedWithSnappy = memDecode(encodedWithSnappy)
|
||||
|
||||
timeit(timers.fastStreams[0]):
|
||||
let encodedWithFastStreams = faststreamsEncode(source)
|
||||
|
@ -122,9 +134,9 @@ proc timedRoundTripFramed(msg: string, source: openArray[byte], iterations = 100
|
|||
|
||||
for i in 0..<iterations:
|
||||
timeit(timers.inMemory[0]):
|
||||
let encodedWithSnappy = snappy.encodeFramed(source)
|
||||
let encodedWithSnappy = memEncodeFramed(source)
|
||||
timeit(timers.inMemory[1]):
|
||||
let decodedWithSnappy = snappy.decodeFramed(encodedWithSnappy)
|
||||
let decodedWithSnappy = memDecodeFramed(encodedWithSnappy)
|
||||
|
||||
timeit(timers.fastStreams[0]):
|
||||
let encodedWithFastStreams = faststreamsEncodeFramed(source)
|
||||
|
@ -163,6 +175,6 @@ roundTrip(dataDir & "geo.protodata")
|
|||
roundTrip(dataDir & "kppkn.gtb")
|
||||
roundTrip(dataDir & "Mark.Twain-Tom.Sawyer.txt")
|
||||
|
||||
# ncli_db --db:db dumpState 0x114a593d248af2ad05580299b803657d4b78a3b6578f47425cc396c9644e800e 2560000
|
||||
if fileExists(dataDir & "state-2560000-114a593d-0d5e08e8.ssz"):
|
||||
roundTrip(dataDir & "state-2560000-114a593d-0d5e08e8.ssz", 50)
|
||||
# ncli_db --db:db rewindState 0x488b7150f092949f1dfc3137c4e2909a20fe9739d67a5185d75dbd0440c51edd 6800000
|
||||
if fileExists(dataDir & "state-6800000-488b7150-d613b584.ssz"):
|
||||
roundTrip(dataDir & "state-6800000-488b7150-d613b584.ssz", 50)
|
||||
|
|
|
@ -94,15 +94,15 @@ proc checkInvalidFramed(payload: openArray[byte], uncompressedLen: int) =
|
|||
|
||||
check uncompressedLenFramed(payload).isNone
|
||||
|
||||
proc checkValidFramed(payload: openArray[byte], expected: openArray[byte]) =
|
||||
proc checkValidFramed(payload: openArray[byte], expected: openArray[byte], checkIntegrity = true) =
|
||||
var tmp = newSeqUninitialized[byte](expected.len)
|
||||
check:
|
||||
decodeFramed(payload) == expected
|
||||
uncompressFramed(payload, tmp).get() == (payload.len, expected.len)
|
||||
decodeFramed(payload, checkIntegrity = checkIntegrity) == expected
|
||||
uncompressFramed(payload, tmp, checkIntegrity = checkIntegrity).get() == (payload.len, expected.len)
|
||||
tmp == expected
|
||||
|
||||
var output = memoryOutput()
|
||||
uncompressFramed(unsafeMemoryInput(payload), output)
|
||||
uncompressFramed(unsafeMemoryInput(payload), output, checkIntegrity = checkIntegrity)
|
||||
|
||||
check:
|
||||
output.getOutput() == expected
|
||||
|
@ -176,6 +176,25 @@ suite "framing":
|
|||
checkValidFramed(framed, data)
|
||||
checkValidFramed(framedCompressed, data)
|
||||
|
||||
test "checkIntegrity false":
|
||||
let
|
||||
data = newSeq[byte](maxUncompressedFrameDataLen)
|
||||
compressed = snappy.encode(data)
|
||||
framed =
|
||||
@framingHeader & @[byte chunkUncompressed] &
|
||||
@((data.len + 4).uint32.toBytesLE().toOpenArray(0, 2)) &
|
||||
@([byte 0, 0, 0, 0]) &
|
||||
data
|
||||
|
||||
framedCompressed =
|
||||
@framingHeader & @[byte chunkCompressed] &
|
||||
@((compressed.len + 4).uint32.toBytesLE().toOpenArray(0, 2)) &
|
||||
@([byte 0, 0, 0, 0]) &
|
||||
compressed
|
||||
|
||||
checkValidFramed(framed, data, checkIntegrity = false)
|
||||
checkValidFramed(framedCompressed, data, checkIntegrity = false)
|
||||
|
||||
test "invalid header":
|
||||
checkInvalidFramed([byte 3, 2, 1, 0], 0)
|
||||
|
||||
|
|
Loading…
Reference in New Issue