allow skipping crc32 integrity check (#22)
Some data is already protected by stronger checks - crc32 on the other hand significantly slows down framed reading - ie 2.5x slower: ``` 118.853 / 41.781, 129.115 / 0.000, 188.438 / 0.000, 90.565 / 44.371, 50, 115613038, state-6800000-488b7150-d613b584.ssz 186.600 / 97.202, 191.935 /123.325, 0.000 / 0.000, 0.000 / 0.000, 50, 115613038, state-6800000-488b7150-d613b584.ssz(framed) ``` The difference between unframed and framed decoding is the CRC32 check - it takes ~50ms on a decent laptop for a 110mb file.
This commit is contained in:
parent
e36f19d886
commit
ecbcee1d10
|
@ -7,3 +7,4 @@ master
|
||||||
# Fuzzer-generated files
|
# Fuzzer-generated files
|
||||||
crash-*
|
crash-*
|
||||||
|
|
||||||
|
build/
|
||||||
|
|
20
snappy.nim
20
snappy.nim
|
@ -106,7 +106,7 @@ func uncompress*(input: openArray[byte], output: var openArray[byte]):
|
||||||
if written.uint64 != lenU32:
|
if written.uint64 != lenU32:
|
||||||
return err(CodecError.invalidInput) # Header does not match content
|
return err(CodecError.invalidInput) # Header does not match content
|
||||||
|
|
||||||
return ok(written)
|
ok(written)
|
||||||
|
|
||||||
func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] =
|
func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] =
|
||||||
## Decode input returning the uncompressed output. On error, return an empty
|
## Decode input returning the uncompressed output. On error, return an empty
|
||||||
|
@ -117,7 +117,7 @@ func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] =
|
||||||
let uncompressed = uncompressedLen(input).valueOr:
|
let uncompressed = uncompressedLen(input).valueOr:
|
||||||
return
|
return
|
||||||
|
|
||||||
if uncompressed > maxSize.uint64 or uncompressed > int.high.uint64:
|
if uncompressed > maxSize or uncompressed > int.high.uint64:
|
||||||
return
|
return
|
||||||
|
|
||||||
# TODO https://github.com/nim-lang/Nim/issues/19357
|
# TODO https://github.com/nim-lang/Nim/issues/19357
|
||||||
|
@ -166,7 +166,8 @@ func encodeFramed*(input: openArray[byte]): seq[byte] =
|
||||||
result.setLen(written)
|
result.setLen(written)
|
||||||
|
|
||||||
func uncompressFramed*(
|
func uncompressFramed*(
|
||||||
input: openArray[byte], output: var openArray[byte], checkHeader = true):
|
input: openArray[byte], output: var openArray[byte], checkHeader = true,
|
||||||
|
checkIntegrity = true):
|
||||||
Result[tuple[read: int, written: int], FrameError] =
|
Result[tuple[read: int, written: int], FrameError] =
|
||||||
## Uncompress as many frames as possible from `input` and write them to
|
## Uncompress as many frames as possible from `input` and write them to
|
||||||
## `output`, returning the number of bytes read and written.
|
## `output`, returning the number of bytes read and written.
|
||||||
|
@ -226,7 +227,7 @@ func uncompressFramed*(
|
||||||
of CodecError.invalidInput: err(FrameError.invalidInput)
|
of CodecError.invalidInput: err(FrameError.invalidInput)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
if maskedCrc(
|
if checkIntegrity and maskedCrc(
|
||||||
output.toOpenArray(written, written + (uncompressed - 1))) != crc:
|
output.toOpenArray(written, written + (uncompressed - 1))) != crc:
|
||||||
return err(FrameError.crcMismatch)
|
return err(FrameError.crcMismatch)
|
||||||
|
|
||||||
|
@ -239,7 +240,8 @@ func uncompressFramed*(
|
||||||
let
|
let
|
||||||
crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
|
crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
|
||||||
|
|
||||||
if maskedCrc(input.toOpenArray(read + 4, read + (dataLen - 1))) != crc:
|
if checkIntegrity and
|
||||||
|
maskedCrc(input.toOpenArray(read + 4, read + (dataLen - 1))) != crc:
|
||||||
return err(FrameError.crcMismatch)
|
return err(FrameError.crcMismatch)
|
||||||
|
|
||||||
let uncompressed = dataLen - 4 # dataLen includes CRC length
|
let uncompressed = dataLen - 4 # dataLen includes CRC length
|
||||||
|
@ -263,7 +265,9 @@ func uncompressFramed*(
|
||||||
|
|
||||||
ok((read, written))
|
ok((read, written))
|
||||||
|
|
||||||
func decodeFramed*(input: openArray[byte], maxSize = int.high): seq[byte] =
|
func decodeFramed*(
|
||||||
|
input: openArray[byte], maxSize = int.high,
|
||||||
|
checkIntegrity = true): seq[byte] =
|
||||||
## Uncompress as many frames as possible from `input` and return the
|
## Uncompress as many frames as possible from `input` and return the
|
||||||
## uncompressed output.
|
## uncompressed output.
|
||||||
##
|
##
|
||||||
|
@ -275,13 +279,13 @@ func decodeFramed*(input: openArray[byte], maxSize = int.high): seq[byte] =
|
||||||
let uncompressed = uncompressedLenFramed(input).valueOr:
|
let uncompressed = uncompressedLenFramed(input).valueOr:
|
||||||
return
|
return
|
||||||
|
|
||||||
if uncompressed > maxSize.uint64 or uncompressed > int.high.uint64:
|
if uncompressed > maxSize.uint64:
|
||||||
return
|
return
|
||||||
|
|
||||||
# TODO https://github.com/nim-lang/Nim/issues/19357
|
# TODO https://github.com/nim-lang/Nim/issues/19357
|
||||||
result = newSeqUninitialized[byte](int uncompressed)
|
result = newSeqUninitialized[byte](int uncompressed)
|
||||||
|
|
||||||
if uncompressFramed(input, result).isErr():
|
if uncompressFramed(input, result, checkIntegrity = checkIntegrity).isErr():
|
||||||
result = @[] # Empty return on error
|
result = @[] # Empty return on error
|
||||||
|
|
||||||
template compress*(input: openArray[byte]): seq[byte] {.
|
template compress*(input: openArray[byte]): seq[byte] {.
|
||||||
|
|
|
@ -196,7 +196,7 @@ func uncompressedLenFramed*(input: openArray[byte]): Opt[uint64] =
|
||||||
else: 0'u32 # Reserved skippable (for example framing format header)
|
else: 0'u32 # Reserved skippable (for example framing format header)
|
||||||
|
|
||||||
if uncompressed > maxUncompressedFrameDataLen:
|
if uncompressed > maxUncompressedFrameDataLen:
|
||||||
return # Uncomnpressed data has limits (for the known chunk types)
|
return # Uncompressed data has limits (for the known chunk types)
|
||||||
|
|
||||||
expected += uncompressed
|
expected += uncompressed
|
||||||
read += dataLen
|
read += dataLen
|
||||||
|
|
|
@ -10,9 +10,10 @@ export
|
||||||
inputs, multisync, outputs, codec, exceptions
|
inputs, multisync, outputs, codec, exceptions
|
||||||
|
|
||||||
proc checkCrcAndAppend(
|
proc checkCrcAndAppend(
|
||||||
output: OutputStream, data: openArray[byte], crc: uint32): bool {.
|
output: OutputStream, data: openArray[byte], crc: uint32,
|
||||||
|
checkIntegrity: bool): bool {.
|
||||||
raises: [IOError].}=
|
raises: [IOError].}=
|
||||||
if maskedCrc(data) == crc:
|
if not checkIntegrity or maskedCrc(data) == crc:
|
||||||
output.write(data)
|
output.write(data)
|
||||||
return true
|
return true
|
||||||
|
|
||||||
|
@ -85,7 +86,8 @@ proc compressFramed*(input: openArray[byte], output: OutputStream) {.
|
||||||
raises: [IOError].} =
|
raises: [IOError].} =
|
||||||
compressFramed(unsafeMemoryInput(input), output)
|
compressFramed(unsafeMemoryInput(input), output)
|
||||||
|
|
||||||
proc uncompressFramed*(input: InputStream, output: OutputStream) {.
|
proc uncompressFramed*(
|
||||||
|
input: InputStream, output: OutputStream, checkIntegrity = true) {.
|
||||||
fsMultiSync, raises: [IOError, SnappyDecodingError].} =
|
fsMultiSync, raises: [IOError, SnappyDecodingError].} =
|
||||||
if not input.readable(framingHeader.len):
|
if not input.readable(framingHeader.len):
|
||||||
raise newException(UnexpectedEofError, "Failed to read stream header")
|
raise newException(UnexpectedEofError, "Failed to read stream header")
|
||||||
|
@ -112,7 +114,8 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {.
|
||||||
uncompressed = uncompress(input.read(dataLen - 4), tmp).valueOr:
|
uncompressed = uncompress(input.read(dataLen - 4), tmp).valueOr:
|
||||||
raise newException(MalformedSnappyData, "Failed to decompress content")
|
raise newException(MalformedSnappyData, "Failed to decompress content")
|
||||||
|
|
||||||
if not checkCrcAndAppend(Sync output, tmp.toOpenArray(0, uncompressed-1), crc):
|
if not checkCrcAndAppend(
|
||||||
|
Sync output, tmp.toOpenArray(0, uncompressed-1), crc, checkIntegrity):
|
||||||
raise newException(MalformedSnappyData, "Content CRC checksum failed")
|
raise newException(MalformedSnappyData, "Content CRC checksum failed")
|
||||||
|
|
||||||
elif id == chunkUncompressed:
|
elif id == chunkUncompressed:
|
||||||
|
@ -123,7 +126,8 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {.
|
||||||
raise newException(MalformedSnappyData, "Invalid frame length: " & $dataLen)
|
raise newException(MalformedSnappyData, "Invalid frame length: " & $dataLen)
|
||||||
|
|
||||||
let crc = uint32.fromBytesLE(input.read(4))
|
let crc = uint32.fromBytesLE(input.read(4))
|
||||||
if not checkCrcAndAppend(Sync output, input.read(dataLen - 4), crc):
|
if not checkCrcAndAppend(
|
||||||
|
Sync output, input.read(dataLen - 4), crc, checkIntegrity):
|
||||||
raise newException(MalformedSnappyData, "Content CRC checksum failed")
|
raise newException(MalformedSnappyData, "Content CRC checksum failed")
|
||||||
|
|
||||||
elif id < 0x80:
|
elif id < 0x80:
|
||||||
|
@ -142,6 +146,8 @@ proc uncompressFramed*(input: InputStream, output: OutputStream) {.
|
||||||
|
|
||||||
output.flush()
|
output.flush()
|
||||||
|
|
||||||
proc uncompressFramed*(input: openArray[byte], output: OutputStream) {.
|
proc uncompressFramed*(
|
||||||
|
input: openArray[byte], output: OutputStream, checkIntegrity = true) {.
|
||||||
raises: [IOError, SnappyDecodingError].} =
|
raises: [IOError, SnappyDecodingError].} =
|
||||||
uncompressFramed(unsafeMemoryInput(input), output)
|
uncompressFramed(
|
||||||
|
unsafeMemoryInput(input), output, checkIntegrity = checkIntegrity)
|
||||||
|
|
|
@ -49,7 +49,13 @@ proc readSource(sourceName: string): seq[byte] =
|
||||||
doAssert(size == f.readBytes(result, 0, size))
|
doAssert(size == f.readBytes(result, 0, size))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
proc streamsEncode(input: openArray[byte]): seq[byte] =
|
proc memEncode(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||||
|
snappy.encode(input)
|
||||||
|
|
||||||
|
proc memDecode(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||||
|
snappy.decode(input)
|
||||||
|
|
||||||
|
proc streamsEncode(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||||
let
|
let
|
||||||
ins = newStringStream(string.fromBytes(input))
|
ins = newStringStream(string.fromBytes(input))
|
||||||
outs = newStringStream()
|
outs = newStringStream()
|
||||||
|
@ -57,21 +63,27 @@ proc streamsEncode(input: openArray[byte]): seq[byte] =
|
||||||
outs.setPosition(0)
|
outs.setPosition(0)
|
||||||
outs.readAll().toBytes() # This line is a hotspot due to missing RVO
|
outs.readAll().toBytes() # This line is a hotspot due to missing RVO
|
||||||
|
|
||||||
proc faststreamsEncode(input: openArray[byte]): seq[byte] =
|
proc faststreamsEncode(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||||
let
|
let
|
||||||
ins = unsafeMemoryInput(input)
|
ins = unsafeMemoryInput(input)
|
||||||
outs = memoryOutput()
|
outs = memoryOutput()
|
||||||
compress(ins, outs)
|
compress(ins, outs)
|
||||||
outs.getOutput() # This line is a hotspot due to missing RVO
|
outs.getOutput() # This line is a hotspot due to missing RVO
|
||||||
|
|
||||||
proc faststreamsEncodeFramed(input: openArray[byte]): seq[byte] =
|
proc memEncodeFramed(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||||
|
snappy.encodeFramed(input)
|
||||||
|
|
||||||
|
proc memDecodeFramed(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||||
|
snappy.decodeFramed(input)
|
||||||
|
|
||||||
|
proc faststreamsEncodeFramed(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||||
let
|
let
|
||||||
ins = unsafeMemoryInput(input)
|
ins = unsafeMemoryInput(input)
|
||||||
outs = memoryOutput()
|
outs = memoryOutput()
|
||||||
compressFramed(ins, outs)
|
compressFramed(ins, outs)
|
||||||
outs.getOutput() # This line is a hotspot due to missing RVO
|
outs.getOutput() # This line is a hotspot due to missing RVO
|
||||||
|
|
||||||
proc faststreamsDecodeFramed(input: openArray[byte]): seq[byte] =
|
proc faststreamsDecodeFramed(input: openArray[byte]): seq[byte] {.noinline.} =
|
||||||
let
|
let
|
||||||
ins = unsafeMemoryInput(input)
|
ins = unsafeMemoryInput(input)
|
||||||
outs = memoryOutput()
|
outs = memoryOutput()
|
||||||
|
@ -87,9 +99,9 @@ proc timedRoundTrip(msg: string, source: openArray[byte], iterations = 100) =
|
||||||
|
|
||||||
for i in 0..<iterations:
|
for i in 0..<iterations:
|
||||||
timeit(timers.inMemory[0]):
|
timeit(timers.inMemory[0]):
|
||||||
let encodedWithSnappy = snappy.encode(source)
|
let encodedWithSnappy = memEncode(source)
|
||||||
timeit(timers.inMemory[1]):
|
timeit(timers.inMemory[1]):
|
||||||
let decodedWithSnappy = snappy.decode(encodedWithSnappy)
|
let decodedWithSnappy = memDecode(encodedWithSnappy)
|
||||||
|
|
||||||
timeit(timers.fastStreams[0]):
|
timeit(timers.fastStreams[0]):
|
||||||
let encodedWithFastStreams = faststreamsEncode(source)
|
let encodedWithFastStreams = faststreamsEncode(source)
|
||||||
|
@ -122,9 +134,9 @@ proc timedRoundTripFramed(msg: string, source: openArray[byte], iterations = 100
|
||||||
|
|
||||||
for i in 0..<iterations:
|
for i in 0..<iterations:
|
||||||
timeit(timers.inMemory[0]):
|
timeit(timers.inMemory[0]):
|
||||||
let encodedWithSnappy = snappy.encodeFramed(source)
|
let encodedWithSnappy = memEncodeFramed(source)
|
||||||
timeit(timers.inMemory[1]):
|
timeit(timers.inMemory[1]):
|
||||||
let decodedWithSnappy = snappy.decodeFramed(encodedWithSnappy)
|
let decodedWithSnappy = memDecodeFramed(encodedWithSnappy)
|
||||||
|
|
||||||
timeit(timers.fastStreams[0]):
|
timeit(timers.fastStreams[0]):
|
||||||
let encodedWithFastStreams = faststreamsEncodeFramed(source)
|
let encodedWithFastStreams = faststreamsEncodeFramed(source)
|
||||||
|
@ -163,6 +175,6 @@ roundTrip(dataDir & "geo.protodata")
|
||||||
roundTrip(dataDir & "kppkn.gtb")
|
roundTrip(dataDir & "kppkn.gtb")
|
||||||
roundTrip(dataDir & "Mark.Twain-Tom.Sawyer.txt")
|
roundTrip(dataDir & "Mark.Twain-Tom.Sawyer.txt")
|
||||||
|
|
||||||
# ncli_db --db:db dumpState 0x114a593d248af2ad05580299b803657d4b78a3b6578f47425cc396c9644e800e 2560000
|
# ncli_db --db:db rewindState 0x488b7150f092949f1dfc3137c4e2909a20fe9739d67a5185d75dbd0440c51edd 6800000
|
||||||
if fileExists(dataDir & "state-2560000-114a593d-0d5e08e8.ssz"):
|
if fileExists(dataDir & "state-6800000-488b7150-d613b584.ssz"):
|
||||||
roundTrip(dataDir & "state-2560000-114a593d-0d5e08e8.ssz", 50)
|
roundTrip(dataDir & "state-6800000-488b7150-d613b584.ssz", 50)
|
||||||
|
|
|
@ -94,15 +94,15 @@ proc checkInvalidFramed(payload: openArray[byte], uncompressedLen: int) =
|
||||||
|
|
||||||
check uncompressedLenFramed(payload).isNone
|
check uncompressedLenFramed(payload).isNone
|
||||||
|
|
||||||
proc checkValidFramed(payload: openArray[byte], expected: openArray[byte]) =
|
proc checkValidFramed(payload: openArray[byte], expected: openArray[byte], checkIntegrity = true) =
|
||||||
var tmp = newSeqUninitialized[byte](expected.len)
|
var tmp = newSeqUninitialized[byte](expected.len)
|
||||||
check:
|
check:
|
||||||
decodeFramed(payload) == expected
|
decodeFramed(payload, checkIntegrity = checkIntegrity) == expected
|
||||||
uncompressFramed(payload, tmp).get() == (payload.len, expected.len)
|
uncompressFramed(payload, tmp, checkIntegrity = checkIntegrity).get() == (payload.len, expected.len)
|
||||||
tmp == expected
|
tmp == expected
|
||||||
|
|
||||||
var output = memoryOutput()
|
var output = memoryOutput()
|
||||||
uncompressFramed(unsafeMemoryInput(payload), output)
|
uncompressFramed(unsafeMemoryInput(payload), output, checkIntegrity = checkIntegrity)
|
||||||
|
|
||||||
check:
|
check:
|
||||||
output.getOutput() == expected
|
output.getOutput() == expected
|
||||||
|
@ -176,6 +176,25 @@ suite "framing":
|
||||||
checkValidFramed(framed, data)
|
checkValidFramed(framed, data)
|
||||||
checkValidFramed(framedCompressed, data)
|
checkValidFramed(framedCompressed, data)
|
||||||
|
|
||||||
|
test "checkIntegrity false":
|
||||||
|
let
|
||||||
|
data = newSeq[byte](maxUncompressedFrameDataLen)
|
||||||
|
compressed = snappy.encode(data)
|
||||||
|
framed =
|
||||||
|
@framingHeader & @[byte chunkUncompressed] &
|
||||||
|
@((data.len + 4).uint32.toBytesLE().toOpenArray(0, 2)) &
|
||||||
|
@([byte 0, 0, 0, 0]) &
|
||||||
|
data
|
||||||
|
|
||||||
|
framedCompressed =
|
||||||
|
@framingHeader & @[byte chunkCompressed] &
|
||||||
|
@((compressed.len + 4).uint32.toBytesLE().toOpenArray(0, 2)) &
|
||||||
|
@([byte 0, 0, 0, 0]) &
|
||||||
|
compressed
|
||||||
|
|
||||||
|
checkValidFramed(framed, data, checkIntegrity = false)
|
||||||
|
checkValidFramed(framedCompressed, data, checkIntegrity = false)
|
||||||
|
|
||||||
test "invalid header":
|
test "invalid header":
|
||||||
checkInvalidFramed([byte 3, 2, 1, 0], 0)
|
checkInvalidFramed([byte 3, 2, 1, 0], 0)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue