working snappy framing compress prototype
This commit is contained in:
parent
45b8258af4
commit
f08cbf9dc5
|
@ -474,7 +474,7 @@ proc snappyUncompress*(src: openArray[byte], dst: var openArray[byte]): int =
|
|||
|
||||
if dst.len < int(len):
|
||||
return
|
||||
|
||||
|
||||
if int(len) > 0:
|
||||
let errCode = decode(dst.toOpenArray(0, len.int-1), src[bytesRead..^1])
|
||||
if errCode != 0:
|
||||
|
|
|
@ -1,20 +1,20 @@
|
|||
import
|
||||
../snappy,
|
||||
../snappy, ../snappy/utils,
|
||||
../tests/openarrays_snappy as oas,
|
||||
faststreams/[output_stream, input_stream],
|
||||
stew/endians2
|
||||
|
||||
{.compile: "crc32c.c".}
|
||||
# TODO: we don't have a native implementation of CRC32C algorithm yet.
|
||||
# we can't use nimPNG CRC32
|
||||
proc masked_crc32c(buf: ptr byte, len: cuint): cuint {.cdecl, importc.}
|
||||
proc masked_crc32c(buf: ptr byte, len: uint): cuint {.cdecl, importc.}
|
||||
|
||||
func checkCrc32(data: openArray[byte], expected: uint32): bool =
|
||||
let actual = masked_crc32c(data[0].unsafeAddr, data.len.cuint)
|
||||
let actual = masked_crc32c(data[0].unsafeAddr, data.len.uint)
|
||||
result = actual == expected
|
||||
|
||||
proc checkData(data: openArray[byte], crc: uint32, output: OutputStreamVar): bool =
|
||||
if not checkCrc32(data, crc):
|
||||
echo "BAD CRC"
|
||||
return
|
||||
|
||||
output.append(data)
|
||||
|
@ -26,9 +26,11 @@ func seekForward(_: openArray[byte]) =
|
|||
|
||||
const
|
||||
# maximum chunk data length
|
||||
MAX_DATA_LEN = 16777215
|
||||
# MAX_DATA_LEN = 16777215
|
||||
# maximum uncompressed data length excluding checksum
|
||||
MAX_UNCOMPRESSED_DATA_LEN = 65536
|
||||
# maximum uncompressed data length excluding checksum
|
||||
MAX_COMPRESSED_DATA_LEN = maxEncodedLen(MAX_UNCOMPRESSED_DATA_LEN)
|
||||
|
||||
COMPRESSED_DATA_IDENTIFIER = 0x00
|
||||
UNCOMPRESSED_DATA_IDENTIFIER = 0x01
|
||||
|
@ -37,9 +39,13 @@ const
|
|||
STREAM_HEADER = "\xff\x06\x00\x00sNaPpY"
|
||||
|
||||
proc framing_format_uncompress*(input: ByteStreamVar, output: OutputStreamVar) =
|
||||
if input[].ensureBytes(STREAM_HEADER.len):
|
||||
if input.readBytes(STREAM_HEADER.len) != STREAM_HEADER.toOpenArrayByte(0, STREAM_HEADER.len-1):
|
||||
return
|
||||
if not input[].ensureBytes(STREAM_HEADER.len):
|
||||
# debugEcho "NOT A SNAPPY STREAM"
|
||||
return
|
||||
|
||||
if input.readBytes(STREAM_HEADER.len) != STREAM_HEADER.toOpenArrayByte(0, STREAM_HEADER.len-1):
|
||||
# debugEcho "BAD HEADER"
|
||||
return
|
||||
|
||||
var uncompressedData = newSeq[byte](MAX_UNCOMPRESSED_DATA_LEN)
|
||||
|
||||
|
@ -48,10 +54,22 @@ proc framing_format_uncompress*(input: ByteStreamVar, output: OutputStreamVar) =
|
|||
break
|
||||
|
||||
# ensure bytes
|
||||
if not input[].ensureBytes(4):
|
||||
# debugEcho "CHK 1 NOT ENOUGH BYTES"
|
||||
return
|
||||
|
||||
let x = uint32.fromBytesLE(input.readBytes(4))
|
||||
let id = x and 0xFF
|
||||
let dataLen = (x shr 8).int
|
||||
|
||||
if not input[].ensureBytes(dataLen):
|
||||
# debugEcho "CHK 2 NOT ENOUGH BYTES"
|
||||
# debugEcho "request: ", dataLen
|
||||
# debugEcho "pos: ", input[].pos
|
||||
# debugEcho "endPos: ", input.endPos
|
||||
# debugEcho "distance: ", input.endPos - input[].pos
|
||||
return
|
||||
|
||||
if id == COMPRESSED_DATA_IDENTIFIER:
|
||||
let crc = uint32.fromBytesLE(input.readBytes(4))
|
||||
|
||||
|
@ -60,21 +78,66 @@ proc framing_format_uncompress*(input: ByteStreamVar, output: OutputStreamVar) =
|
|||
uncompressedData
|
||||
)
|
||||
|
||||
if uncompressedLen < 0:
|
||||
if uncompressedLen <= 0:
|
||||
# debugEcho "BAD LEN"
|
||||
return
|
||||
|
||||
if not checkData(uncompressedData.toOpenArray(0, uncompressedLen-1), crc, output):
|
||||
# debugEcho "BAD CRC"
|
||||
return
|
||||
|
||||
elif id == UNCOMPRESSED_DATA_IDENTIFIER:
|
||||
let crc = uint32.fromBytesLE(input.readBytes(4))
|
||||
if not checkData(input.readBytes(dataLen - 4), crc, output):
|
||||
# debugEcho "BAD CRC UNCOMP"
|
||||
return
|
||||
elif id < 0x80:
|
||||
# Reserved unskippable chunks (chunk types 0x02-0x7f)
|
||||
# if we encounter this type of chunk, stop decoding
|
||||
# the spec says it is an error
|
||||
# debugEcho "BAD CHUNK"
|
||||
return
|
||||
else:
|
||||
# Reserved skippable chunks (chunk types 0x80-0xfe)
|
||||
# including STREAM_HEADER (0xff) should be skipped
|
||||
seekForward(input.readBytes(dataLen))
|
||||
|
||||
output.flush()
|
||||
|
||||
proc processFrame*(output: OutputStreamVar, dst: var openArray[byte], src: openArray[byte]) =
|
||||
let
|
||||
crc = masked_crc32c(src[0].unsafeAddr, src.len.uint)
|
||||
varintLen = oas.putUvarint(dst, src.len.uint64)
|
||||
encodedLen = oas.encodeBlock(dst.toOpenArray(varintLen, dst.len-1), src) + varintLen
|
||||
|
||||
if encodedLen >= (src.len - (src.len div 8)):
|
||||
let frameLen = src.len + 4 # include 4 bytes crc
|
||||
let header = (uint32(frameLen) shl 8) or UNCOMPRESSED_DATA_IDENTIFIER.uint32
|
||||
output.append toBytesLE(header)
|
||||
output.append toBytesLE(crc)
|
||||
output.append src
|
||||
else:
|
||||
let frameLen = encodedLen + 4 # include 4 bytes crc
|
||||
let header = (uint32(frameLen) shl 8) or COMPRESSED_DATA_IDENTIFIER.uint32
|
||||
output.append toBytesLE(header)
|
||||
output.append toBytesLE(crc)
|
||||
output.append dst.toOpenArray(0, encodedLen-1)
|
||||
|
||||
proc framing_format_compress*(output: OutputStreamVar, src: openArray[byte]) =
|
||||
const maxFrameSize = MAX_UNCOMPRESSED_DATA_LEN
|
||||
var compressedData = newSeq[byte](MAX_COMPRESSED_DATA_LEN)
|
||||
|
||||
# write the magic identifier
|
||||
output.append(STREAM_HEADER)
|
||||
|
||||
var
|
||||
p = 0
|
||||
len = src.len
|
||||
|
||||
while len > 0:
|
||||
let frameSize = min(len, maxFrameSize)
|
||||
processFrame(output, compressedData, src[p..<p+frameSize])
|
||||
inc(p, frameSize)
|
||||
dec(len, frameSize)
|
||||
|
||||
output.flush()
|
||||
|
|
|
@ -7,7 +7,7 @@ const
|
|||
inputMargin = 16 - 1
|
||||
|
||||
# PutUvarint encodes a uint64 into buf and returns the number of bytes written.
|
||||
func putUvarint(buf: var openArray[byte], x: uint64): int =
|
||||
func putUvarint*(buf: var openArray[byte], x: uint64): int =
|
||||
var
|
||||
i = 0
|
||||
x = x
|
||||
|
@ -169,7 +169,7 @@ func hash(u, shift: uint32): uint32 =
|
|||
# It also assumes that:
|
||||
# len(dst) >= MaxEncodedLen(len(src)) and
|
||||
# minNonLiteralBlockSize <= len(src) and len(src) <= maxBlockSize
|
||||
func encodeBlock(dst, src: var openArray[byte]): int =
|
||||
func encodeBlock*(dst: var openArray[byte], src: openArray[byte]): int =
|
||||
# Initialize the hash table. Its size ranges from 1shl8 to 1shl14 inclusive.
|
||||
# The table element type is uint16, as s < sLimit and sLimit < len(src)
|
||||
# and len(src) <= maxBlockSize and maxBlockSize == 65536.
|
||||
|
|
|
@ -18,6 +18,26 @@ template check_uncompress(source, target: string) =
|
|||
else:
|
||||
check true
|
||||
|
||||
template check_roundtrip(source) =
|
||||
test "roundtrip " & source:
|
||||
let expected = readFile(uncompDir & source)
|
||||
var ost = OutputStream.init
|
||||
|
||||
framing_format_compress(ost, expected.toOpenArrayByte(0, expected.len-1))
|
||||
let compressed = ost.getOutput(string)
|
||||
debugEcho "compressed len: ", compressed.len
|
||||
|
||||
var inst = memoryStream(compressed)
|
||||
var outst = OutputStream.init
|
||||
framing_format_uncompress(inst, outst)
|
||||
let actual = outst.getOutput(string)
|
||||
check actual.len == expected.len
|
||||
|
||||
if actual != expected:
|
||||
check false
|
||||
else:
|
||||
check true
|
||||
|
||||
proc main() =
|
||||
suite "framing":
|
||||
setup:
|
||||
|
@ -29,4 +49,19 @@ proc main() =
|
|||
check_uncompress("alice29.txt.sz-64k", "alice29.txt")
|
||||
check_uncompress("house.jpg.sz", "house.jpg")
|
||||
|
||||
check_roundtrip("alice29.txt")
|
||||
check_roundtrip("house.jpg")
|
||||
check_roundtrip("html")
|
||||
check_roundtrip("urls.10K")
|
||||
check_roundtrip("fireworks.jpeg")
|
||||
|
||||
check_roundtrip("paper-100k.pdf")
|
||||
|
||||
check_roundtrip("html_x_4")
|
||||
check_roundtrip("asyoulik.txt")
|
||||
check_roundtrip("lcet10.txt")
|
||||
check_roundtrip("plrabn12.txt")
|
||||
check_roundtrip("geo.protodata")
|
||||
check_roundtrip("kppkn.gtb")
|
||||
check_roundtrip("Mark.Twain-Tom.Sawyer.txt")
|
||||
main()
|
||||
|
|
Loading…
Reference in New Issue