checkpoint

This commit is contained in:
Ryan Oldenburg 2020-11-01 16:16:36 -06:00
parent f69226a677
commit 0ad33208b4
6 changed files with 202 additions and 124 deletions

View File

@ -65,6 +65,9 @@ func readBits*(b: var BitStream, bits: int): uint16 =
result = result or (b.read(bits - 8).uint16 shl 8)
func skipBits*(b: var BitStream, bits: int) =
if b.bitPos == 8 and bits > 0:
b.incBytePos()
var bitsLeftToSkip = bits
while bitsLeftToSkip > 0:
let bitsLeftInByte = 8 - b.bitPos
@ -99,20 +102,25 @@ func readBytes*(b: var BitStream, dst: pointer, len: int) =
b.skipBits(len * 8)
func addBit*(b: var BitStream, bit: uint8) =
# debugEcho "addBit ", bit
if b.bitPos == 8:
b.incBytePos()
b.data.setLen(b.data.len + 1)
b.data[b.bytePos] = b.data[b.bytePos] or (bit shl b.bitPos)
inc b.bitPos
func addBits*(b: var BitStream, value: uint16, bits: uint8) =
# debugEcho "addBits ", value, " ", bits
for i in 0.uint8 ..< bits:
b.addBit(((value shr i) and 1).uint8)
func addBits*(b: var BitStream, value: uint16, bits: int) =
assert bits <= 16
func addBitsReverse*(b: var BitStream, value: uint16, bits: uint8)=
for i in 0.uint8 ..< bits:
# debugEcho "addBitReverse ", ((value shr (bits - 1 - i)) and 1).uint8
b.addBit(((value shr (bits - 1 - i)) and 1).uint8)
var bitsRemaining = bits
for i in 0 ..< 3: # 16 bits cannot spread out across more than 3 bytes
if bitsRemaining == 0:
break
if b.bitPos == 8:
b.incBytePos()
let
bitsLeftInByte = 8 - b.bitPos
bitsAdded = min(bitsLeftInByte, bitsRemaining)
bitsToAdd = ((value shr (bits - bitsRemaining)) shl b.bitPos).uint8
b.data[b.bytePos] = b.data[b.bytePos] or bitsToAdd
inc(b.bitPos, bitsAdded)
dec(bitsRemaining, bitsAdded)

View File

@ -1,8 +1,27 @@
import zippyerror, common, deques, bitstreams, strutils
const
minLitLenCodes = 286
minDistCodes = 30
bitReverseTable = [
0x00.uint8, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
]
# minLitLenCodes = 286
# minDistCodes = 30
# blockSize = 65535
# windowSize = 32768
@ -115,9 +134,9 @@ func newHuffmanTree(
walk(n.kids[1], d + 1, (code shl 1) or 1)
walk(root, 0, 0)
for i, code in codes:
if depths[i] != 0:
debugEcho toBin(code.int, 16), " ", depths[i], " ", i
# for i, code in codes:
# if depths[i] != 0:
# debugEcho toBin(code.int, 16), " ", depths[i], " ", i
var depthCounts: array[16, uint8]
for d in depths:
@ -125,27 +144,33 @@ func newHuffmanTree(
depthCounts[0] = 0
debugEcho "c depthCounts: ", depthCounts
# debugEcho "c depthCounts: ", depthCounts
var nextCode = newSeq[uint16](maxBitLen + 1)
var nextCode: array[16, uint16]
for i in 1.uint8 .. maxBitLen:
nextCode[i] = (nextCode[i - 1] + depthCounts[i - 1]) shl 1
debugEcho "c nextCode: ", nextCode
# debugEcho "c nextCode: ", nextCode
var canonicalCodes = newSeq[uint16](codes.len)
template reverseCode(code: uint16, depth: uint8): uint16 =
(
(bitReverseTable[code.uint8].uint16 shl 8) or
(bitReverseTable[(code shr 8).uint8].uint16)
) shr (16 - depth)
# Convert to canonical codes (+ reversed)
for i in 0 ..< codes.len:
if depths[i] != 0:
canonicalCodes[i] = nextCode[depths[i]]
debugEcho toBin(canonicalCodes[i].int, 16), " ", i
codes[i] = reverseCode(nextCode[depths[i]], depths[i])
inc nextCode[depths[i]]
(numCodes, depths, canonicalCodes)
(numCodes, depths, codes)
func compress*(src: seq[uint8]): seq[uint8] =
## Uncompresses src and returns the compressed data seq.
var b = initBitStream()
b.data.setLen(5)
const
cm = 8.uint8
@ -182,11 +207,11 @@ func compress*(src: seq[uint8]): seq[uint8] =
for i in 0 ..< storedCodesDist:
bitLens[i + storedCodesLitLen] = depthsDist[i]
debugEcho "c bitLens: ", bitLens
# debugEcho "c bitLens: ", bitLens
var
bitLensRle: seq[uint8]
i: int
i, bitCount: int
while i < bitLens.len:
var repeatCount: int
while i + repeatCount + 1 < bitLens.len and
@ -201,6 +226,7 @@ func compress*(src: seq[uint8]): seq[uint8] =
repeatCount = min(repeatCount, 138) # Max of 138 zeros for code 18
bitLensRle.add([18.uint8, repeatCount.uint8 - 11])
inc(i, repeatCount - 1)
inc(bitCount, 7)
elif repeatCount >= 3: # Repeat code for non-zero, must be >= 3 times
var
a = repeatCount div 6
@ -213,11 +239,16 @@ func compress*(src: seq[uint8]): seq[uint8] =
else:
dec(repeatCount, b)
inc(i, repeatCount)
inc(bitCount, (a + b) * 2)
else:
bitLensRle.add(bitLens[i])
inc i
inc(bitCount, 7)
debugEcho "c bitLensRle: ", bitLensRle
# debugEcho "c bitLensRle: ", bitLensRle
# debugEcho (bitCount + 7) div 8
b.data.setLen(b.data.len + (bitCount + 7) div 8)
var
freqCodeLen = newSeq[uint16](19)
@ -229,7 +260,7 @@ func compress*(src: seq[uint8]): seq[uint8] =
inc j
inc j
debugEcho "c freqCodeLen: ", freqCodeLen
# debugEcho "c freqCodeLen: ", freqCodeLen
let (_, depthsCodeLen, codesCodeLen) = newHuffmanTree(freqCodeLen, freqCodeLen.len, 7)
@ -242,7 +273,7 @@ func compress*(src: seq[uint8]): seq[uint8] =
while bitLensCodeLen[bitLensCodeLen.high] == 0 and bitLensCodeLen.len > 4:
bitLensCodeLen.setLen(bitLensCodeLen.len - 1)
debugEcho "c bitLensCodeLen: ", bitLensCodeLen
# debugEcho "c bitLensCodeLen: ", bitLensCodeLen
b.addBit(1)
b.addBits(2, 2)
@ -258,18 +289,20 @@ func compress*(src: seq[uint8]): seq[uint8] =
b.addBits(hdist, 5)
b.addBits(hclen, 4)
debugEcho "c depthsCodeLen: ", depthsCodeLen
# debugEcho "c depthsCodeLen: ", depthsCodeLen
b.data.setLen(b.data.len + (((hclen.int + 4) * 3 + 7) div 8))
for i in 0.uint8 ..< hclen + 4:
b.addBits(bitLensCodeLen[i], 3)
debugEcho b.bytePos, " ", b.bitPos
# debugEcho b.bytePos, " ", b.bitPos
var k: int
while k < bitLensRle.len:
let symbol = bitLensRle[k]
debugEcho "c s: ", symbol, " ", codesCodeLen[symbol], " ", depthsCodeLen[symbol], " ", toBin(codesCodeLen[symbol].int, 8)
b.addBitsReverse(codesCodeLen[symbol], depthsCodeLen[symbol])
# debugEcho "c s: ", symbol, " ", codesCodeLen[symbol], " ", depthsCodeLen[symbol], " ", toBin(codesCodeLen[symbol].int, 8)
b.addBits(codesCodeLen[symbol], depthsCodeLen[symbol].int)
if symbol == 16:
inc k
b.addBits(bitLensRle[k], 2)
@ -281,14 +314,16 @@ func compress*(src: seq[uint8]): seq[uint8] =
b.addBits(bitLensRle[k], 7)
inc k
b.data.setLen(b.data.len + ((encoded.len * 15) + 7) div 8)
for i in 0 ..< encoded.len:
let symbol = encoded[i]
b.addBitsReverse(codesLitLen[symbol], depthsLitLen[symbol])
b.addBits(codesLitLen[symbol], depthsLitLen[symbol].int)
if depthsLitLen[256] == 0:
failCompress()
b.addBitsReverse(codesLitLen[256], depthsLitLen[256]) # End of block
b.addBits(codesLitLen[256], depthsLitLen[256].int) # End of block
b.skipRemainingBitsInCurrentByte()
b.data.setLen(b.data.len + 1)
@ -299,6 +334,7 @@ func compress*(src: seq[uint8]): seq[uint8] =
b.addBits(checkSum[1], 8)
b.addBits(checkSum[0], 8)
b.data.setLen(b.bytePos + 1)
b.data
template compress*(src: string): string =

View File

@ -117,8 +117,6 @@ func initHuffman(lengths: seq[uint8], maxCodes: int): Huffman =
result.symbols[offset] = symbol.uint16
inc offsets[lengths[symbol]]
import strutils
func decodeSymbol(b: var BitStream, h: Huffman): uint16 {.inline.} =
b.checkBytePos()
@ -138,8 +136,6 @@ func decodeSymbol(b: var BitStream, h: Huffman): uint16 {.inline.} =
code = code or (bits and 1).int
bits = bits shr 1
count = h.counts[len].int
# debugEcho code, " ", toBin(code.int, 8), " ", len, " ", count, " ", first, " ", index
# debugEcho code - count
if code - count < first:
fastSkip(i)
return h.symbols[index + (code - first)]
@ -183,24 +179,15 @@ func inflateBlock(b: var BitStream, dst: var seq[uint8], fixedCodes: bool) =
hdist = b.readBits(5).int + 1
hclen = b.readBits(4).int + 4
# debugEcho hlit, " ", hdist, " ", hclen
var codeLengths = newSeq[uint8](19)
for i in 0 ..< hclen.int:
codeLengths[codeLengthOrder[i]] = b.readBits(3).uint8
debugEcho "u codeLengths: ", codeLengths
debugEcho b.bytePos, " ", b.bitPos
let h = initHuffman(codeLengths, 19)
debugEcho "u counts: ", h.counts
debugEcho "u symbols: ", h.symbols
var unpacked: seq[uint8]
while unpacked.len < hlit + hdist:
let symbol = decodeSymbol(b, h)
debugEcho "u s: ", symbol
if symbol <= 15:
unpacked.add(symbol.uint8)
elif symbol == 16:
@ -219,8 +206,6 @@ func inflateBlock(b: var BitStream, dst: var seq[uint8], fixedCodes: bool) =
literalHuffman = initHuffman(unpacked[0 ..< hlit], maxLitLenCodes)
distanceHuffman = initHuffman(unpacked[hlit ..< unpacked.len], maxDistCodes)
debugEcho "u unpacked: ", unpacked
var pos = dst.len
while true:
let symbol = decodeSymbol(b, literalHuffman)
@ -284,9 +269,9 @@ func inflate(b: var BitStream, dst: var seq[uint8]) =
else:
raise newException(ZippyError, "Invalid block header")
func uncompress*(src: seq[uint8], dst: var seq[uint8]) =
## Uncompresses src into dst. This resizes dst as needed and starts writing
## at dst index 0.
func uncompress*(src: seq[uint8]): seq[uint8] =
## Uncompresses src and returns the uncompressed data seq.
result = newSeqOfCap[uint8](src.len * 3)
if src.len < 6:
failUncompress()
@ -314,16 +299,11 @@ func uncompress*(src: seq[uint8], dst: var seq[uint8]) =
if (flg and 0b00100000) != 0: # FDICT
raise newException(ZippyError, "Preset dictionary is not yet supported")
inflate(b, dst)
inflate(b, result)
if checksum != adler32(dst):
if checksum != adler32(result):
raise newException(ZippyError, "Checksum verification failed")
func uncompress*(src: seq[uint8]): seq[uint8] {.inline.} =
## Uncompresses src and returns the uncompressed data seq.
result = newSeqOfCap[uint8](src.len * 3)
uncompress(src, result)
template uncompress*(src: string): string =
## Helper for when preferring to work with strings.
cast[string](uncompress(cast[seq[uint8]](src)))

View File

@ -1,63 +1,105 @@
import miniz, nimPNG/nimz, std/monotimes, strformat, zip/zlib, zippy
const
files = [
zs = [
"randtest3.z",
"rfctest3.z",
"alice29.txt.z",
"urls.10K.z",
"fixed.z"
]
iterations = 1000
golds = [
"rfctest1.gold",
]
iterations = 10000
block guzba_zippy:
echo "https://github.com/guzba/zippy"
for file in files:
# block guzba_zippy_uncompress:
# echo "https://github.com/guzba/zippy uncompress"
# for z in zs:
# let
# compressed = readFile(&"tests/data/{z}")
# start = getMonoTime().ticks
# var c: int
# for i in 0 ..< iterations:
# let uncompressed = zippy.uncompress(compressed)
# inc(c, uncompressed.len)
# let delta = float64(getMonoTime().ticks - start) / 1000000000.0
# echo &" {z}: {delta:.4f}s [{c}]"
block guzba_zippy_compress:
echo "https://github.com/guzba/zippy compress"
for gold in golds:
let
compressed = readFile(&"tests/data/{file}")
uncompressed = readFile(&"tests/data/{gold}")
start = getMonoTime().ticks
var c: int
for i in 0 ..< iterations:
let uncompressed = zippy.uncompress(compressed)
inc(c, uncompressed.len)
let compressed = zippy.compress(uncompressed)
inc(c, compressed.len)
let delta = float64(getMonoTime().ticks - start) / 1000000000.0
echo &" {file}: {delta:.4f}s [{c}]"
echo &" {gold}: {delta:.4f}s [{c}]"
block treeform_miniz:
echo "https://github.com/treeform/miniz"
for file in files:
# block treeform_miniz_uncompress:
# echo "https://github.com/treeform/miniz uncompress"
# for z in zs:
# let
# compressed = readFile(&"tests/data/{z}")
# start = getMonoTime().ticks
# var c: int
# for i in 0 ..< iterations:
# let uncompressed = miniz.uncompress(compressed)
# inc(c, uncompressed.len)
# let delta = float64(getMonoTime().ticks - start) / 1000000000.0
# echo &" {z}: {delta:.4f}s [{c}]"
block treeform_miniz_compress:
echo "https://github.com/treeform/miniz compress"
for gold in golds:
let
compressed = readFile(&"tests/data/{file}")
uncompressed = readFile(&"tests/data/{gold}")
start = getMonoTime().ticks
var c: int
for i in 0 ..< iterations:
let uncompressed = miniz.uncompress(compressed)
inc(c, uncompressed.len)
let compressed = miniz.compress(uncompressed, 1)
inc(c, compressed.len)
let delta = float64(getMonoTime().ticks - start) / 1000000000.0
echo &" {file}: {delta:.4f}s [{c}]"
echo &" {gold}: {delta:.4f}s [{c}]"
block nimlang_zip: # Requires zlib1.dll
echo "https://github.com/nim-lang/zip"
for file in files:
# block nimlang_zip_uncompress: # Requires zlib1.dll
# echo "https://github.com/nim-lang/zip uncompress"
# for z in zs:
# let
# compressed = readFile(&"tests/data/{z}")
# start = getMonoTime().ticks
# var c: int
# for i in 0 ..< iterations:
# let uncompressed = zlib.uncompress(compressed, stream = ZLIB_STREAM)
# inc(c, uncompressed.len)
# let delta = float64(getMonoTime().ticks - start) / 1000000000.0
# echo &" {z}: {delta:.4f}s [{c}]"
# block jangko_nimPNG_uncompress:
# echo "https://github.com/jangko/nimPNG uncompress"
# for z in zs:
# let
# compressed = readFile(&"tests/data/{z}")
# start = getMonoTime().ticks
# var c: int
# for i in 0 ..< iterations:
# let uncompressed = zlib_decompress(nzInflateInit(compressed))
# inc(c, uncompressed.len)
# let delta = float64(getMonoTime().ticks - start) / 1000000000.0
# echo &" {z}: {delta:.4f}s [{c}]"
block jangko_nimPNG_compress:
echo "https://github.com/jangko/nimPNG compress"
for gold in golds:
let
compressed = readFile(&"tests/data/{file}")
uncompressed = readFile(&"tests/data/{gold}")
start = getMonoTime().ticks
var c: int
for i in 0 ..< iterations:
let uncompressed = zlib.uncompress(compressed, stream = ZLIB_STREAM)
inc(c, uncompressed.len)
let compressed = zlib_compress(nzDeflateInit(uncompressed))
inc(c, compressed.len)
let delta = float64(getMonoTime().ticks - start) / 1000000000.0
echo &" {file}: {delta:.4f}s [{c}]"
block jangko_nimPNG:
echo "https://github.com/jangko/nimPNG"
for file in files:
let
compressed = readFile(&"tests/data/{file}")
start = getMonoTime().ticks
var c: int
for i in 0 ..< iterations:
let uncompressed = zlib_decompress(nzInflateInit(compressed))
inc(c, uncompressed.len)
let delta = float64(getMonoTime().ticks - start) / 1000000000.0
echo &" {file}: {delta:.4f}s [{c}]"
echo &" {gold}: {delta:.4f}s [{c}]"

View File

@ -1,30 +1,30 @@
import strformat, zippy
const zs = [
"randtest1.z",
"randtest2.z",
"randtest3.z",
"rfctest1.z",
"rfctest2.z",
"rfctest3.z",
"tor-list.z",
"zerotest1.z",
"zerotest2.z",
"zerotest3.z",
]
const golds = [
"randtest1.gold",
"randtest2.gold",
"randtest3.gold",
"rfctest1.gold",
"rfctest2.gold",
"rfctest3.gold",
"tor-list.gold",
"zerotest1.gold",
"zerotest2.gold",
"zerotest3.gold",
]
const
zs = [
"randtest1.z",
"randtest2.z",
"randtest3.z",
"rfctest1.z",
"rfctest2.z",
"rfctest3.z",
"tor-list.z",
"zerotest1.z",
"zerotest2.z",
"zerotest3.z",
]
golds = [
# "randtest1.gold",
# "randtest2.gold",
# "randtest3.gold",
"rfctest1.gold",
# "rfctest2.gold",
# "rfctest3.gold",
# "tor-list.gold",
# "zerotest1.gold",
# "zerotest2.gold",
# "zerotest3.gold",
]
# for i, file in zs:
# echo file
@ -33,13 +33,25 @@ const golds = [
# gold = readFile(&"tests/data/{golds[i]}")
# assert uncompress(z) == gold
# let c = cast[seq[uint8]](compress(readFile("tests/data/randtest1.gold")))
# let uncompressed = uncompress(compressed)
# let gold = readFile("tests/data/rfctest1.gold")
# let c = compress(gold)
# let uncompressed = uncompress(c)
# assert uncompressed == gold
let c = cast[seq[uint8]](compress("A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED"))
# debugEcho "GOLD LEN: ", gold.len, " c len: ", c.len
for gold in golds:
let
uncompressed = readFile(&"tests/data/{gold}")
compressed = compress(uncompressed)
assert uncompressed == uncompress(compressed)
# let c = cast[seq[uint8]](compress("A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED"))
# let c = cast[seq[uint8]](compress("aaaaaaaaaabcccccccccccccccddddddd"))
echo c
echo cast[string](uncompress(c))
# echo c
# echo cast[string](uncompress(c))
# import random, fidget/opengl/perf, algorithm
# include zippy/compress

BIN
tests/zlib1.dll Normal file

Binary file not shown.