nimPNG/nimz.nim

1330 lines
48 KiB
Nim
Raw Normal View History

import streams, sequtils, algorithm, strutils
2015-08-28 15:56:55 +00:00
const
FIRST_LENGTH_CODE_INDEX = 257
LAST_LENGTH_CODE_INDEX = 285
#256 literals, the end code, some length codes, and 2 unused codes
NUM_DEFLATE_CODE_SYMBOLS = 288
#the distance codes have their own symbols, 30 used, 2 unused
NUM_DISTANCE_SYMBOLS = 32
#the code length codes.
#0-15: code lengths,
#16: copy previous 3-6 times,
#17: 3-10 zeros,
#18: 11-138 zeros
NUM_CODE_LENGTH_CODES = 19
#the base lengths represented by codes 257-285
LENGTHBASE = [3, 4, 5, 6, 7, 8, 9, 10,
11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51,
59, 67, 83, 99, 115, 131, 163, 195, 227, 258]
#the extra bits used by codes 257-285 (added to base length)
LENGTHEXTRA = [0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
4, 4, 4, 4, 5, 5, 5, 5, 0]
#the base backwards distances
#(the bits of distance codes appear after
#length codes and use their own huffman tree)
DISTANCEBASE = [1, 2, 3, 4, 5, 7, 9,
13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577]
#the extra bits of backwards distances (added to base)
DISTANCEEXTRA = [0, 0, 0, 0, 1, 1, 2,
2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13]
#the order in which "code length alphabet code lengths" are stored,
#out of this the huffman tree of the dynamic huffman tree lengths is generated
CLCL_ORDER = [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]
#3 bytes of data get encoded into two bytes. The hash cannot use more than 3
#bytes as input because 3 is the minimum match length for deflate
HASH_NUM_VALUES = 65536
HASH_BIT_MASK = HASH_NUM_VALUES - 1
MAX_SUPPORTED_DEFLATE_LENGTH = 258
type
HuffmanTree = object
tree2d, tree1d: seq[int]
lengths: seq[int] #the lengths of the codes of the 1d-tree
maxbitlen: int #maximum number of bits a single code can get
numcodes: int #number of symbols in the alphabet = number of codes
BitStream = object
bitpointer: int
data: string
databitlen: int
NZError = ref object of Exception
NZHash = object
head: seq[int] #hash value to head circular pos
#can be outdated if went around window
chain: seq[int] #circular pos to prev circular pos
val: seq[int] #circular pos to hash value
#TODO: do this not only for zeros but for any repeated byte. However for PNG
#it's always going to be the zeros that dominate, so not important for PNG
headz: seq[int] #similar to head, but for chainz
chainz: seq[int] #those with same amount of zeros
zeros: seq[int] #length of zeros streak, used as a second hash chain
#A coin, this is the terminology used for the package-merge algorithm and the
#coin collector's problem. This is used to generate the huffman tree.
#A coin can be multiple coins (when they're merged)
Coin = ref object
symbols: seq[int]
weight: float #the sum of all weights in this coin
Coins = seq[Coin]
#Possible inflate modes between inflate() calls
#inflateMode = enum
# HEAD, # i: waiting for magic header
# FLAGS, # i: waiting for method and flags (gzip)
# TIME, # i: waiting for modification time (gzip)
# OS, # i: waiting for extra flags and operating system (gzip)
# EXLEN, # i: waiting for extra length (gzip)
# EXTRA, # i: waiting for extra bytes (gzip)
# NAME, # i: waiting for end of file name (gzip)
# COMMENT, # i: waiting for end of comment (gzip)
# HCRC, # i: waiting for header crc (gzip)
# DICTID, # i: waiting for dictionary check value
# DICT, # waiting for inflateSetDictionary() call
# TYPE, # i: waiting for type bits, including last-flag bit
# TYPEDO, # i: same, but skip check to exit inflate on new block
# STORED, # i: waiting for stored size (length and complement)
# COPY_FIRST, # i/o: same as COPY below, but only first time in
# COPY, # i/o: waiting for input or output to copy stored block
# TABLE, # i: waiting for dynamic block table lengths
# LENLENS, # i: waiting for code length code lengths
# CODELENS, # i: waiting for length/lit and distance code lengths
# LEN_FIRST, # i: same as LEN below, but only first time in
# LEN, # i: waiting for length/lit/eob code
# LENEXT, # i: waiting for length extra bits
# DIST, # i: waiting for distance code
# DISTEXT, # i: waiting for distance extra bits
# MATCH, # o: waiting for output space to copy string
# LIT, # o: waiting for output space to write literal
# CHECK, # i: waiting for 32-bit check value
# LENGTH, # i: waiting for 32-bit length (gzip)
# DONE, # finished check, done -- remain here until reset
# BAD, # got a data error -- remain here until reset
# MEM, # got an inflate() memory error -- remain here until reset
# SYNC # looking for synchronization bytes to restart inflate()
2015-08-28 15:56:55 +00:00
nzStreamMode = enum
nzsDeflate, nzsInflate
nzStream* = ref object
btype: range[0..3]
use_lz77: bool
windowsize: range[2..32768]
minmatch: range[3..258]
nicematch: range[3..358]
lazymatching: bool
bits: BitStream
data: string
mode: nzStreamMode
proc newNZError(msg: string): NZError =
new(result)
result.msg = msg
proc readBit(s: BitStream): int {.inline.} =
result = (ord(s.data[s.bitpointer shr 3]) shr (s.bitpointer and 0x07)) and 0x01
proc readBitFromStream(s: var BitStream): int {.inline.} =
result = s.readBit
inc s.bitpointer
proc readBitsFromStream(s: var BitStream, nbits: int): int =
for i in 0..nbits-1:
inc(result, s.readBit shl i)
inc s.bitpointer
proc readBitsSafe(s: var BitStream, nbits: int): int =
if s.bitpointer + nbits > s.databitlen:
raise newNZError("bit pointer jumps past memory")
for i in 0..nbits-1:
inc(result, s.readBit shl i)
inc s.bitpointer
#the tree representation used by the decoder.
proc HuffmanTree_make2DTree(tree: var HuffmanTree) =
var nodefilled = 0 #up to which node it is filled
var treepos = 0 #position in the tree (1 of the numcodes columns)
#32767 here means the tree2d isn't filled there yet
tree.tree2d = newSeqWith(tree.numcodes * 2, 32767)
#convert tree1d[] to tree2d[][]. In the 2D array, a value of 32767 means
#uninited, a value >= numcodes is an address to another bit, a value < numcodes
#is a code. The 2 rows are the 2 possible bit values (0 or 1), there are as
#many columns as codes - 1.
#A good huffmann tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
#Here, the internal nodes are stored (what their 0 and 1 option point to).
#There is only memory for such good tree currently, if there are more nodes
#(due to too long length codes), error 55 will happen
for n in 0..tree.numcodes-1: #the codes
let len = tree.lengths[n]
for i in 0..len-1: #the bits for this code
let bit = (tree.tree1d[n] shr (len - i - 1)) and 1
let branch = 2 * treepos + bit
#oversubscribed, see comment in lodepng_error_text
if treepos > 2147483647 or treepos + 2 > tree.numcodes:
raise newNZError("oversubscribed")
if tree.tree2d[branch] != 32767: #not yet filled in
treepos = tree.tree2d[branch] - tree.numcodes
continue
if i + 1 < len:
#put address of the next step in here, first that address has to be found of course
#(it's just nodefilled + 1)...
inc(nodefilled)
#addresses encoded with numcodes added to it
tree.tree2d[branch] = nodefilled + tree.numcodes
treepos = nodefilled
continue
#last bit
tree.tree2d[branch] = n #put the current code in it
treepos = 0 #start from root again
2016-01-25 22:01:04 +00:00
tree.tree2d.applyIt(if it == 32767: 0 else: it) #remove possible remaining 32767's
2015-08-28 15:56:55 +00:00
#Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
#numcodes, lengths and maxbitlen must already be filled in correctly.
proc HuffmanTree_makeFromLengths2(tree: var HuffmanTree) =
tree.tree1d = newSeq[int](tree.numcodes)
2016-01-25 22:01:04 +00:00
var blcount = newSeq[int](tree.maxbitlen + 1)
var nextcode = newSeq[int](tree.maxbitlen + 1)
2015-08-28 15:56:55 +00:00
#step 1: count number of instances of each code length
for len in tree.lengths: inc blcount[len]
#step 2: generate the nextcode values
for bits in 1..tree.maxbitlen:
nextcode[bits] = (nextcode[bits - 1] + blcount[bits - 1]) shl 1
#step 3: generate all the codes
for n in 0..tree.numcodes-1:
let len = tree.lengths[n]
if len != 0:
tree.tree1d[n] = nextcode[len]
inc nextcode[len]
#given the code lengths (as stored in the compressed data),
#generate the tree as defined by Deflate.
#maxbitlen is the maximum bits that a code in the tree can have.
proc HuffmanTree_makeFromLengths(tree: var HuffmanTree, bitlen: openarray[int], maxbitlen: int) =
tree.lengths = @bitlen
tree.numcodes = bitlen.len #number of symbols
tree.maxbitlen = maxbitlen
HuffmanTree_makeFromLengths2(tree)
HuffmanTree_make2DTree(tree)
proc make_coin(): Coin =
new(result)
result.symbols = @[]
proc coin_copy(c1, c2: Coin) =
c1.weight = c2.weight
c1.symbols = c2.symbols
proc add_coins(c1, c2: Coin) =
for sym in c2.symbols: c1.symbols.add sym
c1.weight += c2.weight
proc init_coins(c: var Coins, num: int) =
for i in 0..num-1: c[i] = make_coin()
proc cleanup_coins(c: var Coins, num: int) =
for i in 0..num-1: c[i].symbols = @[]
2015-12-10 05:07:49 +00:00
proc cmpx(a, b: Coin): int =
2015-08-28 15:56:55 +00:00
var wa = a.weight
var wb = b.weight
if wa > wb: result = 1
elif wa < wb: result = -1
else: result = 0
proc append_symbol_coins(coins: Coins, start: int, frequencies: openarray[int], numcodes, sum: int) =
var j = start #index of present symbols
for i in 0..numcodes-1:
if frequencies[i] != 0: #only include symbols that are present
coins[j].weight = frequencies[i] / sum
coins[j].symbols.add i
inc j
2015-12-10 05:07:49 +00:00
proc placePivot[T](a: var openArray[T], lo, hi: int): int =
2015-08-28 15:56:55 +00:00
var pivot = lo #set pivot
var switch_i = lo + 1
2015-12-10 05:07:49 +00:00
let x = lo+1
2015-08-28 15:56:55 +00:00
2015-12-10 05:07:49 +00:00
for i in x..hi: #run on array
if cmpx(a[i], a[pivot]) <= 0: #compare pivot and i
2015-08-28 15:56:55 +00:00
swap(a[i], a[switch_i]) #swap i and i to switch
swap(a[pivot], a[switch_i]) #swap pivot and i to switch
inc pivot #set current location of pivot
inc switch_i #set location for i to switch with pivot
result = pivot #return pivot location
2015-12-10 05:07:49 +00:00
proc quickSort[T](a: var openArray[T], lo, hi: int) =
2015-08-28 15:56:55 +00:00
if lo >= hi: return #stop condition
#set pivot location
2015-12-10 05:07:49 +00:00
var pivot = placePivot(a, lo, hi)
quickSort(a, lo, pivot-1) #sort bottom half
quickSort(a, pivot+1, hi) #sort top half
2016-01-25 22:01:04 +00:00
2015-12-10 05:07:49 +00:00
proc quickSort[T](a: var openArray[T], length = -1) =
2015-08-28 15:56:55 +00:00
var lo = 0
var hi = if length < 0: a.high else: length-1
2015-12-10 05:07:49 +00:00
quickSort(a, lo, hi)
2015-08-28 15:56:55 +00:00
proc huffman_code_lengths(frequencies: openarray[int], numcodes, maxbitlen: int): seq[int] =
var
2016-01-25 22:01:04 +00:00
lengths = newSeq[int](numcodes)
2015-08-28 15:56:55 +00:00
sum = 0
numpresent = 0
coins: Coins #the coins of the currently calculated row
prev_row: Coins #the previous row of coins
coinmem, numcoins: int
2016-01-25 22:01:04 +00:00
2015-08-28 15:56:55 +00:00
if numcodes == 0:
raise newNZError("a tree of 0 symbols is not supposed to be made")
for i in 0..numcodes-1:
if frequencies[i] > 0:
inc numpresent
inc(sum, frequencies[i])
#ensure at least two present symbols. There should be at least one symbol
#according to RFC 1951 section 3.2.7. To decoders incorrectly require two. To
#make these work as well ensure there are at least two symbols. The
#Package-Merge code below also doesn't work correctly if there's only one
#symbol, it'd give it the theoritical 0 bits but in practice zlib wants 1 bit
if numpresent == 0:
lengths[0] = 1
lengths[1] = 1 #note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed
elif numpresent == 1:
for i in 0..numcodes-1:
if frequencies[i] != 0:
lengths[i] = 1
lengths[if i == 0: 1 else: 0] = 1
break
else:
#Package-Merge algorithm represented by coin collector's problem
#For every symbol, maxbitlen coins will be created
coinmem = numpresent * 2 #max amount of coins needed with the current algo
coins = newSeq[Coin](coinmem)
prev_row = newSeq[Coin](coinmem)
coins.init_coins(coinmem)
prev_row.init_coins(coinmem)
#first row, lowest denominator
append_symbol_coins(coins, 0, frequencies, numcodes, sum)
numcoins = numpresent
2015-12-10 05:07:49 +00:00
coins.quickSort(numcoins)
2015-08-28 15:56:55 +00:00
var numprev = 0
for j in 1..maxbitlen: #each of the remaining rows
swap(prev_row, coins)
swap(numprev, numcoins)
coins.cleanup_coins(numcoins)
coins.init_coins(numcoins)
numcoins = 0
#fill in the merged coins of the previous row
var i = 0
while i + 1 < numprev:
#merge prev_row[i] and prev_row[i + 1] into new coin
var coin = coins[numcoins]
coin_copy(coin, prev_row[i])
add_coins(coin, prev_row[i + 1])
inc numcoins
inc(i, 2)
#fill in all the original symbols again
if j < maxbitlen:
append_symbol_coins(coins, numcoins, frequencies, numcodes, sum)
inc(numcoins, numpresent)
2016-01-25 22:01:04 +00:00
2015-12-10 05:07:49 +00:00
coins.quickSort(numcoins)
2015-08-28 15:56:55 +00:00
#calculate the lengths of each symbol, as the amount of times a coin of each symbol is used
var i = 0
while i + 1 < numpresent:
var coin = coins[i]
for j in 0..coin.symbols.high: inc lengths[coin.symbols[j]]
inc i
result = lengths
#Create the Huffman tree given the symbol frequencies
proc HuffmanTree_makeFromFrequencies(
tree: var HuffmanTree, frequencies: openarray[int], mincodes, maxbitlen: int) =
var numcodes = frequencies.len
while(frequencies[numcodes - 1] == 0) and (numcodes > mincodes):
dec numcodes #trim zeroes
tree.maxbitlen = maxbitlen
tree.numcodes = numcodes #number of symbols
tree.lengths = huffman_code_lengths(frequencies, numcodes, maxbitlen)
HuffmanTree_makeFromLengths2(tree)
#get the literal and length code tree of a deflated block with fixed tree,
#as per the deflate specification
proc generateFixedLitLenTree(tree: var HuffmanTree) =
var bitlen: array[0..NUM_DEFLATE_CODE_SYMBOLS-1, int]
#288 possible codes:
#0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused
for i in 0..143: bitlen[i] = 8
for i in 144..255: bitlen[i] = 9
for i in 256..279: bitlen[i] = 7
for i in 280..287: bitlen[i] = 8
HuffmanTree_makeFromLengths(tree, bitlen, 15)
proc generateFixedDistanceTree(tree: var HuffmanTree) =
var bitlen: array[0..NUM_DISTANCE_SYMBOLS-1, int]
#there are 32 distance codes, but 30-31 are unused
for i in 0..bitlen.len-1: bitlen[i] = 5
HuffmanTree_makeFromLengths(tree, bitlen, 15)
proc readInt16(s: var BitStream): int =
#go to first boundary of byte
while (s.bitpointer and 0x7) != 0: inc s.bitpointer
var p = s.bitpointer div 8 #byte position
if p + 2 >= s.data.len: raise newNZError("bit pointer will jump past memory")
result = ord(s.data[p]) + 256 * ord(s.data[p + 1])
inc(s.bitpointer, 16)
proc getBytePosition(s: var BitStream): int =
result = s.bitpointer div 8 #byte position
proc readByte(s: var BitStream): int =
while (s.bitpointer and 0x7) != 0: inc s.bitpointer
var p = s.bitpointer div 8 #byte position
if p + 1 >= s.data.len: raise newNZError("bit pointer will jump past memory")
result = ord(s.data[p])
inc(s.bitpointer, 8)
proc inflateNoCompression(nz: nzStream) =
let inlength = nz.bits.data.len
#read LEN (2 bytes) and NLEN (2 bytes)
let LEN = nz.bits.readInt16
let NLEN = nz.bits.readInt16
#check if 16-bit NLEN is really the one's complement of LEN
if LEN + NLEN != 65535:
raise newNZError("NLEN is not one's complement of LEN")
#read the literal data: LEN bytes are now stored in the out buffer
var p = nz.bits.getBytePosition
if p + LEN > inlength:
raise newNZError("reading outside of input buffer")
var pos = nz.data.len
nz.data.setLen(pos + LEN)
for i in 0..LEN-1:
nz.data[pos] = nz.bits.data[p]
inc pos
inc p
nz.bits.bitpointer = p * 8
#get the tree of a deflated block with fixed tree,
#as specified in the deflate specification
proc getTreeInflateFixed(tree_ll, tree_d: var HuffmanTree) =
generateFixedLitLenTree(tree_ll)
generateFixedDistanceTree(tree_d)
#returns the code, or (unsigned)(-1) if error happened
#inbitlength is the length of the complete buffer, in bits (so its byte length times 8)
proc huffmanDecodeSymbol(s: var BitStream, codetree: HuffmanTree, inbitlength: int): int =
var treepos = 0
while true:
if s.bitpointer >= inbitlength:
return -1 #end of input memory reached without endcode
#decode the symbol from the tree. The "readBitFromStream" code is inlined in
#the expression below because this is the biggest bottleneck while decoding
let ct = codetree.tree2d[(treepos shl 1) + s.readBit]
inc s.bitpointer
if ct < codetree.numcodes: return ct #the symbol is decoded, return it
else: treepos = ct - codetree.numcodes #symbol not yet decoded, instead move tree position
if treepos >= codetree.numcodes: return -1 #it appeared outside the codetree
proc getTreeInflateDynamic(s: var BitStream, tree_ll, tree_d: var HuffmanTree) =
#make sure that length values that aren't filled in will be 0,
#or a wrong tree will be generated
let inlength = s.data.len
let inbitlength = inlength * 8
#see comments in deflateDynamic for explanation
#of the context and these variables, it is analogous
2016-01-25 22:01:04 +00:00
var bitlen_ll = newSeq[int](NUM_DEFLATE_CODE_SYMBOLS) #lit,len code lengths
var bitlen_d = newSeq[int](NUM_DISTANCE_SYMBOLS) #dist code lengths
2015-08-28 15:56:55 +00:00
#code length code lengths ("clcl"),
#the bit lengths of the huffman tree
#used to compress bitlen_ll and bitlen_d
var bitlen_cl = newSeq[int](NUM_CODE_LENGTH_CODES)
#the code tree for code length codes
#(the huffman tree for compressed huffman trees)
var tree_cl: HuffmanTree
if s.bitpointer + 14 > inbitlength:
raise newNZError("the bit pointer is or will go past the memory")
#number of literal/length codes + 257.
#Unlike the spec, the value 257 is added to it here already
let HLIT = s.readBitsFromStream(5) + 257
#number of distance codes.
#Unlike the spec, the value 1 is added to it here already
let HDIST = s.readBitsFromStream(5) + 1
#number of code length codes.
#Unlike the spec, the value 4 is added to it here already
let HCLEN = s.readBitsFromStream(4) + 4
if s.bitpointer + HCLEN * 3 > inbitlength:
raise newNZError("the bit pointer is or will go past the memory")
#read the code length codes out of 3 * (amount of code length codes) bits
for i in 0..NUM_CODE_LENGTH_CODES-1:
if i < HCLEN: bitlen_cl[CLCL_ORDER[i]] = s.readBitsFromStream(3)
else: bitlen_cl[CLCL_ORDER[i]] = 0 #if not, it must stay 0
HuffmanTree_makeFromLengths(tree_cl, bitlen_cl, 7)
#now we can use this tree to read the lengths
#for the tree that this function will return
#i is the current symbol we're reading in the part
#that contains the code lengths of lit/len and dist codes
var i = 0
while i < HLIT + HDIST:
let code = s.huffmanDecodeSymbol(tree_cl, inbitlength)
if code <= 15: #a length code
if i < HLIT: bitlen_ll[i] = code
else: bitlen_d[i - HLIT] = code
inc(i)
elif code == 16: #repeat previous
var replength = 3 #read in the 2 bits that indicate repeat length (3-6)
var value = 0 #set value to the previous code
if i == 0: raise newNZError("can't repeat previous if i is 0")
replength += s.readBitsSafe(2)
if i < HLIT + 1: value = bitlen_ll[i - 1]
else: value = bitlen_d[i - HLIT - 1]
#repeat this value in the next lengths
for n in 0..replength-1:
if i >= HLIT + HDIST: raise newNZError("i is larger than the amount of codes")
if i < HLIT: bitlen_ll[i] = value
else: bitlen_d[i - HLIT] = value
inc(i)
elif code == 17: #repeat "0" 3-10 times
var replength = 3 #read in the bits that indicate repeat length
replength += s.readBitsSafe(3)
#repeat this value in the next lengths
for n in 0..replength-1:
if i >= HLIT + HDIST: raise newNZError("i is larger than the amount of codes")
if i < HLIT: bitlen_ll[i] = 0
else: bitlen_d[i - HLIT] = 0
inc(i)
elif code == 18: #repeat "0" 11-138 times
var replength = 11 #read in the bits that indicate repeat length
replength += s.readBitsSafe(7)
#repeat this value in the next lengths
for n in 0..replength-1:
if i >= HLIT + HDIST: raise newNZError("i is larger than the amount of codes")
if i < HLIT: bitlen_ll[i] = 0
else: bitlen_d[i - HLIT] = 0
inc(i)
else: #if(code == -1) huffmanDecodeSymbol returns -1 in case of error
if code == -1:
#return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
#(10=no endcode, 11=wrong jump outside of tree)
if s.bitpointer > inbitlength: raise newNZError("no endcode")
else: raise newNZError("wrong jump outside of tree")
else:
raise newNZError("unexisting code, this can never happen")
break
if bitlen_ll[256] == 0:
raise newNZError("the length of the end code 256 must be larger than 0")
#now we've finally got HLIT and HDIST,
#so generate the code trees, and the function is done
HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, 15)
HuffmanTree_makeFromLengths(tree_d, bitlen_d, 15)
#inflate a block with dynamic or fixed Huffman tree
proc inflateHuffmanBlock(nz: nzStream, blockType: int) =
var tree_ll: HuffmanTree #the huffman tree for literal and length codes
var tree_d: HuffmanTree #the huffman tree for distance codes
let inlength = nz.bits.data.len
let inbitlength = inlength * 8
if blockType == 1: getTreeInflateFixed(tree_ll, tree_d)
elif blockType == 2: nz.bits.getTreeInflateDynamic(tree_ll, tree_d)
#decode all symbols until end reached, breaks at end code
#code_ll is literal, length or end code
while true:
let code_ll = nz.bits.huffmanDecodeSymbol(tree_ll, inbitlength)
if code_ll <= 255: #literal symbol
nz.data.add chr(code_ll)
elif code_ll >= FIRST_LENGTH_CODE_INDEX and code_ll <= LAST_LENGTH_CODE_INDEX: #length code
#part 1: get length base
var length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX]
#part 2: get extra bits and add the value of that to length
let numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX]
length += nz.bits.readBitsSafe(numextrabits_l)
#part 3: get distance code
let code_d = nz.bits.huffmanDecodeSymbol(tree_d, inbitlength)
if code_d > 29:
if code_ll == -1: #huffmanDecodeSymbol returns -1 in case of error
#return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
#(10=no endcode, 11=wrong jump outside of tree)
if nz.bits.bitpointer > inbitlength: raise newNZError("no endcode")
else: raise newNZError("wrong jump outside of tree")
else:
raise newNZError("invalid distance code (30-31 are never used)")
break
var distance = DISTANCEBASE[code_d]
#part 4: get extra bits from distance
let numextrabits_d = DISTANCEEXTRA[code_d]
distance += nz.bits.readBitsSafe(numextrabits_d)
#part 5: fill in all the out[n] values based on the length and dist
let start = nz.data.len
if distance > start:
raise newNZError("too long backward distance")
var backward = start - distance
nz.data.setLen(start + length)
for pos in 0..length-1:
nz.data[pos+start] = nz.data[backward]
inc backward
if backward >= start: backward = start - distance
elif code_ll == 256:
break #end code, break the loop
else: #if(code == -1) huffmanDecodeSymbol returns -1 in case of error
#return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
#(10=no endcode, 11=wrong jump outside of tree)
if nz.bits.bitpointer > inbitlength: raise newNZError("no endcode")
else: raise newNZError("wrong jump outside of tree")
break
proc nzInflate(nz: nzStream) =
var finalBlock = false
var streamLen = nz.bits.databitlen
while not finalBlock:
if nz.bits.bitpointer + 2 >= streamLen: break
#error, bit pointer will jump past memory
finalBlock = nz.bits.readBitFromStream != 0
let blockType = nz.bits.readBitFromStream + 2 * nz.bits.readBitFromStream
if blockType == 3: raise newNZError("invalid blockType")
elif blockType == 0: nz.inflateNoCompression #no compression
else: nz.inflateHuffmanBlock(blockType) #compression, blockType 01 or 10
proc nimzHashInit(hash: var NZHash, windowsize: int) =
hash.head = newSeqWith(HASH_NUM_VALUES, -1)
hash.val = newSeqWith(windowsize, -1)
hash.chain = newSeq[int](windowsize)
hash.zeros = newSeq[int](windowsize)
hash.headz = newSeqWith(MAX_SUPPORTED_DEFLATE_LENGTH + 1, -1)
hash.chainz = newSeq[int](windowsize)
for i in 0..windowsize-1:
hash.chain[i] = i
hash.chainz[i] = i
proc deflateNoCompression(nz: nzStream) =
#non compressed deflate block data:
#1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
#2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA
let datasize = nz.data.len
let numdeflateblocks = (datasize + 65534) div 65535
var datapos = 0
for i in 0..numdeflateblocks-1:
let finalBlock = (i == numdeflateblocks - 1)
nz.bits.data.add chr(if finalBlock: 1 else: 0)
var LEN = 65535
if datasize - datapos < 65535: LEN = datasize - datapos
let NLEN = 65535 - LEN
nz.bits.data.add chr(LEN mod 256)
nz.bits.data.add chr(LEN div 256)
nz.bits.data.add chr(NLEN mod 256)
nz.bits.data.add chr(NLEN div 256)
#Decompressed data
var j = 0
while j < 65535 and datapos < datasize:
nz.bits.data.add nz.data[datapos]
inc datapos
inc j
proc `|=`(a: var char, b: char) {.inline.} =
a = chr(ord(a) or ord(b))
proc addBitToStream(s: var BitStream, bit: int) =
#add a new byte at the end
if (s.bitpointer and 0x07) == 0: s.data.add chr(0)
#earlier bit of huffman code is in a lesser significant bit of an earlier byte
s.data[s.data.len - 1] |= chr(bit shl (s.bitpointer and 0x07))
inc s.bitpointer
proc addBitsToStream(s: var BitStream, value: int, nbits: int) =
for i in 0..nbits-1:
s.addBitToStream ((value shr i) and 1)
proc addBitsToStreamReversed(s: var BitStream, value: int, nbits: int) =
for i in 0..nbits-1:
s.addBitToStream ((value shr (nbits - 1 - i)) and 1)
proc HuffmanTree_getCode(tree: HuffmanTree, index: int): int =
result = tree.tree1d[index]
proc HuffmanTree_getLength(tree: HuffmanTree, index: int): int =
result = tree.lengths[index]
proc addHuffmanSymbol(s: var BitStream, tree: HuffmanTree, val: int) {.inline.} =
s.addBitsToStreamReversed(
HuffmanTree_getCode(tree, val),
HuffmanTree_getLength(tree, val))
#write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
#tree_ll: the tree for lit and len codes.
#tree_d: the tree for distance codes.
proc writeLZ77data(s: var BitStream, input: seq[int], tree_ll, tree_d: HuffmanTree) =
var i = 0
while i < input.len:
let val = input[i]
s.addHuffmanSymbol(tree_ll, val)
if val > 256: #for a length code, 3 more things have to be added
let length_index = val - FIRST_LENGTH_CODE_INDEX
let n_length_extra_bits = LENGTHEXTRA[length_index]
let length_extra_bits = input[i+1]
let distance_code = input[i+2]
let n_distance_extra_bits = DISTANCEEXTRA[distance_code]
let distance_extra_bits = input[i+3]
inc(i, 3)
s.addBitsToStream(length_extra_bits, n_length_extra_bits)
s.addHuffmanSymbol(tree_d, distance_code)
s.addBitsToStream(distance_extra_bits, n_distance_extra_bits)
inc i
proc `^=`(a: var int, b: int) =
a = a xor b
proc getHash(nz: nzStream, size, pos: int): int =
if pos + 2 < size:
#simple shift and xor hash is used. Since the data of PNGs is dominated
#by zeroes due to the filters, a better hash does not have a significant
#effect on speed in traversing the chain, and causes more time spend on
#calculating the hash.
result ^= (ord(nz.data[pos + 0]) shl 0)
result ^= (ord(nz.data[pos + 1]) shl 4)
result ^= (ord(nz.data[pos + 2]) shl 8)
else:
if pos >= size: return 0
let amount = size - pos
for i in 0..amount-1: result ^= (ord(nz.data[pos + i]) shl (i * 8))
result = result and HASH_BIT_MASK
proc countZeros(nz: nzStream, size, pos: int): int =
var datapos = pos
2015-09-02 13:15:44 +00:00
var dataend = min(datapos + MAX_SUPPORTED_DEFLATE_LENGTH, size)
2015-08-28 15:56:55 +00:00
while datapos < dataend and nz.data[datapos] == chr(0): inc datapos
#subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)
result = datapos - pos
#wpos = pos & (windowsize - 1)
proc updateHashChain(hash: var NZHash, wpos, hashval, numzeros: int) =
hash.val[wpos] = hashval
if hash.head[hashval] != -1: hash.chain[wpos] = hash.head[hashval]
hash.head[hashval] = wpos
hash.zeros[wpos] = numzeros
if hash.headz[numzeros] != -1: hash.chainz[wpos] = hash.headz[numzeros]
hash.headz[numzeros] = wpos
proc getMaxChainLen(nz: nzStream): int =
result = if nz.windowsize >= 8192: nz.windowsize else: nz.windowsize div 8
proc getMaxLazyMatch(nz:nzStream): int =
result = if nz.windowsize >= 8192: MAX_SUPPORTED_DEFLATE_LENGTH else: 64
#search the index in the array, that has the largest value smaller than or equal to the given value,
#given array must be sorted (if no value is smaller, it returns the size of the given array)
proc searchCodeIndex(input: openarray[int], value: int): int =
#linear search implementation
#for i in 1..high(input):
#if input[i] > value: return i - 1
#return input.len - 1
#binary search implementation (not that much faster) (precondition: array_size > 0)
var left = 1
var right = input.len - 1
while left <= right:
let mid = (left + right) div 2
if input[mid] <= value: left = mid + 1 #the value to find is more to the right
elif input[mid - 1] > value: right = mid - 1 #the value to find is more to the left
else: return mid - 1
result = input.len - 1
proc addLengthDistance(values: var seq[int], length, distance: int) =
#values in encoded vector are those used by deflate:
#0-255: literal bytes
#256: end
#257-285: length/distance pair
#(length code, followed by extra length bits, distance code, extra distance bits)
#286-287: invalid
let length_code = searchCodeIndex(LENGTHBASE, length)
let extra_length = length - LENGTHBASE[length_code]
let dist_code = searchCodeIndex(DISTANCEBASE, distance)
let extra_distance = distance - DISTANCEBASE[dist_code]
values.add(length_code + FIRST_LENGTH_CODE_INDEX)
values.add extra_length
values.add dist_code
values.add extra_distance
#LZ77-encode the data. Return value is error code. The input are raw bytes, the output
#is in the form of unsigned integers with codes representing for example literal bytes, or
#length/distance pairs.
#It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
#sliding window (of windowsize) is used, and all past bytes in that window can be used as
#the "dictionary". A brute force search through all possible distances would be slow, and
#this hash technique is one out of several ways to speed this up.
proc encodeLZ77(nz: nzStream, hash: var NZHash, inpos, insize: int): seq[int] =
#for large window lengths, assume the user wants no compression loss.
#Otherwise, max hash chain length speedup.
result = @[]
var maxchainlength = nz.getMaxChainLen
var maxlazymatch = nz.getMaxLazyMatch
#not sure if setting it to false for windowsize < 8192 is better or worse
var
usezeros = true
numzeros = 0
lazy = 0
lazylength = 0
lazyoffset = 0
hashval: int
offset, length: int
hashpos: int
lastptr, foreptr, backptr: int
prev_offset: int
current_offset, current_length: int
if (nz.windowsize == 0) or (nz.windowsize > 32768):
raise newNZError("windowsize smaller/larger than allowed")
if (nz.windowsize and (nz.windowsize - 1)) != 0:
raise newNZError("must be power of two")
var nicematch = min(nz.nicematch, MAX_SUPPORTED_DEFLATE_LENGTH)
var pos = inpos
while pos < insize:
var wpos = pos and (nz.windowsize - 1) #position for in 'circular' hash buffers
var chainlength = 0
hashval = getHash(nz, insize, pos)
if usezeros and hashval == 0:
if numzeros == 0: numzeros = countZeros(nz, insize, pos)
elif (pos + numzeros > insize) or (nz.data[pos + numzeros - 1] != chr(0)): dec numzeros
else: numzeros = 0
updateHashChain(hash, wpos, hashval, numzeros)
#the length and offset found for the current position
length = 0
offset = 0
hashpos = hash.chain[wpos]
lastptr = min(insize, pos + MAX_SUPPORTED_DEFLATE_LENGTH)
#search for the longest string
prev_offset = 0
while true:
if chainlength >= maxchainlength: break
inc chainlength
current_offset = if hashpos <= wpos: wpos - hashpos else: wpos - hashpos + nz.windowsize
#stop when went completely around the circular buffer
if current_offset < prev_offset: break
prev_offset = current_offset
if current_offset > 0:
#test the next characters
foreptr = pos
backptr = pos - current_offset
#common case in PNGs is lots of zeros. Quickly skip over them as a speedup
if numzeros >= 3:
let skip = min(numzeros, hash.zeros[hashpos])
inc(backptr, skip)
inc(foreptr, skip)
#maximum supported length by deflate is max length
while foreptr < lastptr:
if nz.data[backptr] != nz.data[foreptr]: break
inc backptr
inc foreptr
current_length = foreptr - pos
if current_length > length:
length = current_length #the longest length
offset = current_offset #the offset that is related to this longest length
#jump out once a length of max length is found (speed gain). This also jumps
#out if length is MAX_SUPPORTED_DEFLATE_LENGTH
if current_length >= nicematch: break
if hashpos == hash.chain[hashpos]: break
if (numzeros >= 3) and (length > numzeros):
hashpos = hash.chainz[hashpos]
if hash.zeros[hashpos] != numzeros: break
else:
hashpos = hash.chain[hashpos]
#outdated hash value, happens if particular
#value was not encountered in whole last window
if hash.val[hashpos] != hashval: break
if nz.lazymatching:
if (lazy==0) and (length >= 3) and (length <= maxlazymatch) and (length < MAX_SUPPORTED_DEFLATE_LENGTH):
lazy = 1
lazylength = length
lazyoffset = offset
inc pos
continue #try the next byte
if lazy != 0:
lazy = 0
if pos == 0: raise newNZError("lazy matching at pos 0 is impossible")
if length > lazylength + 1:
#push the previous character as literal
result.add ord(nz.data[pos - 1])
else:
length = lazylength
offset = lazyoffset
hash.head[hashval] = -1 #the same hashchain update will be done, this ensures no wrong alteration*
hash.headz[numzeros] = -1 #idem
dec pos
if(length >= 3) and (offset > nz.windowsize):
raise newNZError("too big (or overflown negative) offset")
#encode it as length/distance pair or literal value
if length < 3: #only lengths of 3 or higher are supported as length/distance pair
result.add ord(nz.data[pos])
elif(length < nz.minmatch) or ((length == 3) and (offset > 4096)):
#compensate for the fact that longer offsets have more extra bits, a
#length of only 3 may be not worth it then
result.add ord(nz.data[pos])
else:
result.addLengthDistance(length, offset)
for i in 1..length-1:
inc pos
wpos = pos and (nz.windowsize - 1)
hashval = getHash(nz, insize, pos)
if usezeros and (hashval == 0):
if numzeros == 0: numzeros = countZeros(nz, insize, pos)
elif (pos + numzeros > insize) or (nz.data[pos + numzeros - 1] != chr(0)): dec numzeros
else: numzeros = 0
updateHashChain(hash, wpos, hashval, numzeros)
inc pos
proc deflateFixed(nz: nzStream, hash: var NZHash, datapos, dataend: int, final: bool) =
var tree_ll: HuffmanTree #tree for literal values and length codes
var tree_d: HuffmanTree #tree for distance codes
generateFixedLitLenTree(tree_ll)
generateFixedDistanceTree(tree_d)
nz.bits.addBitToStream(if final: 1 else: 0)
nz.bits.addBitToStream(1) #first bit of BTYPE
nz.bits.addBitToStream(0) #second bit of BTYPE
if nz.use_lz77: #LZ77 encoded
var lz77 = nz.encodeLZ77(hash, datapos, dataend)
nz.bits.writeLZ77data(lz77, tree_ll, tree_d)
else: #no LZ77, but still will be Huffman compressed
for i in datapos..dataend-1:
nz.bits.addHuffmanSymbol(tree_ll, ord(nz.data[i]))
nz.bits.addHuffmanSymbol(tree_ll, 256) #add END code
proc deflateDynamic(nz: nzStream, hash: var NZHash, datapos, dataend: int, final: bool) =
#A block is compressed as follows: The PNG data is lz77 encoded, resulting in
#literal bytes and length/distance pairs. This is then huffman compressed with
#two huffman trees. One huffman tree is used for the lit and len values ("ll"),
#another huffman tree is used for the dist values ("d"). These two trees are
#stored using their code lengths, and to compress even more these code lengths
#are also run-length encoded and huffman compressed. This gives a huffman tree
#of code lengths "cl". The code lenghts used to describe this third tree are
#the code length code lengths ("clcl").
#The lz77 encoded data, represented with integers
#since there will also be length and distance codes in it
var
tree_ll: HuffmanTree #tree for lit,len values
tree_d: HuffmanTree #tree for distance codes
tree_cl: HuffmanTree #tree for encoding the code lengths representing tree_ll and tree_d
frequencies_cl: seq[int] #frequency of code length codes
bitlen_lld: seq[int] #lit,len,dist code lenghts (int bits), literally (without repeat codes).
bitlen_lld_e: seq[int] #bitlen_lld encoded with repeat codes (this is a rudemtary run length compression)
#bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl
#(these are written as is in the file, it would be crazy to compress these using yet another huffman
#tree that needs to be represented by yet another set of code lengths)
bitlen_cl: seq[int]
datasize = dataend - datapos
#Due to the huffman compression of huffman tree representations ("two levels"), there are some anologies:
#bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
#bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
#bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
var lz77: seq[int]
if nz.use_lz77:
lz77 = nz.encodeLZ77(hash, datapos, dataend)
else:
#no LZ77, but still will be Huffman compressed
lz77 = newSeq[int](datasize)
for i in datapos..dataend-1: lz77[i] = ord(nz.data[i])
2016-01-25 22:01:04 +00:00
var frequencies_ll = newSeq[int](286) #frequency of lit,len codes
var frequencies_d = newSeq[int](30) #frequency of dist codes
2015-08-28 15:56:55 +00:00
#Count the frequencies of lit, len and dist codes
var i = 0
while i < lz77.len:
let symbol = lz77[i]
inc frequencies_ll[symbol]
if symbol > 256:
let dist = lz77[i + 2]
inc frequencies_d[dist]
inc(i, 3)
inc i
frequencies_ll[256] = 1 #there will be exactly 1 end code, at the end of the block
#Make both huffman trees, one for the lit and len codes, one for the dist codes
HuffmanTree_makeFromFrequencies(tree_ll, frequencies_ll, 257, 15)
#2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree
HuffmanTree_makeFromFrequencies(tree_d, frequencies_d, 2, 15)
var numcodes_ll = min(tree_ll.numcodes, 286)
var numcodes_d = min(tree_d.numcodes, 30)
#store the code lengths of both generated trees in bitlen_lld
bitlen_lld = newSeq[int](numcodes_ll + numcodes_d)
for i in 0..numcodes_ll-1: bitlen_lld[i] = HuffmanTree_getLength(tree_ll, i)
for i in 0..numcodes_d-1: bitlen_lld[i+numcodes_ll] = HuffmanTree_getLength(tree_d, i)
#run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
#17 (3-10 zeroes), 18 (11-138 zeroes)
i = 0
bitlen_lld_e = @[]
while i < bitlen_lld.len:
var j = 0 #amount of repetitions
while(i + j + 1 < bitlen_lld.len) and (bitlen_lld[i + j + 1] == bitlen_lld[i]): inc j
if (bitlen_lld[i] == 0) and (j >= 2): #repeat code for zeroes
inc j #include the first zero
if j <= 10: #repeat code 17 supports max 10 zeroes
bitlen_lld_e.add 17
bitlen_lld_e.add(j - 3)
else: #repeat code 18 supports max 138 zeroes
if j > 138: j = 138
bitlen_lld_e.add 18
bitlen_lld_e.add(j - 11)
i += (j - 1)
elif j >= 3: #repeat code for value other than zero
var num = j div 6
var rest = j mod 6
bitlen_lld_e.add bitlen_lld[i]
for k in 0..num-1:
bitlen_lld_e.add 16
bitlen_lld_e.add(6 - 3)
if rest >= 3:
bitlen_lld_e.add 16
bitlen_lld_e.add(rest - 3)
else: j -= rest
i += j
else: #too short to benefit from repeat code
bitlen_lld_e.add bitlen_lld[i]
inc i
#generate tree_cl, the huffmantree of huffmantrees
2016-01-25 22:01:04 +00:00
frequencies_cl = newSeq[int](NUM_CODE_LENGTH_CODES)
2015-08-28 15:56:55 +00:00
i = 0
while i < bitlen_lld_e.len:
inc frequencies_cl[bitlen_lld_e[i]]
#after a repeat code come the bits that specify the number of repetitions,
#those don't need to be in the frequencies_cl calculation
if bitlen_lld_e[i] >= 16: inc i
inc i
HuffmanTree_makeFromFrequencies(tree_cl, frequencies_cl, frequencies_cl.len, 7)
bitlen_cl = newSeq[int](tree_cl.numcodes)
for i in 0..tree_cl.numcodes-1:
#lenghts of code length tree is in the order as specified by deflate*/
bitlen_cl[i] = HuffmanTree_getLength(tree_cl, CLCL_ORDER[i])
while(bitlen_cl[bitlen_cl.high] == 0) and (bitlen_cl.len > 4):
#remove zeros at the end, but minimum size must be 4
bitlen_cl.setLen(bitlen_cl.high)
#Write everything into the output
#After the BFINAL and BTYPE, the dynamic block consists out of the following:
#- 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
#- (HCLEN+4)*3 bits code lengths of code length alphabet
#- HLIT + 257 code lenghts of lit/length alphabet (encoded using the code length
# alphabet, + possible repetition codes 16, 17, 18)
#- HDIST + 1 code lengths of distance alphabet (encoded using the code length
# alphabet, + possible repetition codes 16, 17, 18)
#- compressed data
#- 256 (end code)
#Write block type
nz.bits.addBitToStream(if final: 1 else: 0)
nz.bits.addBitToStream(0) #first bit of BTYPE "dynamic"
nz.bits.addBitToStream(1) #second bit of BTYPE "dynamic"
#write the HLIT, HDIST and HCLEN values
var HLIT = (numcodes_ll - 257)
var HDIST = (numcodes_d - 1)
var HCLEN = bitlen_cl.len - 4
#trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation
while(bitlen_cl[HCLEN + 4 - 1] == 0) and (HCLEN > 0): dec HCLEN
nz.bits.addBitsToStream(HLIT, 5)
nz.bits.addBitsToStream(HDIST, 5)
nz.bits.addBitsToStream(HCLEN, 4)
#write the code lenghts of the code length alphabet
for i in 0..HCLEN + 4 - 1: nz.bits.addBitsToStream(bitlen_cl[i], 3)
#write the lenghts of the lit/len AND the dist alphabet
i = 0
while i < bitlen_lld_e.len:
nz.bits.addHuffmanSymbol(tree_cl, bitlen_lld_e[i])
#extra bits of repeat codes
if bitlen_lld_e[i] == 16:
inc i
nz.bits.addBitsToStream(bitlen_lld_e[i], 2)
elif bitlen_lld_e[i] == 17:
inc i
nz.bits.addBitsToStream(bitlen_lld_e[i], 3)
elif bitlen_lld_e[i] == 18:
inc i
nz.bits.addBitsToStream(bitlen_lld_e[i], 7)
inc i
#write the compressed data symbols
nz.bits.writeLZ77data(lz77, tree_ll, tree_d)
if HuffmanTree_getLength(tree_ll, 256) == 0:
raise newNZError("the length of the end code 256 must be larger than 0")
#write the end code
nz.bits.addHuffmanSymbol(tree_ll, 256)
proc nzDeflate(nz: nzStream) =
var hash: NZHash
var blocksize = 0
var insize = nz.data.len
2016-01-25 22:01:04 +00:00
2015-08-28 15:56:55 +00:00
if nz.btype > 2: raise newNZError("invalid block type")
elif nz.btype == 0:
nz.deflateNoCompression
return
elif nz.btype == 1: blocksize = insize
else: blocksize = max(insize div 8 + 8, 65535) #if(nz.btype == 2)
#if blocksize < 65535: blocksize = 65535
2016-01-25 22:01:04 +00:00
2015-08-28 15:56:55 +00:00
var numdeflateblocks = (insize + blocksize - 1) div blocksize
if numdeflateblocks == 0: numdeflateblocks = 1
nimzHashInit(hash, nz.windowsize)
2016-01-25 22:01:04 +00:00
2015-08-28 15:56:55 +00:00
for i in 0..numdeflateblocks-1:
let final = (i == numdeflateblocks - 1)
let datapos = i * blocksize
let dataend = min(datapos + blocksize, insize)
2016-01-25 22:01:04 +00:00
2015-08-28 15:56:55 +00:00
if nz.btype == 1: nz.deflateFixed(hash, datapos, dataend, final)
elif nz.btype == 2: nz.deflateDynamic(hash, datapos, dataend, final)
2015-12-10 05:07:49 +00:00
proc nzInit(): nzStream =
2015-08-28 15:56:55 +00:00
const DEFAULT_WINDOWSIZE = 2048
2015-12-10 05:07:49 +00:00
result = nzStream(
#compress with dynamic huffman tree
#(not in the mathematical sense, just not the predefined one)
btype : 2,
use_lz77: true,
windowsize: DEFAULT_WINDOWSIZE,
minmatch: 3,
nicematch: 128,
lazymatching: true)
2015-08-28 15:56:55 +00:00
proc nzDeflateInit*(input: string): nzStream =
2015-12-10 05:07:49 +00:00
var nz = nzInit()
2015-08-28 15:56:55 +00:00
nz.data = input
nz.bits.data = ""
nz.bits.bitpointer = 0
2016-01-25 22:01:04 +00:00
nz.mode = nzsDeflate
2015-08-28 15:56:55 +00:00
result = nz
proc nzInflateInit*(input: string): nzStream =
2015-12-10 05:07:49 +00:00
var nz = nzInit()
2016-08-12 13:17:16 +00:00
nz.data = newStringOfCap(1024 * 1024 * 5) # Allocate 5MB in advance
shallowCopy(nz.bits.data, input)
2015-08-28 15:56:55 +00:00
nz.bits.bitpointer = 0
nz.bits.databitlen = input.len * 8
nz.mode = nzsInflate
result = nz
proc nzGetResult(nz: nzStream): string =
if nz.mode == nzsInflate: return nz.data
result = nz.bits.data
proc nzAdler32(adler: uint32, data: string): uint32 =
var s1 = adler and 0xffff
var s2 = (adler shr 16) and 0xffff
var len = data.len
var i = 0
while len > 0:
#at least 5550 sums can be done before the sums overflow
#saving a lot of module divisions
var amount = min(len, 5550)
dec(len, amount)
while amount > 0:
2016-01-25 22:01:04 +00:00
s1 += cast[uint32](ord(data[i]) or 0)
2015-08-28 15:56:55 +00:00
s2 += s1
dec(amount)
inc(i)
2016-01-25 22:01:04 +00:00
s1 = s1 mod 65521.uint32
s2 = s2 mod 65521.uint32
2015-08-28 15:56:55 +00:00
2016-01-25 22:01:04 +00:00
result = (s2 shl 16.uint32) or s1
2015-08-28 15:56:55 +00:00
proc add32bitInt(s: var BitStream, val: uint32) =
s.data.add chr(cast[int](val shr 24) and 0xff)
s.data.add chr(cast[int](val shr 16) and 0xff)
s.data.add chr(cast[int](val shr 8) and 0xff)
s.data.add chr(cast[int](val ) and 0xff)
proc zlib_compress*(nz: nzStream): string =
#zlib data: 1 byte CMF (CM+CINFO),
#1 byte FLG, deflate data,
#4 byte ADLER32 checksum of the Decompressed data
let
CMF = 120 #0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.
FLEVEL = 0
FDICT = 0
var
CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64
FCHECK = 31 - CMFFLG mod 31
CMFFLG += FCHECK
nz.bits.data.add chr(CMFFLG div 256)
nz.bits.data.add chr(CMFFLG mod 256)
nz.bits.bitpointer += 16
2016-01-25 22:01:04 +00:00
2015-08-28 15:56:55 +00:00
nz.nzDeflate
nz.bits.add32bitInt nzAdler32(1, nz.data)
result = nz.nzGetResult
proc readInt32(input: string): uint32 =
assert input.len == 4
result = cast[uint32](ord(input[0])) shl 24
result += cast[uint32](ord(input[1])) shl 16
result += cast[uint32](ord(input[2])) shl 8
result += cast[uint32](ord(input[3]))
proc zlib_decompress*(nz: nzStream): string =
2016-01-25 22:01:04 +00:00
let insize = nz.bits.data.len
2015-08-28 15:56:55 +00:00
if insize < 2: raise newNZError("size of zlib data too small")
#read information from zlib header
let CMF = nz.bits.readByte
let FLG = nz.bits.readByte
if ((CMF * 256 + FLG) mod 31) != 0:
raise newNZError(" zlib header must be a multiple of 31")
#the FCHECK value is supposed to be made that way
#let CM = CMF and 15
#let CINFO = (CMF shr 4) and 15
#FCHECK = FLG and 31 #FCHECK is already tested above
#let FDICT = (FLG shr 5) and 1
#FLEVEL = (FLG shr 6) and 3 #FLEVEL is not used here
#if(CM != 8 || CINFO > 7)
#/*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
#return 25;
#if(FDICT != 0)
#/*error: the specification of PNG says about the zlib stream:
#"The additional flags shall not specify a preset dictionary."*/
#return 26;
2016-01-25 22:01:04 +00:00
let checksum = nz.bits.data.substr(insize-4, insize-1).readInt32
2015-08-28 15:56:55 +00:00
nz.bits.data.setLen(insize-4)
nz.nzInflate
let adler32 = nzAdler32(1, nz.data)
if checksum != adler32:
raise newNZError("adler checksum not correct, data must be corrupted")
result = nz.nzGetResult