another 12% better perf for compress

This commit is contained in:
Ryan Oldenburg 2020-11-08 22:04:36 -06:00
parent ebe678443f
commit b13c1900ca
4 changed files with 22 additions and 13 deletions

View File

@ -31,9 +31,9 @@ Each file is compressed 1000 times.
**https://github.com/guzba/zippy** compress results:
File | Time | % Size Reduction
--- | --- | ---:
alice29.txt | 6.3391s | 62.33%
urls.10K | 19.9189s | 67.01%
rfctest3.gold | 1.1809s | 70.91%
alice29.txt | 4.4424s | 62.33%
urls.10K | 16.9259s | 67.01%
rfctest3.gold | 1.0140s | 70.91%
randtest3.gold | 0.1285s | 0%
https://github.com/nim-lang/zip compress results: (Requires zlib1.dll)

View File

@ -104,6 +104,9 @@ when defined(release):
template read32*(p: pointer): uint32 =
cast[ptr uint32](p)[]
template read64*(p: pointer): uint64 =
cast[ptr uint64](p)[]
func adler32*(data: seq[uint8]): uint32 =
## See https://github.com/madler/zlib/blob/master/adler32.c

View File

@ -283,16 +283,22 @@ func lz77Encode(src: seq[uint8]): (seq[uint16], seq[int], seq[int], int) =
prevOffset = offset
var matchLen: int
if (
(read32(src[pos - offset].unsafeAddr) xor read32(src[pos].unsafeAddr)
) shl 8) == 0:
# The first 3 bytes match (the hash of them got us here so usually do)
inc(matchLen, 3)
for i in 3 ..< stop - pos:
if src[pos - offset + i] != src[pos + i]:
var i, matchLen: int
while i < stop - pos:
let
bytesToCheck = min(8, stop - pos - i)
v = read64(src[pos - offset + i].unsafeAddr) xor
read64(src[pos + i].unsafeAddr)
if v == 0:
inc(matchLen, bytesToCheck)
else:
let
zeroBits = countTrailingZeroBits(v)
matchingBytes = min(zeroBits div 8, bytesToCheck)
inc(matchLen, matchingBytes)
if matchingBytes < bytesToCheck:
break
inc matchLen
inc(i, bytesToCheck)
if matchLen > longestMatchLen:
longestMatchLen = matchLen

View File

@ -1,5 +1,5 @@
import std/monotimes, strformat, zip/zlib, zippy
# import miniz, nimPNG/nimz,
# import miniz, nimPNG/nimz
const
zs = [