From c2bb9a1d1393eb3bd4d92b71e2430826da7c989f Mon Sep 17 00:00:00 2001 From: munna0908 Date: Fri, 27 Jun 2025 19:19:25 +0530 Subject: [PATCH 1/2] Revert "feat: enhance encode/decode procs for multithreading support (#20)" This reverts commit 03f71498fc413e650f8f1a6010be741972c2d3d6. --- leopard/leopard.nim | 35 +++++----- tests/helpers.nim | 80 ++++++++--------------- tests/testleopard.nim | 149 +++++++++++++++++------------------------- 3 files changed, 104 insertions(+), 160 deletions(-) diff --git a/leopard/leopard.nim b/leopard/leopard.nim index 71d23ee..615f7ad 100644 --- a/leopard/leopard.nim +++ b/leopard/leopard.nim @@ -48,8 +48,8 @@ type func encode*( self: var LeoEncoder, - data,parity: ptr UncheckedArray[ptr UncheckedArray[byte]], - dataLen,parityLen: int ): Result[void, cstring] = + data, + parity: var openArray[seq[byte]]): Result[void, cstring] = ## Encode a list of buffers in `data` into a number of `bufSize` sized ## `parity` buffers ## @@ -57,10 +57,10 @@ func encode*( ## `parity` - list of parity `buffers` of size `bufSize` ## - if dataLen != self.buffers: + if data.len != self.buffers: return err("Number of data buffers should match!") - if parityLen != self.parity: + if parity.len != self.parity: return err("Number of parity buffers should match!") # zero encode work buffer to avoid corrupting with previous run @@ -68,7 +68,7 @@ func encode*( zeroMem(self.workBufferPtr[i], self.bufSize) # copy data into aligned buffer - for i in 0.. 0: + copyMem(self.dataBufferPtr[i], addr data[i][0], self.bufSize) dataPtr[i] = self.dataBufferPtr[i] else: dataPtr[i] = nil # copy parity into aligned buffer for i in 0.. 0: copyMem(self.workBufferPtr[i], addr parity[i][0], self.bufSize) parityPtr[i] = self.workBufferPtr[i] else: diff --git a/tests/helpers.nim b/tests/helpers.nim index 3b43899..847e901 100644 --- a/tests/helpers.nim +++ b/tests/helpers.nim @@ -24,22 +24,22 @@ proc randomCRCPacket*(data: var openArray[byte]) = copyMem(addr data[4], unsafeAddr crc, sizeof(crc)) -proc checkCRCPacket*(data: ptr UncheckedArray[byte], len: int): bool = - if len < 16: - for i in 1.. 0: - dropRandomIdx(dataBuf,buffers, dataLosses) + dropRandomIdx(dataBuf, dataLosses) if parityLosses > 0: - dropRandomIdx(parityBuf,parity,parityLosses) + dropRandomIdx(parityBuf, parityLosses) - decoder.decode(dataBuf, parityBuf, recoveredBuf,buffers,parity,buffers).tryGet() + decoder.decode(dataBuf, parityBuf, recoveredBuf).tryGet() - for i in 0.. Date: Fri, 27 Jun 2025 19:24:29 +0530 Subject: [PATCH 2/2] refactor: improve code formatting and consistency across multiple files --- config.nims | 17 +++-- leopard/leopard.nim | 123 ++++++++++++++-------------------- leopard/utils/allocs.nim | 36 +++++----- leopard/utils/cpuinfo_x86.nim | 94 +++++++++++++++++++++----- leopard/wrapper.nim | 85 ++++++++++++----------- tests/helpers.nim | 36 +++++----- tests/testleopard.nim | 77 +++++++++++---------- 7 files changed, 263 insertions(+), 205 deletions(-) diff --git a/config.nims b/config.nims index 063e9b5..c40b055 100644 --- a/config.nims +++ b/config.nims @@ -1,8 +1,13 @@ ---styleCheck:usages ---styleCheck:error ---threads:on ---tlsEmulation:off -# begin Nimble config (version 1) -when fileExists("nimble.paths"): +--styleCheck: + usages +--styleCheck: + error +--threads: + on +--tlsEmulation: + off +# begin Nimble config (version 2) +--noNimblePath +when withDir(thisDir(), system.fileExists("nimble.paths")): include "nimble.paths" # end Nimble config diff --git a/leopard/leopard.nim b/leopard/leopard.nim index 615f7ad..5cc7390 100644 --- a/leopard/leopard.nim +++ b/leopard/leopard.nim @@ -19,27 +19,27 @@ import ./utils export wrapper, results -const - BuffMultiples* = 64 +const BuffMultiples* = 64 type LeoBufferPtr* = ptr UncheckedArray[byte] LeoCoderKind* {.pure.} = enum - Encoder, + Encoder Decoder Leo* = object of RootObj - bufSize*: int # size of the buffer in multiples of 64 - buffers*: int # total number of data buffers (K) - parity*: int # total number of parity buffers (M) - dataBufferPtr: seq[LeoBufferPtr] # buffer where data is copied before encoding - workBufferCount: int # number of parity work buffers - workBufferPtr: seq[LeoBufferPtr] # buffer where parity data is written during encoding or before decoding + bufSize*: int # size of the buffer in multiples of 64 + buffers*: int # total number of data buffers (K) + parity*: int # total number of parity buffers (M) + dataBufferPtr: seq[LeoBufferPtr] # buffer where data is copied before encoding + workBufferCount: int # number of parity work buffers + workBufferPtr: seq[LeoBufferPtr] + # buffer where parity data is written during encoding or before decoding case kind: LeoCoderKind of LeoCoderKind.Decoder: - decodeBufferCount: int # number of decoding work buffers - decodeBufferPtr: seq[LeoBufferPtr] # work buffer used for decoding + decodeBufferCount: int # number of decoding work buffers + decodeBufferPtr: seq[LeoBufferPtr] # work buffer used for decoding of LeoCoderKind.Encoder: discard @@ -47,9 +47,8 @@ type LeoDecoder* = object of Leo func encode*( - self: var LeoEncoder, - data, - parity: var openArray[seq[byte]]): Result[void, cstring] = + self: var LeoEncoder, data, parity: var openArray[seq[byte]] +): Result[void, cstring] = ## Encode a list of buffers in `data` into a number of `bufSize` sized ## `parity` buffers ## @@ -64,35 +63,33 @@ func encode*( return err("Number of parity buffers should match!") # zero encode work buffer to avoid corrupting with previous run - for i in 0.. 0: copyMem(self.dataBufferPtr[i], addr data[i][0], self.bufSize) dataPtr[i] = self.dataBufferPtr[i] @@ -135,22 +132,22 @@ func decode*( dataPtr[i] = nil # copy parity into aligned buffer - for i in 0.. 0: copyMem(self.workBufferPtr[i], addr parity[i][0], self.bufSize) parityPtr[i] = self.workBufferPtr[i] else: parityPtr[i] = nil - let - res = leoDecode( - self.bufSize.culonglong, - self.buffers.cuint, - self.parity.cuint, - self.decodeBufferCount.cuint, - cast[LeoDataPtr](addr dataPtr[0]), - cast[LeoDataPtr](addr parityPtr[0]), - cast[ptr pointer](addr self.decodeBufferPtr[0])) + let res = leoDecode( + self.bufSize.culonglong, + self.buffers.cuint, + self.parity.cuint, + self.decodeBufferCount.cuint, + cast[LeoDataPtr](addr dataPtr[0]), + cast[LeoDataPtr](addr parityPtr[0]), + cast[ptr pointer](addr self.decodeBufferPtr[0]), + ) if ord(res) != ord(LeopardSuccess): return err(leoResultString(res.LeopardResult)) @@ -195,11 +192,8 @@ func free*(self: var Leo) = # self.free() proc init[TT: Leo]( - T: type TT, - bufSize, - buffers, - parity: int, - kind: LeoCoderKind): Result[T, cstring] = + T: type TT, bufSize, buffers, parity: int, kind: LeoCoderKind +): Result[T, cstring] = if bufSize mod BuffMultiples != 0: return err("bufSize should be multiples of 64 bytes!") @@ -221,46 +215,33 @@ proc init[TT: Leo]( if (let res = leoInit(); res.ord != LeopardSuccess.ord): return err(leoResultString(res.LeopardResult)) - var - self = T( - kind: kind, - bufSize: bufSize, - buffers: buffers, - parity: parity) + var self = T(kind: kind, bufSize: bufSize, buffers: buffers, parity: parity) - self.workBufferCount = leoEncodeWorkCount( - buffers.cuint, - parity.cuint).int + self.workBufferCount = leoEncodeWorkCount(buffers.cuint, parity.cuint).int # initialize encode work buffers - for _ in 0..".} + proc alignedAllocWindows( + size, alignment: csize_t + ): pointer {.importc: "_aligned_malloc", header: "".} # Beware of the arg order! proc alignedAlloc(alignment, size: csize_t): pointer = alignedAllocWindows(size, alignment) - proc alignedFree*[T](p: ptr T) - {.importc: "_aligned_free", header: "".} + proc alignedFree*[T](p: ptr T) {.importc: "_aligned_free", header: "".} elif defined(osx): - proc posix_memalign(mem: var pointer, alignment, size: csize_t) - {.importc, header:"".} + proc posix_memalign( + mem: var pointer, alignment, size: csize_t + ) {.importc, header: "".} proc alignedAlloc(alignment, size: csize_t): pointer {.inline.} = posix_memalign(result, alignment, size) proc alignedFree*[T](p: ptr T) {.inline.} = c_free(p) + elif defined(unix): - proc alignedAlloc(alignment, size: csize_t): pointer - {.importc: "aligned_alloc", header: "".} + proc alignedAlloc( + alignment, size: csize_t + ): pointer {.importc: "aligned_alloc", header: "".} proc alignedFree*[T](p: ptr T) {.inline.} = c_free(p) + else: {.warning: "Falling back to manual pointer alignment, this is highly inefficient!".} - proc alignedAlloc*(size, align: Positive): pointer {.inline.} = - var - data = c_malloc(align + size) + proc alignedAlloc*(size, align: Positive): pointer {.inline.} = + var data = c_malloc(align + size) if not isNil(data): - var - doffset = cast[uint](data) mod align + var doffset = cast[uint](data) mod align data = data.offset((align + doffset).int) - var - offsetPtr = cast[pointer](cast[uint](data) - 1'u) + var offsetPtr = cast[pointer](cast[uint](data) - 1'u) moveMem(offsetPtr, addr doffset, sizeof(doffset)) return data @@ -71,7 +73,7 @@ else: if not isNil(data): let offset = cast[uint](data) - 1'u if offset >= align: - return + return data = cast[pointer](cast[uint](data) - (align - offset)) c_free(data) diff --git a/leopard/utils/cpuinfo_x86.nim b/leopard/utils/cpuinfo_x86.nim index ce31069..dc0add7 100644 --- a/leopard/utils/cpuinfo_x86.nim +++ b/leopard/utils/cpuinfo_x86.nim @@ -14,11 +14,13 @@ push: {.upraises: [].} # From awr1: https://github.com/nim-lang/Nim/pull/11816/files -proc cpuidX86(eaxi, ecxi: int32): tuple[eax, ebx, ecx, edx: int32] {.used.}= +proc cpuidX86(eaxi, ecxi: int32): tuple[eax, ebx, ecx, edx: int32] {.used.} = when defined(vcc): # limited inline asm support in vcc, so intrinsics, here we go: - proc cpuidVcc(cpuInfo: ptr int32; functionID, subFunctionID: int32) - {.cdecl, importc: "__cpuidex", header: "intrin.h".} + proc cpuidVcc( + cpuInfo: ptr int32, functionID, subFunctionID: int32 + ) {.cdecl, importc: "__cpuidex", header: "intrin.h".} + cpuidVcc(addr result.eax, eaxi, ecxi) else: var (eaxr, ebxr, ecxr, edxr) = (0'i32, 0'i32, 0'i32, 0'i32) @@ -28,24 +30,81 @@ proc cpuidX86(eaxi, ecxi: int32): tuple[eax, ebx, ecx, edx: int32] {.used.}= :"a"(`eaxi`), "c"(`ecxi`)""" (eaxr, ebxr, ecxr, edxr) -proc cpuNameX86(): string {.used.}= +proc cpuNameX86(): string {.used.} = var leaves {.global.} = cast[array[48, char]]([ cpuidX86(eaxi = 0x80000002'i32, ecxi = 0), cpuidX86(eaxi = 0x80000003'i32, ecxi = 0), - cpuidX86(eaxi = 0x80000004'i32, ecxi = 0)]) + cpuidX86(eaxi = 0x80000004'i32, ecxi = 0), + ]) result = $cast[cstring](addr leaves[0]) -type - X86Feature {.pure.} = enum - HypervisorPresence, Hyperthreading, NoSMT, IntelVtx, Amdv, X87fpu, Mmx, - MmxExt, F3DNow, F3DNowEnhanced, Prefetch, Sse, Sse2, Sse3, Ssse3, Sse4a, - Sse41, Sse42, Avx, Avx2, Avx512f, Avx512dq, Avx512ifma, Avx512pf, - Avx512er, Avx512cd, Avx512bw, Avx512vl, Avx512vbmi, Avx512vbmi2, - Avx512vpopcntdq, Avx512vnni, Avx512vnniw4, Avx512fmaps4, Avx512bitalg, - Avx512bfloat16, Avx512vp2intersect, Rdrand, Rdseed, MovBigEndian, Popcnt, - Fma3, Fma4, Xop, Cas8B, Cas16B, Abm, Bmi1, Bmi2, TsxHle, TsxRtm, Adx, Sgx, - Gfni, Aes, Vaes, Vpclmulqdq, Pclmulqdq, NxBit, Float16c, Sha, Clflush, - ClflushOpt, Clwb, PrefetchWT1, Mpx +type X86Feature {.pure.} = enum + HypervisorPresence + Hyperthreading + NoSMT + IntelVtx + Amdv + X87fpu + Mmx + MmxExt + F3DNow + F3DNowEnhanced + Prefetch + Sse + Sse2 + Sse3 + Ssse3 + Sse4a + Sse41 + Sse42 + Avx + Avx2 + Avx512f + Avx512dq + Avx512ifma + Avx512pf + Avx512er + Avx512cd + Avx512bw + Avx512vl + Avx512vbmi + Avx512vbmi2 + Avx512vpopcntdq + Avx512vnni + Avx512vnniw4 + Avx512fmaps4 + Avx512bitalg + Avx512bfloat16 + Avx512vp2intersect + Rdrand + Rdseed + MovBigEndian + Popcnt + Fma3 + Fma4 + Xop + Cas8B + Cas16B + Abm + Bmi1 + Bmi2 + TsxHle + TsxRtm + Adx + Sgx + Gfni + Aes + Vaes + Vpclmulqdq + Pclmulqdq + NxBit + Float16c + Sha + Clflush + ClflushOpt + Clwb + PrefetchWT1 + Mpx let leaf1 = cpuidX86(eaxi = 1, ecxi = 0) @@ -62,7 +121,8 @@ proc testX86Feature(feature: X86Feature): bool = # see: https://en.wikipedia.org/wiki/CPUID#Calling_CPUID # see: IntelĀ® Architecture Instruction Set Extensions and Future Features # Programming Reference - result = case feature + result = + case feature # leaf 1, edx of X87fpu: leaf1.edx.test(0) diff --git a/leopard/wrapper.nim b/leopard/wrapper.nim index 90d8b5a..c90eeb7 100644 --- a/leopard/wrapper.nim +++ b/leopard/wrapper.nim @@ -24,7 +24,6 @@ ## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ## POSSIBILITY OF SUCH DAMAGE. - ## Leopard-RS ## MDS Reed-Solomon Erasure Correction Codes for Large Data in C ## @@ -56,9 +55,9 @@ ## arithmetic using Intel SIMD instructions." In: FAST-2013: 11th Usenix ## Conference on File and Storage Technologies, San Jose, 2013 - import pkg/upraises -push: {.upraises: [].} +push: + {.upraises: [].} ## ----------------------------------------------------------------------------- ## Build configuration @@ -67,8 +66,7 @@ import std/compilesettings import std/os import std/strutils -type - LeoDataPtr* {.importc: "const void* const*", bycopy.} = pointer +type LeoDataPtr* {.importc: "const void* const*", bycopy.} = pointer const LeopardCmakeFlags {.strdefine.} = @@ -121,12 +119,15 @@ static: let buildDirUnix = buildDir.pathWin2Unix leopardDirUnix = LeopardDir.pathWin2Unix - if defined(LeopardRebuild): discard bash("rm -rf", buildDirUnix) + if defined(LeopardRebuild): + discard bash("rm -rf", buildDirUnix) if (bashEx("ls", LeopardLib.pathWin2Unix)).exitCode != 0: discard bash("mkdir -p", buildDirUnix) let cmd = - @["cd", buildDirUnix, "&& cmake", leopardDirUnix, LeopardCmakeFlags, - "&& make libleopard"] + @[ + "cd", buildDirUnix, "&& cmake", leopardDirUnix, LeopardCmakeFlags, + "&& make libleopard", + ] echo "\nBuilding Leopard-RS: " & cmd.join(" ") let (output, exitCode) = bashEx cmd echo output @@ -134,7 +135,8 @@ static: discard bash("rm -rf", buildDirUnix) raise (ref Defect)(msg: "Failed to build Leopard-RS") else: - if defined(LeopardRebuild): discard gorge "rm -rf " & buildDir + if defined(LeopardRebuild): + discard gorge "rm -rf " & buildDir if gorgeEx("ls " & LeopardLib).exitCode != 0: discard gorge "mkdir -p " & buildDir let cmd = @@ -159,21 +161,22 @@ proc leoInit*(): cint {.leo, importcpp: "leo_init".} ## Results # TODO: For some reason it's only possibly to use the enum with `ord` -type - LeopardResult* = enum - LeopardCallInitialize = -7, ## Call leo_init() first - LeopardPlatform = -6, ## Platform is unsupported - LeopardInvalidInput = -5, ## A function parameter was invalid - LeopardInvalidCounts = -4, ## Invalid counts provided - LeopardInvalidSize = -3, ## Buffer size must be a multiple of 64 bytes - LeopardTooMuchData = -2, ## Buffer counts are too high - LeopardNeedMoreData = -1, ## Not enough recovery data received - LeopardSuccess = 0 ## Operation succeeded - +type LeopardResult* = enum + LeopardCallInitialize = -7 ## Call leo_init() first + LeopardPlatform = -6 ## Platform is unsupported + LeopardInvalidInput = -5 ## A function parameter was invalid + LeopardInvalidCounts = -4 ## Invalid counts provided + LeopardInvalidSize = -3 ## Buffer size must be a multiple of 64 bytes + LeopardTooMuchData = -2 ## Buffer counts are too high + LeopardNeedMoreData = -1 ## Not enough recovery data received + LeopardSuccess = 0 ## Operation succeeded ## Convert Leopard result to string -proc leoResultString*(result: LeopardResult): cstring {.leo, importc: "leo_result_string".} +proc leoResultString*( + result: LeopardResult +): cstring {.leo, importc: "leo_result_string".} + ## ------------------------------------------------------------------------------ ## Encoder API ## @@ -187,8 +190,10 @@ proc leoResultString*(result: LeopardResult): cstring {.leo, importc: "leo_resul ## Returns 0 on invalid input. ## -proc leoEncodeWorkCount*(originalCount: cuint; recoveryCount: cuint): cuint - {.leo, importc: "leo_encode_work_count".} +proc leoEncodeWorkCount*( + originalCount: cuint, recoveryCount: cuint +): cuint {.leo, importc: "leo_encode_work_count".} + ## ## leo_encode() ## @@ -224,12 +229,13 @@ proc leoEncodeWorkCount*(originalCount: cuint; recoveryCount: cuint): cuint ## proc leoEncode*( - bufferBytes: uint64; - originalCount: cuint; - recoveryCount: cuint; - workCount: cuint; - originalData: LeoDataPtr; - workData: ptr pointer): LeopardResult {.leo, importc: "leo_encode".} + bufferBytes: uint64, + originalCount: cuint, + recoveryCount: cuint, + workCount: cuint, + originalData: LeoDataPtr, + workData: ptr pointer, +): LeopardResult {.leo, importc: "leo_encode".} ## Number of bytes in each data buffer ## Number of original_data[] buffer pointers ## Number of recovery_data[] buffer pointers @@ -251,8 +257,10 @@ proc leoEncode*( ## Returns 0 on invalid input. ## -proc leoDecodeWorkCount*(originalCount: cuint; recoveryCount: cuint): cuint - {.leo, importc: "leo_decode_work_count".} +proc leoDecodeWorkCount*( + originalCount: cuint, recoveryCount: cuint +): cuint {.leo, importc: "leo_decode_work_count".} + ## ## leoDecode() ## @@ -276,13 +284,14 @@ proc leoDecodeWorkCount*(originalCount: cuint; recoveryCount: cuint): cuint ## proc leoDecode*( - bufferBytes: uint64; - originalCount: cuint; - recoveryCount: cuint; - workCount: cuint; - originalData: LeoDataPtr; - recoveryData: LeoDataPtr; - workData: ptr pointer): LeopardResult {.leo, importc: "leo_decode".} + bufferBytes: uint64, + originalCount: cuint, + recoveryCount: cuint, + workCount: cuint, + originalData: LeoDataPtr, + recoveryData: LeoDataPtr, + workData: ptr pointer, +): LeopardResult {.leo, importc: "leo_decode".} ## Number of bytes in each data buffer ## Number of original_data[] buffer pointers ## Number of recovery_data[] buffer pointers diff --git a/tests/helpers.nim b/tests/helpers.nim index 847e901..a9a11e6 100644 --- a/tests/helpers.nim +++ b/tests/helpers.nim @@ -6,17 +6,15 @@ import ../leopard proc randomCRCPacket*(data: var openArray[byte]) = if data.len < 16: data[0] = rand(data.len).byte - for i in 1..