diff --git a/leopard.nim b/leopard.nim index 560bd9c..aa6490c 100644 --- a/leopard.nim +++ b/leopard.nim @@ -1,10 +1,9 @@ -import pkg/stew/ptrops import pkg/stew/results import pkg/upraises import ./leopard/wrapper -export results +export LEO_ALIGN_BYTES, results push: {.upraises: [].} @@ -57,7 +56,7 @@ type # workaround for https://github.com/nim-lang/Nim/issues/19619 # necessary for use of nim-leopard in nimbus-build-system projects because nbs # ships libbacktrace by default -proc `$`*(err: LeopardError): string {.noSideEffect.} = +proc `$`*(err: LeopardError): string {.nosideeffect.} = $err # https://github.com/catid/leopard/issues/12 @@ -82,47 +81,56 @@ func isValid*(code: ReedSolomonCode): bool = (code.data < MinSymbols) or (code.parity < MinSymbols) or (code.codeword > MaxTotalSymbols)) -when (NimMajor, NimMinor, NimPatch) < (1, 4, 0): - const - header = "" +# alloc/freeAligned and helpers adapted from mratsim/weave: +# https://github.com/mratsim/weave/blob/master/weave/memory/allocs.nim - proc c_malloc(size: csize_t): pointer {.importc: "malloc", header: header.} - proc c_free(p: pointer) {.importc: "free", header: header.} +func isPowerOfTwo(n: int): bool {.inline.} = + (n and (n - 1)) == 0 -proc SIMDSafeAllocate(size: int): pointer {.inline.} = - var - data = - when (NimMajor, NimMinor, NimPatch) < (1, 4, 0): - c_malloc(LEO_ALIGN_BYTES + size.uint) - else: - allocShared(LEO_ALIGN_BYTES + size.uint) +func roundNextMultipleOf(x, n: Natural): int {.inline.} = + (x + n - 1) and not (n - 1) - doffset = cast[uint](data) mod LEO_ALIGN_BYTES +when defined(windows): + proc aligned_alloc_windows(size, alignment: csize_t): pointer + {.header: "", importc: "_aligned_malloc", sideeffect.} - data = offset(data, (LEO_ALIGN_BYTES + doffset).int) + proc aligned_free_windows(p: pointer) + {.header: "", importc: "_aligned_free", sideeffect.} - var - offsetPtr = cast[pointer](cast[uint](data) - 1) + proc freeAligned*(p: pointer) = + if not p.isNil: + aligned_free_windows(p) - moveMem(offsetPtr, addr doffset, sizeof(doffset)) - data +elif defined(osx): + proc posix_memalign(mem: var pointer, alignment, size: csize_t) + {.header: "", importc, sideeffect.} -proc SIMDSafeFree(data: pointer) {.inline.} = - var - data = data + proc aligned_alloc(alignment, size: csize_t): pointer {.inline.} = + posix_memalign(result, alignment, size) - if not data.isNil: - let - offset = cast[uint](data) - 1 +else: + proc aligned_alloc(alignment, size: csize_t): pointer + {.header: "", importc, sideeffect.} - if offset >= LEO_ALIGN_BYTES: return +when not defined(windows): + proc c_free(p: pointer) {.header: "", importc: "free".} - data = cast[pointer](cast[uint](data) - (LEO_ALIGN_BYTES - offset)) + proc freeAligned*(p: pointer) {.inline.} = + if not p.isNil: + c_free(p) - when (NimMajor, NimMinor, NimPatch) < (1, 4, 0): - c_free data - else: - deallocShared data +proc allocAligned*(size: int, alignment: static Natural): pointer {.inline.} = + ## aligned_alloc requires allocating in multiple of the alignment. + static: + assert alignment.isPowerOfTwo() + + let + requiredMem = size.roundNextMultipleOf(alignment) + + when defined(windows): + aligned_alloc_windows(csize_t requiredMem, csize_t alignment) + else: + aligned_alloc(csize_t alignment, csize_t requiredMem) proc leoInit*() = if wrapper.leoInit() != 0: @@ -156,18 +164,18 @@ proc encode*(code: ReedSolomonCode, data: Data): for i in 0..