Initial implementation

This commit is contained in:
Dmitriy Ryajov 2022-03-21 12:06:00 -06:00
parent 4d8964e554
commit 7060c2ef7e
No known key found for this signature in database
GPG Key ID: DA8C680CE7C657A4
8 changed files with 1231 additions and 703 deletions

View File

@ -82,48 +82,6 @@ func isValid*(code: ReedSolomonCode): bool =
(code.data < MinSymbols) or (code.parity < MinSymbols) or
(code.codeword > MaxTotalSymbols))
when (NimMajor, NimMinor, NimPatch) < (1, 4, 0):
const
header = "<stdlib.h>"
proc c_malloc(size: csize_t): pointer {.importc: "malloc", header: header.}
proc c_free(p: pointer) {.importc: "free", header: header.}
proc SIMDSafeAllocate(size: int): pointer {.inline.} =
var
data =
when (NimMajor, NimMinor, NimPatch) < (1, 4, 0):
c_malloc(LEO_ALIGN_BYTES + size.uint)
else:
allocShared(LEO_ALIGN_BYTES + size.uint)
doffset = cast[uint](data) mod LEO_ALIGN_BYTES
data = offset(data, (LEO_ALIGN_BYTES + doffset).int)
var
offsetPtr = cast[pointer](cast[uint](data) - 1)
moveMem(offsetPtr, addr doffset, sizeof(doffset))
data
proc SIMDSafeFree(data: pointer) {.inline.} =
var
data = data
if not data.isNil:
let
offset = cast[uint](data) - 1
if offset >= LEO_ALIGN_BYTES: return
data = cast[pointer](cast[uint](data) - (LEO_ALIGN_BYTES - offset))
when (NimMajor, NimMinor, NimPatch) < (1, 4, 0):
c_free data
else:
deallocShared data
proc leoInit*() =
if wrapper.leoInit() != 0:
raise (ref LeopardDefect)(msg: "Leopard-RS failed to initialize")

198
leopard/leopard.nim Normal file
View File

@ -0,0 +1,198 @@
## Nim-Leopard
## Copyright (c) 2022 Status Research & Development GmbH
## Licensed under either of
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.
import pkg/upraises
push: {.upraises: [].}
{.deadCodeElim: on.}
import pkg/stew/results
import pkg/stew/byteutils
import ./wrapper
import ./utils
export wrapper, results
const
BuffMultiples* = 64
type
LeoBufferPtr = ptr UncheckedArray[byte]
Leo = object of RootObj
bufSize*: int # size of the buffer in multiples of 64
buffers*: int # total number of data buffers (K)
parity*: int # total number of parity buffers (M)
dataBufferPtr: seq[LeoBufferPtr] # buffer where data is copied before encoding
parityWorkCount: int # number of parity work buffers
parityBufferPtr: seq[LeoBufferPtr] # buffer where parity is copied before encoding
LeoEncoder* = object of Leo
LeoDecoder* = object of Leo
decodeWorkCount: int # number of decoding work buffers
decodeBufferPtr: seq[LeoBufferPtr] # work buffer used for decoding
proc encode*(
self: var LeoEncoder,
data,
parity: var openArray[seq[byte]]): Result[void, cstring] =
# zero encode work buffer to avoid corrupting with previous run
for i in 0..<self.parityWorkCount:
zeroMem(self.parityBufferPtr[i], self.bufSize)
# copy data into aligned buffer
for i in 0..<data.len:
copyMem(self.dataBufferPtr[i], addr data[i][0], self.bufSize)
let
res = leoEncode(
self.bufSize.cuint,
self.buffers.cuint,
self.parity.cuint,
self.parityWorkCount.cuint,
cast[ptr pointer](addr self.dataBufferPtr[0]),
cast[ptr pointer](addr self.parityBufferPtr[0]))
if ord(res) != ord(LeopardSuccess):
return err(leoResultString(res.LeopardResult))
for i in 0..<parity.len:
copyMem(addr parity[i][0], self.parityBufferPtr[i], self.bufSize)
return ok()
proc decode*(
self: var LeoDecoder,
data,
parity,
recovered: var openArray[seq[byte]]): Result[void, cstring] =
doAssert(data.len == self.buffers, "Number of data buffers should match!")
doAssert(parity.len == self.parity, "Number of parity buffers should match!")
doAssert(recovered.len == self.buffers, "Number of recovered buffers should match buffers!")
# zero both work buffers before decoding
for i in 0..<self.parityWorkCount:
zeroMem(self.parityBufferPtr[i], self.bufSize)
for i in 0..<self.decodeWorkCount:
zeroMem(self.decodeBufferPtr[i], self.bufSize)
var
dataPtr = newSeq[LeoBufferPtr](data.len)
parityPtr = newSeq[LeoBufferPtr](self.parityWorkCount)
# copy data into aligned buffer
for i in 0..<data.len:
if data[i].len > 0:
dataPtr[i] = self.dataBufferPtr[i]
copyMem(self.dataBufferPtr[i], addr data[i][0], self.bufSize)
else:
dataPtr[i] = nil
# copy parity into aligned buffer
for i in 0..<self.parityWorkCount:
if i < parity.len and parity[i].len > 0:
parityPtr[i] = self.parityBufferPtr[i]
copyMem(self.parityBufferPtr[i], addr parity[i][0], self.bufSize)
else:
parityPtr[i] = nil
let
res = leo_decode(
self.bufSize.cuint,
self.buffers.cuint,
self.parity.cuint,
self.decodeWorkCount.cuint,
cast[ptr pointer](addr dataPtr[0]),
cast[ptr pointer](addr self.parityBufferPtr[0]),
cast[ptr pointer](addr self.decodeBufferPtr[0]))
if ord(res) != ord(LeopardSuccess):
return err(leoResultString(res.LeopardResult))
for i in 0..<self.buffers:
if data[i].len <= 0:
echo string.fromBytes(self.decodeBufferPtr[i].toOpenArray(0, self.bufSize - 1))
copyMem(addr recovered[i][0], self.decodeBufferPtr[i], self.bufSize)
ok()
proc free*(self: var Leo) = discard
# for i in 0..<self.encodeWorkCount:
# leoFree(self.encodeBufferPtr[i])
# self.encodeBufferPtr[i] = nil
# for i in 0..<self.decodeWorkCount:
# leoFree(self.decodeBufferPtr[i])
# self.decodeBufferPtr[i] = nil
proc setup*(self: var Leo, bufSize, buffers, parity: int): Result[void, cstring] =
if bufSize mod BuffMultiples != 0:
return err("bufSize should be multiples of 64 bytes!")
once:
# First attempt to init the library
# This happens only once for all threads...
if (let res = leoinit(); res.ord != LeopardSuccess.ord):
return err(leoResultString(res.LeopardResult))
self.bufSize = bufSize
self.buffers = buffers
self.parity = parity
return ok()
proc init*(T: type LeoEncoder, bufSize, buffers, parity: int): Result[T, cstring] =
var
self = LeoEncoder()
? Leo(self).setup(bufSize, buffers, parity)
self.parityWorkCount = leoEncodeWorkCount(
buffers.cuint,
parity.cuint).int
# initialize encode work buffers
for _ in 0..<self.parityWorkCount:
self.parityBufferPtr.add(cast[LeoBufferPtr](leoAlloc(self.bufSize)))
# initialize data buffers
for _ in 0..<self.buffers:
self.dataBufferPtr.add(cast[LeoBufferPtr](leoAlloc(self.bufSize)))
ok(self)
proc init*(T: type LeoDecoder, bufSize, buffers, parity: int): Result[T, cstring] =
var
self = LeoDecoder()
? Leo(self).setup(bufSize, buffers, parity)
self.parityWorkCount = leoEncodeWorkCount(
buffers.cuint,
parity.cuint).int
self.decodeWorkCount = leoDecodeWorkCount(
buffers.cuint,
parity.cuint).int
# initialize decode work buffers
for _ in 0..<self.decodeWorkCount:
self.decodeBufferPtr.add(cast[LeoBufferPtr](leoAlloc(self.bufSize)))
# initialize data buffers
for _ in 0..<self.buffers:
self.dataBufferPtr.add(cast[LeoBufferPtr](leoAlloc(self.bufSize)))
# initialize data buffers
for _ in 0..<self.parityWorkCount:
self.parityBufferPtr.add(cast[LeoBufferPtr](leoAlloc(self.bufSize)))
ok(self)

4
leopard/utils.nim Normal file
View File

@ -0,0 +1,4 @@
import ./utils/allocs
import ./utils/cpuinfo_x86
export cpuinfo_x86, allocs

82
leopard/utils/allocs.nim Normal file
View File

@ -0,0 +1,82 @@
## Nim-Leopard
## Copyright (c) 2022 Status Research & Development GmbH
## Licensed under either of
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.
import pkg/upraises
push: {.upraises: [].}
{.deadCodeElim: on.}
import system/ansi_c
import pkg/stew/ptrops
import ./cpuinfo_x86
## inspired by https://github.com/mratsim/weave/blob/master/weave/memory/allocs.nim
let
LeoAlignBytes* = if hasAvx2(): 32'u else: 16'u
when defined(windows):
proc alignedAlloc(alignment, size: csize_t): pointer =
alignedAllocWindows(size, alignment)
proc alignedAllocWindows(size, alignment: csize_t): pointer
{.sideeffect, importc: "_aligned_malloc", header: "<malloc.h>".}
# Beware of the arg order!
proc alignedFree*[T](p: ptr T)
{.sideeffect, importc: "_aligned_free", header: "<malloc.h>".}
elif defined(osx):
proc posix_memalign(mem: var pointer, alignment, size: csize_t)
{.sideeffect, importc, header:"<stdlib.h>".}
proc alignedAlloc(alignment, size: csize_t): pointer {.inline.} =
posix_memalign(result, alignment, size)
proc alignedFree*[T](p: ptr T) {.inline.} =
c_free(p)
elif defined(unix):
proc alignedAlloc(alignment, size: csize_t): pointer
{.sideeffect, importc: "aligned_alloc", header: "<stdlib.h>".}
proc alignedFree*[T](p: ptr T) {.inline.} =
{.sideeffect, importc: "free_aligned", header: "<stdlib.h>".}
c_free(p)
else:
{.warning: "Falling back to manual pointer alignment, might end-up using more memory!".}
proc alignedAlloc*(size, align: Positive): pointer {.inline.} =
var
data = c_malloc(align + size)
if not isNil(data):
var
doffset = cast[uint](data) mod align
data = data.offset((align + doffset).int)
var
offsetPtr = cast[pointer](cast[uint](data) - 1'u)
moveMem(offsetPtr, addr doffset, sizeof(doffset))
return data
proc freeAligned*[T](p: ptr T, align: Positive) {.inline.} =
var data = p
if not isNil(data):
let offset = cast[uint](data) - 1'u
if offset >= align:
return
data = cast[pointer](cast[uint](data) - (align - offset))
c_free(data)
proc leoAlloc*(size: Positive): pointer {.inline.} =
alignedAlloc(LeoAlignBytes, size.csize_t)
proc leoFree*[T](p: ptr T) =
alignedFree(p)

View File

@ -0,0 +1,793 @@
## Nim-Leopard
## Copyright (c) 2022 Status Research & Development GmbH
## Licensed under either of
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.
import pkg/upraises
push: {.upraises: [].}
{.deadCodeElim: on.}
# From awr1: https://github.com/nim-lang/Nim/pull/11816/files
proc cpuidX86(eaxi, ecxi: int32): tuple[eax, ebx, ecx, edx: int32] {.used.}=
when defined(vcc):
# limited inline asm support in vcc, so intrinsics, here we go:
proc cpuidVcc(cpuInfo: ptr int32; functionID, subFunctionID: int32)
{.cdecl, importc: "__cpuidex", header: "intrin.h".}
cpuidVcc(addr result.eax, eaxi, ecxi)
else:
var (eaxr, ebxr, ecxr, edxr) = (0'i32, 0'i32, 0'i32, 0'i32)
asm """
cpuid
:"=a"(`eaxr`), "=b"(`ebxr`), "=c"(`ecxr`), "=d"(`edxr`)
:"a"(`eaxi`), "c"(`ecxi`)"""
(eaxr, ebxr, ecxr, edxr)
proc cpuNameX86(): string {.used.}=
var leaves {.global.} = cast[array[48, char]]([
cpuidX86(eaxi = 0x80000002'i32, ecxi = 0),
cpuidX86(eaxi = 0x80000003'i32, ecxi = 0),
cpuidX86(eaxi = 0x80000004'i32, ecxi = 0)])
result = $cast[cstring](addr leaves[0])
type
X86Feature {.pure.} = enum
HypervisorPresence, Hyperthreading, NoSMT, IntelVtx, Amdv, X87fpu, Mmx,
MmxExt, F3DNow, F3DNowEnhanced, Prefetch, Sse, Sse2, Sse3, Ssse3, Sse4a,
Sse41, Sse42, Avx, Avx2, Avx512f, Avx512dq, Avx512ifma, Avx512pf,
Avx512er, Avx512cd, Avx512bw, Avx512vl, Avx512vbmi, Avx512vbmi2,
Avx512vpopcntdq, Avx512vnni, Avx512vnniw4, Avx512fmaps4, Avx512bitalg,
Avx512bfloat16, Avx512vp2intersect, Rdrand, Rdseed, MovBigEndian, Popcnt,
Fma3, Fma4, Xop, Cas8B, Cas16B, Abm, Bmi1, Bmi2, TsxHle, TsxRtm, Adx, Sgx,
Gfni, Aes, Vaes, Vpclmulqdq, Pclmulqdq, NxBit, Float16c, Sha, Clflush,
ClflushOpt, Clwb, PrefetchWT1, Mpx
let
leaf1 = cpuidX86(eaxi = 1, ecxi = 0)
leaf7 = cpuidX86(eaxi = 7, ecxi = 0)
leaf8 = cpuidX86(eaxi = 0x80000001'i32, ecxi = 0)
# The reason why we don't just evaluate these directly in the `let` variable
# list is so that we can internally organize features by their input (leaf)
# and output registers.
proc testX86Feature(feature: X86Feature): bool =
proc test(input, bit: int): bool =
((1 shl bit) and input) != 0
# see: https://en.wikipedia.org/wiki/CPUID#Calling_CPUID
# see: Intel® Architecture Instruction Set Extensions and Future Features
# Programming Reference
result = case feature
# leaf 1, edx
of X87fpu:
leaf1.edx.test(0)
of Clflush:
leaf1.edx.test(19)
of Mmx:
leaf1.edx.test(23)
of Sse:
leaf1.edx.test(25)
of Sse2:
leaf1.edx.test(26)
of Hyperthreading:
leaf1.edx.test(28)
# leaf 1, ecx
of Sse3:
leaf1.ecx.test(0)
of Pclmulqdq:
leaf1.ecx.test(1)
of IntelVtx:
leaf1.ecx.test(5)
of Ssse3:
leaf1.ecx.test(9)
of Fma3:
leaf1.ecx.test(12)
of Cas16B:
leaf1.ecx.test(13)
of Sse41:
leaf1.ecx.test(19)
of Sse42:
leaf1.ecx.test(20)
of MovBigEndian:
leaf1.ecx.test(22)
of Popcnt:
leaf1.ecx.test(23)
of Aes:
leaf1.ecx.test(25)
of Avx:
leaf1.ecx.test(28)
of Float16c:
leaf1.ecx.test(29)
of Rdrand:
leaf1.ecx.test(30)
of HypervisorPresence:
leaf1.ecx.test(31)
# leaf 7, ecx
of PrefetchWT1:
leaf7.ecx.test(0)
of Avx512vbmi:
leaf7.ecx.test(1)
of Avx512vbmi2:
leaf7.ecx.test(6)
of Gfni:
leaf7.ecx.test(8)
of Vaes:
leaf7.ecx.test(9)
of Vpclmulqdq:
leaf7.ecx.test(10)
of Avx512vnni:
leaf7.ecx.test(11)
of Avx512bitalg:
leaf7.ecx.test(12)
of Avx512vpopcntdq:
leaf7.ecx.test(14)
# lead 7, eax
of Avx512bfloat16:
leaf7.eax.test(5)
# leaf 7, ebx
of Sgx:
leaf7.ebx.test(2)
of Bmi1:
leaf7.ebx.test(3)
of TsxHle:
leaf7.ebx.test(4)
of Avx2:
leaf7.ebx.test(5)
of Bmi2:
leaf7.ebx.test(8)
of TsxRtm:
leaf7.ebx.test(11)
of Mpx:
leaf7.ebx.test(14)
of Avx512f:
leaf7.ebx.test(16)
of Avx512dq:
leaf7.ebx.test(17)
of Rdseed:
leaf7.ebx.test(18)
of Adx:
leaf7.ebx.test(19)
of Avx512ifma:
leaf7.ebx.test(21)
of ClflushOpt:
leaf7.ebx.test(23)
of Clwb:
leaf7.ebx.test(24)
of Avx512pf:
leaf7.ebx.test(26)
of Avx512er:
leaf7.ebx.test(27)
of Avx512cd:
leaf7.ebx.test(28)
of Sha:
leaf7.ebx.test(29)
of Avx512bw:
leaf7.ebx.test(30)
of Avx512vl:
leaf7.ebx.test(31)
# leaf 7, edx
of Avx512vnniw4:
leaf7.edx.test(2)
of Avx512fmaps4:
leaf7.edx.test(3)
of Avx512vp2intersect:
leaf7.edx.test(8)
# leaf 8, edx
of NoSMT:
leaf8.edx.test(1)
of Cas8B:
leaf8.edx.test(8)
of NxBit:
leaf8.edx.test(20)
of MmxExt:
leaf8.edx.test(22)
of F3DNowEnhanced:
leaf8.edx.test(30)
of F3DNow:
leaf8.edx.test(31)
# leaf 8, ecx
of Amdv:
leaf8.ecx.test(2)
of Abm:
leaf8.ecx.test(5)
of Sse4a:
leaf8.ecx.test(6)
of Prefetch:
leaf8.ecx.test(8)
of Xop:
leaf8.ecx.test(11)
of Fma4:
leaf8.ecx.test(16)
let
isHypervisorPresentImpl = testX86Feature(HypervisorPresence)
hasSimultaneousMultithreadingImpl =
testX86Feature(Hyperthreading) or not testX86Feature(NoSMT)
hasIntelVtxImpl = testX86Feature(IntelVtx)
hasAmdvImpl = testX86Feature(Amdv)
hasX87fpuImpl = testX86Feature(X87fpu)
hasMmxImpl = testX86Feature(Mmx)
hasMmxExtImpl = testX86Feature(MmxExt)
has3DNowImpl = testX86Feature(F3DNow)
has3DNowEnhancedImpl = testX86Feature(F3DNowEnhanced)
hasPrefetchImpl = testX86Feature(Prefetch) or testX86Feature(F3DNow)
hasSseImpl = testX86Feature(Sse)
hasSse2Impl = testX86Feature(Sse2)
hasSse3Impl = testX86Feature(Sse3)
hasSsse3Impl = testX86Feature(Ssse3)
hasSse4aImpl = testX86Feature(Sse4a)
hasSse41Impl = testX86Feature(Sse41)
hasSse42Impl = testX86Feature(Sse42)
hasAvxImpl = testX86Feature(Avx)
hasAvx2Impl = testX86Feature(Avx2)
hasAvx512fImpl = testX86Feature(Avx512f)
hasAvx512dqImpl = testX86Feature(Avx512dq)
hasAvx512ifmaImpl = testX86Feature(Avx512ifma)
hasAvx512pfImpl = testX86Feature(Avx512pf)
hasAvx512erImpl = testX86Feature(Avx512er)
hasAvx512cdImpl = testX86Feature(Avx512dq)
hasAvx512bwImpl = testX86Feature(Avx512bw)
hasAvx512vlImpl = testX86Feature(Avx512vl)
hasAvx512vbmiImpl = testX86Feature(Avx512vbmi)
hasAvx512vbmi2Impl = testX86Feature(Avx512vbmi2)
hasAvx512vpopcntdqImpl = testX86Feature(Avx512vpopcntdq)
hasAvx512vnniImpl = testX86Feature(Avx512vnni)
hasAvx512vnniw4Impl = testX86Feature(Avx512vnniw4)
hasAvx512fmaps4Impl = testX86Feature(Avx512fmaps4)
hasAvx512bitalgImpl = testX86Feature(Avx512bitalg)
hasAvx512bfloat16Impl = testX86Feature(Avx512bfloat16)
hasAvx512vp2intersectImpl = testX86Feature(Avx512vp2intersect)
hasRdrandImpl = testX86Feature(Rdrand)
hasRdseedImpl = testX86Feature(Rdseed)
hasMovBigEndianImpl = testX86Feature(MovBigEndian)
hasPopcntImpl = testX86Feature(Popcnt)
hasFma3Impl = testX86Feature(Fma3)
hasFma4Impl = testX86Feature(Fma4)
hasXopImpl = testX86Feature(Xop)
hasCas8BImpl = testX86Feature(Cas8B)
hasCas16BImpl = testX86Feature(Cas16B)
hasAbmImpl = testX86Feature(Abm)
hasBmi1Impl = testX86Feature(Bmi1)
hasBmi2Impl = testX86Feature(Bmi2)
hasTsxHleImpl = testX86Feature(TsxHle)
hasTsxRtmImpl = testX86Feature(TsxRtm)
hasAdxImpl = testX86Feature(TsxHle)
hasSgxImpl = testX86Feature(Sgx)
hasGfniImpl = testX86Feature(Gfni)
hasAesImpl = testX86Feature(Aes)
hasVaesImpl = testX86Feature(Vaes)
hasVpclmulqdqImpl = testX86Feature(Vpclmulqdq)
hasPclmulqdqImpl = testX86Feature(Pclmulqdq)
hasNxBitImpl = testX86Feature(NxBit)
hasFloat16cImpl = testX86Feature(Float16c)
hasShaImpl = testX86Feature(Sha)
hasClflushImpl = testX86Feature(Clflush)
hasClflushOptImpl = testX86Feature(ClflushOpt)
hasClwbImpl = testX86Feature(Clwb)
hasPrefetchWT1Impl = testX86Feature(PrefetchWT1)
hasMpxImpl = testX86Feature(Mpx)
# NOTE: We use procedures here (layered over the variables) to keep the API
# consistent and usable against possible future heterogenous systems with ISA
# differences between cores (a possibility that has historical precedents, for
# instance, the PPU/SPU relationship found on the IBM Cell). If future systems
# do end up having disparate ISA features across multiple cores, expect there to
# be a "cpuCore" argument added to the feature procs.
proc isHypervisorPresent*(): bool {.inline.} =
return isHypervisorPresentImpl
## **(x86 Only)**
##
## Reports `true` if this application is running inside of a virtual machine
## (this is by no means foolproof).
proc hasSimultaneousMultithreading*(): bool {.inline.} =
return hasSimultaneousMultithreadingImpl
## **(x86 Only)**
##
## Reports `true` if the hardware is utilizing simultaneous multithreading
## (branded as *"hyperthreads"* on Intel processors).
proc hasIntelVtx*(): bool {.inline.} =
return hasIntelVtxImpl
## **(x86 Only)**
##
## Reports `true` if the Intel virtualization extensions (VT-x) are available.
proc hasAmdv*(): bool {.inline.} =
return hasAmdvImpl
## **(x86 Only)**
##
## Reports `true` if the AMD virtualization extensions (AMD-V) are available.
proc hasX87fpu*(): bool {.inline.} =
return hasX87fpuImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use x87 floating-point instructions
## (includes support for single, double, and 80-bit percision floats as per
## IEEE 754-1985).
##
## By virtue of SSE2 enforced compliance on AMD64 CPUs, this should always be
## `true` on 64-bit x86 processors. It should be noted that support of these
## instructions is deprecated on 64-bit versions of Windows - see MSDN_.
##
## .. _MSDN: https://docs.microsoft.com/en-us/windows/win32/dxtecharts/sixty-four-bit-programming-for-game-developers#porting-applications-to-64-bit-platforms
proc hasMmx*(): bool {.inline.} =
return hasMmxImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use MMX SIMD instructions.
##
## By virtue of SSE2 enforced compliance on AMD64 CPUs, this should always be
## `true` on 64-bit x86 processors. It should be noted that support of these
## instructions is deprecated on 64-bit versions of Windows (see MSDN_ for
## more info).
##
## .. _MSDN: https://docs.microsoft.com/en-us/windows/win32/dxtecharts/sixty-four-bit-programming-for-game-developers#porting-applications-to-64-bit-platforms
proc hasMmxExt*(): bool {.inline.} =
return hasMmxExtImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use "Extended MMX" SIMD instructions.
##
## It should be noted that support of these instructions is deprecated on
## 64-bit versions of Windows (see MSDN_ for more info).
##
## .. _MSDN: https://docs.microsoft.com/en-us/windows/win32/dxtecharts/sixty-four-bit-programming-for-game-developers#porting-applications-to-64-bit-platforms
proc has3DNow*(): bool {.inline.} =
return has3DNowImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use 3DNow! SIMD instructions.
##
## It should be noted that support of these instructions is deprecated on
## 64-bit versions of Windows (see MSDN_ for more info), and that the 3DNow!
## instructions (with an exception made for the prefetch instructions, see the
## `hasPrefetch` procedure) have been phased out of AMD processors since 2010
## (see `AMD Developer Central`_ for more info).
##
## .. _MSDN: https://docs.microsoft.com/en-us/windows/win32/dxtecharts/sixty-four-bit-programming-for-game-developers#porting-applications-to-64-bit-platforms
## .. _`AMD Developer Central`: https://web.archive.org/web/20131109151245/http://developer.amd.com/community/blog/2010/08/18/3dnow-deprecated/
proc has3DNowEnhanced*(): bool {.inline.} =
return has3DNowEnhancedImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use "Enhanced 3DNow!" SIMD instructions.
##
## It should be noted that support of these instructions is deprecated on
## 64-bit versions of Windows (see MSDN_ for more info), and that the 3DNow!
## instructions (with an exception made for the prefetch instructions, see the
## `hasPrefetch` procedure) have been phased out of AMD processors since 2010
## (see `AMD Developer Central`_ for more info).
##
## .. _MSDN: https://docs.microsoft.com/en-us/windows/win32/dxtecharts/sixty-four-bit-programming-for-game-developers#porting-applications-to-64-bit-platforms
## .. _`AMD Developer Central`: https://web.archive.org/web/20131109151245/http://developer.amd.com/community/blog/2010/08/18/3dnow-deprecated/
proc hasPrefetch*(): bool {.inline.} =
return hasPrefetchImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use the `PREFETCH` and `PREFETCHW`
## instructions. These instructions originally included as part of 3DNow!, but
## potentially indepdendent from the rest of it due to changes in contemporary
## AMD processors (see above).
proc hasSse*(): bool {.inline.} =
return hasSseImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use the SSE (Streaming SIMD Extensions)
## 1.0 instructions, which introduced 128-bit SIMD on x86 machines.
##
## By virtue of SSE2 enforced compliance on AMD64 CPUs, this should always be
## `true` on 64-bit x86 processors.
proc hasSse2*(): bool {.inline.} =
return hasSse2Impl
## **(x86 Only)**
##
## Reports `true` if the hardware can use the SSE (Streaming SIMD Extensions)
## 2.0 instructions.
##
## By virtue of SSE2 enforced compliance on AMD64 CPUs, this should always be
## `true` on 64-bit x86 processors.
proc hasSse3*(): bool {.inline.} =
return hasSse3Impl
## **(x86 Only)**
##
## Reports `true` if the hardware can use SSE (Streaming SIMD Extensions) 3.0
## instructions.
proc hasSsse3*(): bool {.inline.} =
return hasSsse3Impl
## **(x86 Only)**
##
## Reports `true` if the hardware can use Supplemental SSE (Streaming SIMD
## Extensions) 3.0 instructions.
proc hasSse4a*(): bool {.inline.} =
return hasSse4aImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use Supplemental SSE (Streaming SIMD
## Extensions) 4a instructions.
proc hasSse41*(): bool {.inline.} =
return hasSse41Impl
## **(x86 Only)**
##
## Reports `true` if the hardware can use Supplemental SSE (Streaming SIMD
## Extensions) 4.1 instructions.
proc hasSse42*(): bool {.inline.} =
return hasSse42Impl
## **(x86 Only)**
##
## Reports `true` if the hardware can use Supplemental SSE (Streaming SIMD
## Extensions) 4.2 instructions.
proc hasAvx*(): bool {.inline.} =
return hasAvxImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 1.0 instructions, which introduced 256-bit SIMD on x86 machines along with
## addded reencoded versions of prior 128-bit SSE instructions into the more
## code-dense and non-backward compatible VEX (Vector Extensions) format.
proc hasAvx2*(): bool {.inline.} =
return hasAvx2Impl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) 2.0
## instructions.
proc hasAvx512f*(): bool {.inline.} =
return hasAvx512fImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit F (Foundation) instructions.
proc hasAvx512dq*(): bool {.inline.} =
return hasAvx512dqImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit DQ (Doubleword + Quadword) instructions.
proc hasAvx512ifma*(): bool {.inline.} =
return hasAvx512ifmaImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit IFMA (Integer Fused Multiply Accumulation) instructions.
proc hasAvx512pf*(): bool {.inline.} =
return hasAvx512pfImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit PF (Prefetch) instructions.
proc hasAvx512er*(): bool {.inline.} =
return hasAvx512erImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit ER (Exponential and Reciprocal) instructions.
proc hasAvx512cd*(): bool {.inline.} =
return hasAvx512cdImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit CD (Conflict Detection) instructions.
proc hasAvx512bw*(): bool {.inline.} =
return hasAvx512bwImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit BW (Byte and Word) instructions.
proc hasAvx512vl*(): bool {.inline.} =
return hasAvx512vlImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit VL (Vector Length) instructions.
proc hasAvx512vbmi*(): bool {.inline.} =
return hasAvx512vbmiImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit VBMI (Vector Byte Manipulation) 1.0 instructions.
proc hasAvx512vbmi2*(): bool {.inline.} =
return hasAvx512vbmi2Impl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit VBMI (Vector Byte Manipulation) 2.0 instructions.
proc hasAvx512vpopcntdq*(): bool {.inline.} =
return hasAvx512vpopcntdqImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use the AVX (Advanced Vector Extensions)
## 512-bit `VPOPCNTDQ` (population count, i.e. determine number of flipped
## bits) instruction.
proc hasAvx512vnni*(): bool {.inline.} =
return hasAvx512vnniImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit VNNI (Vector Neural Network) instructions.
proc hasAvx512vnniw4*(): bool {.inline.} =
return hasAvx512vnniw4Impl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit 4VNNIW (Vector Neural Network Word Variable Percision)
## instructions.
proc hasAvx512fmaps4*(): bool {.inline.} =
return hasAvx512fmaps4Impl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit 4FMAPS (Fused-Multiply-Accumulation Single-percision) instructions.
proc hasAvx512bitalg*(): bool {.inline.} =
return hasAvx512bitalgImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit BITALG (Bit Algorithms) instructions.
proc hasAvx512bfloat16*(): bool {.inline.} =
return hasAvx512bfloat16Impl
## **(x86 Only)**
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit BFLOAT16 (8-bit exponent, 7-bit mantissa) instructions used by
## Intel DL (Deep Learning) Boost.
proc hasAvx512vp2intersect*(): bool {.inline.} =
return hasAvx512vp2intersectImpl
## **(x86 Only)**
##
## Reports `true` if the hardware can use AVX (Advanced Vector Extensions)
## 512-bit VP2INTERSECT (Compute Intersections between Dualwords + Quadwords)
## instructions.
proc hasRdrand*(): bool {.inline.} =
return hasRdrandImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the `RDRAND` instruction,
## i.e. Intel on-CPU hardware random number generation.
proc hasRdseed*(): bool {.inline.} =
return hasRdseedImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the `RDSEED` instruction,
## i.e. Intel on-CPU hardware random number generation (used for seeding other
## PRNGs).
proc hasMovBigEndian*(): bool {.inline.} =
return hasMovBigEndianImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the `MOVBE` instruction for
## endianness/byte-order switching.
proc hasPopcnt*(): bool {.inline.} =
return hasPopcntImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the `POPCNT` (population
## count, i.e. determine number of flipped bits) instruction.
proc hasFma3*(): bool {.inline.} =
return hasFma3Impl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the FMA3 (Fused Multiply
## Accumulation 3-operand) SIMD instructions.
proc hasFma4*(): bool {.inline.} =
return hasFma4Impl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the FMA4 (Fused Multiply
## Accumulation 4-operand) SIMD instructions.
proc hasXop*(): bool {.inline.} =
return hasXopImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the XOP (eXtended
## Operations) SIMD instructions. These instructions are exclusive to the
## Bulldozer AMD microarchitecture family (i.e. Bulldozer, Piledriver,
## Steamroller, and Excavator) and were phased out with the release of the Zen
## design.
proc hasCas8B*(): bool {.inline.} =
return hasCas8BImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the (`LOCK`-able)
## `CMPXCHG8B` 64-bit compare-and-swap instruction.
proc hasCas16B*(): bool {.inline.} =
return hasCas16BImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the (`LOCK`-able)
## `CMPXCHG16B` 128-bit compare-and-swap instruction.
proc hasAbm*(): bool {.inline.} =
return hasAbmImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for ABM (Advanced Bit
## Manipulation) insturctions (i.e. `POPCNT` and `LZCNT` for counting leading
## zeroes).
proc hasBmi1*(): bool {.inline.} =
return hasBmi1Impl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for BMI (Bit Manipulation) 1.0
## instructions.
proc hasBmi2*(): bool {.inline.} =
return hasBmi2Impl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for BMI (Bit Manipulation) 2.0
## instructions.
proc hasTsxHle*(): bool {.inline.} =
return hasTsxHleImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for HLE (Hardware Lock Elision)
## as part of Intel's TSX (Transactional Synchronization Extensions).
proc hasTsxRtm*(): bool {.inline.} =
return hasTsxRtmImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for RTM (Restricted
## Transactional Memory) as part of Intel's TSX (Transactional Synchronization
## Extensions).
proc hasAdx*(): bool {.inline.} =
return hasAdxImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for ADX (Multi-percision
## Add-Carry Extensions) insructions.
proc hasSgx*(): bool {.inline.} =
return hasSgxImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for SGX (Software Guard
## eXtensions) memory encryption technology.
proc hasGfni*(): bool {.inline.} =
return hasGfniImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for GFNI (Galois Field Affine
## Transformation) instructions.
proc hasAes*(): bool {.inline.} =
return hasAesImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for AESNI (Advanced Encryption
## Standard) instructions.
proc hasVaes*(): bool {.inline.} =
return hasVaesImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for VAES (Vectorized Advanced
## Encryption Standard) instructions.
proc hasVpclmulqdq*(): bool {.inline.} =
return hasVpclmulqdqImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for `VCLMULQDQ` (512 and 256-bit
## Carryless Multiplication) instructions.
proc hasPclmulqdq*(): bool {.inline.} =
return hasPclmulqdqImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for `PCLMULQDQ` (128-bit
## Carryless Multiplication) instructions.
proc hasNxBit*(): bool {.inline.} =
return hasNxBitImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for NX-bit (No-eXecute)
## technology for marking pages of memory as non-executable.
proc hasFloat16c*(): bool {.inline.} =
return hasFloat16cImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for F16C instructions, used for
## converting 16-bit "half-percision" floating-point values to and from
## single-percision floating-point values.
proc hasSha*(): bool {.inline.} =
return hasShaImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for SHA (Secure Hash Algorithm)
## instructions.
proc hasClflush*(): bool {.inline.} =
return hasClflushImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the `CLFLUSH` (Cache-line
## Flush) instruction.
proc hasClflushOpt*(): bool {.inline.} =
return hasClflushOptImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the `CLFLUSHOPT` (Cache-line
## Flush Optimized) instruction.
proc hasClwb*(): bool {.inline.} =
return hasClwbImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the `CLWB` (Cache-line Write
## Back) instruction.
proc hasPrefetchWT1*(): bool {.inline.} =
return hasPrefetchWT1Impl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for the `PREFECTHWT1`
## instruction.
proc hasMpx*(): bool {.inline.} =
return hasMpxImpl
## **(x86 Only)**
##
## Reports `true` if the hardware has support for MPX (Memory Protection
## eXtensions).

View File

@ -57,10 +57,9 @@
## Conference on File and Storage Technologies, San Jose, 2013
import upraises
import pkg/upraises
push: {.upraises: [].}
## -----------------------------------------------------------------------------
## Build configuration
@ -150,144 +149,141 @@ static:
{.pragma: leo, cdecl, header: LeopardHeader.}
## -----------------------------------------------------------------------------
## Library version
var LEO_VERSION* {.header: LeopardHeader, importc.}: int
## -----------------------------------------------------------------------------
## Platform/Architecture
# maybe should detect AVX2 and set to 32 if detected, 16 otherwise:
# https://github.com/catid/leopard/blob/master/LeopardCommon.h#L247-L253
# https://github.com/mratsim/Arraymancer/blob/master/src/arraymancer/laser/cpuinfo_x86.nim#L220
const LEO_ALIGN_BYTES* = 16
## -----------------------------------------------------------------------------
## Initialization API
## leoInit()
##
## Perform static initialization for the library, verifying that the platform
## is supported.
##
## Returns 0 on success and other values on failure.
proc leoInit*(): cint {.leo, importcpp: "leo_init".}
## ------------------------------------------------------------------------------
## Shared Constants / Datatypes
## Results
## -----------------------------------------------------------------------------
## Shared Constants / Datatypes
## Results
# TODO: For some reason it's only possibly to use the enum with `ord`
type
LeopardResult* = enum
LeopardCallInitialize = -7.cint ## Call leoInit() first
LeopardPlatform = -6.cint ## Platform is unsupported
LeopardInvalidInput = -5.cint ## A function parameter was invalid
LeopardInvalidCounts = -4.cint ## Invalid counts provided
LeopardInvalidSize = -3.cint ## Buffer size must be multiple of 64 bytes
LeopardTooMuchData = -2.cint ## Buffer counts are too high
LeopardNeedMoreData = -1.cint ## Not enough recovery data received
LeopardSuccess = 0.cint ## Operation succeeded
## Convert Leopard result to string
func leoResultString*(res: LeopardResult): cstring
{.leo, importc: "leo_result_string".}
LeopardCallInitialize = -7, ## Call leo_init() first
LeopardPlatform = -6, ## Platform is unsupported
LeopardInvalidInput = -5, ## A function parameter was invalid
LeopardInvalidCounts = -4, ## Invalid counts provided
LeopardInvalidSize = -3, ## Buffer size must be a multiple of 64 bytes
LeopardTooMuchData = -2, ## Buffer counts are too high
LeopardNeedMoreData = -1, ## Not enough recovery data received
LeopardSuccess = 0 ## Operation succeeded
## -----------------------------------------------------------------------------
## Encoder API
## Convert Leopard result to string
## leoEncodeWorkCount()
proc leoResultString*(result: LeopardResult): cstring {.leo, importc: "leo_result_string".}
## ------------------------------------------------------------------------------
## Encoder API
##
## Calculate the number of work data buffers to provide to leoEncode().
## leo_encode_work_count()
##
## The sum of originalCount + recoveryCount must not exceed 65536.
## Calculate the number of work_data buffers to provide to leo_encode().
##
## The sum of original_count + recovery_count must not exceed 65536.
##
## Returns the work_count value to pass into leo_encode().
## Returns 0 on invalid input.
##
## Returns the workCount value to pass into leoEncode().
## Returns 0 on invalid input.
func leoEncodeWorkCount*(originalCount, recoveryCount: cuint): cuint
proc leoEncodeWorkCount*(originalCount: cuint; recoveryCount: cuint): cuint
{.leo, importc: "leo_encode_work_count".}
## leoEncode()
##
## Generate recovery data.
## leo_encode()
##
## bufferBytes: Number of bytes in each data buffer.
## originalCount: Number of original data buffers provided.
## recoveryCount: Number of desired recovery data buffers.
## workCount: Number of work data buffers, from leoEncodeWorkCount().
## originalData: Array of pointers to original data buffers.
## workData: Array of pointers to work data buffers.
## Generate recovery data.
##
## The sum of originalCount + recoveryCount must not exceed 65536.
## The recoveryCount <= originalCount.
## original_count: Number of original_data[] buffers provided.
## recovery_count: Number of desired recovery data buffers.
## buffer_bytes: Number of bytes in each data buffer.
## original_data: Array of pointers to original data buffers.
## work_count: Number of work_data[] buffers, from leo_encode_work_count().
## work_data: Array of pointers to work data buffers.
##
## The value of bufferBytes must be a multiple of 64.
## Each buffer should have the same number of bytes.
## Even the last piece must be rounded up to the block size.
## The sum of original_count + recovery_count must not exceed 65536.
## The recovery_count <= original_count.
##
## The buffer_bytes must be a multiple of 64.
## Each buffer should have the same number of bytes.
## Even the last piece must be rounded up to the block size.
##
## Let buffer_bytes = The number of bytes in each buffer:
##
## original_count = static_cast<unsigned>(
## ((uint64_t)total_bytes + buffer_bytes - 1) / buffer_bytes);
##
## Or if the number of pieces is known:
##
## buffer_bytes = static_cast<unsigned>(
## ((uint64_t)total_bytes + original_count - 1) / original_count);
##
## Returns Leopard_Success on success.
## The first set of recovery_count buffers in work_data will be the result.
## Returns other values on errors.
##
## Returns LeopardSuccess on success.
## The first set of recoveryCount buffers in workData will be the result.
## Returns other values on errors.
proc leoEncode*(
bufferBytes: uint64, ## Number of bytes in each data buffer
originalCount: cuint, ## Number of originalData[] buffer pointers
recoveryCount: cuint, ## Number of recovery data buffer pointers
## (readable post-call from start of workData[])
workCount: cuint, ## Number of workData[] buffer pointers
originalData: pointer, ## Array of pointers to original data buffers
workData: pointer, ## Array of pointers to work data buffers
): LeopardResult {.leo, importc: "leo_encode".}
bufferBytes: uint64;
originalCount: cuint;
recoveryCount: cuint;
workCount: cuint;
originalData: ptr pointer;
workData: ptr pointer): LeopardResult {.leo, importc: "leo_encode".}
## Number of bytes in each data buffer
## Number of original_data[] buffer pointers
## Number of recovery_data[] buffer pointers
## Number of work_data[] buffer pointers, from leo_encode_work_count()
## Array of pointers to original data buffers
##
## -----------------------------------------------------------------------------
## Decoder API
## leoDecodeWorkCount()
## Array of work buffers
## ------------------------------------------------------------------------------
## Decoder API
##
## Calculate the number of work data buffers to provide to leoDecode().
## leo_decode_work_count()
##
## The sum of originalCount + recoveryCount must not exceed 65536.
## Calculate the number of work_data buffers to provide to leo_decode().
##
## The sum of original_count + recovery_count must not exceed 65536.
##
## Returns the work_count value to pass into leo_encode().
## Returns 0 on invalid input.
##
## Returns the workCount value to pass into leoDecode().
## Returns 0 on invalid input.
func leoDecodeWorkCount*(originalCount, recoveryCount: cuint): cuint
proc leoDecodeWorkCount*(originalCount: cuint; recoveryCount: cuint): cuint
{.leo, importc: "leo_decode_work_count".}
## leoDecode()
##
## Decode original data from recovery data.
## leo_decode()
##
## bufferBytes: Number of bytes in each data buffer.
## originalCount: Number of original data buffers provided.
## recoveryCount: Number of recovery data buffers provided.
## workCount: Number of work data buffers, from leoDecodeWorkCount().
## originalData: Array of pointers to original data buffers.
## recoveryData: Array of pointers to recovery data buffers.
## workData: Array of pointers to work data buffers.
## Decode original data from recovery data.
##
## Lost original/recovery data should be set to NULL.
## buffer_bytes: Number of bytes in each data buffer.
## original_count: Number of original_data[] buffers provided.
## original_data: Array of pointers to original data buffers.
## recovery_count: Number of recovery_data[] buffers provided.
## recovery_data: Array of pointers to recovery data buffers.
## work_count: Number of work_data[] buffers, from leo_decode_work_count().
## work_data: Array of pointers to recovery data buffers.
##
## The sum of recoveryCount + the number of non-NULL original data must be at
## least originalCount in order to perform recovery.
## Lost original/recovery data should be set to NULL.
##
## The sum of recovery_count + the number of non-NULL original data must be at
## least original_count in order to perform recovery.
##
## Returns Leopard_Success on success.
## Returns other values on errors.
##
## Returns LeopardSuccess on success.
## Returns other values on errors.
proc leoDecode*(
bufferBytes: uint64, ## Number of bytes in each data buffer
originalCount: cuint, ## Number of originalData[] buffer pointers
recoveryCount: cuint, ## Number of recoveryData[] buffer pointers
workCount: cuint, ## Number of workData[] buffer pointers
originalData: pointer, ## Array of pointers to original data buffers
recoveryData: pointer, ## Array of pointers to recovery data buffers
workData: pointer, ## Array of pointers to work data buffers
): LeopardResult {.leo, importc: "leo_decode".}
bufferBytes: uint64;
originalCount: cuint;
recoveryCount: cuint;
workCount: cuint;
originalData: ptr pointer;
recoveryData: ptr pointer;
workData: ptr pointer): LeopardResult {.leo, importc: "leo_decode".}
## Number of bytes in each data buffer
## Number of original_data[] buffer pointers
## Number of recovery_data[] buffer pointers
## Number of buffer pointers in work_data[]
## Array of original data buffers
## Array of recovery data buffers
## Array of work data buffers

View File

@ -1,551 +0,0 @@
import std/random
import pkg/leopard
import pkg/unittest2
randomize()
proc genData(outerLen, innerLen: uint): Data =
var
data = newSeqOfCap[seq[byte]](outerLen)
for i in 0..<outerLen.int:
data.add newSeqUninitialized[byte](innerLen)
for j in 0..<innerLen:
data[i][j] = rand(255).byte
data
var
initialized = false
suite "Helpers":
test "isValid should return false if RS code is nonsensical or is invalid per Leopard-RS":
var
rsCode = (codeword: 8.uint, data: 5.uint, parity: 1.uint)
check: not rsCode.isValid
rsCode = RS(110,10)
check: not rsCode.isValid
rsCode = RS(1,1)
check: not rsCode.isValid
rsCode = (codeword: 2.uint, data: 0.uint, parity: 2.uint)
check: not rsCode.isValid
rsCode = RS(2,2)
check: not rsCode.isValid
rsCode = RS(65537,65409)
check: not rsCode.isValid
suite "Initialization":
test "encode and decode should fail if Leopard-RS is not initialized":
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize
parityData = genData(rsCode.parity, symbolBytes)
var
data = genData(rsCode.data, symbolBytes)
let
encodeRes = rsCode.encode data
# Related to a subtle race re: decode being called with data that has no
# holes while Leopard-RS is not initialized, i.e. it would succeed by
# simply returning the data without a call to leoDecode.
data[0] = @[]
let
decodeRes = rsCode.decode(data, parityData, symbolBytes)
check:
encodeRes.isErr
encodeRes.error.code == LeopardCallInitialize
decodeRes.isErr
decodeRes.error.code == LeopardCallInitialize
test "initialization should succeed else raise a Defect":
leoInit()
initialized = true
check: initialized
suite "Encoder":
test "should fail if RS code is nonsensical or is invalid per Leopard-RS":
check: initialized
if not initialized: return
let
symbolBytes = MinBufferSize
var
rsCode = RS(110,10)
data = genData(rsCode.data, symbolBytes)
encodeRes = rsCode.encode data
check: encodeRes.isErr
if encodeRes.isErr:
check: encodeRes.error.code == LeopardBadCode
test "should fail if outer length of data does not match the RS code":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize
notEnoughData = genData(rsCode.data - 1, symbolBytes)
tooMuchData = genData(rsCode.data + 1, symbolBytes)
notEnoughEncodeRes = rsCode.encode notEnoughData
tooMuchEncodeRes = rsCode.encode tooMuchData
check:
notEnoughEncodeRes.isErr
tooMuchEncodeRes.isErr
if notEnoughEncodeRes.isErr:
check: notEnoughEncodeRes.error.code == LeopardNotEnoughData
if tooMuchEncodeRes.isErr:
check: tooMuchEncodeRes.error.code == LeopardTooMuchData
test "should fail if length of data[0] is less than minimum buffer size":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize - 5
data = genData(rsCode.data, symbolBytes)
encodeRes = rsCode.encode data
check: encodeRes.isErr
if encodeRes.isErr:
check: encodeRes.error.code == LeopardInvalidSize
test "should fail if length of data[0] is not a multiple of minimum buffer size":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize * 2 + 1
data = genData(rsCode.data, symbolBytes)
encodeRes = rsCode.encode data
check: encodeRes.isErr
if encodeRes.isErr:
check: encodeRes.error.code == LeopardInvalidSize
test "should fail if length of data[0+N] does not equal length of data[0]":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize
var
data = genData(rsCode.data, symbolBytes)
data[3] = @[1.byte, 2.byte, 3.byte]
let
encodeRes = rsCode.encode data
check: encodeRes.isErr
if encodeRes.isErr:
check: encodeRes.error.code == LeopardInconsistentSize
# With the current setup in leopard.nim it seems it's not possible to call
# encode with an RS code that would result in leoEncodeWorkCount being called
# with invalid parameters, i.e. that would result in it returning 0, because
# a Result error will always be returned before leoEncodeWorkCount is called.
# test "should fail if RS code parameters yield invalid parameters for leoEncodWorkCount":
# check: initialized
# if not initialized: return
#
# let
# rsCode = RS(?,?)
# symbolBytes = MinBufferSize
# data = genData(rsCode.data, symbolBytes)
# encodeRes = rsCode.encode data
#
# check: encodeRes.isErr
# if encodeRes.isErr:
# check: encodeRes.error.code == LeopardInvalidInput
test "should succeed if RS code and data yield valid parameters for leoEncode":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize
data = genData(rsCode.data, symbolBytes)
encodeRes = rsCode.encode data
check: encodeRes.isOk
suite "Decoder":
test "should fail if RS code is nonsensical or is invalid per Leopard-RS":
check: initialized
if not initialized: return
let
symbolBytes = MinBufferSize
var
rsCode = RS(110,10)
data = genData(rsCode.data, symbolBytes)
parityData: ParityData
decodeRes = rsCode.decode(data, parityData, symbolBytes)
check: decodeRes.isErr
if decodeRes.isErr:
check: decodeRes.error.code == LeopardBadCode
test "should fail if outer length of data does not match the RS code":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize
notEnoughData = genData(rsCode.data - 1, symbolBytes)
tooMuchData = genData(rsCode.data + 1, symbolBytes)
parityData = genData(rsCode.parity, symbolBytes)
notEnoughDecodeRes = rsCode.decode(notEnoughData, parityData, symbolBytes)
tooMuchDecodeRes = rsCode.decode(tooMuchData, parityData, symbolBytes)
check:
notEnoughDecodeRes.isErr
tooMuchDecodeRes.isErr
if notEnoughDecodeRes.isErr:
check: notEnoughDecodeRes.error.code == LeopardNotEnoughData
if tooMuchDecodeRes.isErr:
check: tooMuchDecodeRes.error.code == LeopardTooMuchData
test "should fail if outer length of parityData does not match the RS code":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize
data = genData(rsCode.data, symbolBytes)
notEnoughParityData = genData(rsCode.parity - 1, symbolBytes)
tooMuchParityData = genData(rsCode.parity + 1, symbolBytes)
notEnoughDecodeRes = rsCode.decode(data, notEnoughParityData, symbolBytes)
tooMuchDecodeRes = rsCode.decode(data, tooMuchParityData, symbolBytes)
check:
notEnoughDecodeRes.isErr
tooMuchDecodeRes.isErr
if notEnoughDecodeRes.isErr:
check: notEnoughDecodeRes.error.code == LeopardNeedMoreData
if tooMuchDecodeRes.isErr:
check: tooMuchDecodeRes.error.code == LeopardNeedLessData
test "should fail if symbolBytes is less than minimum buffer size":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize - 5
data = genData(rsCode.data, symbolBytes)
parityData = genData(rsCode.parity, symbolBytes)
decodeRes = rsCode.decode(data, parityData, symbolBytes)
check: decodeRes.isErr
if decodeRes.isErr:
check: decodeRes.error.code == LeopardInvalidSize
test "should fail if symbolBytes is not a multiple of minimum buffer size":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize * 2 + 1
data = genData(rsCode.data, symbolBytes)
parityData = genData(rsCode.parity, symbolBytes)
decodeRes = rsCode.decode(data, parityData, symbolBytes)
check: decodeRes.isErr
if decodeRes.isErr:
check: decodeRes.error.code == LeopardInvalidSize
test "should fail if length of data[0+N] is not zero and does not equal symbolBytes":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize
parityData = genData(rsCode.parity, symbolBytes)
var
data = genData(rsCode.data, symbolBytes)
data[3] = @[1.byte, 2.byte, 3.byte]
let
decodeRes = rsCode.decode(data, parityData, symbolBytes)
check: decodeRes.isErr
if decodeRes.isErr:
check: decodeRes.error.code == LeopardInconsistentSize
test "should fail if there are data losses and length of parityData[0+N] is not zero and does not equal symbolBytes":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize
var
data = genData(rsCode.data, symbolBytes)
parityData = genData(rsCode.parity, symbolBytes)
data[3] = @[]
parityData[1] = @[1.byte, 2.byte, 3.byte]
let
decodeRes = rsCode.decode(data, parityData, symbolBytes)
check: decodeRes.isErr
if decodeRes.isErr:
check: decodeRes.error.code == LeopardInconsistentSize
# With the current setup in leopard.nim it seems it's not possible to call
# decode with an RS code that would result in leoDecodeWorkCount being called
# with invalid parameters, i.e. that would result in it returning 0, because
# a Result error will always be returned before leoDecodeWorkCount is called.
# test "should fail if there are data losses and RS code parameters yield invalid parameters for leoDecodWorkCount":
# check: initialized
# if not initialized: return
#
# let
# rsCode = RS(?,?)
# symbolBytes = MinBufferSize
# parityData = genData(rsCode.parity, symbolBytes)
#
# var
# data = genData(rsCode.data, symbolBytes)
#
# data[0] = @[]
#
# let
# decodeRes = rsCode.decode(data, parityData, symbolBytes)
#
# check: decodeRes.isErr
# if decodeRes.isErr:
# check: decodeRes.error.code == LeopardInvalidInput
test "should succeed if there are no data losses even when all parity data is lost":
check: initialized
if not initialized: return
let
rsCode = RS(8,5)
symbolBytes = MinBufferSize
data = genData(rsCode.data, symbolBytes)
var
parityData = genData(rsCode.parity, symbolBytes)
decodeRes = rsCode.decode(data, parityData, symbolBytes)
check: decodeRes.isOk
parityData = genData(rsCode.parity, symbolBytes)
parityData[1] = @[]
decodeRes = rsCode.decode(data, parityData, symbolBytes)
check: decodeRes.isOk
parityData = genData(rsCode.parity, symbolBytes)
for i in 0..<parityData.len: parityData[i] = @[]
decodeRes = rsCode.decode(data, parityData, symbolBytes)
check: decodeRes.isOk
suite "Encode + Decode":
test "should fail to recover data when losses exceed tolerance":
check: initialized
if not initialized: return
var i = 0
while i < 1000:
let
# together dataSymbols = 256+, paritySymbols = 17+, symbolBytes = 64+
# seem to consistently trigger parallel processing with OpenMP
dataSymbols = rand(256..320)
paritySymbols = rand(17..dataSymbols)
codewordSymbols = dataSymbols + paritySymbols
symbolBytesMultip = rand(1..8)
symbolBytes = MinBufferSize * symbolBytesMultip.uint
rsCode = RS(codewordSymbols, dataSymbols)
data = genData(rsCode.data, symbolBytes)
losses = paritySymbols + 1
parityDataHoleCount =
if (losses - 1) == 0: 0 else: rand(1..(losses - 1))
dataHoleCount = losses - parityDataHoleCount
encodeRes = rsCode.encode data
check: dataHoleCount + parityDataHoleCount == losses
check: encodeRes.isOk
if encodeRes.isOk:
let
parityData = encodeRes.get
var
dataWithHoles = data
parityDataWithHoles = parityData
var
dataHoles: seq[int]
for i in 1..dataHoleCount:
while true:
let
j = rand(dataSymbols - 1)
if dataHoles.find(j) == -1:
dataHoles.add j
break
check: dataHoles.len == dataHoleCount
for i in dataHoles:
dataWithHoles[i] = @[]
var
parityDataHoles: seq[int]
for i in 1..parityDataHoleCount:
while true:
let
j = rand(paritySymbols - 1)
if parityDataHoles.find(j) == -1:
parityDataHoles.add j
break
check: parityDataHoles.len == parityDataHoleCount
for i in parityDataHoles:
parityDataWithHoles[i] = @[]
let
decodeRes = rsCode.decode(dataWithHoles, parityDataWithHoles,
symbolBytes)
check: decodeRes.isErr
if decodeRes.isErr:
check: decodeRes.error.code == LeopardNeedMoreData
else:
echo "encode error message: " & encodeRes.error.msg
inc i
test "should recover data otherwise":
check: initialized
if not initialized: return
var i = 0
while i < 1000:
let
# together dataSymbols = 256+, paritySymbols = 17+, symbolBytes = 64+
# seem to consistently trigger parallel processing with OpenMP
dataSymbols = rand(256..320)
paritySymbols = rand(17..dataSymbols)
codewordSymbols = dataSymbols + paritySymbols
symbolBytesMultip = rand(1..8)
symbolBytes = MinBufferSize * symbolBytesMultip.uint
rsCode = RS(codewordSymbols, dataSymbols)
data = genData(rsCode.data, symbolBytes)
losses = rand(1..paritySymbols)
parityDataHoleCount =
if (losses - 1) == 0: 0 else: rand(1..(losses - 1))
dataHoleCount = losses - parityDataHoleCount
encodeRes = rsCode.encode data
check: dataHoleCount + parityDataHoleCount == losses
check: encodeRes.isOk
if encodeRes.isOk:
let
parityData = encodeRes.get
var
dataWithHoles = data
parityDataWithHoles = parityData
var
dataHoles: seq[int]
for i in 1..dataHoleCount:
while true:
let
j = rand(dataSymbols - 1)
if dataHoles.find(j) == -1:
dataHoles.add j
break
check: dataHoles.len == dataHoleCount
for i in dataHoles:
dataWithHoles[i] = @[]
var
parityDataHoles: seq[int]
for i in 1..parityDataHoleCount:
while true:
let
j = rand(paritySymbols - 1)
if parityDataHoles.find(j) == -1:
parityDataHoles.add j
break
check: parityDataHoles.len == parityDataHoleCount
for i in parityDataHoles:
parityDataWithHoles[i] = @[]
let
decodeRes = rsCode.decode(dataWithHoles, parityDataWithHoles,
symbolBytes)
check: decodeRes.isOk
if decodeRes.isOk:
let
decodedData = decodeRes.get
check:
decodedData != dataWithHoles
decodedData == data
else:
echo "decode error message: " & decodeRes.error.msg
else:
echo "encode error message: " & encodeRes.error.msg
inc i

48
tests/testleopard.nim Normal file
View File

@ -0,0 +1,48 @@
import pkg/unittest2
import pkg/stew/results
import pkg/stew/byteutils
import ../leopard
suite "Leopard":
const
testString = "Hello World!"
var
leoEncoder: LeoEncoder
leoDecoder: LeoDecoder
data: seq[seq[byte]]
parity: seq[seq[byte]]
recovered: seq[seq[byte]]
test "Test Encode/Decode":
leoEncoder = LeoEncoder.init(64, 16, 10).tryGet()
leoDecoder = LeoDecoder.init(64, 16, 10).tryGet()
data = newSeq[seq[byte]](16)
parity = newSeq[seq[byte]](10)
recovered = newSeq[seq[byte]](16)
for i in 0..<16:
data[i] = newSeq[byte](64)
recovered[i] = newSeq[byte](64)
var
str = testString & " " & $i
copyMem(addr data[i][0], addr str[0], str.len)
for i in 0..<10:
parity[i] = newSeq[byte](64)
leoEncoder.encode(data, parity).tryGet()
let
data1 = data[0]
data2 = data[1]
data[0].setLen(0)
data[1].setLen(0)
leoDecoder.decode(data, parity, recovered).tryGet()
check recovered[0] == data1
check recovered[1] == data2