diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..6cc19ff --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +[*] +indent_style = space +insert_final_newline = true +indent_size = 2 +trim_trailing_whitespace = true diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6313b56 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..72952f0 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,154 @@ +name: Tests + +on: [pull_request, push] + +jobs: + tests: + env: + NPROC: 2 + strategy: + fail-fast: false + matrix: + cache_nonce: [ 1 ] + nim_version: [ 1.2.18, 1.4.8, 1.6.4 ] + platform: + - { + icon: 🐧, + label: Linux, + os: ubuntu, + shell: bash --noprofile --norc -eo pipefail + } + - { + icon: 🍎, + label: macOS, + os: macos, + shell: bash --noprofile --norc -eo pipefail + } + - { + icon: 🏁, + label: Windows, + os: windows, + shell: msys2 + } + name: ${{ matrix.platform.icon }} ${{ matrix.platform.label }} - Nim v${{ matrix.nim_version }} + runs-on: ${{ matrix.platform.os }}-latest + defaults: + run: + shell: ${{ matrix.platform.shell }} {0} + + steps: + # - name: Install tools and libraries via APT (Linux) + # if: matrix.platform.os == 'ubuntu' + # run: | + # sudo apt update + # sudo apt install -y \ + # ... + + - name: Install tools and libraries via Homebrew (macOS) + if: matrix.platform.os == 'macos' + run: | + brew update + brew install \ + findutils \ + libomp + + - name: Install tools and libraries via MSYS2 (Windows) + if: matrix.platform.os == 'windows' + uses: msys2/setup-msys2@v2 + with: + msystem: UCRT64 + install: > + base-devel + git + mingw-w64-ucrt-x86_64-cmake + mingw-w64-ucrt-x86_64-toolchain + + - name: Checkout sources from GitHub + uses: actions/checkout@v2 + with: + submodules: true + + - name: Calculate cache member paths + id: calc-paths + run: | + if [[ ${{ matrix.platform.os }} = windows ]]; then + echo "::set-output name=bash_env::$(cygpath -m "${HOME}")/.bash_env" + echo "::set-output name=choosenim::$(cygpath -m "${USERPROFILE}")/.choosenim" + echo "::set-output name=nimble::$(cygpath -m "${HOME}")/.nimble" + else + echo "::set-output name=bash_env::${HOME}/.bash_env" + echo "::set-output name=choosenim::${HOME}/.choosenim" + echo "::set-output name=nimble::${HOME}/.nimble" + fi + + - name: Restore choosenim and Nim tooling from cache + id: choosenim-nim-tooling-cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.calc-paths.outputs.bash_env }} + ${{ steps.calc-paths.outputs.choosenim }} + ${{ steps.calc-paths.outputs.nimble }}/bin + key: ${{ matrix.platform.os }}-nim_version:${{ matrix.nim_version }}-cache_nonce:${{ matrix.cache_nonce }} + + - name: Install choosenim and Nim tooling + if: steps.choosenim-nim-tooling-cache.outputs.cache-hit != 'true' + run: | + mkdir -p "${HOME}/Downloads" + cd "${HOME}/Downloads" + curl https://nim-lang.org/choosenim/init.sh -sSf -O + chmod +x init.sh + if [[ ${{ matrix.platform.os }} = windows ]]; then + mkdir -p "$(cygpath "${USERPROFILE}")/.nimble/bin" + fi + CHOOSENIM_CHOOSE_VERSION=${{ matrix.nim_version }} ./init.sh -y + if [[ ${{ matrix.platform.os }} = windows ]]; then + mv "$(cygpath "${USERPROFILE}")/.nimble" "${HOME}/" + # intention is to rely only on libs provided by the OS and MSYS2 env + rm -rf "${HOME}/.nimble/bin/"*.dll + rm -rf "${HOME}/.nimble/bin/"*.pem + fi + echo 'export NIMBLE_DIR="${HOME}/.nimble"' >> "${HOME}/.bash_env" + echo 'export PATH="${NIMBLE_DIR}/bin:${PATH}"' >> "${HOME}/.bash_env" + + - name: Install project dependencies + run: | + source "${HOME}/.bash_env" + cd "${NIMBLE_DIR}/bin" + # delete broken symlinks, which can arise because e.g. the cache + # restored a symlink that points to an executable within + # ../pkgs/foo-1.2.3/ but the project's .nimble file has been updated + # to install foo-#head; in the case of a broken symlink, nimble's + # auto-overwrite fails + if [[ ${{ matrix.platform.os }} = macos ]]; then + gfind . -xtype l -delete + else + find . -xtype l -delete + fi + cd - + nimble --accept install + + - name: Build and run tests + run: | + source "${HOME}/.bash_env" + if [[ ${{ matrix.platform.os }} = windows ]]; then + touch tests/testleopard.exe + else + touch tests/testleopard + fi + if [[ ${{ matrix.platform.os }} = macos ]]; then + export PATH="$(brew --prefix)/opt/llvm/bin:${PATH}" + export LDFLAGS="-L$(brew --prefix)/opt/libomp/lib -L$(brew --prefix)/opt/llvm/lib -Wl,-rpath,$(brew --prefix)/opt/llvm/lib" + nimble test -d:verbose -d:release -d:LeopardCmakeFlags="-DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=$(brew --prefix)/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix)/opt/llvm/bin/clang++" -d:LeopardExtraCompilerlags="-fopenmp" -d:LeopardExtraLinkerFlags="-fopenmp -L$(brew --prefix)/opt/libomp/lib" + else + nimble test -d:verbose -d:release + fi + if [[ ${{ matrix.platform.os }} = macos ]]; then + echo + echo otool -L tests/testleopard + otool -L tests/testleopard + else + echo + echo ldd tests/testleopard + ldd tests/testleopard + fi diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d06ac8c --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +* +!*/ +!*.* +*.a +*.dll +*.dylib +*.exe +*.so +.DS_Store +.idea +.vscode +leopard.nims +TODO diff --git a/.gitmodules b/.gitmodules index 5e8fa86..57cc6a9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,5 +1,5 @@ [submodule "vendor/leopard"] path = vendor/leopard - url = https://github.com/catid/leopard.git + url = https://github.com/status-im/leopard.git ignore = untracked branch = master diff --git a/README.md b/README.md index cb9b139..24e4446 100644 --- a/README.md +++ b/README.md @@ -3,20 +3,92 @@ [![License: Apache](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![Stability: experimental](https://img.shields.io/badge/stability-experimental-orange.svg)](https://github.com/status-im/nim-leopard#stability) +[![Tests (GitHub Actions)](https://github.com/status-im/nim-leopard/workflows/Tests/badge.svg?branch=main)](https://github.com/status-im/nim-leopard/actions?query=workflow%3ATests+branch%3Amain) Nim wrapper for [Leopard-RS](https://github.com/catid/leopard): a fast library for [Reed-Solomon](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction) erasure correction coding. +## Requirements + +* Same as Leopard-RS' requirements, e.g. CMake 3.7 or newer. +* Nim 1.2 or newer. + + +## Installation + +With [Nimble](https://github.com/nim-lang/nimble) +```text +$ nimble install leopard +``` +In a project's `.nimble` file +```nim +requires "leopard >= 0.0.1 & < 0.0.2" +``` +In a [nimbus-build-system](https://github.com/status-im/nimbus-build-system) project +```text +$ git submodule add https://github.com/status-im/nim-leopard.git vendor/nim-leopard +$ make update +``` + +### Submodule + +#### Init + +[status-im/leopard](https://github.com/status-im/leopard), a fork of [catid/leopard](https://github.com/catid/leopard) (Leopard-RS), is a submodule of nim-leopard. + +When nim-leopard is installed with `nimble install leopard`, or as a dependency in a Nimble project, or vendored in a nimbus-build-system project, submodule init is handled automatically. + +If the nim-leopard repo is cloned directly, then before running `nimble develop` or `nimble install` in the root of the clone, it's necessary to init the submodule +```text +$ git submodule update --init +``` + +#### Build + +The submodule is automatically built (in the `nimcache` dir) and statically linked during compilation of any Nim module that has `import leopard` or `import leopard/wrapper`. + +If the `nimcache` dir is set to a custom value, it must be an absolute path. + +For the build to work on Windows, `nimble` or `nim c` must be run from a Bash shell, e.g. Git Bash or an MSYS2 shell, and all needed tools (e.g. `cmake` and `make`) must be available in and suitable for that environment. + +##### OpenMP + +Leopard-RS' `CMakeLists.txt` checks for [OpenMP](https://en.wikipedia.org/wiki/OpenMP) support. If it is available then it is enabled in the build of `libleopard.a`. + +Build toolchains commonly installed on Linux and Windows come with support for OpenMP. + +The clang/++ compiler in Apple's Xcode does not support OpenMP, but the one installed with `brew install llvm` does support it, though it's also necessary to `brew install libomp`. + +So, on macOS, when running `nimble test` of nim-leopard or compiling a project that imports nim-leopard: +* If libomp is not installed and Apple's clang is used, no extra flags need to be passed to the Nim compiler. OpenMP support will not be enabled in `libleopard.a`. +* If libomp is installed and Apple's clang is used, this flag should be passed to `nim c` + ```text + -d:LeopardCmakeFlags="-DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=off" + ``` +* If the intent is to use brew-installed clang + libomp, the shell environment should be modified + ```text + $ export PATH="$(brew --prefix)/opt/llvm/bin:${PATH}" + $ export LDFLAGS="-L$(brew --prefix)/opt/libomp/lib -L$(brew --prefix)/opt/llvm/lib -Wl,-rpath,$(brew --prefix)/opt/llvm/lib" + ``` + and these flags should be passed to `nim c` + ```text + -d:LeopardCmakeFlags="-DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=$(brew --prefix)/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix)/opt/llvm/bin/clang++" -d:LeopardExtraCompilerlags="-fopenmp" -d:LeopardExtraLinkerFlags="-fopenmp -L$(brew --prefix)/opt/libomp/lib" + ``` + ## Usage TODO +### OpenMP + +When OpenMP is enabled, whether or not parallel processing kicks in depends on the symbol and byte counts. On a local machine with an Intel processor `RS(256,239)` with `symbolBytes == 64` seems to be the lower bound for triggering parallel processing. + ## Versioning -nim-leopard generally follows the upstream master branch. +nim-leopard generally follows the upstream `master` branch such that changes there will result in a version bump for this package. ## Stability -The API provided by this package is currently marked as experimental. Until it is marked as stable, it may be subject to breaking changes across any version bump. +This package is currently marked as experimental. Until it is marked as stable, it may be subject to breaking changes across any version bump. ## License diff --git a/config.nims b/config.nims new file mode 100644 index 0000000..bdad7d5 --- /dev/null +++ b/config.nims @@ -0,0 +1,2 @@ +--threads:on +--tlsEmulation:off diff --git a/leopard.nim b/leopard.nim index e69de29..60de3d7 100644 --- a/leopard.nim +++ b/leopard.nim @@ -0,0 +1,12 @@ +## Nim-Leopard +## Copyright (c) 2022 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +import ./leopard/leopard + +export leopard diff --git a/leopard.nimble b/leopard.nimble index 2b30660..1beea63 100644 --- a/leopard.nimble +++ b/leopard.nimble @@ -5,7 +5,9 @@ version = "0.0.1" author = "Status Research & Development GmbH" description = "A wrapper for Leopard-RS" license = "Apache License 2.0 or MIT" +installDirs = @["vendor"] requires "nim >= 1.2.0", - "stew#head", - "unittest2" + "stew", + "unittest2", + "upraises >= 0.1.0 & < 0.2.0" diff --git a/leopard/leopard.nim b/leopard/leopard.nim new file mode 100644 index 0000000..fda40b8 --- /dev/null +++ b/leopard/leopard.nim @@ -0,0 +1,266 @@ +## Nim-Leopard +## Copyright (c) 2022 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +import pkg/upraises +push: {.upraises: [].} + +{.deadCodeElim: on.} + +import pkg/stew/results + +import ./wrapper +import ./utils + +export wrapper, results + +const + BuffMultiples* = 64 + +type + LeoBufferPtr* = ptr UncheckedArray[byte] + + LeoCoderKind* {.pure.} = enum + Encoder, + Decoder + + Leo* = object of RootObj + bufSize*: int # size of the buffer in multiples of 64 + buffers*: int # total number of data buffers (K) + parity*: int # total number of parity buffers (M) + dataBufferPtr: seq[LeoBufferPtr] # buffer where data is copied before encoding + workBufferCount: int # number of parity work buffers + workBufferPtr: seq[LeoBufferPtr] # buffer where parity data is written during encoding or before decoding + case kind: LeoCoderKind + of LeoCoderKind.Decoder: + decodeBufferCount: int # number of decoding work buffers + decodeBufferPtr: seq[LeoBufferPtr] # work buffer used for decoding + of LeoCoderKind.Encoder: + discard + + LeoEncoder* = object of Leo + LeoDecoder* = object of Leo + +func encode*( + self: var LeoEncoder, + data, + parity: var openArray[seq[byte]]): Result[void, cstring] = + ## Encode a list of buffers in `data` into a number of `bufSize` sized + ## `parity` buffers + ## + ## `data` - list of original data `buffers` of size `bufSize` + ## `parity` - list of parity `buffers` of size `bufSize` + ## + + if data.len != self.buffers: + return err("Number of data buffers should match!") + + if parity.len != self.parity: + return err("Number of parity buffers should match!") + + # zero encode work buffer to avoid corrupting with previous run + for i in 0.. 0: + copyMem(self.dataBufferPtr[i], addr data[i][0], self.bufSize) + dataPtr[i] = self.dataBufferPtr[i] + else: + dataPtr[i] = nil + + # copy parity into aligned buffer + for i in 0.. 0: + copyMem(self.workBufferPtr[i], addr parity[i][0], self.bufSize) + parityPtr[i] = self.workBufferPtr[i] + else: + parityPtr[i] = nil + + let + res = leo_decode( + self.bufSize.culonglong, + self.buffers.cuint, + self.parity.cuint, + self.decodeBufferCount.cuint, + cast[ptr pointer](addr dataPtr[0]), + cast[ptr pointer](addr parityPtr[0]), + cast[ptr pointer](addr self.decodeBufferPtr[0])) + + if ord(res) != ord(LeopardSuccess): + return err(leoResultString(res.LeopardResult)) + + for i, p in dataPtr: + if p.isNil: + copyMem(addr recovered[i][0], self.decodeBufferPtr[i], self.bufSize) + + ok() + +func free*(self: var Leo) = + if self.workBufferPtr.len > 0: + for i, p in self.workBufferPtr: + if not isNil(p): + p.leoFree() + self.workBufferPtr[i] = nil + + self.workBufferPtr.setLen(0) + + if self.dataBufferPtr.len > 0: + for i, p in self.dataBufferPtr: + if not isNil(p): + p.leoFree() + self.dataBufferPtr[i] = nil + + self.dataBufferPtr.setLen(0) + + if self.kind == LeoCoderKind.Decoder: + if self.decodeBufferPtr.len > 0: + for i, p in self.decodeBufferPtr: + if not isNil(p): + p.leoFree() + self.decodeBufferPtr[i] = nil + self.decodeBufferPtr.setLen(0) + +# TODO: The destructor doesn't behave as +# I'd expect it, it's called many more times +# than it should. This is however, most +# likely my misinterpretation of how it should +# work. +# proc `=destroy`*(self: var Leo) = +# self.free() + +proc init[TT: Leo]( + T: type TT, + bufSize, + buffers, + parity: int, + kind: LeoCoderKind): Result[T, cstring] = + if bufSize mod BuffMultiples != 0: + return err("bufSize should be multiples of 64 bytes!") + + if parity > buffers: + return err("number of parity buffers cannot exceed number of data buffers!") + + if (buffers + parity) > 65536: + return err("number of parity and data buffers cannot exceed 65536!") + + once: + # First, attempt to init the leopard library, + # this happens only once for all threads and + # should be safe as internal tables are only read, + # never written. However instantiation should be + # synchronized, since two instances can attempt to + # concurrently instantiate the library twice, and + # might end up with two distinct versions - not a big + # deal but will defeat the purpose of this `once` block + if (let res = leoinit(); res.ord != LeopardSuccess.ord): + return err(leoResultString(res.LeopardResult)) + + var + self = T( + kind: kind, + bufSize: bufSize, + buffers: buffers, + parity: parity) + + self.workBufferCount = leoEncodeWorkCount( + buffers.cuint, + parity.cuint).int + + # initialize encode work buffers + for _ in 0..".} + # Beware of the arg order! + + proc alignedAlloc(alignment, size: csize_t): pointer = + alignedAllocWindows(size, alignment) + + proc alignedFree*[T](p: ptr T) + {.importc: "_aligned_free", header: "".} +elif defined(osx): + proc posix_memalign(mem: var pointer, alignment, size: csize_t) + {.importc, header:"".} + + proc alignedAlloc(alignment, size: csize_t): pointer {.inline.} = + posix_memalign(result, alignment, size) + + proc alignedFree*[T](p: ptr T) {.inline.} = + c_free(p) +elif defined(unix): + proc alignedAlloc(alignment, size: csize_t): pointer + {.importc: "aligned_alloc", header: "".} + + proc alignedFree*[T](p: ptr T) {.inline.} = + c_free(p) +else: + {.warning: "Falling back to manual pointer alignment, this is highly inefficient!".} + proc alignedAlloc*(size, align: Positive): pointer {.inline.} = + var + data = c_malloc(align + size) + + if not isNil(data): + var + doffset = cast[uint](data) mod align + + data = data.offset((align + doffset).int) + var + offsetPtr = cast[pointer](cast[uint](data) - 1'u) + moveMem(offsetPtr, addr doffset, sizeof(doffset)) + + return data + + proc freeAligned*[T](p: ptr T, align: Positive) {.inline.} = + var data = p + if not isNil(data): + let offset = cast[uint](data) - 1'u + if offset >= align: + return + + data = cast[pointer](cast[uint](data) - (align - offset)) + c_free(data) + +proc leoAlloc*(size: Positive): pointer {.inline.} = + alignedAlloc(LeoAlignBytes, size.csize_t) + +proc leoFree*[T](p: ptr T) = + alignedFree(p) diff --git a/leopard/utils/cpuinfo_x86.nim b/leopard/utils/cpuinfo_x86.nim new file mode 100644 index 0000000..ce31069 --- /dev/null +++ b/leopard/utils/cpuinfo_x86.nim @@ -0,0 +1,793 @@ +## Nim-Leopard +## Copyright (c) 2022 Status Research & Development GmbH +## Licensed under either of +## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE)) +## * MIT license ([LICENSE-MIT](LICENSE-MIT)) +## at your option. +## This file may not be copied, modified, or distributed except according to +## those terms. + +import pkg/upraises +push: {.upraises: [].} + +{.deadCodeElim: on.} + +# From awr1: https://github.com/nim-lang/Nim/pull/11816/files + +proc cpuidX86(eaxi, ecxi: int32): tuple[eax, ebx, ecx, edx: int32] {.used.}= + when defined(vcc): + # limited inline asm support in vcc, so intrinsics, here we go: + proc cpuidVcc(cpuInfo: ptr int32; functionID, subFunctionID: int32) + {.cdecl, importc: "__cpuidex", header: "intrin.h".} + cpuidVcc(addr result.eax, eaxi, ecxi) + else: + var (eaxr, ebxr, ecxr, edxr) = (0'i32, 0'i32, 0'i32, 0'i32) + asm """ + cpuid + :"=a"(`eaxr`), "=b"(`ebxr`), "=c"(`ecxr`), "=d"(`edxr`) + :"a"(`eaxi`), "c"(`ecxi`)""" + (eaxr, ebxr, ecxr, edxr) + +proc cpuNameX86(): string {.used.}= + var leaves {.global.} = cast[array[48, char]]([ + cpuidX86(eaxi = 0x80000002'i32, ecxi = 0), + cpuidX86(eaxi = 0x80000003'i32, ecxi = 0), + cpuidX86(eaxi = 0x80000004'i32, ecxi = 0)]) + result = $cast[cstring](addr leaves[0]) + +type + X86Feature {.pure.} = enum + HypervisorPresence, Hyperthreading, NoSMT, IntelVtx, Amdv, X87fpu, Mmx, + MmxExt, F3DNow, F3DNowEnhanced, Prefetch, Sse, Sse2, Sse3, Ssse3, Sse4a, + Sse41, Sse42, Avx, Avx2, Avx512f, Avx512dq, Avx512ifma, Avx512pf, + Avx512er, Avx512cd, Avx512bw, Avx512vl, Avx512vbmi, Avx512vbmi2, + Avx512vpopcntdq, Avx512vnni, Avx512vnniw4, Avx512fmaps4, Avx512bitalg, + Avx512bfloat16, Avx512vp2intersect, Rdrand, Rdseed, MovBigEndian, Popcnt, + Fma3, Fma4, Xop, Cas8B, Cas16B, Abm, Bmi1, Bmi2, TsxHle, TsxRtm, Adx, Sgx, + Gfni, Aes, Vaes, Vpclmulqdq, Pclmulqdq, NxBit, Float16c, Sha, Clflush, + ClflushOpt, Clwb, PrefetchWT1, Mpx + +let + leaf1 = cpuidX86(eaxi = 1, ecxi = 0) + leaf7 = cpuidX86(eaxi = 7, ecxi = 0) + leaf8 = cpuidX86(eaxi = 0x80000001'i32, ecxi = 0) + +# The reason why we don't just evaluate these directly in the `let` variable +# list is so that we can internally organize features by their input (leaf) +# and output registers. +proc testX86Feature(feature: X86Feature): bool = + proc test(input, bit: int): bool = + ((1 shl bit) and input) != 0 + + # see: https://en.wikipedia.org/wiki/CPUID#Calling_CPUID + # see: Intel® Architecture Instruction Set Extensions and Future Features + # Programming Reference + result = case feature + # leaf 1, edx + of X87fpu: + leaf1.edx.test(0) + of Clflush: + leaf1.edx.test(19) + of Mmx: + leaf1.edx.test(23) + of Sse: + leaf1.edx.test(25) + of Sse2: + leaf1.edx.test(26) + of Hyperthreading: + leaf1.edx.test(28) + + # leaf 1, ecx + of Sse3: + leaf1.ecx.test(0) + of Pclmulqdq: + leaf1.ecx.test(1) + of IntelVtx: + leaf1.ecx.test(5) + of Ssse3: + leaf1.ecx.test(9) + of Fma3: + leaf1.ecx.test(12) + of Cas16B: + leaf1.ecx.test(13) + of Sse41: + leaf1.ecx.test(19) + of Sse42: + leaf1.ecx.test(20) + of MovBigEndian: + leaf1.ecx.test(22) + of Popcnt: + leaf1.ecx.test(23) + of Aes: + leaf1.ecx.test(25) + of Avx: + leaf1.ecx.test(28) + of Float16c: + leaf1.ecx.test(29) + of Rdrand: + leaf1.ecx.test(30) + of HypervisorPresence: + leaf1.ecx.test(31) + + # leaf 7, ecx + of PrefetchWT1: + leaf7.ecx.test(0) + of Avx512vbmi: + leaf7.ecx.test(1) + of Avx512vbmi2: + leaf7.ecx.test(6) + of Gfni: + leaf7.ecx.test(8) + of Vaes: + leaf7.ecx.test(9) + of Vpclmulqdq: + leaf7.ecx.test(10) + of Avx512vnni: + leaf7.ecx.test(11) + of Avx512bitalg: + leaf7.ecx.test(12) + of Avx512vpopcntdq: + leaf7.ecx.test(14) + + # lead 7, eax + of Avx512bfloat16: + leaf7.eax.test(5) + + # leaf 7, ebx + of Sgx: + leaf7.ebx.test(2) + of Bmi1: + leaf7.ebx.test(3) + of TsxHle: + leaf7.ebx.test(4) + of Avx2: + leaf7.ebx.test(5) + of Bmi2: + leaf7.ebx.test(8) + of TsxRtm: + leaf7.ebx.test(11) + of Mpx: + leaf7.ebx.test(14) + of Avx512f: + leaf7.ebx.test(16) + of Avx512dq: + leaf7.ebx.test(17) + of Rdseed: + leaf7.ebx.test(18) + of Adx: + leaf7.ebx.test(19) + of Avx512ifma: + leaf7.ebx.test(21) + of ClflushOpt: + leaf7.ebx.test(23) + of Clwb: + leaf7.ebx.test(24) + of Avx512pf: + leaf7.ebx.test(26) + of Avx512er: + leaf7.ebx.test(27) + of Avx512cd: + leaf7.ebx.test(28) + of Sha: + leaf7.ebx.test(29) + of Avx512bw: + leaf7.ebx.test(30) + of Avx512vl: + leaf7.ebx.test(31) + + # leaf 7, edx + of Avx512vnniw4: + leaf7.edx.test(2) + of Avx512fmaps4: + leaf7.edx.test(3) + of Avx512vp2intersect: + leaf7.edx.test(8) + + # leaf 8, edx + of NoSMT: + leaf8.edx.test(1) + of Cas8B: + leaf8.edx.test(8) + of NxBit: + leaf8.edx.test(20) + of MmxExt: + leaf8.edx.test(22) + of F3DNowEnhanced: + leaf8.edx.test(30) + of F3DNow: + leaf8.edx.test(31) + + # leaf 8, ecx + of Amdv: + leaf8.ecx.test(2) + of Abm: + leaf8.ecx.test(5) + of Sse4a: + leaf8.ecx.test(6) + of Prefetch: + leaf8.ecx.test(8) + of Xop: + leaf8.ecx.test(11) + of Fma4: + leaf8.ecx.test(16) + +let + isHypervisorPresentImpl = testX86Feature(HypervisorPresence) + hasSimultaneousMultithreadingImpl = + testX86Feature(Hyperthreading) or not testX86Feature(NoSMT) + hasIntelVtxImpl = testX86Feature(IntelVtx) + hasAmdvImpl = testX86Feature(Amdv) + hasX87fpuImpl = testX86Feature(X87fpu) + hasMmxImpl = testX86Feature(Mmx) + hasMmxExtImpl = testX86Feature(MmxExt) + has3DNowImpl = testX86Feature(F3DNow) + has3DNowEnhancedImpl = testX86Feature(F3DNowEnhanced) + hasPrefetchImpl = testX86Feature(Prefetch) or testX86Feature(F3DNow) + hasSseImpl = testX86Feature(Sse) + hasSse2Impl = testX86Feature(Sse2) + hasSse3Impl = testX86Feature(Sse3) + hasSsse3Impl = testX86Feature(Ssse3) + hasSse4aImpl = testX86Feature(Sse4a) + hasSse41Impl = testX86Feature(Sse41) + hasSse42Impl = testX86Feature(Sse42) + hasAvxImpl = testX86Feature(Avx) + hasAvx2Impl = testX86Feature(Avx2) + hasAvx512fImpl = testX86Feature(Avx512f) + hasAvx512dqImpl = testX86Feature(Avx512dq) + hasAvx512ifmaImpl = testX86Feature(Avx512ifma) + hasAvx512pfImpl = testX86Feature(Avx512pf) + hasAvx512erImpl = testX86Feature(Avx512er) + hasAvx512cdImpl = testX86Feature(Avx512dq) + hasAvx512bwImpl = testX86Feature(Avx512bw) + hasAvx512vlImpl = testX86Feature(Avx512vl) + hasAvx512vbmiImpl = testX86Feature(Avx512vbmi) + hasAvx512vbmi2Impl = testX86Feature(Avx512vbmi2) + hasAvx512vpopcntdqImpl = testX86Feature(Avx512vpopcntdq) + hasAvx512vnniImpl = testX86Feature(Avx512vnni) + hasAvx512vnniw4Impl = testX86Feature(Avx512vnniw4) + hasAvx512fmaps4Impl = testX86Feature(Avx512fmaps4) + hasAvx512bitalgImpl = testX86Feature(Avx512bitalg) + hasAvx512bfloat16Impl = testX86Feature(Avx512bfloat16) + hasAvx512vp2intersectImpl = testX86Feature(Avx512vp2intersect) + hasRdrandImpl = testX86Feature(Rdrand) + hasRdseedImpl = testX86Feature(Rdseed) + hasMovBigEndianImpl = testX86Feature(MovBigEndian) + hasPopcntImpl = testX86Feature(Popcnt) + hasFma3Impl = testX86Feature(Fma3) + hasFma4Impl = testX86Feature(Fma4) + hasXopImpl = testX86Feature(Xop) + hasCas8BImpl = testX86Feature(Cas8B) + hasCas16BImpl = testX86Feature(Cas16B) + hasAbmImpl = testX86Feature(Abm) + hasBmi1Impl = testX86Feature(Bmi1) + hasBmi2Impl = testX86Feature(Bmi2) + hasTsxHleImpl = testX86Feature(TsxHle) + hasTsxRtmImpl = testX86Feature(TsxRtm) + hasAdxImpl = testX86Feature(TsxHle) + hasSgxImpl = testX86Feature(Sgx) + hasGfniImpl = testX86Feature(Gfni) + hasAesImpl = testX86Feature(Aes) + hasVaesImpl = testX86Feature(Vaes) + hasVpclmulqdqImpl = testX86Feature(Vpclmulqdq) + hasPclmulqdqImpl = testX86Feature(Pclmulqdq) + hasNxBitImpl = testX86Feature(NxBit) + hasFloat16cImpl = testX86Feature(Float16c) + hasShaImpl = testX86Feature(Sha) + hasClflushImpl = testX86Feature(Clflush) + hasClflushOptImpl = testX86Feature(ClflushOpt) + hasClwbImpl = testX86Feature(Clwb) + hasPrefetchWT1Impl = testX86Feature(PrefetchWT1) + hasMpxImpl = testX86Feature(Mpx) + +# NOTE: We use procedures here (layered over the variables) to keep the API +# consistent and usable against possible future heterogenous systems with ISA +# differences between cores (a possibility that has historical precedents, for +# instance, the PPU/SPU relationship found on the IBM Cell). If future systems +# do end up having disparate ISA features across multiple cores, expect there to +# be a "cpuCore" argument added to the feature procs. + +proc isHypervisorPresent*(): bool {.inline.} = + return isHypervisorPresentImpl + ## **(x86 Only)** + ## + ## Reports `true` if this application is running inside of a virtual machine + ## (this is by no means foolproof). + +proc hasSimultaneousMultithreading*(): bool {.inline.} = + return hasSimultaneousMultithreadingImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware is utilizing simultaneous multithreading + ## (branded as *"hyperthreads"* on Intel processors). + +proc hasIntelVtx*(): bool {.inline.} = + return hasIntelVtxImpl + ## **(x86 Only)** + ## + ## Reports `true` if the Intel virtualization extensions (VT-x) are available. + +proc hasAmdv*(): bool {.inline.} = + return hasAmdvImpl + ## **(x86 Only)** + ## + ## Reports `true` if the AMD virtualization extensions (AMD-V) are available. + +proc hasX87fpu*(): bool {.inline.} = + return hasX87fpuImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use x87 floating-point instructions + ## (includes support for single, double, and 80-bit percision floats as per + ## IEEE 754-1985). + ## + ## By virtue of SSE2 enforced compliance on AMD64 CPUs, this should always be + ## `true` on 64-bit x86 processors. It should be noted that support of these + ## instructions is deprecated on 64-bit versions of Windows - see MSDN_. + ## + ## .. _MSDN: https://docs.microsoft.com/en-us/windows/win32/dxtecharts/sixty-four-bit-programming-for-game-developers#porting-applications-to-64-bit-platforms + +proc hasMmx*(): bool {.inline.} = + return hasMmxImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use MMX SIMD instructions. + ## + ## By virtue of SSE2 enforced compliance on AMD64 CPUs, this should always be + ## `true` on 64-bit x86 processors. It should be noted that support of these + ## instructions is deprecated on 64-bit versions of Windows (see MSDN_ for + ## more info). + ## + ## .. _MSDN: https://docs.microsoft.com/en-us/windows/win32/dxtecharts/sixty-four-bit-programming-for-game-developers#porting-applications-to-64-bit-platforms + +proc hasMmxExt*(): bool {.inline.} = + return hasMmxExtImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use "Extended MMX" SIMD instructions. + ## + ## It should be noted that support of these instructions is deprecated on + ## 64-bit versions of Windows (see MSDN_ for more info). + ## + ## .. _MSDN: https://docs.microsoft.com/en-us/windows/win32/dxtecharts/sixty-four-bit-programming-for-game-developers#porting-applications-to-64-bit-platforms + +proc has3DNow*(): bool {.inline.} = + return has3DNowImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use 3DNow! SIMD instructions. + ## + ## It should be noted that support of these instructions is deprecated on + ## 64-bit versions of Windows (see MSDN_ for more info), and that the 3DNow! + ## instructions (with an exception made for the prefetch instructions, see the + ## `hasPrefetch` procedure) have been phased out of AMD processors since 2010 + ## (see `AMD Developer Central`_ for more info). + ## + ## .. _MSDN: https://docs.microsoft.com/en-us/windows/win32/dxtecharts/sixty-four-bit-programming-for-game-developers#porting-applications-to-64-bit-platforms + ## .. _`AMD Developer Central`: https://web.archive.org/web/20131109151245/http://developer.amd.com/community/blog/2010/08/18/3dnow-deprecated/ + +proc has3DNowEnhanced*(): bool {.inline.} = + return has3DNowEnhancedImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use "Enhanced 3DNow!" SIMD instructions. + ## + ## It should be noted that support of these instructions is deprecated on + ## 64-bit versions of Windows (see MSDN_ for more info), and that the 3DNow! + ## instructions (with an exception made for the prefetch instructions, see the + ## `hasPrefetch` procedure) have been phased out of AMD processors since 2010 + ## (see `AMD Developer Central`_ for more info). + ## + ## .. _MSDN: https://docs.microsoft.com/en-us/windows/win32/dxtecharts/sixty-four-bit-programming-for-game-developers#porting-applications-to-64-bit-platforms + ## .. _`AMD Developer Central`: https://web.archive.org/web/20131109151245/http://developer.amd.com/community/blog/2010/08/18/3dnow-deprecated/ + +proc hasPrefetch*(): bool {.inline.} = + return hasPrefetchImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use the `PREFETCH` and `PREFETCHW` + ## instructions. These instructions originally included as part of 3DNow!, but + ## potentially indepdendent from the rest of it due to changes in contemporary + ## AMD processors (see above). + +proc hasSse*(): bool {.inline.} = + return hasSseImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use the SSE (Streaming SIMD Extensions) + ## 1.0 instructions, which introduced 128-bit SIMD on x86 machines. + ## + ## By virtue of SSE2 enforced compliance on AMD64 CPUs, this should always be + ## `true` on 64-bit x86 processors. + +proc hasSse2*(): bool {.inline.} = + return hasSse2Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use the SSE (Streaming SIMD Extensions) + ## 2.0 instructions. + ## + ## By virtue of SSE2 enforced compliance on AMD64 CPUs, this should always be + ## `true` on 64-bit x86 processors. + +proc hasSse3*(): bool {.inline.} = + return hasSse3Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use SSE (Streaming SIMD Extensions) 3.0 + ## instructions. + +proc hasSsse3*(): bool {.inline.} = + return hasSsse3Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use Supplemental SSE (Streaming SIMD + ## Extensions) 3.0 instructions. + +proc hasSse4a*(): bool {.inline.} = + return hasSse4aImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use Supplemental SSE (Streaming SIMD + ## Extensions) 4a instructions. + +proc hasSse41*(): bool {.inline.} = + return hasSse41Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use Supplemental SSE (Streaming SIMD + ## Extensions) 4.1 instructions. + +proc hasSse42*(): bool {.inline.} = + return hasSse42Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use Supplemental SSE (Streaming SIMD + ## Extensions) 4.2 instructions. + +proc hasAvx*(): bool {.inline.} = + return hasAvxImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 1.0 instructions, which introduced 256-bit SIMD on x86 machines along with + ## addded reencoded versions of prior 128-bit SSE instructions into the more + ## code-dense and non-backward compatible VEX (Vector Extensions) format. + +proc hasAvx2*(): bool {.inline.} = + return hasAvx2Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) 2.0 + ## instructions. + +proc hasAvx512f*(): bool {.inline.} = + return hasAvx512fImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit F (Foundation) instructions. + +proc hasAvx512dq*(): bool {.inline.} = + return hasAvx512dqImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit DQ (Doubleword + Quadword) instructions. + +proc hasAvx512ifma*(): bool {.inline.} = + return hasAvx512ifmaImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit IFMA (Integer Fused Multiply Accumulation) instructions. + +proc hasAvx512pf*(): bool {.inline.} = + return hasAvx512pfImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit PF (Prefetch) instructions. + +proc hasAvx512er*(): bool {.inline.} = + return hasAvx512erImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit ER (Exponential and Reciprocal) instructions. + +proc hasAvx512cd*(): bool {.inline.} = + return hasAvx512cdImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit CD (Conflict Detection) instructions. + +proc hasAvx512bw*(): bool {.inline.} = + return hasAvx512bwImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit BW (Byte and Word) instructions. + +proc hasAvx512vl*(): bool {.inline.} = + return hasAvx512vlImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit VL (Vector Length) instructions. + +proc hasAvx512vbmi*(): bool {.inline.} = + return hasAvx512vbmiImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit VBMI (Vector Byte Manipulation) 1.0 instructions. + +proc hasAvx512vbmi2*(): bool {.inline.} = + return hasAvx512vbmi2Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit VBMI (Vector Byte Manipulation) 2.0 instructions. + +proc hasAvx512vpopcntdq*(): bool {.inline.} = + return hasAvx512vpopcntdqImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use the AVX (Advanced Vector Extensions) + ## 512-bit `VPOPCNTDQ` (population count, i.e. determine number of flipped + ## bits) instruction. + +proc hasAvx512vnni*(): bool {.inline.} = + return hasAvx512vnniImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit VNNI (Vector Neural Network) instructions. + +proc hasAvx512vnniw4*(): bool {.inline.} = + return hasAvx512vnniw4Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit 4VNNIW (Vector Neural Network Word Variable Percision) + ## instructions. + +proc hasAvx512fmaps4*(): bool {.inline.} = + return hasAvx512fmaps4Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit 4FMAPS (Fused-Multiply-Accumulation Single-percision) instructions. + +proc hasAvx512bitalg*(): bool {.inline.} = + return hasAvx512bitalgImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit BITALG (Bit Algorithms) instructions. + +proc hasAvx512bfloat16*(): bool {.inline.} = + return hasAvx512bfloat16Impl + ## **(x86 Only)** + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit BFLOAT16 (8-bit exponent, 7-bit mantissa) instructions used by + ## Intel DL (Deep Learning) Boost. + +proc hasAvx512vp2intersect*(): bool {.inline.} = + return hasAvx512vp2intersectImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware can use AVX (Advanced Vector Extensions) + ## 512-bit VP2INTERSECT (Compute Intersections between Dualwords + Quadwords) + ## instructions. + +proc hasRdrand*(): bool {.inline.} = + return hasRdrandImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the `RDRAND` instruction, + ## i.e. Intel on-CPU hardware random number generation. + +proc hasRdseed*(): bool {.inline.} = + return hasRdseedImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the `RDSEED` instruction, + ## i.e. Intel on-CPU hardware random number generation (used for seeding other + ## PRNGs). + +proc hasMovBigEndian*(): bool {.inline.} = + return hasMovBigEndianImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the `MOVBE` instruction for + ## endianness/byte-order switching. + +proc hasPopcnt*(): bool {.inline.} = + return hasPopcntImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the `POPCNT` (population + ## count, i.e. determine number of flipped bits) instruction. + +proc hasFma3*(): bool {.inline.} = + return hasFma3Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the FMA3 (Fused Multiply + ## Accumulation 3-operand) SIMD instructions. + +proc hasFma4*(): bool {.inline.} = + return hasFma4Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the FMA4 (Fused Multiply + ## Accumulation 4-operand) SIMD instructions. + +proc hasXop*(): bool {.inline.} = + return hasXopImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the XOP (eXtended + ## Operations) SIMD instructions. These instructions are exclusive to the + ## Bulldozer AMD microarchitecture family (i.e. Bulldozer, Piledriver, + ## Steamroller, and Excavator) and were phased out with the release of the Zen + ## design. + +proc hasCas8B*(): bool {.inline.} = + return hasCas8BImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the (`LOCK`-able) + ## `CMPXCHG8B` 64-bit compare-and-swap instruction. + +proc hasCas16B*(): bool {.inline.} = + return hasCas16BImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the (`LOCK`-able) + ## `CMPXCHG16B` 128-bit compare-and-swap instruction. + +proc hasAbm*(): bool {.inline.} = + return hasAbmImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for ABM (Advanced Bit + ## Manipulation) insturctions (i.e. `POPCNT` and `LZCNT` for counting leading + ## zeroes). + +proc hasBmi1*(): bool {.inline.} = + return hasBmi1Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for BMI (Bit Manipulation) 1.0 + ## instructions. + +proc hasBmi2*(): bool {.inline.} = + return hasBmi2Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for BMI (Bit Manipulation) 2.0 + ## instructions. + +proc hasTsxHle*(): bool {.inline.} = + return hasTsxHleImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for HLE (Hardware Lock Elision) + ## as part of Intel's TSX (Transactional Synchronization Extensions). + +proc hasTsxRtm*(): bool {.inline.} = + return hasTsxRtmImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for RTM (Restricted + ## Transactional Memory) as part of Intel's TSX (Transactional Synchronization + ## Extensions). + +proc hasAdx*(): bool {.inline.} = + return hasAdxImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for ADX (Multi-percision + ## Add-Carry Extensions) insructions. + +proc hasSgx*(): bool {.inline.} = + return hasSgxImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for SGX (Software Guard + ## eXtensions) memory encryption technology. + +proc hasGfni*(): bool {.inline.} = + return hasGfniImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for GFNI (Galois Field Affine + ## Transformation) instructions. + +proc hasAes*(): bool {.inline.} = + return hasAesImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for AESNI (Advanced Encryption + ## Standard) instructions. + +proc hasVaes*(): bool {.inline.} = + return hasVaesImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for VAES (Vectorized Advanced + ## Encryption Standard) instructions. + +proc hasVpclmulqdq*(): bool {.inline.} = + return hasVpclmulqdqImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for `VCLMULQDQ` (512 and 256-bit + ## Carryless Multiplication) instructions. + +proc hasPclmulqdq*(): bool {.inline.} = + return hasPclmulqdqImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for `PCLMULQDQ` (128-bit + ## Carryless Multiplication) instructions. + +proc hasNxBit*(): bool {.inline.} = + return hasNxBitImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for NX-bit (No-eXecute) + ## technology for marking pages of memory as non-executable. + +proc hasFloat16c*(): bool {.inline.} = + return hasFloat16cImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for F16C instructions, used for + ## converting 16-bit "half-percision" floating-point values to and from + ## single-percision floating-point values. + +proc hasSha*(): bool {.inline.} = + return hasShaImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for SHA (Secure Hash Algorithm) + ## instructions. + +proc hasClflush*(): bool {.inline.} = + return hasClflushImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the `CLFLUSH` (Cache-line + ## Flush) instruction. + +proc hasClflushOpt*(): bool {.inline.} = + return hasClflushOptImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the `CLFLUSHOPT` (Cache-line + ## Flush Optimized) instruction. + +proc hasClwb*(): bool {.inline.} = + return hasClwbImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the `CLWB` (Cache-line Write + ## Back) instruction. + +proc hasPrefetchWT1*(): bool {.inline.} = + return hasPrefetchWT1Impl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for the `PREFECTHWT1` + ## instruction. + +proc hasMpx*(): bool {.inline.} = + return hasMpxImpl + ## **(x86 Only)** + ## + ## Reports `true` if the hardware has support for MPX (Memory Protection + ## eXtensions). diff --git a/leopard/wrapper.nim b/leopard/wrapper.nim index e69de29..05a6626 100644 --- a/leopard/wrapper.nim +++ b/leopard/wrapper.nim @@ -0,0 +1,289 @@ +## Copyright (c) 2017 Christopher A. Taylor. All rights reserved. +## +## Redistribution and use in source and binary forms, with or without +## modification, are permitted provided that the following conditions are met: +## +## * Redistributions of source code must retain the above copyright notice, +## this list of conditions and the following disclaimer. +## * Redistributions in binary form must reproduce the above copyright notice, +## this list of conditions and the following disclaimer in the documentation +## and/or other materials provided with the distribution. +## * Neither the name of Leopard-RS nor the names of its contributors may be +## used to endorse or promote products derived from this software without +## specific prior written permission. +## +## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +## AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +## ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +## LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +## CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +## SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +## INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +## POSSIBILITY OF SUCH DAMAGE. + + +## Leopard-RS +## MDS Reed-Solomon Erasure Correction Codes for Large Data in C +## +## Algorithms are described in LeopardCommon.h +## +## +## Inspired by discussion with: +## +## Sian-Jhen Lin : Author of {1} {3}, basis for Leopard +## Bulat Ziganshin : Author of FastECC +## Yutaka Sawada : Author of MultiPar +## +## +## References: +## +## {1} S.-J. Lin, T. Y. Al-Naffouri, Y. S. Han, and W.-H. Chung, +## "Novel Polynomial Basis with Fast Fourier Transform +## and Its Application to Reed-Solomon Erasure Codes" +## IEEE Trans. on Information Theory, pp. 6284-6299, November, 2016. +## +## {2} D. G. Cantor, "On arithmetical algorithms over finite fields", +## Journal of Combinatorial Theory, Series A, vol. 50, no. 2, pp. 285-300, 1989. +## +## {3} Sian-Jheng Lin, Wei-Ho Chung, "An Efficient (n, k) Information +## Dispersal Algorithm for High Code Rate System over Fermat Fields," +## IEEE Commun. Lett., vol.16, no.12, pp. 2036-2039, Dec. 2012. +## +## {4} Plank, J. S., Greenan, K. M., Miller, E. L., "Screaming fast Galois Field +## arithmetic using Intel SIMD instructions." In: FAST-2013: 11th Usenix +## Conference on File and Storage Technologies, San Jose, 2013 + + +import pkg/upraises +push: {.upraises: [].} + +## ----------------------------------------------------------------------------- +## Build configuration + +import std/compilesettings +import std/os +import std/strutils + +const + LeopardCmakeFlags {.strdefine.} = + when defined(macosx): + "-DCMAKE_BUILD_TYPE=Release -DENABLE_OPENMP=off" + elif defined(windows): + "-G\"MSYS Makefiles\" -DCMAKE_BUILD_TYPE=Release" + else: + "-DCMAKE_BUILD_TYPE=Release" + + LeopardDir {.strdefine.} = + joinPath(currentSourcePath.parentDir.parentDir, "vendor", "leopard") + + buildDir = joinPath(querySetting(nimcacheDir), "vendor_leopard") + + LeopardHeader {.strdefine.} = "leopard.h" + + LeopardLib {.strdefine.} = joinPath(buildDir, "liblibleopard.a") + + LeopardCompilerFlags {.strdefine.} = + when defined(macosx): + "-I" & LeopardDir + else: + "-I" & LeopardDir & " -fopenmp" + + LeopardLinkerFlags {.strdefine.} = + when defined(macosx): + LeopardLib + else: + LeopardLib & " -fopenmp" + + LeopardExtraCompilerFlags {.strdefine.} = "" + + LeopardExtraLinkerFlags {.strdefine.} = "" + +static: + if defined(windows): + func pathUnix2Win(path: string): string = + gorge("cygpath -w " & path.strip).strip + + func pathWin2Unix(path: string): string = + gorge("cygpath " & path.strip).strip + + proc bash(cmd: varargs[string]): string = + gorge(gorge("which bash").pathUnix2Win & " -c '" & cmd.join(" ") & "'") + + proc bashEx(cmd: varargs[string]): tuple[output: string, exitCode: int] = + gorgeEx(gorge("which bash").pathUnix2Win & " -c '" & cmd.join(" ") & "'") + + let + buildDirUnix = buildDir.pathWin2Unix + leopardDirUnix = LeopardDir.pathWin2Unix + if defined(LeopardRebuild): discard bash("rm -rf", buildDirUnix) + if (bashEx("ls", LeopardLib.pathWin2Unix)).exitCode != 0: + discard bash("mkdir -p", buildDirUnix) + let cmd = + @["cd", buildDirUnix, "&& cmake", leopardDirUnix, LeopardCmakeFlags, + "&& make"] + echo "\nBuilding Leopard-RS: " & cmd.join(" ") + let (output, exitCode) = bashEx cmd + echo output + if exitCode != 0: + discard bash("rm -rf", buildDirUnix) + raise (ref Defect)(msg: "Failed to build Leopard-RS") + else: + if defined(LeopardRebuild): discard gorge "rm -rf " & buildDir + if gorgeEx("ls " & LeopardLib).exitCode != 0: + discard gorge "mkdir -p " & buildDir + let cmd = + "cd " & buildDir & " && cmake " & LeopardDir & " " & LeopardCmakeFlags & + " && make" + echo "\nBuilding Leopard-RS: " & cmd + let (output, exitCode) = gorgeEx cmd + echo output + if exitCode != 0: + discard gorge "rm -rf " & buildDir + raise (ref Defect)(msg: "Failed to build Leopard-RS") + +{.passC: LeopardCompilerFlags & " " & LeopardExtraCompilerFlags.} +{.passL: LeopardLinkerFlags & " " & LeopardExtraLinkerFlags.} + +{.pragma: leo, cdecl, header: LeopardHeader.} + +proc leoInit*(): cint {.leo, importcpp: "leo_init".} + +## ------------------------------------------------------------------------------ +## Shared Constants / Datatypes +## Results + +# TODO: For some reason it's only possibly to use the enum with `ord` +type + LeopardResult* = enum + LeopardCallInitialize = -7, ## Call leo_init() first + LeopardPlatform = -6, ## Platform is unsupported + LeopardInvalidInput = -5, ## A function parameter was invalid + LeopardInvalidCounts = -4, ## Invalid counts provided + LeopardInvalidSize = -3, ## Buffer size must be a multiple of 64 bytes + LeopardTooMuchData = -2, ## Buffer counts are too high + LeopardNeedMoreData = -1, ## Not enough recovery data received + LeopardSuccess = 0 ## Operation succeeded + + +## Convert Leopard result to string + +proc leoResultString*(result: LeopardResult): cstring {.leo, importc: "leo_result_string".} +## ------------------------------------------------------------------------------ +## Encoder API +## +## leo_encode_work_count() +## +## Calculate the number of work_data buffers to provide to leo_encode(). +## +## The sum of original_count + recovery_count must not exceed 65536. +## +## Returns the work_count value to pass into leo_encode(). +## Returns 0 on invalid input. +## + +proc leoEncodeWorkCount*(originalCount: cuint; recoveryCount: cuint): cuint + {.leo, importc: "leo_encode_work_count".} +## +## leo_encode() +## +## Generate recovery data. +## +## original_count: Number of original_data[] buffers provided. +## recovery_count: Number of desired recovery data buffers. +## buffer_bytes: Number of bytes in each data buffer. +## original_data: Array of pointers to original data buffers. +## work_count: Number of work_data[] buffers, from leo_encode_work_count(). +## work_data: Array of pointers to work data buffers. +## +## The sum of original_count + recovery_count must not exceed 65536. +## The recovery_count <= original_count. +## +## The buffer_bytes must be a multiple of 64. +## Each buffer should have the same number of bytes. +## Even the last piece must be rounded up to the block size. +## +## Let buffer_bytes = The number of bytes in each buffer: +## +## original_count = static_cast( +## ((uint64_t)total_bytes + buffer_bytes - 1) / buffer_bytes); +## +## Or if the number of pieces is known: +## +## buffer_bytes = static_cast( +## ((uint64_t)total_bytes + original_count - 1) / original_count); +## +## Returns Leopard_Success on success. +## The first set of recovery_count buffers in work_data will be the result. +## Returns other values on errors. +## + +proc leoEncode*( + bufferBytes: uint64; + originalCount: cuint; + recoveryCount: cuint; + workCount: cuint; + originalData: ptr pointer; + workData: ptr pointer): LeopardResult {.leo, importc: "leo_encode".} + ## Number of bytes in each data buffer + ## Number of original_data[] buffer pointers + ## Number of recovery_data[] buffer pointers + ## Number of work_data[] buffer pointers, from leo_encode_work_count() + ## Array of pointers to original data buffers + ## + +## Array of work buffers +## ------------------------------------------------------------------------------ +## Decoder API +## +## leo_decode_work_count() +## +## Calculate the number of work_data buffers to provide to leo_decode(). +## +## The sum of original_count + recovery_count must not exceed 65536. +## +## Returns the work_count value to pass into leo_encode(). +## Returns 0 on invalid input. +## + +proc leoDecodeWorkCount*(originalCount: cuint; recoveryCount: cuint): cuint + {.leo, importc: "leo_decode_work_count".} +## +## leo_decode() +## +## Decode original data from recovery data. +## +## buffer_bytes: Number of bytes in each data buffer. +## original_count: Number of original_data[] buffers provided. +## original_data: Array of pointers to original data buffers. +## recovery_count: Number of recovery_data[] buffers provided. +## recovery_data: Array of pointers to recovery data buffers. +## work_count: Number of work_data[] buffers, from leo_decode_work_count(). +## work_data: Array of pointers to recovery data buffers. +## +## Lost original/recovery data should be set to NULL. +## +## The sum of recovery_count + the number of non-NULL original data must be at +## least original_count in order to perform recovery. +## +## Returns Leopard_Success on success. +## Returns other values on errors. +## + +proc leoDecode*( + bufferBytes: uint64; + originalCount: cuint; + recoveryCount: cuint; + workCount: cuint; + originalData: ptr pointer; + recoveryData: ptr pointer; + workData: ptr pointer): LeopardResult {.leo, importc: "leo_decode".} + ## Number of bytes in each data buffer + ## Number of original_data[] buffer pointers + ## Number of recovery_data[] buffer pointers + ## Number of buffer pointers in work_data[] + ## Array of original data buffers + ## Array of recovery data buffers + ## Array of work data buffers diff --git a/tests/helpers.nim b/tests/helpers.nim new file mode 100644 index 0000000..847e901 --- /dev/null +++ b/tests/helpers.nim @@ -0,0 +1,106 @@ +import std/random + +import pkg/stew/results +import ../leopard + +proc randomCRCPacket*(data: var openArray[byte]) = + if data.len < 16: + data[0] = rand(data.len).byte + for i in 1.. 0: + dropRandomIdx(dataBuf, dataLosses) + + if parityLosses > 0: + dropRandomIdx(parityBuf, parityLosses) + + decoder.decode(dataBuf, parityBuf, recoveredBuf).tryGet() + + for i, d in dataBuf: + if d.len <= 0: + if not checkCRCPacket(recoveredBuf[i]): + return err(("Check failed for packet " & $i).cstring) + + ok() diff --git a/tests/test_leopard.nim b/tests/test_leopard.nim deleted file mode 100644 index e69de29..0000000 diff --git a/tests/testleopard.nim b/tests/testleopard.nim new file mode 100644 index 0000000..8c5cb00 --- /dev/null +++ b/tests/testleopard.nim @@ -0,0 +1,329 @@ +import std/random +import std/sets + +import pkg/unittest2 +import pkg/stew/results + +import ../leopard +import ./helpers + +randomize() + +suite "Leopard Parametrization": + test "Should not allow invalid buffer multiples": + check: + LeoEncoder.init(63, 4, 2).error == "bufSize should be multiples of 64 bytes!" + LeoEncoder.init(65, 4, 2).error == "bufSize should be multiples of 64 bytes!" + + test "Should not allow invalid data/parity buffer counts": + check: + LeoEncoder.init(64, 1, 2).error == + "number of parity buffers cannot exceed number of data buffers!" + + test "Should not allow data + parity to exceed 65536": + check: + LeoEncoder.init(64, 65536 + 1, 0).error == + "number of parity and data buffers cannot exceed 65536!" + + LeoEncoder.init(64, 32768 + 1, 32768).error == + "number of parity and data buffers cannot exceed 65536!" + + test "Should not allow encoding with invalid data buffer counts": + var + leo = LeoEncoder.init(64, 4, 2).tryGet() + data = newSeq[seq[byte]](3) + parity = newSeq[seq[byte]](2) + + check: + leo.encode(data, parity).error == "Number of data buffers should match!" + + test "Should not allow encoding with invalid parity buffer counts": + var + leo = LeoEncoder.init(64, 4, 2).tryGet() + data = newSeq[seq[byte]](4) + parity = newSeq[seq[byte]](3) + + check: + leo.encode(data, parity).error == "Number of parity buffers should match!" + + test "Should not allow decoding with invalid data buffer counts": + var + leo = LeoDecoder.init(64, 4, 2).tryGet() + data = newSeq[seq[byte]](3) + parity = newSeq[seq[byte]](2) + recovered = newSeq[seq[byte]](3) + + check: + leo.decode(data, parity, recovered).error == "Number of data buffers should match!" + + test "Should not allow decoding with invalid data buffer counts": + var + leo = LeoDecoder.init(64, 4, 2).tryGet() + data = newSeq[seq[byte]](4) + parity = newSeq[seq[byte]](1) + recovered = newSeq[seq[byte]](3) + + check: + leo.decode(data, parity, recovered).error == "Number of parity buffers should match!" + + test "Should not allow decoding with invalid data buffer counts": + var + leo = LeoDecoder.init(64, 4, 2).tryGet() + data = newSeq[seq[byte]](4) + parity = newSeq[seq[byte]](2) + recovered = newSeq[seq[byte]](3) + + check: + leo.decode(data, parity, recovered).error == "Number of recovered buffers should match buffers!" + +suite "Leopard simple Encode/Decode": + const + TestString = "Hello World!" + DataCount = 4 + ParityCount = 2 + BufferSize = 64 + + var + encoder: LeoEncoder + decoder: LeoDecoder + data: seq[seq[byte]] + parity: seq[seq[byte]] + recovered: seq[seq[byte]] + + setup: + encoder = LeoEncoder.init(BufferSize, DataCount, ParityCount).tryGet() + decoder = LeoDecoder.init(BufferSize, DataCount, ParityCount).tryGet() + data = newSeq[seq[byte]](DataCount) + parity = newSeq[seq[byte]](ParityCount) + recovered = newSeq[seq[byte]](DataCount) + + teardown: + encoder.free() + decoder.free() + + test "Test 2 data loses out of 4 possible": + for i in 0..