91 lines
4.3 KiB
Nim
91 lines
4.3 KiB
Nim
|
#
|
||
|
#
|
||
|
# Nim's Runtime Library
|
||
|
# (c) Copyright 2017 Nim Authors
|
||
|
#
|
||
|
# See the file "copying.txt", included in this
|
||
|
# distribution, for details about the copyright.
|
||
|
#
|
||
|
|
||
|
## This module implements a series of low level methods for bit manipulation.
|
||
|
## By default, this module use compiler intrinsics to improve performance
|
||
|
## on supported compilers: ``GCC``, ``LLVM_GCC``, ``CLANG``, ``VCC``, ``ICC``.
|
||
|
##
|
||
|
## The module will fallback to pure nim procs incase the backend is not supported.
|
||
|
## You can also use the flag `noIntrinsicsBitOpts` to disable compiler intrinsics.
|
||
|
##
|
||
|
## This module is also compatible with other backends: ``Javascript``, ``Nimscript``
|
||
|
## as well as the ``compiletime VM``.
|
||
|
##
|
||
|
## As a result of using optimized function/intrinsics some functions can return
|
||
|
## undefined results if the input is invalid. You can use the flag `noUndefinedBitOpts`
|
||
|
## to force predictable behaviour for all input, causing a small performance hit.
|
||
|
##
|
||
|
## At this time only `fastLog2`, `firstSetBit, `countLeadingZeroBits`, `countTrailingZeroBits`
|
||
|
## may return undefined and/or platform dependant value if given invalid input.
|
||
|
|
||
|
const useBuiltins* = not defined(noIntrinsicsBitOpts)
|
||
|
const noUndefined* = defined(noUndefinedBitOpts)
|
||
|
const useGCC_builtins* = (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins
|
||
|
const useICC_builtins* = defined(icc) and useBuiltins
|
||
|
const useVCC_builtins* = defined(vcc) and useBuiltins
|
||
|
const arch64* = sizeof(int) == 8
|
||
|
|
||
|
|
||
|
proc fastlog2_nim*(x: uint32): int {.inline, nosideeffect.} =
|
||
|
## Quickly find the log base 2 of a 32-bit or less integer.
|
||
|
# https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
|
||
|
# https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
|
||
|
const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18,
|
||
|
22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31]
|
||
|
var v = x.uint32
|
||
|
v = v or v shr 1 # first round down to one less than a power of 2
|
||
|
v = v or v shr 2
|
||
|
v = v or v shr 4
|
||
|
v = v or v shr 8
|
||
|
v = v or v shr 16
|
||
|
result = lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int
|
||
|
|
||
|
proc fastlog2_nim*(x: uint64): int {.inline, nosideeffect.} =
|
||
|
## Quickly find the log base 2 of a 64-bit integer.
|
||
|
# https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
|
||
|
# https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
|
||
|
const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54,
|
||
|
33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62,
|
||
|
57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31,
|
||
|
35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63]
|
||
|
var v = x.uint64
|
||
|
v = v or v shr 1 # first round down to one less than a power of 2
|
||
|
v = v or v shr 2
|
||
|
v = v or v shr 4
|
||
|
v = v or v shr 8
|
||
|
v = v or v shr 16
|
||
|
v = v or v shr 32
|
||
|
result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int
|
||
|
|
||
|
|
||
|
when useGCC_builtins:
|
||
|
# Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
|
||
|
proc builtin_clz*(x: cuint): cint {.importc: "__builtin_clz", cdecl.}
|
||
|
proc builtin_clzll*(x: culonglong): cint {.importc: "__builtin_clzll", cdecl.}
|
||
|
|
||
|
elif useVCC_builtins:
|
||
|
# Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1).
|
||
|
proc bitScanReverse*(index: ptr culong, mask: culong): cuchar {.importc: "_BitScanReverse", header: "<intrin.h>", nosideeffect.}
|
||
|
proc bitScanReverse64*(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanReverse64", header: "<intrin.h>", nosideeffect.}
|
||
|
|
||
|
template vcc_scan_impl*(fnc: untyped; v: untyped): int =
|
||
|
var index: culong
|
||
|
discard fnc(index.addr, v)
|
||
|
index.int
|
||
|
|
||
|
elif useICC_builtins:
|
||
|
# Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
|
||
|
proc bitScanReverse*(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanReverse", header: "<immintrin.h>", nosideeffect.}
|
||
|
proc bitScanReverse64*(p: ptr uint32, b: uint64): cuchar {.importc: "_BitScanReverse64", header: "<immintrin.h>", nosideeffect.}
|
||
|
|
||
|
template icc_scan_impl*(fnc: untyped; v: untyped): int =
|
||
|
var index: uint32
|
||
|
discard fnc(index.addr, v)
|
||
|
index.int
|