refactor bitops2, add endians2

This commit is contained in:
Jacek Sieka 2019-07-04 20:30:24 +02:00
parent 904f84c8e0
commit 7a87693eaf
No known key found for this signature in database
GPG Key ID: A1B09461ABB656B8
6 changed files with 504 additions and 209 deletions

View File

@ -10,6 +10,12 @@ skipDirs = @["tests"]
requires "nim >= 0.19.0" requires "nim >= 0.19.0"
task test, "Run all tests": task test, "Run all tests":
<<<<<<< HEAD
exec "nim c -r --threads:off tests/all_tests" exec "nim c -r --threads:off tests/all_tests"
exec "nim c -r --threads:on tests/all_tests" exec "nim c -r --threads:on tests/all_tests"
=======
--run
--threads:on
setCommand "c", "tests/all_tests"
>>>>>>> refactor bitops2, add endians2

View File

@ -2,6 +2,7 @@
# #
# Nim's Runtime Library # Nim's Runtime Library
# (c) Copyright 2017 Nim Authors # (c) Copyright 2017 Nim Authors
# (c) Copyright 2019 Status Research
# #
# See the file "copying.txt", included in this # See the file "copying.txt", included in this
# distribution, for details about the copyright. # distribution, for details about the copyright.
@ -16,27 +17,44 @@
## ##
## This module is also compatible with other backends: ``Javascript``, ``Nimscript`` ## This module is also compatible with other backends: ``Javascript``, ``Nimscript``
## as well as the ``compiletime VM``. ## as well as the ``compiletime VM``.
##
## As a result of using optimized function/intrinsics some functions can return
## undefined results if the input is invalid. You can use the ``maybe*`` flags to
## disable the extra checking.
const useBuiltins = not defined(noIntrinsicsBitOpts) const
const useGCC_builtins = (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins useBuiltins = not defined(noIntrinsicsBitOpts)
const useICC_builtins = defined(icc) and useBuiltins arch64 = sizeof(int) == 8
const useVCC_builtins = defined(vcc) and useBuiltins
const arch64 = sizeof(int) == 8 template bitsof*(T: typedesc[SomeInteger]): int = 8 * sizeof(T)
template bitsof*(x: SomeInteger): int = 8 * sizeof(x)
# #### Pure Nim version #### # #### Pure Nim version ####
func nextPow2Nim(x: SomeUnsignedInt): SomeUnsignedInt =
var v = x - 1
# round down, make sure all bits are 1 below the threshold, then add 1
v = v or v shr 1
v = v or v shr 2
v = v or v shr 4
when bitsof(x) > 8:
v = v or v shr 8
when bitsof(x) > 16:
v = v or v shr 16
when bitsof(x) > 32:
v = v or v shr 32
v + 1
func firstOneNim(x: uint32): int = func firstOneNim(x: uint32): int =
## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero. ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
# https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
const lookup = [0'u8, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, const lookup = [0'u8, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15,
25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9] 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9]
let k = not x + 1 # get two's complement if x == 0:
1 + lookup[uint32((x and k) * 0x077CB531'u32) shr 27].int 0
else:
let k = not x + 1 # get two's complement
1 + lookup[((x and k) * 0x077CB531'u32) shr 27].int
func firstOneNim(x: uint8|uint16): int = firstOneNim(x.uint32)
func firstOneNim(x: uint64): int = func firstOneNim(x: uint64): int =
## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero. ## Returns the 1-based index of the least significant set bit of x, or if x is zero, returns zero.
# https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup # https://graphics.stanford.edu/%7Eseander/bithacks.html#ZerosOnRightMultLookup
@ -46,7 +64,7 @@ func firstOneNim(x: uint64): int =
else: else:
firstOneNim(uint32(x)) firstOneNim(uint32(x))
func fastLog2Nim(x: uint32): int = func log2truncNim(x: uint8|uint16|uint32): int =
## Quickly find the log base 2 of a 32-bit or less integer. ## Quickly find the log base 2 of a 32-bit or less integer.
# https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
# https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
@ -60,7 +78,7 @@ func fastLog2Nim(x: uint32): int =
v = v or v shr 16 v = v or v shr 16
lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int lookup[uint32(v * 0x07C4ACDD'u32) shr 27].int
func fastLog2Nim(x: uint64): int = func log2truncNim(x: uint64): int =
## Quickly find the log base 2 of a 64-bit integer. ## Quickly find the log base 2 of a 64-bit integer.
# https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
# https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers # https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers
@ -68,7 +86,7 @@ func fastLog2Nim(x: uint64): int =
33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62, 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62,
57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31, 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31,
35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63] 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63]
var v = x.uint64 var v = x
v = v or v shr 1 # first round down to one less than a power of 2 v = v or v shr 1 # first round down to one less than a power of 2
v = v or v shr 2 v = v or v shr 2
v = v or v shr 4 v = v or v shr 4
@ -77,128 +95,201 @@ func fastLog2Nim(x: uint64): int =
v = v or v shr 32 v = v or v shr 32
lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int
func countOnesNim(n: uint32): int = func countOnesNim(x: uint8|uint16|uint32): int =
## Counts the set bits in integer. (also called Hamming weight.) ## Counts the set bits in integer. (also called Hamming weight.)
# generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
var v = n var v = x.uint32
v = v - ((v shr 1) and 0x55555555) v = v - ((v shr 1) and 0x55555555)
v = (v and 0x33333333) + ((v shr 2) and 0x33333333) v = (v and 0x33333333) + ((v shr 2) and 0x33333333)
(((v + (v shr 4) and 0xF0F0F0F) * 0x1010101) shr 24).int (((v + (v shr 4) and 0xF0F0F0F) * 0x1010101) shr 24).int
func countOnesNim(n: uint64): int = func countOnesNim(x: uint64): int =
## Counts the set bits in integer. (also called Hamming weight.) ## Counts the set bits in integer. (also called Hamming weight.)
# generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
var v = n var v = x
v = v - ((v shr 1'u64) and 0x5555555555555555'u64) v = v - ((v shr 1'u64) and 0x5555555555555555'u64)
v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64) v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64)
v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64) v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64)
((v * 0x0101010101010101'u64) shr 56'u64).int ((v * 0x0101010101010101'u64) shr 56'u64).int
func parityNim(value: SomeUnsignedInt): int = func parityNim(x: SomeUnsignedInt): int =
# formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel
var v = value var v = x
when sizeof(value) == 8: when sizeof(v) == 8:
v = v xor (v shr 32) v = v xor (v shr 32)
when sizeof(value) >= 4: when sizeof(v) >= 4:
v = v xor (v shr 16) v = v xor (v shr 16)
when sizeof(value) >= 2: when sizeof(v) >= 2:
v = v xor (v shr 8) v = v xor (v shr 8)
v = v xor (v shr 4) v = v xor (v shr 4)
v = v and 0xf v = v and 0xf
((0x6996'u shr v) and 1).int ((0x6996'u shr v) and 1).int
when useGCC_builtins: when (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins:
# Returns the number of set 1-bits in value. # Returns the number of set 1-bits in value.
func builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.} func builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", nodecl.}
func builtin_popcountll(x: culonglong): cint {.importc: "__builtin_popcountll", cdecl.} func builtin_popcountll(x: culonglong): cint {.importc: "__builtin_popcountll", nodecl.}
# Returns the bit parity in value # Returns the bit parity in value
func builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.} func builtin_parity(x: cuint): cint {.importc: "__builtin_parity", nodecl.}
func builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.} func builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", nodecl.}
# Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero. # Returns one plus the index of the least significant 1-bit of x, or if x is zero, returns zero.
func builtin_ffs(x: cint): cint {.importc: "__builtin_ffs", cdecl.} func builtin_ffs(x: cint): cint {.importc: "__builtin_ffs", nodecl.}
func builtin_ffsll(x: clonglong): cint {.importc: "__builtin_ffsll", cdecl.} func builtin_ffsll(x: clonglong): cint {.importc: "__builtin_ffsll", nodecl.}
# Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
func builtin_clz(x: cuint): cint {.importc: "__builtin_clz", cdecl.} func builtin_clz(x: cuint): cint {.importc: "__builtin_clz", nodecl.}
func builtin_clzll(x: culonglong): cint {.importc: "__builtin_clzll", cdecl.} func builtin_clzll(x: culonglong): cint {.importc: "__builtin_clzll", nodecl.}
# Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. func countOnesBuiltin(x: SomeUnsignedInt): int =
func builtin_ctz(x: cuint): cint {.importc: "__builtin_ctz", cdecl.} when bitsof(x) == bitsof(culonglong):
func builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.} builtin_popcountll(x).int
else:
builtin_popcount(x).int
elif useVCC_builtins: func parityBuiltin(x: SomeUnsignedInt): int =
when bitsof(x) == bitsof(culonglong):
builtin_parityll(x)
else:
builtin_parity(x)
func firstOneBuiltin(x: SomeUnsignedInt): int =
when bitsof(x) == bitsof(culonglong):
builtin_ffsll(x)
else:
builtin_ffs(x.cuint.cint)
func log2truncBuiltin(v: uint8|uint16|uint32): int = 31 - builtin_clz(v.uint32)
func log2truncBuiltin(v: uint64): int = 63 - builtin_clzll(v)
elif defined(icc) and useBuiltins:
# Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer. # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer.
func builtin_popcnt16(a2: uint16): uint16 {.importc: "__popcnt16" header: "<intrin.h>".} func builtin_popcnt16(a2: uint16): uint16 {.importc: "__popcnt16" header: "<intrin.h>".}
func builtin_popcnt32(a2: uint32): uint32 {.importc: "__popcnt" header: "<intrin.h>".} func builtin_popcnt32(a2: uint32): uint32 {.importc: "__popcnt" header: "<intrin.h>".}
func builtin_popcnt64(a2: uint64): uint64 {.importc: "__popcnt64" header: "<intrin.h>".}
# Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1). # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1).
func bitScanReverse(index: ptr culong, mask: culong): cuchar {.importc: "_BitScanReverse", header: "<intrin.h>".} func bitScanReverse(index: ptr culong, mask: culong): cuchar {.importc: "_BitScanReverse", header: "<intrin.h>".}
func bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanReverse64", header: "<intrin.h>".}
# Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1). # Search the mask data from least significant bit (LSB) to the most significant bit (MSB) for a set bit (1).
func bitScanForward(index: ptr culong, mask: culong): cuchar {.importc: "_BitScanForward", header: "<intrin.h>".} func bitScanForward(index: ptr culong, mask: culong): cuchar {.importc: "_BitScanForward", header: "<intrin.h>".}
func bitScanForward64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanForward64", header: "<intrin.h>".}
template vcc_scan_impl(fnc: untyped, v: untyped): int = when defined(arch64):
var index: culong func builtin_popcnt64(a2: uint64): uint64 {.importc: "__popcnt64" header: "<intrin.h>".}
func bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanReverse64", header: "<intrin.h>".}
func bitScanForward64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanForward64", header: "<intrin.h>".}
template checkedScan(fnc: untyped, x: typed, def: typed): int =
var index{.noinit.}: culong
if fnc(index.addr, v) == 0: def
else: index.int
template checkedScan(fnc: untyped, x: typed, def: typed): int =
var index{.noinit.}: culong
discard fnc(index.addr, v) discard fnc(index.addr, v)
index.int index.int
elif useICC_builtins: func countOnesBuiltin(v: uint8|uint16): int = builtin_popcnt16(v.uint16).int
func countOnesBuiltin(v: uint32): int = builtin_popcnt32(v).int
func countOnesBuiltin(v: uint64): int =
when defined(arch64):
builtin_popcnt64(v).int
else:
builtin_popcnt32((v and 0xFFFFFFFF'u64).uint32).int +
builtin_popcnt32((v shr 32'u64).uint32).int
func firstOneBuiltin(v: uint8|uint16|uint32): int =
1 + checkedScan(bitScanForward, v.culong, -1)
func firstOneBuiltin(v: uint64): int =
when defined(arch64):
1 + checkedScan(bitScanForward64, v.culong, -1)
else:
firstOneNim(v)
func log2truncBuiltin(v: uint8|uint16|uint32): int =
bitScan(bitScanReverse, v.culong)
func log2truncBuiltin(v: uint64): int =
when defined(arch64):
bitScan(bitScanReverse64, v.culong)
else:
log2truncNim(v)
elif defined(vcc) and useBuiltins:
# Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm
# see also: https://software.intel.com/en-us/node/523362 # see also: https://software.intel.com/en-us/node/523362
# Count the number of bits set to 1 in an integer a, and return that count in dst. # Count the number of bits set to 1 in an integer a, and return that count in dst.
func builtin_popcnt32(a: cint): cint {.importc: "_popcnt" header: "<immintrin.h>".} func builtin_popcnt32(x: cint): cint {.importc: "_popcnt" header: "<immintrin.h>".}
func builtin_popcnt64(a: uint64): cint {.importc: "_popcnt64" header: "<immintrin.h>".}
# Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
func bitScanForward(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanForward", header: "<immintrin.h>".} func bitScanForward(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanForward", header: "<immintrin.h>".}
func bitScanForward64(p: ptr uint32, b: uint64): cuchar {.importc: "_BitScanForward64", header: "<immintrin.h>".}
# Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. # Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.
func bitScanReverse(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanReverse", header: "<immintrin.h>".} func bitScanReverse(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanReverse", header: "<immintrin.h>".}
func bitScanReverse64(p: ptr uint32, b: uint64): cuchar {.importc: "_BitScanReverse64", header: "<immintrin.h>".}
template icc_scan_impl(fnc: untyped, v: untyped): int = when defined(arch64):
var index: uint32 func builtin_popcnt64(x: uint64): cint {.importc: "_popcnt64" header: "<immintrin.h>".}
discard fnc(index.addr, v) func bitScanForward64(p: ptr uint32, b: uint64): cuchar {.importc: "_BitScanForward64", header: "<immintrin.h>".}
index.int func bitScanReverse64(p: ptr uint32, b: uint64): cuchar {.importc: "_BitScanReverse64", header: "<immintrin.h>".}
template checkedScan(fnc: untyped, x: typed, def: typed): int =
var index{.noinit.}: culong
if fnc(index.addr, v) == 0: def
else: index.int
template bitScan(fnc: untyped, x: typed): int =
var index{.noinit.}: culong
if fnc(index.addr, v) == 0: 0
else: index.int
func countOnesBuiltin(v: uint8|uint16|uint32): int = builtin_popcnt32(v.uint32).int
func countOnesBuiltin(v: uint64): int =
when defined(arch64):
builtin_popcnt64(v).int
else:
builtin_popcnt32((v and 0xFFFFFFFF'u64).uint32).int +
builtin_popcnt32((v shr 32'u64).uint32).int
func firstOneBuiltin(v: uint8|uint16|uint32): int =
1 + checkedScan(bitScanForward, v.culong, -1)
func firstOneBuiltin(v: uint64): int =
when defined(arch64):
1 + checkedScan(bitScanForward64, v.culong, -1)
else:
firstOneNim(v)
func log2truncBuiltin(v: uint8|uint16|uint32): int =
bitScan(bitScanReverse, v.culong)
func log2truncBuiltin(v: uint64): int =
when defined(arch64):
bitScan(bitScanReverse64, v.culong)
else:
log2truncNim(v)
func countOnes*(x: SomeUnsignedInt): int {.inline.} = func countOnes*(x: SomeUnsignedInt): int {.inline.} =
## Counts the set bits in integer. (also called `Hamming weight`:idx:.) ## Counts the set bits in integer. (also called `Hamming weight`:idx:.)
## ##
## Example: ## Example:
## doAssert oneBits(0b01000100'u8) == 2 ## doAssert countOnes(0b01000100'u8) == 2
# TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT. # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
# like GCC and MSVC # like GCC and MSVC
when nimvm: when nimvm:
when sizeof(x) <= 4: countOnesNim(x.uint32) countOnesNim(x)
else: countOnesNim(x.uint64)
else: else:
when useGCC_builtins: when defined(countOnesBuiltin):
when sizeof(x) <= 4: builtin_popcount(x.cuint).int countOnesBuiltin(x)
else: builtin_popcountll(x.culonglong).int
elif useVCC_builtins:
when sizeof(x) <= 2: builtin_popcnt16(x.uint16).int
elif sizeof(x) <= 4: builtin_popcnt32(x.uint32).int
elif arch64: builtin_popcnt64(x.uint64).int
else: builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32 ).int +
builtin_popcnt32((x.uint64 shr 32'u64).uint32 ).int
elif useICC_builtins:
when sizeof(x) <= 4: builtin_popcnt32(x.cint).int
elif arch64: builtin_popcnt64(x.uint64).int
else: builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint ).int +
builtin_popcnt32((x.uint64 shr 32'u64).cint ).int
else: else:
when sizeof(x) <= 4: countOnesNim(x.uint32) countOnesNim(x)
else: countOnesNim(x.uint64)
func countZeros*(x: SomeUnsignedInt): int {.inline.} =
sizeof(x) - countOnes(x)
func parity*(x: SomeUnsignedInt): int {.inline.} = func parity*(x: SomeUnsignedInt): int {.inline.} =
## Calculate the bit parity in integer. If number of 1-bit ## Calculate the bit parity in integer. If number of 1-bit
@ -209,176 +300,97 @@ func parity*(x: SomeUnsignedInt): int {.inline.} =
# Can be used a base if creating ASM version. # Can be used a base if creating ASM version.
# https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd
when nimvm: when nimvm:
when sizeof(x) <= 4: parityNim(x.uint32) parityNim(x)
else: parityNim(x.uint64)
else: else:
when useGCC_builtins: when defined parityBuiltin:
when sizeof(x) <= 4: builtin_parity(x.uint32).int parityBuiltin(x)
else: builtin_parityll(x.uint64).int
else: else:
when sizeof(x) <= 4: parityNim(x.uint32) parityNim(x)
else: parityNim(x.uint64)
func firstOne*(x: SomeUnsignedInt, maybeZero = true): int {.inline.} = func firstOne*(x: SomeUnsignedInt): int {.inline.} =
## Returns the 1-based index of the least significant set bit of x. ## Returns the 1-based index of the least significant set bit of x.
## If `x` is zero and `maybeZero` is true, result is 0 ## If `x` is zero result is 0
## If `x` is zero and `maybeZero` is false, result is undefined ##
## firstOne(x) == trailingZeros(x) + 1
## ##
## Example: ## Example:
## doAssert firstOneBit(0b00000010'u8) == 2 ## doAssert firstOneBit(0b00000010'u8) == 2
## ##
when nimvm: when nimvm:
if maybeZero and x == 0: 0 firstOneNim(x)
elif sizeof(x) <= 4: firstOneNim(x.uint32)
else: firstOneNim(x.uint64)
else: else:
when useGCC_builtins: when defined(firstOneBuiltin):
# GCC builtin 'builtin_ffs' already handle zero input. firstOneBuiltin(x)
when sizeof(x) <= 4: builtin_ffs(cast[cint](x.cuint)).int
else: builtin_ffsll(cast[clonglong](x.culonglong)).int
elif useVCC_builtins:
if maybeZero and x == 0: 0
elif sizeof(x) <= 4: 1 + vcc_scan_impl(bitScanForward, x.culong)
elif arch64: 1 + vcc_scan_impl(bitScanForward64, x.uint64)
else: firstOneBitNim(x.uint64)
elif useICC_builtins:
if maybeZero and x == 0: 0
elif sizeof(x) <= 4: 1 + icc_scan_impl(bitScanForward, x.uint32)
elif arch64: 1 + icc_scan_impl(bitScanForward64, x.uint64)
else: firstOneBitNim(x.uint64)
else: else:
if maybeZero and x == 0: 0 firstOneNim(x)
elif sizeof(x) <= 4: firstOneBitNim(x.uint32)
else: firstOneBitNim(x.uint64)
func fastLog2*(x: SomeUnsignedInt, maybeZero = true): int {.inline.} = func log2trunc*(x: SomeUnsignedInt): int {.inline.} =
## Return the truncated base 2 logarithm of `x` ## Return the truncated base 2 logarithm of `x` - this is the zero-based
## If `x` is zero and `maybeZero` is true, result is -1 ## index of the last set bit.
## If `x` is zero and `maybeZero` is false, result is undefined ##
## If `x` is zero result is -1
##
## log2trunc(x) == bitsof(x) - leadingZeros(x) - 1.
## ##
## Example: ## Example:
## doAssert fastLog2Bit(0b01000000'u8) == 6 ## doAssert log2trunc(0b01001000'u8) == 6
if maybeZero and x == 0: -1 if x == 0: -1
else: else:
when nimvm: when nimvm:
when sizeof(x) <= 4: fastLog2Nim(x.uint32) log2truncNim(x)
else: fastLog2Nim(x.uint64)
else: else:
when useGCC_builtins: when defined(log2truncBuiltin):
when sizeof(x) <= 4: 31 - builtin_clz(x.uint32).int log2truncBuiltin(x)
else: 63 - builtin_clzll(x.uint64).int
elif useVCC_builtins:
when sizeof(x) <= 4: vcc_scan_impl(bitScanReverse, x.culong)
elif arch64: vcc_scan_impl(bitScanReverse64, x.uint64)
else: fastLog2Nim(x.uint64)
elif useICC_builtins:
when sizeof(x) <= 4: icc_scan_impl(bitScanReverse, x.uint32)
elif arch64: icc_scan_impl(bitScanReverse64, x.uint64)
else: fastLog2Nim(x.uint64)
else: else:
when sizeof(x) <= 4: fastLog2Nim(x.uint32) log2truncNim(x)
else: fastLog2Nim(x.uint64)
func leadingZeros*(x: SomeInteger, maybeZero = true): int {.inline.} = func leadingZeros*(x: SomeInteger): int {.inline.} =
## Returns the number of leading zero bits in integer. ## Returns the number of leading zero bits in integer.
## If `x` is zero and maybeZero is true, result is sizeof(x) * 8 ## If `x` is zero, result is bitsof(x)
## If `x` is zero and maybeZero is false, result is undefined
## ##
## Example: ## Example:
## doAssert leadingZeroBits(0b00100000'u8) == 2 ## doAssert leadingZeros(0b00000000'u8) == 8
## doAssert leadingZeros(0b00100000'u8) == 2
## ##
## Performance note: # Performance note:
## On recent x86_64 cpu's, this translates to the LZCNT instruction # On recent x86_64 cpu's, this translates to the LZCNT instruction
if maybeZero and x == 0: sizeof(x) * 8 bitsof(x) - 1 - log2trunc(x)
else:
when nimvm:
when sizeof(x) <= 4: sizeof(x)*8 - 1 - fastLog2Nim(x.uint32)
else: sizeof(x)*8 - 1 - fastLog2Nim(x.uint64)
else:
when useGCC_builtins:
when sizeof(x) <= sizeof(cuint):
builtin_clz(x.cuint).int - (sizeof(cuint) - sizeof(x)) * 8
else:
builtin_clzll(x.culonglong).int
else:
when sizeof(x) <= 4: sizeof(x)*8 - 1 - fastLog2Nim(x.uint32)
else: sizeof(x)*8 - 1 - fastLog2Nim(x.uint64)
func trailingZeros*(x: SomeUnsignedInt, maybeZero = true): int = func trailingZeros*(x: SomeUnsignedInt): int {.inline.} =
## Returns the number of trailing zeros in integer. ## Returns the number of trailing zeros in integer.
## If `x` is zero and maybeZero is true, result is sizeof(x) * 8 ## If `x` is zero, result is sizeof(x) * 8
## If `x` is zero and maybeZero is false, result is undefined
## ##
## Example: ## Example:
## doAssert trailingZeroBits(0b00000010'u8) == 1 ## doAssert trailingZeros(0b00000010'u8) == 1
## ##
## Performance note: # Performance note:
## On recent x86_64 cpu's, this translates to the TZCNT instruction # On recent x86_64 cpu's, this translates to the TZCNT instruction
if maybeZero and x == 0: sizeof(x) * 8 if x == 0:
bitsof(x)
else: else:
when nimvm: firstOne(x) - 1
firstOne(x) - 1
else:
when useGCC_builtins:
when sizeof(x) <= sizeof(cuint): builtin_ctz(x.cuint).int
else: builtin_ctzll(x.culonglong).int
else: firstOneBit(x) - 1
func rotateLeft*(value: uint8, amount: SomeInteger): uint8 = func nextPow2*(x: SomeUnsignedInt): SomeUnsignedInt {.inline.} =
## Left-rotate bits in a 8-bits value. ## Calculate the next power-of-2 of x - wraps to 0
##
## Examples:
## doAssert nextPow2(3) == 4
## doAssert nextPow2(4) == 4
nextPow2Nim(x)
func rotateLeft*(v: SomeUnsignedInt, amount: SomeInteger):
SomeUnsignedInt {.inline.} =
## Left-rotate bits in an unsigned value
# using this form instead of the one below should handle any value # using this form instead of the one below should handle any value
# out of range as well as negative values. # out of range as well as negative values.
# result = (value shl amount) or (value shr (8 - amount))
# taken from: https://en.wikipedia.org/wiki/Circular_shift#Implementing_circular_shifts # taken from: https://en.wikipedia.org/wiki/Circular_shift#Implementing_circular_shifts
let amount = int(amount and 7) const mask = 8 * sizeof(v) - 1
(value shl amount) or (value shr ( (-amount) and 7)) let amount = int(amount and mask)
(v shl amount) or (v shr ( (-amount) and mask))
func rotateLeft*(value: uint16, amount: SomeInteger): uint16 = func rotateRight*(v: SomeUnsignedInt, amount: SomeInteger):
## Left-rotate bits in a 16-bits value. SomeUnsignedInt {.inline.} =
let amount = int(amount and 15) ## Right-rotate bits in an unsigned value.
(value shl amount) or (value shr ( (-amount) and 15)) const mask = bitsof(v) - 1
let amount = int(amount and mask)
func rotateLeft*(value: uint32, amount: SomeInteger): uint32 = (v shr amount) or (v shl ( (-amount) and mask))
## Left-rotate bits in a 32-bits value.
let amount = int(amount and 31)
(value shl amount) or (value shr ( (-amount) and 31))
func rotateLeft*(value: uint64, amount: SomeInteger): uint64 =
## Left-rotate bits in a 64-bits value.
let amount = int(amount and 63)
(value shl amount) or (value shr ( (-amount) and 63))
func rotateRight*(value: uint8, amount: SomeInteger): uint8 =
## Right-rotate bits in a 8-bits value.
let amount = int(amount and 7)
(value shr amount) or (value shl ( (-amount) and 7))
func rotateRight*(value: uint16, amount: SomeInteger): uint16 =
## Right-rotate bits in a 16-bits value.
let amount = int(amount and 15)
(value shr amount) or (value shl ( (-amount) and 15))
func rotateRight*(value: uint32, amount: SomeInteger): uint32 =
## Right-rotate bits in a 32-bits value.
let amount = int(amount and 31)
(value shr amount) or (value shl ( (-amount) and 31))
func rotateRight*(value: uint64, amount: SomeInteger): uint64 =
## Right-rotate bits in a 64-bits value.
let amount = int(amount and 63)
(value shr amount) or (value shl ( (-amount) and 63))
when isMainModule:
template test() =
doAssert countOnes(0b01000100'u8) == 2
doAssert parity(0b00000001'u8) == 1
doAssert firstOne(0b00000010'u8) == 2
doAssert firstOne(0'u8) == 0
doAssert fastLog2(0b01000000'u8) == 6
doAssert leadingZeros(0b00100000'u8) == 2
doAssert trailingZeros(0b00100000'u8) == 5
doAssert leadingZeros(0'u8) == 8
doAssert trailingZeros(0'u8) == 8
test()
static: test()

181
stew/endians2.nim Normal file
View File

@ -0,0 +1,181 @@
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# Endian conversion operations for unsigned integers, suitable for serializing
# and deserializing data. The operations are only defined for unsigned
# integers - if you wish to encode signed integers, convert / cast them to
# unsigned first!
#
# Although it would be possible to enforce correctness with endians in the type
# (`BigEndian[uin64]`) this seems like overkill. That said, some
# static analysis tools allow you to annotate fields with endianess - perhaps
# an idea for the future, akin to `TaintedString`?
#
# Keeping the above in mind, it's generally safer to use `array[N, byte]` to
# hold values of specific endianess and read them out with `fromBytes` when the
# integer interpretation of the bytes is needed.
#
# Though it doesn't quite make sense, we include byte swappers for single bytes
# for generic convenience.
when defined(gcc) or defined(llvm_gcc) or defined(clang):
func swapBytesBuiltin(x: uint8): uint8 = x
func swapBytesBuiltin(x: uint16): uint16 {.
importc: "__builtin_bswap16", nodecl.}
func swapBytesBuiltin(x: uint32): uint32 {.
importc: "__builtin_bswap32", nodecl.}
func swapBytesBuiltin(x: uint64): uint64 {.
importc: "__builtin_bswap64", nodecl.}
elif defined(icc):
func swapBytesBuiltin(x: uint8): uint8 = x
func swapBytesBuiltin(a: uint16): uint16 {.importc: "_bswap16", nodecl.}
func swapBytesBuiltin(a: uint32): uint32 {.importc: "_bswap", nodec.}
func swapBytesBuiltin(a: uint64): uint64 {.importc: "_bswap64", nodecl.}
elif defined(vcc):
func swapBytesBuiltin(x: uint8): uint8 = x
proc builtin_bswap16(a: uint16): uint16 {.
importc: "_byteswap_ushort", cdecl, header: "<intrin.h>".}
proc builtin_bswap32(a: uint32): uint32 {.
importc: "_byteswap_ulong", cdecl, header: "<intrin.h>".}
proc builtin_bswap64(a: uint64): uint64 {.
importc: "_byteswap_uint64", cdecl, header: "<intrin.h>".}
func swapBytesNim(x: uint8): uint8 = x
func swapBytesNim(x: uint16): uint16 = (x shl 8) or (x shr 8)
func swapBytesNim(x: uint32): uint32 =
let v = (x shl 16) or (x shr 16)
((v shl 8) and 0xff00ff00'u32) or ((v shr 8) and 0x00ff00ff'u32)
func swapBytesNim(x: uint64): uint64 =
var v = (x shl 32) or (x shr 32)
v =
((v and 0x0000ffff0000ffff'u64) shl 16) or
((v and 0xffff0000ffff0000'u64) shr 16)
((v and 0x00ff00ff00ff00ff'u64) shl 8) or
((v and 0xff00ff00ff00ff00'u64) shr 8)
func swapBytes*(x: SomeUnsignedInt): SomeUnsignedInt {.inline.} =
## Reverse the bytes within an integer, such that the most significant byte
## changes place with the least significant one, etc
##
## Example:
## doAssert swapBytes(0x01234567'u32) == 0x67452301
when nimvm:
swapBytesNim(x)
else:
when defined(swapBytesBuiltin):
swapBytesBuiltin(x)
else:
swapBytesNim(x)
func toBytes*(x: uint8|uint16|uint32|uint64, endian: Endianness = system.cpuEndian):
array[sizeof(x), byte] {.noinit, inline.} =
## Convert integer to its corresponding byte sequence using the chosen
## endianness. By default, native endianess is used which is not portable!
let v =
if endian == system.cpuEndian: x
else: swapBytes(x)
# Loop since vm can't copymem - let's hope optimizer is smart here :)
for i in 0..<sizeof(result):
result[i] = byte((v shr (i * 8)) and 0xff)
func toBytesLE*(x: uint8|uint16|uint32|uint64):
array[sizeof(x), byte] {.inline.} =
## Convert a native endian integer to a little endian byte sequence
toBytes(x, littleEndian)
func toBytesBE*(x: uint8|uint16|uint32|uint64):
array[sizeof(x), byte] {.inline.} =
## Convert a native endian integer to a native endian byte sequence
toBytes(x, bigEndian)
func fromBytes*(
T: typedesc[uint8|uint16|uint32|uint64],
x: array[sizeof(T), byte],
endian: Endianness = system.cpuEndian): T {.inline.} =
## Convert a byte sequence to a native endian integer. By default, native
## endianess is used which is not portable!
for i in 0..<sizeof(result): # No copymem in vm
result = result or T(x[i]) shl (i * 8)
if endian != system.cpuEndian:
result = swapBytes(result)
func fromBytes*(
T: type,
x: openArray[byte],
endian: Endianness = system.cpuEndian): T {.inline.} =
## Read bytes and convert to an integer according to the given endianess. At
## runtime, v must contain at least sizeof(T) bytes. By default, native
## endianess is used which is not portable!
const ts = sizeof(T) # Nim bug: can't use sizeof directly
var tmp: array[ts, byte]
for i in 0..<tmp.len: # Loop since vm can't copymem
tmp[i] = x[i]
fromBytes(T, tmp, endian)
func fromBytesBE*(
T: typedesc[uint8|uint16|uint32|uint64],
x: array[sizeof(T), byte]): T {.inline.} =
## Read big endian bytes and convert to an integer. By default, native
## endianess is used which is not
## portable!
fromBytes(T, x, bigEndian)
func fromBytesBE*(
T: typedesc[uint8|uint16|uint32|uint64],
x: openArray[byte]): T {.inline.} =
## Read big endian bytes and convert to an integer. At runtime, v must contain
## at least sizeof(T) bytes. By default, native endianess is used which is not
## portable!
fromBytes(T, x, bigEndian)
func toBE*(x: SomeUnsignedInt): SomeUnsignedInt {.inline.} =
## Convert a native endian value to big endian. Consider toBytesBE instead
## which may prevent some confusion.
if cpuEndian == bigEndian: x
else: x.swapBytes
func fromBE*(x: SomeUnsignedInt): SomeUnsignedInt {.inline.} =
## Read a big endian value and return the corresponding native endian
# there's no difference between this and toBE, except when reading the code
toBE(x)
func fromBytesLE*(
T: typedesc[uint8|uint16|uint32|uint64],
x: array[sizeof(T), byte]): T {.inline.} =
## Read little endian bytes and convert to an integer. By default, native
## endianess is used which is not portable!
fromBytes(T, x, littleEndian)
func fromBytesLE*(
T: typedesc[uint8|uint16|uint32|uint64],
x: openArray[byte]): T {.inline.} =
## Read little endian bytes and convert to an integer. At runtime, v must
## contain at least sizeof(T) bytes. By default, native endianess is used
## which is not portable!
fromBytes(T, x, littleEndian)
func toLE*(x: SomeUnsignedInt): SomeUnsignedInt {.inline.} =
## Convert a native endian value to little endian. Consider toBytesLE instead
## which may prevent some confusion.
if cpuEndian == littleEndian: x
else: x.swapBytes
func fromLE*(x: SomeUnsignedInt): SomeUnsignedInt {.inline.} =
## Read a little endian value and return the corresponding native endian
# there's no difference between this and toLE, except when reading the code
toLE(x)

View File

@ -10,4 +10,6 @@
import import
ranges/all, ranges/all,
test_byteutils test_bitops2,
test_byteutils,
test_endians2

56
tests/test_bitops2.nim Normal file
View File

@ -0,0 +1,56 @@
import unittest
import ../stew/bitops2
template test() =
doAssert bitsof(8'u8) == 8
doAssert bitsof(uint64) == 64
doAssert countOnes(0b00000000'u8) == 0
doAssert countOnes(0b01000100'u8) == 2
doAssert countOnes(0b11111111'u64) == 8
doAssert firstOne(0b00000000'u8) == 0
doAssert firstOne(0b00000001'u64) == 1
doAssert firstOne(0b00010010'u8) == 2
doAssert firstOne(0b11111111'u8) == 1
doAssert firstOne(0b100000000000000000000000000000000'u64) == 33
doAssert leadingZeros(0b00000000'u8) == 8
doAssert leadingZeros(0b00000001'u8) == 7
doAssert leadingZeros(0b00100000'u8) == 2
doAssert leadingZeros(0b10000000'u8) == 0
doAssert leadingZeros(0b10000000'u16) == 8
doAssert leadingZeros(0b10000000'u32) == 24
doAssert leadingZeros(0b10000000'u64) == 56
doAssert log2trunc(0b00000000'u8) == -1
doAssert log2trunc(0b00000001'u8) == 0
doAssert log2trunc(0b00000010'u8) == 1
doAssert log2trunc(0b01000000'u8) == 6
doAssert log2trunc(0b01001000'u8) == 6
doAssert log2trunc(0b10001000'u64) == 7
doAssert nextPow2(0'u64) == 0
doAssert nextPow2(3'u64) == 4
doAssert nextPow2(4'u32) == 4
doAssert parity(0b00000001'u8) == 1
doAssert parity(0b10000001'u64) == 0
doAssert rotateLeft(0b01000001'u8, 2) == 0b00000101'u8
doAssert rotateRight(0b01000001'u8, 2) == 0b01010000'u8
doAssert trailingZeros(0b00000000'u8) == 8
doAssert trailingZeros(0b00100000'u8) == 5
doAssert trailingZeros(0b00100001'u8) == 0
doAssert trailingZeros(0b10000000'u8) == 7
doAssert trailingZeros(0b10000000'u16) == 7
doAssert trailingZeros(0b10000000'u32) == 7
doAssert trailingZeros(0b10000000'u64) == 7
static: test()
suite "bitops2":
test "bitops2_test":
test() # Cannot use unittest at compile time..

38
tests/test_endians2.nim Normal file
View File

@ -0,0 +1,38 @@
import unittest
import ../stew/endians2
template test() =
doAssert 0x01'u8.toBytesBE == [0x01'u8]
doAssert 0x0123'u16.toBytesBE == [0x01'u8, 0x23'u8]
doAssert 0x01234567'u32.toBytesBE == [0x01'u8, 0x23'u8, 0x45'u8, 0x67'u8]
doAssert 0x0123456789abcdef'u64.toBytesBE == [
0x01'u8, 0x23'u8, 0x45'u8, 0x67'u8, 0x89'u8, 0xab'u8, 0xcd'u8, 0xef'u8]
doAssert 0x01'u8.toBytesLE == [0x01'u8]
doAssert 0x0123'u16.toBytesLE == [0x23'u8, 0x01'u8]
doAssert 0x01234567'u32.toBytesLE == [0x67'u8, 0x45'u8, 0x23'u8, 0x01'u8]
doAssert 0x0123456789abcdef'u64.toBytesLE == [
0xef'u8, 0xcd'u8, 0xab'u8, 0x89'u8, 0x67'u8, 0x45'u8, 0x23'u8, 0x01'u8]
doAssert 0x01'u8 == uint8.fromBytesBE([0x01'u8])
doAssert 0x0123'u16 == uint16.fromBytesBE([0x01'u8, 0x23'u8])
doAssert 0x01234567'u32 == uint32.fromBytesBE(
[0x01'u8, 0x23'u8, 0x45'u8, 0x67'u8])
doAssert 0x0123456789abcdef'u64 == uint64.fromBytesBE(
[0x01'u8, 0x23'u8, 0x45'u8, 0x67'u8, 0x89'u8, 0xab'u8, 0xcd'u8, 0xef'u8])
doAssert 0x01'u8 == uint8.fromBytesLE([0x01'u8])
doAssert 0x0123'u16 == uint16.fromBytesLE([0x23'u8, 0x01'u8])
doAssert 0x01234567'u32 == uint32.fromBytesLE(
[0x67'u8, 0x45'u8, 0x23'u8, 0x01'u8])
doAssert 0x0123456789abcdef'u64 == uint64.fromBytesLE([
0xef'u8, 0xcd'u8, 0xab'u8, 0x89'u8, 0x67'u8, 0x45'u8, 0x23'u8, 0x01'u8])
doAssert 0x01234567'u32.swapBytes() == 0x67452301
static: test()
suite "endians2":
test "endians2_test":
test() # Cannot use unittest at compile time..