fix use of compiler intrinsics in endians/bitops (#54)

* fix use of compiler intrinsics in endians/bitops

* windows fix
This commit is contained in:
Jacek Sieka 2020-09-21 08:43:27 +02:00 committed by GitHub
parent 1db43c7234
commit 47ff49aae7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 25 deletions

View File

@ -12,6 +12,7 @@ requires "nim >= 1.2.0"
task test, "Run all tests": task test, "Run all tests":
exec "nim c -r --threads:off tests/all_tests" exec "nim c -r --threads:off tests/all_tests"
exec "nim c -r --threads:on -d:nimTypeNames tests/all_tests" exec "nim c -r --threads:on -d:nimTypeNames tests/all_tests"
exec "nim c -r --threads:on -d:noIntrinsicsBitOpts -d:noIntrinsicsEndians tests/all_tests"
task testvcc, "Run all tests with vcc compiler": task testvcc, "Run all tests with vcc compiler":
exec "nim c -r --cc:vcc --threads:off tests/all_tests" exec "nim c -r --cc:vcc --threads:off tests/all_tests"

View File

@ -169,15 +169,15 @@ when (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins:
builtin_parity(x) builtin_parity(x)
func firstOneBuiltin(x: SomeUnsignedInt): int = func firstOneBuiltin(x: SomeUnsignedInt): int =
when bitsof(x) == bitsof(culonglong): when bitsof(x) == bitsof(clonglong):
builtin_ffsll(x) builtin_ffsll(clonglong(x))
else: else:
builtin_ffs(x.cuint.cint) builtin_ffs(x.cuint.cint)
func log2truncBuiltin(v: uint8|uint16|uint32): int = 31 - builtin_clz(v.uint32) func log2truncBuiltin(v: uint8|uint16|uint32): int = 31 - builtin_clz(v.uint32)
func log2truncBuiltin(v: uint64): int = 63 - builtin_clzll(v) func log2truncBuiltin(v: uint64): int = 63 - builtin_clzll(v)
elif defined(icc) and useBuiltins: elif defined(vcc) and useBuiltins:
const arch64 = sizeof(int) == 8 const arch64 = sizeof(int) == 8
# Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer. # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer.
@ -195,16 +195,6 @@ elif defined(icc) and useBuiltins:
func bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanReverse64", header: "<intrin.h>".} func bitScanReverse64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanReverse64", header: "<intrin.h>".}
func bitScanForward64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanForward64", header: "<intrin.h>".} func bitScanForward64(index: ptr culong, mask: uint64): cuchar {.importc: "_BitScanForward64", header: "<intrin.h>".}
template checkedScan(fnc: untyped, x: typed, def: typed): int =
var index{.noinit.}: culong
if fnc(index.addr, v) == 0: def
else: index.int
template checkedScan(fnc: untyped, x: typed, def: typed): int =
var index{.noinit.}: culong
discard fnc(index.addr, v)
index.int
func countOnesBuiltin(v: uint8|uint16): int = builtin_popcnt16(v.uint16).int func countOnesBuiltin(v: uint8|uint16): int = builtin_popcnt16(v.uint16).int
func countOnesBuiltin(v: uint32): int = builtin_popcnt32(v).int func countOnesBuiltin(v: uint32): int = builtin_popcnt32(v).int
func countOnesBuiltin(v: uint64): int = func countOnesBuiltin(v: uint64): int =
@ -214,15 +204,25 @@ elif defined(icc) and useBuiltins:
builtin_popcnt32((v and 0xFFFFFFFF'u64).uint32).int + builtin_popcnt32((v and 0xFFFFFFFF'u64).uint32).int +
builtin_popcnt32((v shr 32'u64).uint32).int builtin_popcnt32((v shr 32'u64).uint32).int
template checkedScan(fnc: untyped, x: typed, def: typed): int =
var index{.noinit.}: culong
if fnc(index.addr, v) == cuchar(0): def
else: index.int
func firstOneBuiltin(v: uint8|uint16|uint32): int = func firstOneBuiltin(v: uint8|uint16|uint32): int =
1 + checkedScan(bitScanForward, v.culong, -1) 1 + checkedScan(bitScanForward, v.culong, -1)
func firstOneBuiltin(v: uint64): int = func firstOneBuiltin(v: uint64): int =
when arch64: when arch64:
1 + checkedScan(bitScanForward64, v.culong, -1) 1 + checkedScan(bitScanForward64, v.culonglong, -1)
else: else:
firstOneNim(v) firstOneNim(v)
template bitScan(fnc: untyped, x: typed): int =
var index{.noinit.}: culong
if fnc(index.addr, v).int == 0: 0
else: index.int
func log2truncBuiltin(v: uint8|uint16|uint32): int = func log2truncBuiltin(v: uint8|uint16|uint32): int =
bitScan(bitScanReverse, v.culong) bitScan(bitScanReverse, v.culong)
@ -232,13 +232,13 @@ elif defined(icc) and useBuiltins:
else: else:
log2truncNim(v) log2truncNim(v)
elif defined(vcc) and useBuiltins: elif defined(icc) and useBuiltins:
const arch64 = sizeof(int) == 8 const arch64 = sizeof(int) == 8
# Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm
# see also: https://software.intel.com/en-us/node/523362 # see also: https://software.intel.com/en-us/node/523362
# Count the number of bits set to 1 in an integer a, and return that count in dst. # Count the number of bits set to 1 in an integer a, and return that count in dst.
func builtin_popcnt32(x: cint): cint {.importc: "_popcnt" header: "<immintrin.h>".} func builtin_popcnt32(x: cint): cint {.importc: "_popcnt32" header: "<immintrin.h>".}
# Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
func bitScanForward(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanForward", header: "<immintrin.h>".} func bitScanForward(p: ptr uint32, b: uint32): cuchar {.importc: "_BitScanForward", header: "<immintrin.h>".}
@ -261,7 +261,7 @@ elif defined(vcc) and useBuiltins:
if fnc(index.addr, v).int == 0: 0 if fnc(index.addr, v).int == 0: 0
else: index.int else: index.int
func countOnesBuiltin(v: uint8|uint16|uint32): int = builtin_popcnt32(v.uint32).int func countOnesBuiltin(v: uint8|uint16|uint32): int = builtin_popcnt32(v.cint).int
func countOnesBuiltin(v: uint64): int = func countOnesBuiltin(v: uint64): int =
when arch64: when arch64:
builtin_popcnt64(v).int builtin_popcnt64(v).int
@ -297,7 +297,7 @@ func countOnes*(x: SomeUnsignedInt): int {.inline.} =
when nimvm: when nimvm:
countOnesNim(x) countOnesNim(x)
else: else:
when defined(countOnesBuiltin): when declared(countOnesBuiltin):
countOnesBuiltin(x) countOnesBuiltin(x)
else: else:
countOnesNim(x) countOnesNim(x)
@ -316,7 +316,7 @@ func parity*(x: SomeUnsignedInt): int {.inline.} =
when nimvm: when nimvm:
parityNim(x) parityNim(x)
else: else:
when defined parityBuiltin: when declared(parityBuiltin):
parityBuiltin(x) parityBuiltin(x)
else: else:
parityNim(x) parityNim(x)
@ -333,7 +333,7 @@ func firstOne*(x: SomeUnsignedInt): int {.inline.} =
when nimvm: when nimvm:
firstOneNim(x) firstOneNim(x)
else: else:
when defined(firstOneBuiltin): when declared(firstOneBuiltin):
firstOneBuiltin(x) firstOneBuiltin(x)
else: else:
firstOneNim(x) firstOneNim(x)
@ -353,7 +353,7 @@ func log2trunc*(x: SomeUnsignedInt): int {.inline.} =
when nimvm: when nimvm:
log2truncNim(x) log2truncNim(x)
else: else:
when defined(log2truncBuiltin): when declared(log2truncBuiltin):
log2truncBuiltin(x) log2truncBuiltin(x)
else: else:
log2truncNim(x) log2truncNim(x)

View File

@ -28,7 +28,10 @@ type
## * intX - over and underflow protection in nim might easily cause issues - ## * intX - over and underflow protection in nim might easily cause issues -
## need to consider before adding here ## need to consider before adding here
when defined(gcc) or defined(llvm_gcc) or defined(clang): const
useBuiltins = not defined(noIntrinsicsEndians)
when (defined(gcc) or defined(llvm_gcc) or defined(clang)) and useBuiltins:
func swapBytesBuiltin(x: uint8): uint8 = x func swapBytesBuiltin(x: uint8): uint8 = x
func swapBytesBuiltin(x: uint16): uint16 {. func swapBytesBuiltin(x: uint16): uint16 {.
importc: "__builtin_bswap16", nodecl.} importc: "__builtin_bswap16", nodecl.}
@ -39,13 +42,13 @@ when defined(gcc) or defined(llvm_gcc) or defined(clang):
func swapBytesBuiltin(x: uint64): uint64 {. func swapBytesBuiltin(x: uint64): uint64 {.
importc: "__builtin_bswap64", nodecl.} importc: "__builtin_bswap64", nodecl.}
elif defined(icc): elif defined(icc) and useBuiltins:
func swapBytesBuiltin(x: uint8): uint8 = x func swapBytesBuiltin(x: uint8): uint8 = x
func swapBytesBuiltin(a: uint16): uint16 {.importc: "_bswap16", nodecl.} func swapBytesBuiltin(a: uint16): uint16 {.importc: "_bswap16", nodecl.}
func swapBytesBuiltin(a: uint32): uint32 {.importc: "_bswap", nodec.} func swapBytesBuiltin(a: uint32): uint32 {.importc: "_bswap", nodec.}
func swapBytesBuiltin(a: uint64): uint64 {.importc: "_bswap64", nodecl.} func swapBytesBuiltin(a: uint64): uint64 {.importc: "_bswap64", nodecl.}
elif defined(vcc): elif defined(vcc) and useBuiltins:
func swapBytesBuiltin(x: uint8): uint8 = x func swapBytesBuiltin(x: uint8): uint8 = x
func swapBytesBuiltin(a: uint16): uint16 {. func swapBytesBuiltin(a: uint16): uint16 {.
importc: "_byteswap_ushort", cdecl, header: "<intrin.h>".} importc: "_byteswap_ushort", cdecl, header: "<intrin.h>".}
@ -82,7 +85,7 @@ func swapBytes*[T: SomeEndianInt](x: T): T {.inline.} =
when nimvm: when nimvm:
swapBytesNim(x) swapBytesNim(x)
else: else:
when defined(swapBytesBuiltin): when declared(swapBytesBuiltin):
swapBytesBuiltin(x) swapBytesBuiltin(x)
else: else:
swapBytesNim(x) swapBytesNim(x)