mirror of
https://github.com/status-im/nim-stint.git
synced 2025-02-22 03:48:20 +00:00
Add alternative branchless shift implementation (TODO benchmark on ARM)
This commit is contained in:
parent
6e27069298
commit
5886d76ebc
@ -58,3 +58,17 @@ macro getSubType*(T: typedesc): untyped =
|
|||||||
## Returns the subtype of a generic type
|
## Returns the subtype of a generic type
|
||||||
## MpUint[uint32] --> uint32
|
## MpUint[uint32] --> uint32
|
||||||
getTypeInst(T)[1][1]
|
getTypeInst(T)[1][1]
|
||||||
|
|
||||||
|
|
||||||
|
proc toMpUint*[T: SomeInteger](n: T): auto {.noSideEffect, inline.} =
|
||||||
|
## Cast an integer to the corresponding size MpUint
|
||||||
|
# Sometimes direct casting doesn't work and we must cast through a pointer
|
||||||
|
|
||||||
|
when T is uint64:
|
||||||
|
return (cast[ptr [MpUint[uint32]]](unsafeAddr n))[]
|
||||||
|
elif T is uint32:
|
||||||
|
return (cast[ptr [MpUint[uint16]]](unsafeAddr n))[]
|
||||||
|
elif T is uint16:
|
||||||
|
return (cast[ptr [MpUint[uint8]]](unsfddr n))[]
|
||||||
|
else:
|
||||||
|
raise newException(ValueError, "You can only cast uint16, uint32 or uint64 to multiprecision integers")
|
@ -77,7 +77,7 @@ template naiveMulImpl[T: MpUint](x, y: T): MpUint[T] =
|
|||||||
# and introduce branching
|
# and introduce branching
|
||||||
# - More total operations means more register moves
|
# - More total operations means more register moves
|
||||||
|
|
||||||
let # TODO: should be a const - https://github.com/nim-lang/Nim/pull/5664
|
let # cannot be const, compile-time sizeof only works for simple types
|
||||||
size = (T.sizeof * 8)
|
size = (T.sizeof * 8)
|
||||||
halfSize = size div 2
|
halfSize = size div 2
|
||||||
let
|
let
|
||||||
@ -100,7 +100,7 @@ proc naiveMul[T: BaseUint](x, y: T): MpUint[T] {.noSideEffect, noInit, inline.}=
|
|||||||
|
|
||||||
elif T.sizeof == 8: # uint64 or MpUint[uint32]
|
elif T.sizeof == 8: # uint64 or MpUint[uint32]
|
||||||
# We cannot double uint64 to uint128
|
# We cannot double uint64 to uint128
|
||||||
naiveMulImpl(cast[MpUint[uint32]](a), cast[MpUint[uint32]](b))
|
naiveMulImpl(x.toMpUint, y.toMpUint)
|
||||||
else:
|
else:
|
||||||
# Case: at least uint128 * uint128 --> uint256
|
# Case: at least uint128 * uint128 --> uint256
|
||||||
naiveMulImpl(a, b)
|
naiveMulImpl(x, y)
|
@ -34,7 +34,7 @@ proc `shl`*[T: MpUint](x: T, y: SomeInteger): T {.noInit, noSideEffect.}=
|
|||||||
if y == 0:
|
if y == 0:
|
||||||
return x
|
return x
|
||||||
|
|
||||||
let # TODO: should be a const - https://github.com/nim-lang/Nim/pull/5664
|
let # cannot be const, compile-time sizeof only works for simple types
|
||||||
size = (T.sizeof * 8)
|
size = (T.sizeof * 8)
|
||||||
halfSize = size div 2
|
halfSize = size div 2
|
||||||
|
|
||||||
@ -53,7 +53,7 @@ proc `shr`*[T: MpUint](x: T, y: SomeInteger): T {.noInit, noSideEffect.}=
|
|||||||
if y == 0:
|
if y == 0:
|
||||||
return x
|
return x
|
||||||
|
|
||||||
let # TODO: should be a const - https://github.com/nim-lang/Nim/pull/5664
|
let # cannot be const, compile-time sizeof only works for simple types
|
||||||
size = (T.sizeof * 8)
|
size = (T.sizeof * 8)
|
||||||
halfSize = size div 2
|
halfSize = size div 2
|
||||||
|
|
||||||
@ -65,3 +65,56 @@ proc `shr`*[T: MpUint](x: T, y: SomeInteger): T {.noInit, noSideEffect.}=
|
|||||||
else:
|
else:
|
||||||
result.hi = x.hi shr (y - halfSize)
|
result.hi = x.hi shr (y - halfSize)
|
||||||
result.lo = 0.Sub
|
result.lo = 0.Sub
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ########################################################################
|
||||||
|
# TODO Benchmarks (especially on ARM)
|
||||||
|
# Alternative shift implementations without branching
|
||||||
|
#
|
||||||
|
# Quick testing on MpUint[uint32] on x86_64 with Clang shows that it is somewhat slower
|
||||||
|
# Fast shifting is key to fast division and modulo operations
|
||||||
|
|
||||||
|
# proc `shl`*[T: MpUint](x: T, y: SomeInteger): T {.noInit, noSideEffect.}=
|
||||||
|
# ## Compute the `shift left` operation of x and y
|
||||||
|
# type Sub = getSubType T
|
||||||
|
#
|
||||||
|
# let # cannot be const, compile-time sizeof only works for simple types
|
||||||
|
# size = Sub(T.sizeof * 8)
|
||||||
|
# halfSize = size div 2
|
||||||
|
#
|
||||||
|
# var S = y.Sub and (size-1) # y mod size
|
||||||
|
#
|
||||||
|
# let
|
||||||
|
# M1 = Sub( ((((S + size-1) or S) and halfSize) div halfSize) - 1)
|
||||||
|
# M2 = Sub( (S div halfSize) - 1)
|
||||||
|
#
|
||||||
|
# S = S and (halfSize-1) # y mod halfsize
|
||||||
|
#
|
||||||
|
# result.hi = (x.lo shl S) and not M2
|
||||||
|
# result.lo = (x.lo shl S) and M2
|
||||||
|
# result.hi = result.hi or ((
|
||||||
|
# x.hi shl S or (x.lo shr (size - S) and M1)
|
||||||
|
# ) and M2)
|
||||||
|
|
||||||
|
# proc `shr`*[T: MpUint](x: T, y: SomeInteger): T {.noInit, noSideEffect.}=
|
||||||
|
# ## Compute the `shift right` operation of x and y
|
||||||
|
# type Sub = getSubType T
|
||||||
|
#
|
||||||
|
# let # cannot be const, compile-time sizeof only works for simple types
|
||||||
|
# size = Sub(T.sizeof * 8)
|
||||||
|
# halfSize = size div 2
|
||||||
|
#
|
||||||
|
# var S = y.Sub and (size-1) # y mod size
|
||||||
|
#
|
||||||
|
# let
|
||||||
|
# M1 = Sub( ((((S + size-1) or S) and halfSize) div halfSize) - 1)
|
||||||
|
# M2 = Sub( (S div halfSize) - 1)
|
||||||
|
#
|
||||||
|
# S = S and (halfSize-1) # y mod halfsize
|
||||||
|
#
|
||||||
|
# result.lo = (x.hi shr S) and not M2
|
||||||
|
# result.hi = (x.hi shr S) and M2
|
||||||
|
# result.lo = result.lo or ((
|
||||||
|
# x.lo shr S or (x.lo shl (size - S) and M1)
|
||||||
|
# ) and M2)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user