diff --git a/src/private/bithacks.nim b/src/private/bithacks.nim index 51aff33..6fbed1b 100644 --- a/src/private/bithacks.nim +++ b/src/private/bithacks.nim @@ -7,43 +7,11 @@ # # at your option. This file may not be copied, modified, or distributed except according to those terms. -import ./uint_type +import ./uint_type, bitops -# Compiler defined const: https://github.com/nim-lang/Nim/wiki/Consts-defined-by-the-compiler -const withBuiltins = defined(gcc) or defined(clang) - -when withBuiltins: - proc builtin_clz(n: cuint): cint {.importc: "__builtin_clz", nodecl.} - proc builtin_clz(n: culong): cint {.importc: "__builtin_clzl", nodecl.} - proc builtin_clz(n: culonglong): cint {.importc: "__builtin_clzll", nodecl.} - type TbuiltinSupported = cuint or culong or culonglong - # Warning ⚠: if n = 0, clz is undefined - -template bit_length_impl[T: SomeUnsignedInt or Natural or int](n: T, result: int) = - # For some reason using "SomeUnsignedInt or Natural" directly makes Nim compiler - # throw a type mismatch in a proc, we use a template as a workaround - # Plus the template doesn't match natural with int :/ - when withBuiltins and T is TbuiltinSupported: - result = if n == T(0): 0 # Removing this branch would make divmod 4x faster :/ - else: T.sizeof * 8 - builtin_clz(n) - - else: - # The biggest optimization for the naive implementation - # is making sure this is inline - # This is the difference between returning in 2ms or 10+ second - # on a 1000000000 times test loop. - var x = n - while x != T(0): - x = x shr 1 - inc(result) - -proc bit_length*(n: SomeUnsignedInt): int {.noSideEffect, inline.}= - ## Calculates how many bits are necessary to represent the number - bit_length_impl(n, result) - -proc bit_length*(n: Natural): int {.noSideEffect, inline.}= - ## Calculates how many bits are necessary to represent the number - bit_length_impl(n, result) +proc bit_length*(x: SomeInteger): int {.inline, noSideEffect.}= + if x == 0: 0 + else: fastlog2(x) proc bit_length*(n: MpUintImpl): int {.noSideEffect.}= ## Calculates how many bits are necessary to represent the number diff --git a/src/private/conversion.nim b/src/private/conversion.nim index ad4f517..3430853 100644 --- a/src/private/conversion.nim +++ b/src/private/conversion.nim @@ -10,15 +10,13 @@ import ./uint_type, macros -template convBool(typ: typedesc): untyped = - # needed for carry conversion - converter boolMpUint*(b: bool): MpUintImpl[typ] {.noSideEffect, inline.}= - result.lo = b.typ -convBool(uint8) -convBool(uint16) -convBool(uint32) -convBool(uint64) +proc toSubtype*[T: SomeInteger](b: bool, typ: typedesc[T]): T {.noSideEffect, inline.}= + b.T + +proc toSubtype*[T: MpUintImpl](b: bool, typ: typedesc[T]): T {.noSideEffect, inline.}= + type SubTy = type result.lo + result.lo = toSubtype(b, SubTy) proc zero*(typ: typedesc[BaseUint]): typ {.compileTime.} = typ() diff --git a/src/private/uint_binary_ops.nim b/src/private/uint_binary_ops.nim index fb471d3..f041280 100644 --- a/src/private/uint_binary_ops.nim +++ b/src/private/uint_binary_ops.nim @@ -21,7 +21,7 @@ proc `+=`*(x: var MpUintImpl, y: MpUintImpl) {.noSideEffect, inline.}= let tmp = x.lo x.lo += y.lo - x.hi += SubTy(x.lo < tmp) + y.hi + x.hi += (x.lo < tmp).toSubtype(SubTy) + y.hi proc `+`*(x, y: MpUintImpl): MpUintImpl {.noSideEffect, noInit, inline.}= # Addition for multi-precision unsigned int @@ -37,14 +37,33 @@ proc `-=`*(x: var MpUintImpl, y: MpUintImpl) {.noSideEffect, inline.}= let tmp = x.lo x.lo -= y.lo - x.hi -= SubTy(x.lo > tmp) + y.hi + x.hi -= (x.lo > tmp).toSubtype(SubTy) + y.hi proc `-`*(x, y: MpUintImpl): MpUintImpl {.noSideEffect, noInit, inline.}= # Substraction for multi-precision unsigned int result = x result -= y -template naiveMulImpl[T: MpUintImpl](x, y: T): MpUintImpl[T] = + +proc naiveMulImpl[T: MpUintImpl](x, y: T): MpUintImpl[T] {.noSideEffect, noInit, inline.} + # Forward declaration + +proc naiveMul[T: BaseUint](x, y: T): MpUintImpl[T] {.noSideEffect, noInit, inline.}= + ## Naive multiplication algorithm with extended precision + + when T.sizeof in {1, 2, 4}: + # Use types twice bigger to do the multiplication + cast[type result](x.asDoubleUint * y.asDoubleUint) + + elif T.sizeof == 8: # uint64 or MpUint[uint32] + # We cannot double uint64 to uint128 + naiveMulImpl(x.toMpUintImpl, y.toMpUintImpl) + else: + # Case: at least uint128 * uint128 --> uint256 + naiveMulImpl(x, y) + + +proc naiveMulImpl[T: MpUintImpl](x, y: T): MpUintImpl[T] {.noSideEffect, noInit, inline.}= # See details at # https://en.wikipedia.org/wiki/Karatsuba_algorithm # https://locklessinc.com/articles/256bit_arithmetic/ @@ -66,27 +85,12 @@ template naiveMulImpl[T: MpUintImpl](x, y: T): MpUintImpl[T] = var z1 = tmp z1 += naiveMul(x.hi, y.lo) - let z2 = (z1 < tmp).T + naiveMul(x.hi, y.hi) + let z2 = (z1 < tmp).toSubtype(T) + naiveMul(x.hi, y.hi) let tmp2 = z1.lo shl halfSize result.lo = tmp2 result.lo += z0 - result.hi = (result.lo < tmp2).T + z2 + z1.hi - -proc naiveMul[T: BaseUint](x, y: T): MpUintImpl[T] {.noSideEffect, noInit, inline.}= - ## Naive multiplication algorithm with extended precision - - when T.sizeof in {1, 2, 4}: - # Use types twice bigger to do the multiplication - cast[type result](x.asDoubleUint * y.asDoubleUint) - - elif T.sizeof == 8: # uint64 or MpUint[uint32] - # We cannot double uint64 to uint128 - naiveMulImpl(x.toMpUint, y.toMpUint) - else: - # Case: at least uint128 * uint128 --> uint256 - naiveMulImpl(x, y) - + result.hi = (result.lo < tmp2).toSubtype(T) + z2 + z1.hi proc `*`*(x, y: MpUintImpl): MpUintImpl {.noSideEffect, noInit.}= ## Multiplication for multi-precision unsigned uint diff --git a/src/private/uint_comparison.nim b/src/private/uint_comparison.nim index 36ae35e..919a62b 100644 --- a/src/private/uint_comparison.nim +++ b/src/private/uint_comparison.nim @@ -17,8 +17,8 @@ proc `<=`*(x, y: MpUintImpl): bool {.noSideEffect, noInit, inline.}= result = if x == y: true else: x < y -proc isZero[T: SomeUnsignedInt](n: T): bool {.noSideEffect,inline.} = - n == 0.T +proc isZero*(n: SomeUnsignedInt): bool {.noSideEffect,inline.} = + n == 0 proc isZero*(n: MpUintImpl): bool {.noSideEffect,inline.} = n.lo.isZero and n.hi.isZero diff --git a/src/uint_init.nim b/src/uint_init.nim index b8861cb..9aa8986 100644 --- a/src/uint_init.nim +++ b/src/uint_init.nim @@ -14,15 +14,15 @@ import ./private/bithacks, ./private/conversion, import typetraits -proc initMpUint*(n: SomeUnsignedInt, bits: static[int]): MpUint[bits] {.noSideEffect.} = - +proc initMpUint*(n: SomeInteger, bits: static[int]): MpUint[bits] {.noSideEffect.} = + assert n >= 0 when result.data is MpuintImpl: type SubTy = type result.data.lo let len = n.bit_length if len > bits: - # Todo print n - raise newException(ValueError, "Input cannot be stored in a multi-precision " & $bits & "-bit integer." & + raise newException(ValueError, "Input " & $n & " cannot be stored in a multi-precision " & + $bits & "-bit integer." & "\nIt requires at least " & $len & " bits of precision") elif len < bits div 2: result.data.lo = SubTy(n) @@ -35,12 +35,6 @@ proc initMpUint*(n: SomeUnsignedInt, bits: static[int]): MpUint[bits] {.noSideEf elif bits == 64: result.data = toMpUintImpl n.uint64 else: - {.fatal, "unreachable".} + raise newException(ValueError, "Fatal") else: result.data = (type result.data)(n) - -proc u128*(n: SomeUnsignedInt): MpUint[128] {.noSideEffect, inline, noInit.}= - initMpUint[128](n) - -proc u256*(n: SomeUnsignedInt): MpUint[256] {.noSideEffect, inline, noInit.}= - initMpUint[256](n) diff --git a/src/uint_public.nim b/src/uint_public.nim index 51e18e5..83adac5 100644 --- a/src/uint_public.nim +++ b/src/uint_public.nim @@ -14,6 +14,13 @@ type UInt128* = MpUint[128] UInt256* = MpUint[256] +template make_conv(conv_name: untyped, size: int): untyped = + proc `convname`*(n: SomeInteger): MpUint[size] {.noSideEffect, inline, noInit.}= + n.initMpUint(size) + +make_conv(u128, 128) +make_conv(u256, 256) + template make_unary(op, ResultTy): untyped = proc `op`*(x: MpUint): ResultTy {.noInit, inline, noSideEffect.} = when resultTy is MpUint: