From cd1adc84c91b960ace245bac2041cf46998ae579 Mon Sep 17 00:00:00 2001 From: mratsim Date: Tue, 20 Mar 2018 16:04:19 +0100 Subject: [PATCH] ported-addition, bug in non-release mode for mul --- src/binary_ops/addsub_impl.nim | 45 ++++++++++ src/binary_ops/div_impl.nim | 56 ++++++++++++ src/binary_ops/mul_impl.nim | 99 +++++++++++++++++++++ src/mpint.nim | 16 ++-- src/private/casting.nim | 59 ------------ src/private/conversion.nim | 8 ++ src/uint_binary_ops.nim | 158 +++------------------------------ src/uint_init.nim | 66 +++++++++----- src/uint_type.nim | 46 ++++++---- 9 files changed, 303 insertions(+), 250 deletions(-) create mode 100644 src/binary_ops/addsub_impl.nim create mode 100644 src/binary_ops/div_impl.nim create mode 100644 src/binary_ops/mul_impl.nim delete mode 100644 src/private/casting.nim create mode 100644 src/private/conversion.nim diff --git a/src/binary_ops/addsub_impl.nim b/src/binary_ops/addsub_impl.nim new file mode 100644 index 0000000..80aa3c0 --- /dev/null +++ b/src/binary_ops/addsub_impl.nim @@ -0,0 +1,45 @@ +# Mpint +# Copyright 2018 Status Research & Development GmbH +# Licensed under either of +# +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) +# +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import ../uint_type + +proc `+=`*(x: var MpUint, y: MpUint) {.noSideEffect.}= + ## In-place addition for multi-precision unsigned int + # + # Optimized assembly should contain adc instruction (add with carry) + # Clang on MacOS does with the -d:release switch and MpUint[uint32] (uint64) + type SubT = type x.lo + let tmp = x.lo + + x.lo += y.lo + x.hi += SubT(x.lo < tmp) + y.hi + +proc `+`*(x, y: MpUint): MpUint {.noSideEffect, noInit, inline.}= + # Addition for multi-precision unsigned int + result = x + result += y + + debugEcho "+: " & $result + +proc `-=`*(x: var MpUint, y: MpUint) {.noSideEffect.}= + ## In-place substraction for multi-precision unsigned int + # + # Optimized assembly should contain sbb instruction (substract with borrow) + # Clang on MacOS does with the -d:release switch and MpUint[uint32] (uint64) + type SubT = type x.lo + let tmp = x.lo + + x.lo -= y.lo + x.hi -= SubT(x.lo > tmp) + y.hi + +proc `-`*(x, y: MpUint): MpUint {.noSideEffect, noInit, inline.}= + # Substraction for multi-precision unsigned int + result = x + result -= y + diff --git a/src/binary_ops/div_impl.nim b/src/binary_ops/div_impl.nim new file mode 100644 index 0000000..da9e639 --- /dev/null +++ b/src/binary_ops/div_impl.nim @@ -0,0 +1,56 @@ +# Mpint +# Copyright 2018 Status Research & Development GmbH +# Licensed under either of +# +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) +# +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# proc divmod*(x, y: MpUint): tuple[quot, rem: MpUint] {.noSideEffect.}= +# ## Division for multi-precision unsigned uint +# ## Returns quotient + reminder in a (quot, rem) tuple +# # +# # Implementation through binary shift division +# const zero = T() + +# when x.lo is MpUInt: +# const one = T(lo: getSubType(T)(1)) +# else: +# const one: getSubType(T) = 1 + +# if unlikely(y.isZero): +# raise newException(DivByZeroError, "You attempted to divide by zero") + +# var +# shift = x.bit_length - y.bit_length +# d = y shl shift + +# result.rem = x + +# while shift >= 0: +# result.quot += result.quot +# if result.rem >= d: +# result.rem -= d +# result.quot.lo = result.quot.lo or one + +# d = d shr 1 +# dec(shift) + +# # Performance note: +# # The performance of this implementation is extremely dependant on shl and shr. +# # +# # Probably the most efficient algorithm that can benefit from MpUInt data structure is +# # the recursive fast division by Burnikel and Ziegler (http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz): +# # - Python implementation: https://bugs.python.org/file11060/fast_div.py and discussion https://bugs.python.org/issue3451 +# # - C++ implementation: https://github.com/linbox-team/givaro/blob/master/src/kernel/recint/rudiv.h +# # - The Handbook of Elliptic and Hyperelliptic Cryptography Algorithm 10.35 on page 188 has a more explicit version of the div2NxN algorithm. This algorithm is directly recursive and avoids the mutual recursion of the original paper's calls between div2NxN and div3Nx2N. +# # - Comparison of fast division algorithms fro large integers: http://bioinfo.ict.ac.cn/~dbu/AlgorithmCourses/Lectures/Hasselstrom2003.pdf + +# proc `div`*(x, y: MpUint): MpUint {.inline, noSideEffect.} = +# ## Division operation for multi-precision unsigned uint +# divmod(x,y).quot + +# proc `mod`*(x, y: MpUint): MpUint {.inline, noSideEffect.} = +# ## Division operation for multi-precision unsigned uint +# divmod(x,y).rem diff --git a/src/binary_ops/mul_impl.nim b/src/binary_ops/mul_impl.nim new file mode 100644 index 0000000..8f278e5 --- /dev/null +++ b/src/binary_ops/mul_impl.nim @@ -0,0 +1,99 @@ +# Mpint +# Copyright 2018 Status Research & Development GmbH +# Licensed under either of +# +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) +# +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import ../uint_type, ../uint_init +import ./addsub_impl + +## Implementation of multiplication + +# For our representation, it is similar to school grade multiplication +# Consider hi and lo as if they were digits +# +# 12 +# X 15 +# ------ +# 10 lo*lo -> z0 +# 5 hi*lo -> z1 +# 2 lo*hi -> z1 +# 10 hi*hi -- z2 +# ------ +# 180 +# +# If T is a type +# For T * T --> T we don't need to compute z2 as it always overflow +# For T * T --> 2T (uint64 * uint64 --> uint128) we use extra precision multiplication + +# See details at +# https://en.wikipedia.org/wiki/Karatsuba_algorithm +# https://locklessinc.com/articles/256bit_arithmetic/ +# https://www.miracl.com/press/missing-a-trick-karatsuba-variations-michael-scott +# +# We use the naive school grade multiplication instead of Karatsuba I.e. +# z1 = x.hi * y.lo + x.lo * y.hi (Naive) = (x.lo - x.hi)(y.hi - y.lo) + z0 + z2 (Karatsuba) +# +# On modern architecture: +# - addition and multiplication have the same cost +# - Karatsuba would require to deal with potentially negative intermediate result +# and introduce branching +# - More total operations means more register moves + +import typetraits + +proc `*`*(x, y: MpUint): MpUint {.noSideEffect.}= + ## Multiplication for multi-precision unsigned uint + + mixin naiveMul + + result = naiveMul(x.lo, y.lo) + result.hi += (naiveMul(x.hi, y.lo) + naiveMul(x.lo, y.hi)).lo + + debugEcho "*: " & $result + debugEcho "* type: " & $result.type.name + +# proc naiveMulImpl[bits: static[int]](x, y: MpUint[bits]): MpUint[bits * 2] {.noSideEffect.}= +# ## Naive multiplication algorithm with extended precision +# ## i.e. we use types twice bigger to do the multiplication +# ## and only keep the bottom part + +# mixin naiveMul + +# const +# halfSize = bits div 2 + +# let +# z0 = naiveMul(x.lo, y.lo) +# tmp = naiveMul(x.hi, y.lo) + +# var z1 = tmp +# z1 += naiveMul(x.hi, y.lo) +# let z2 = (z1 < tmp).T + naiveMul(x.hi, y.hi) + +# let tmp2 = z1.lo shl halfSize +# result.lo = tmp2 +# result.lo += z0 +# result.hi = (result.lo < tmp2).T + z2 + z1.hi + +# proc naiveMul*[bits: static[int]](x, y: MpUint[bits]): MpUint[bits * 2] {.noSideEffect.}= +# naiveMulImpl(x, y) + +proc naiveMul*(x, y: float): MpUint[16] {.noSideEffect.}= + result = toMpuint(x.uint16 * y.uint16) + +proc naiveMul*(x, y: uint16): MpUint[32] {.noSideEffect.}= + result = toMpuint(x.uint32 * y.uint32) + debugEcho "naiveMul cast:" & $result + +proc naiveMul*(x, y: uint32): MpUint[64] {.noSideEffect.}= + toMpuint(x.uint64 * y.uint64) + +# proc naiveMul*(x, y: uint64): MpUint[128] {.noSideEffect, noInit, inline.}= +# let x = x.toMpUint +# let y = y.toMpUint + +# naiveMulImpl[64](x, y) diff --git a/src/mpint.nim b/src/mpint.nim index 475409d..de75aeb 100644 --- a/src/mpint.nim +++ b/src/mpint.nim @@ -8,13 +8,13 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import ./uint_type, - ./uint_init, - ./uint_bitwise_ops, - ./uint_binary_ops, - ./uint_comparison + ./uint_init + #./uint_bitwise_ops, + #./uint_binary_ops, + #./uint_comparison export uint_type, - uint_init, - uint_bitwise_ops, - uint_binary_ops, - uint_comparison + uint_init + # uint_bitwise_ops, + # uint_binary_ops, + # uint_comparison diff --git a/src/private/casting.nim b/src/private/casting.nim deleted file mode 100644 index 9dfc288..0000000 --- a/src/private/casting.nim +++ /dev/null @@ -1,59 +0,0 @@ -# Mpint -# Copyright 2018 Status Research & Development GmbH -# Licensed under either of -# -# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) -# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) -# -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import ../uint_type, - macros - -macro getSubType*(T: typedesc): untyped = - ## Returns the subtype of a generic type - ## MpUint[uint32] --> uint32 - getTypeInst(T)[1][1] - -proc asUint*[T: MpUInt](n: T): auto {.noSideEffect, inline.}= - ## Casts a multiprecision integer to an uint of the same size - - when T.sizeof > 8: - raise newException("Unreachable. You are trying to cast a MpUint with more than 64-bit of precision") - elif T.sizeof == 8: - cast[uint64](n) - elif T.sizeof == 4: - cast[uint32](n) - elif T.sizeof == 2: - cast[uint16](n) - else: - raise newException("Unreachable. MpUInt must be 16-bit minimum and a power of 2") - -proc asUint*[T: SomeUnsignedInt](n: T): T {.noSideEffect, inline.}= - ## No-op overload of multi-precision int casting - n - -proc asDoubleUint*[T: BaseUint](n: T): auto {.noSideEffect, inline.} = - ## Convert an integer or MpUint to an uint with double the size - - type Double = ( - when T.sizeof == 4: uint64 - elif T.sizeof == 2: uint32 - else: uint16 - ) - - n.asUint.Double - - -proc toMpUint*[T: SomeInteger](n: T): auto {.noSideEffect, inline.} = - ## Cast an integer to the corresponding size MpUint - # Sometimes direct casting doesn't work and we must cast through a pointer - - when T is uint64: - return (cast[ptr [MpUint[uint32]]](unsafeAddr n))[] - elif T is uint32: - return (cast[ptr [MpUint[uint16]]](unsafeAddr n))[] - elif T is uint16: - return (cast[ptr [MpUint[uint8]]](unsfddr n))[] - else: - raise newException(ValueError, "You can only cast uint16, uint32 or uint64 to multiprecision integers") diff --git a/src/private/conversion.nim b/src/private/conversion.nim new file mode 100644 index 0000000..7705e19 --- /dev/null +++ b/src/private/conversion.nim @@ -0,0 +1,8 @@ +# Mpint +# Copyright 2018 Status Research & Development GmbH +# Licensed under either of +# +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) +# +# at your option. This file may not be copied, modified, or distributed except according to those terms. diff --git a/src/uint_binary_ops.nim b/src/uint_binary_ops.nim index d2e53e8..6359fa4 100644 --- a/src/uint_binary_ops.nim +++ b/src/uint_binary_ops.nim @@ -7,156 +7,22 @@ # # at your option. This file may not be copied, modified, or distributed except according to those terms. -import ./private/[bithacks, casting], - uint_type, - uint_comparison +import ./binary_ops/addsub_impl, + ./binary_ops/mul_impl -proc `+=`*[T: MpUint](x: var T, y: T) {.noSideEffect.}= - ## In-place addition for multi-precision unsigned int - # - # Optimized assembly should contain adc instruction (add with carry) - # Clang on MacOS does with the -d:release switch and MpUint[uint32] (uint64) - type SubT = type x.lo - let tmp = x.lo +export addsub_impl, mul_impl - x.lo += y.lo - x.hi += SubT(x.lo < tmp) + y.hi +when isMainModule: -proc `+`*[T: MpUint](x, y: T): T {.noSideEffect, noInit, inline.}= - # Addition for multi-precision unsigned int - result = x - result += y + import typetraits + import ./uint_init -proc `-=`*[T: MpUint](x: var T, y: T) {.noSideEffect.}= - ## In-place substraction for multi-precision unsigned int - # - # Optimized assembly should contain sbb instruction (substract with borrow) - # Clang on MacOS does with the -d:release switch and MpUint[uint32] (uint64) - type SubT = type x.lo - let tmp = x.lo + let a = toMpUint(10'u32) - x.lo -= y.lo - x.hi -= SubT(x.lo > tmp) + y.hi + echo "a: " & $a + echo "a+a: " & $(a+a) -proc `-`*[T: MpUint](x, y: T): T {.noSideEffect, noInit, inline.}= - # Substraction for multi-precision unsigned int - result = x - result -= y + let z = a * a + echo z + echo $z.type.name -proc naiveMul[T: BaseUint](x, y: T): MpUint[T] {.noSideEffect, noInit, inline.} - # Forward declaration - -proc `*`*[T: MpUint](x, y: T): T {.noSideEffect, noInit.}= - ## Multiplication for multi-precision unsigned uint - # - # For our representation, it is similar to school grade multiplication - # Consider hi and lo as if they were digits - # - # 12 - # X 15 - # ------ - # 10 lo*lo -> z0 - # 5 hi*lo -> z1 - # 2 lo*hi -> z1 - # 10 hi*hi -- z2 - # ------ - # 180 - # - # If T is a type - # For T * T --> T we don't need to compute z2 as it always overflow - # For T * T --> 2T (uint64 * uint64 --> uint128) we use extra precision multiplication - - result = naiveMul(x.lo, y.lo) - result.hi += (naiveMul(x.hi, y.lo) + naiveMul(x.lo, y.hi)).lo - -template naiveMulImpl[T: MpUint](x, y: T): MpUint[T] = - # See details at - # https://en.wikipedia.org/wiki/Karatsuba_algorithm - # https://locklessinc.com/articles/256bit_arithmetic/ - # https://www.miracl.com/press/missing-a-trick-karatsuba-variations-michael-scott - # - # We use the naive school grade multiplication instead of Karatsuba I.e. - # z1 = x.hi * y.lo + x.lo * y.hi (Naive) = (x.lo - x.hi)(y.hi - y.lo) + z0 + z2 (Karatsuba) - # - # On modern architecture: - # - addition and multiplication have the same cost - # - Karatsuba would require to deal with potentially negative intermediate result - # and introduce branching - # - More total operations means more register moves - - let - halfSize = T.sizeof * 4 - z0 = naiveMul(x.lo, y.lo) - tmp = naiveMul(x.hi, y.lo) - - var z1 = tmp - z1 += naiveMul(x.hi, y.lo) - let z2 = (z1 < tmp).T + naiveMul(x.hi, y.hi) - - let tmp2 = z1.lo shl halfSize - result.lo = tmp2 - result.lo += z0 - result.hi = (result.lo < tmp2).T + z2 + z1.hi - -proc naiveMul[T: BaseUint](x, y: T): MpUint[T] {.noSideEffect, noInit, inline.}= - ## Naive multiplication algorithm with extended precision - - when T.sizeof in {1, 2, 4}: - # Use types twice bigger to do the multiplication - cast[type result](x.asDoubleUint * y.asDoubleUint) - - elif T.sizeof == 8: # uint64 or MpUint[uint32] - # We cannot double uint64 to uint128 - naiveMulImpl(x.toMpUint, y.toMpUint) - else: - # Case: at least uint128 * uint128 --> uint256 - naiveMulImpl(x, y) - - -proc divmod*[T: BaseUint](x, y: T): tuple[quot, rem: T] {.noSideEffect.}= - ## Division for multi-precision unsigned uint - ## Returns quotient + reminder in a (quot, rem) tuple - # - # Implementation through binary shift division - const zero = T() - - when x.lo is MpUInt: - const one = T(lo: getSubType(T)(1)) - else: - const one: getSubType(T) = 1 - - if unlikely(y.isZero): - raise newException(DivByZeroError, "You attempted to divide by zero") - - var - shift = x.bit_length - y.bit_length - d = y shl shift - - result.rem = x - - while shift >= 0: - result.quot += result.quot - if result.rem >= d: - result.rem -= d - result.quot.lo = result.quot.lo or one - - d = d shr 1 - dec(shift) - - # Performance note: - # The performance of this implementation is extremely dependant on shl and shr. - # - # Probably the most efficient algorithm that can benefit from MpUInt data structure is - # the recursive fast division by Burnikel and Ziegler (http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz): - # - Python implementation: https://bugs.python.org/file11060/fast_div.py and discussion https://bugs.python.org/issue3451 - # - C++ implementation: https://github.com/linbox-team/givaro/blob/master/src/kernel/recint/rudiv.h - # - The Handbook of Elliptic and Hyperelliptic Cryptography Algorithm 10.35 on page 188 has a more explicit version of the div2NxN algorithm. This algorithm is directly recursive and avoids the mutual recursion of the original paper's calls between div2NxN and div3Nx2N. - # - Comparison of fast division algorithms fro large integers: http://bioinfo.ict.ac.cn/~dbu/AlgorithmCourses/Lectures/Hasselstrom2003.pdf - -proc `div`*[T: BaseUint](x, y: T): T {.inline, noSideEffect.} = - ## Division operation for multi-precision unsigned uint - divmod(x,y).quot - -proc `mod`*[T: BaseUint](x, y: T): T {.inline, noSideEffect.} = - ## Division operation for multi-precision unsigned uint - divmod(x,y).rem diff --git a/src/uint_init.nim b/src/uint_init.nim index d0604a3..2898941 100644 --- a/src/uint_init.nim +++ b/src/uint_init.nim @@ -9,27 +9,53 @@ import typetraits -import ./private/[bithacks, casting], - ./uint_type +import ./uint_type -proc initMpUint*[T: BaseUint; U: BaseUInt](n: T, base_type: typedesc[U]): MpUint[U] {.noSideEffect.} = - when not (T is type result): - let len = n.bit_length - const size = sizeof(U) * 8 - if len >= 2 * size: - # Todo print n - raise newException(ValueError, "Input cannot be stored in a multi-precision integer of base " & $T.name & - "\nIt requires at least " & $len & " bits of precision") - elif len < size: - result.lo = n.U # TODO: converter for MpInts - else: # Both have the same size and memory representation - assert len == size - n.toMpUint +proc toMpUint*(n: uint16): MpUint[16] {.noInit, inline, noSideEffect.}= + ## Cast an integer to the corresponding size MpUint + cast[MpUint[16]](n) + +proc toMpUint*(n: uint32): MpUint[32] {.noInit, inline, noSideEffect.}= + ## Cast an integer to the corresponding size MpUint + cast[MpUint[32]](n) + +proc toMpUint*(n: uint64): MpUint[64] {.noInit, inline, noSideEffect.}= + ## Cast an integer to the corresponding size MpUint + cast[MpUint[64]](n) + +proc initMpUint*(n: SomeUnsignedInt, bits: static[int]): MpUint[bits] {.noSideEffect.} = + + const nb_bits = n.sizeof * 8 + + static: + assert (bits and (bits-1)) == 0, $bits & " is not a power of 2" + assert bits >= 16, "The number of bits in a should be greater or equal to 16" + assert nb_bits <= bits, "Input cannot be stored in a " & $bits "-bit multi-precision unsigned integer\n" & + "It requires at least " & nb_bits & " bits of precision" + + when nb_bits <= bits div 2: + result.lo = (type result.lo)(n) else: - n + result = n.toMpUint -proc u128*[T: BaseUInt](n: T): UInt128 {.noSideEffect, inline.}= - initMpUint(n, uint64) + ## TODO: The current initMpUint prevents the following: + ## let a = 10 + ## let b = initMpUint[16](a) + ## + ## It should use bit_length to get the number of bits needed instead -proc u256*[T: BaseUInt](n: T): UInt256 {.noSideEffect, inline.}= - initMpUint(n, UInt256) +# proc u128*[T: BaseUInt](n: T): UInt128 {.noSideEffect, inline.}= +# initMpUint(n, uint64) + +# proc u256*[T: BaseUInt](n: T): UInt256 {.noSideEffect, inline.}= +# initMpUint(n, UInt256) + + +when isMainModule: + + let a = 10'u16 + + let b = a.toMpUint + + echo b.repr + echo b diff --git a/src/uint_type.nim b/src/uint_type.nim index 05732d2..674e2d3 100644 --- a/src/uint_type.nim +++ b/src/uint_type.nim @@ -8,24 +8,36 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. type - MpUint*{.packed.}[BaseUint] = object + # TODO: The following is a hacky workaround + # due to: + # - https://github.com/nim-lang/Nim/issues/7230 + # - https://github.com/nim-lang/Nim/issues/7378 + # - https://github.com/nim-lang/Nim/issues/7379 + + BitsHolder[bits: static[int]] = object + +type + MpUintImpl[bh] = object + # TODO: when gcc/clang defines it use the builtin uint128 when system.cpuEndian == littleEndian: - lo*, hi*: BaseUint + when bh is BitsHolder[128]: lo*, hi*: uint64 + elif bh is BitsHolder[64]: lo*, hi*: uint32 + elif bh is BitsHolder[32]: lo*, hi*: uint16 + elif bh is BitsHolder[16]: lo*, hi*: uint8 + + # The following cannot be implemented recursively yet + elif bh is BitsHolder[256]: lo*, hi*: MpUintImpl[BitsHolder[128]] + # else: + # Not implemented else: - hi*, lo*: BaseUint + when bh is BitsHolder[128]: hi*, lo*: uint64 + elif bh is BitsHolder[64]: hi*, lo*: uint32 + elif bh is BitsHolder[32]: hi*, lo*: uint16 + elif bh is BitsHolder[16]: hi*, lo*: uint8 - BaseUint* = SomeUnsignedInt or MpUint + # The following cannot be implemented recursively yet + elif bh is BitsHolder[256]: hi*, lo*: MpUintImpl[BitsHolder[128]] + # else: + # Not implemented - - UInt128* = MpUint[uint64] - UInt256* = MpUint[UInt128] - - -template convBool(typ: typedesc): untyped = - converter boolMpUint*(b: bool): MpUint[typ] {.noSideEffect, inline.}= - result.lo = b.typ - -convBool(uint8) -convBool(uint16) -convBool(uint32) -convBool(uint64) + MpUint*[bits: static[int]] = MpUintImpl[BitsHolder[bits]]