2020-06-12 16:37:02 +00:00
|
|
|
# Stint
|
|
|
|
# Copyright 2018 Status Research & Development GmbH
|
|
|
|
# Licensed under either of
|
|
|
|
#
|
|
|
|
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
|
|
|
|
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
|
|
|
|
#
|
|
|
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
|
|
|
|
|
|
|
# ############################################################
|
|
|
|
#
|
|
|
|
# Extended precision primitives
|
|
|
|
#
|
|
|
|
# ############################################################
|
|
|
|
|
|
|
|
import
|
|
|
|
../datatypes,
|
|
|
|
./addcarry_subborrow
|
|
|
|
|
|
|
|
# ############################################################
|
|
|
|
#
|
|
|
|
# 32-bit words
|
|
|
|
#
|
|
|
|
# ############################################################
|
|
|
|
|
|
|
|
func div2n1n*(q, r: var uint32, n_hi, n_lo, d: uint32) {.inline.}=
|
|
|
|
## Division uint64 by uint32
|
|
|
|
## Warning ⚠️ :
|
|
|
|
## - if n_hi == d, quotient does not fit in an uint32
|
|
|
|
## - if n_hi > d result is undefined
|
|
|
|
##
|
|
|
|
## To avoid issues, n_hi, n_lo, d should be normalized.
|
|
|
|
## i.e. shifted (== multiplied by the same power of 2)
|
|
|
|
## so that the most significant bit in d is set.
|
|
|
|
let dividend = (uint64(n_hi) shl 32) or uint64(n_lo)
|
|
|
|
let divisor = uint64(d)
|
|
|
|
q = uint32(dividend div divisor)
|
|
|
|
r = uint32(dividend mod divisor)
|
|
|
|
|
|
|
|
func mul*(hi, lo: var uint32, a, b: uint32) {.inline.} =
|
|
|
|
## Extended precision multiplication
|
|
|
|
## (hi, lo) <- a*b
|
|
|
|
let dblPrec = uint64(a) * uint64(b)
|
2023-06-13 01:35:35 +00:00
|
|
|
when nimvm:
|
|
|
|
lo = uint32(dblPrec and uint32.high)
|
|
|
|
else:
|
|
|
|
lo = uint32(dblPrec)
|
2020-06-12 16:37:02 +00:00
|
|
|
hi = uint32(dblPrec shr 32)
|
|
|
|
|
|
|
|
func muladd1*(hi, lo: var uint32, a, b, c: uint32) {.inline.} =
|
|
|
|
## Extended precision multiplication + addition
|
|
|
|
## (hi, lo) <- a*b + c
|
|
|
|
##
|
|
|
|
## Note: 0xFFFFFFFF² -> (hi: 0xFFFFFFFE, lo: 0x00000001)
|
|
|
|
## so adding any c cannot overflow
|
|
|
|
let dblPrec = uint64(a) * uint64(b) + uint64(c)
|
2023-06-13 01:35:35 +00:00
|
|
|
when nimvm:
|
|
|
|
lo = uint32(dblPrec and uint32.high)
|
|
|
|
else:
|
|
|
|
lo = uint32(dblPrec)
|
2020-06-12 16:37:02 +00:00
|
|
|
hi = uint32(dblPrec shr 32)
|
|
|
|
|
|
|
|
func muladd2*(hi, lo: var uint32, a, b, c1, c2: uint32) {.inline.}=
|
|
|
|
## Extended precision multiplication + addition + addition
|
|
|
|
## This is constant-time on most hardware except some specific one like Cortex M0
|
|
|
|
## (hi, lo) <- a*b + c1 + c2
|
|
|
|
##
|
|
|
|
## Note: 0xFFFFFFFF² -> (hi: 0xFFFFFFFE, lo: 0x00000001)
|
|
|
|
## so adding 0xFFFFFFFF leads to (hi: 0xFFFFFFFF, lo: 0x00000000)
|
|
|
|
## and we have enough space to add again 0xFFFFFFFF without overflowing
|
|
|
|
let dblPrec = uint64(a) * uint64(b) + uint64(c1) + uint64(c2)
|
2023-06-13 01:35:35 +00:00
|
|
|
when nimvm:
|
|
|
|
lo = uint32(dblPrec and uint32.high)
|
|
|
|
else:
|
|
|
|
lo = uint32(dblPrec)
|
2020-06-12 16:37:02 +00:00
|
|
|
hi = uint32(dblPrec shr 32)
|
|
|
|
|
|
|
|
# ############################################################
|
|
|
|
#
|
|
|
|
# 64-bit words
|
|
|
|
#
|
|
|
|
# ############################################################
|
|
|
|
|
2020-06-13 10:29:31 +00:00
|
|
|
when sizeof(int) == 8 and not defined(Stint32):
|
2022-01-22 00:42:54 +00:00
|
|
|
from ./compiletime_fallback import div2n1n_nim, mul_nim, muladd1_nim, muladd2_nim
|
|
|
|
|
|
|
|
when defined(vcc):
|
|
|
|
from ./extended_precision_x86_64_msvc import div2n1n_128, mul_128, muladd1_128, muladd2_128
|
2023-06-12 14:07:15 +00:00
|
|
|
elif GCC_Compatible:
|
2022-01-22 00:42:54 +00:00
|
|
|
when X86:
|
|
|
|
from ./extended_precision_x86_64_gcc import div2n1n_128
|
|
|
|
from ./extended_precision_64bit_uint128 import mul_128, muladd1_128, muladd2_128
|
|
|
|
else:
|
|
|
|
from ./extended_precision_64bit_uint128 import div2n1n_128, mul_128, muladd1_128, muladd2_128
|
|
|
|
|
|
|
|
func mul*(hi, lo: var uint64, u, v: uint64) {.inline.}=
|
|
|
|
## Extended precision multiplication
|
|
|
|
## (hi, lo) <- u * v
|
|
|
|
when nimvm:
|
|
|
|
mul_nim(hi, lo, u, v)
|
|
|
|
else:
|
|
|
|
mul_128(hi, lo, u, v)
|
|
|
|
|
|
|
|
func muladd1*(hi, lo: var uint64, a, b, c: uint64) {.inline.}=
|
|
|
|
## Extended precision multiplication + addition
|
|
|
|
## (hi, lo) <- a*b + c
|
|
|
|
##
|
|
|
|
## Note: 0xFFFFFFFF_FFFFFFFF² -> (hi: 0xFFFFFFFFFFFFFFFE, lo: 0x0000000000000001)
|
|
|
|
## so adding any c cannot overflow
|
|
|
|
when nimvm:
|
|
|
|
muladd1_nim(hi, lo, a, b, c)
|
|
|
|
else:
|
|
|
|
muladd1_128(hi, lo, a, b, c)
|
|
|
|
|
|
|
|
func muladd2*(hi, lo: var uint64, a, b, c1, c2: uint64) {.inline.}=
|
|
|
|
## Extended precision multiplication + addition + addition
|
|
|
|
## (hi, lo) <- a*b + c1 + c2
|
|
|
|
##
|
|
|
|
## Note: 0xFFFFFFFF_FFFFFFFF² -> (hi: 0xFFFFFFFFFFFFFFFE, lo: 0x0000000000000001)
|
|
|
|
## so adding 0xFFFFFFFFFFFFFFFF leads to (hi: 0xFFFFFFFFFFFFFFFF, lo: 0x0000000000000000)
|
|
|
|
## and we have enough space to add again 0xFFFFFFFFFFFFFFFF without overflowing
|
|
|
|
when nimvm:
|
|
|
|
muladd2_nim(hi, lo, a, b, c1, c2)
|
|
|
|
else:
|
|
|
|
muladd2_128(hi, lo, a, b, c1, c2)
|
|
|
|
|
|
|
|
func div2n1n*(q, r: var uint64, n_hi, n_lo, d: uint64) {.inline.}=
|
|
|
|
## Division uint128 by uint64
|
|
|
|
## Warning ⚠️ :
|
|
|
|
## - if n_hi == d, quotient does not fit in an uint64 and will throw SIGFPE
|
|
|
|
## - if n_hi > d result is undefined
|
|
|
|
when nimvm:
|
|
|
|
div2n1n_nim(q, r, n_hi, n_lo, d)
|
|
|
|
else:
|
|
|
|
div2n1n_128(q, r, n_hi, n_lo, d)
|
2020-06-12 16:37:02 +00:00
|
|
|
|
|
|
|
# ############################################################
|
|
|
|
#
|
|
|
|
# Composite primitives
|
|
|
|
#
|
|
|
|
# ############################################################
|
|
|
|
|
|
|
|
func mulDoubleAdd2*[T: uint32|uint64](r2: var Carry, r1, r0: var T, a, b, c: T, dHi: Carry, dLo: T) {.inline.} =
|
|
|
|
## (r2, r1, r0) <- 2*a*b + c + (dHi, dLo)
|
|
|
|
## with r = (r2, r1, r0) a triple-word number
|
|
|
|
## and d = (dHi, dLo) a double-word number
|
|
|
|
## r2 and dHi are carries, either 0 or 1
|
|
|
|
|
|
|
|
var carry: Carry
|
|
|
|
|
|
|
|
# (r1, r0) <- a*b
|
|
|
|
# Note: 0xFFFFFFFF_FFFFFFFF² -> (hi: 0xFFFFFFFF_FFFFFFFE, lo: 0x00000000_00000001)
|
|
|
|
mul(r1, r0, a, b)
|
|
|
|
|
|
|
|
# (r2, r1, r0) <- 2*a*b
|
|
|
|
# Then (hi: 0xFFFFFFFF_FFFFFFFE, lo: 0x00000000_00000001) * 2
|
|
|
|
# (carry: 1, hi: 0xFFFFFFFF_FFFFFFFC, lo: 0x00000000_00000002)
|
|
|
|
addC(carry, r0, r0, r0, Carry(0))
|
|
|
|
addC(r2, r1, r1, r1, carry)
|
|
|
|
|
|
|
|
# (r1, r0) <- (r1, r0) + c
|
|
|
|
# Adding any uint64 cannot overflow into r2 for example Adding 2^64-1
|
|
|
|
# (carry: 1, hi: 0xFFFFFFFF_FFFFFFFD, lo: 0x00000000_00000001)
|
|
|
|
addC(carry, r0, r0, c, Carry(0))
|
|
|
|
addC(carry, r1, r1, T(0), carry)
|
|
|
|
|
|
|
|
# (r1, r0) <- (r1, r0) + (dHi, dLo) with dHi a carry (previous limb r2)
|
|
|
|
# (dHi, dLo) is at most (dhi: 1, dlo: 0xFFFFFFFF_FFFFFFFF)
|
|
|
|
# summing into (carry: 1, hi: 0xFFFFFFFF_FFFFFFFD, lo: 0x00000000_00000001)
|
|
|
|
# result at most in (carry: 1, hi: 0xFFFFFFFF_FFFFFFFF, lo: 0x00000000_00000000)
|
|
|
|
addC(carry, r0, r0, dLo, Carry(0))
|
|
|
|
addC(carry, r1, r1, T(dHi), carry)
|
|
|
|
|
|
|
|
func mulAcc*[T: uint32|uint64](t, u, v: var T, a, b: T) {.inline.} =
|
|
|
|
## (t, u, v) <- (t, u, v) + a * b
|
|
|
|
var UV: array[2, T]
|
|
|
|
var carry: Carry
|
2022-01-22 00:42:54 +00:00
|
|
|
mul(UV[1], UV[0], a, b)
|
2020-06-12 16:37:02 +00:00
|
|
|
addC(carry, v, v, UV[0], Carry(0))
|
|
|
|
addC(carry, u, u, UV[1], carry)
|
|
|
|
t += T(carry)
|