2020-02-10 18:16:34 +01:00
|
|
|
# Constantine
|
|
|
|
|
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
|
|
|
|
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
|
|
|
|
# Licensed and distributed under either of
|
|
|
|
|
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
|
|
|
|
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
|
|
|
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
|
|
|
|
|
|
|
|
|
# ############################################################
|
|
|
|
|
#
|
|
|
|
|
# BigInt Raw representation and operations
|
|
|
|
|
#
|
|
|
|
|
# ############################################################
|
|
|
|
|
#
|
|
|
|
|
# This file holds the raw operations done on big ints
|
|
|
|
|
# The representation is optimized for:
|
|
|
|
|
# - constant-time (not leaking secret data via side-channel)
|
2020-02-11 14:28:21 +01:00
|
|
|
# - generated code size, datatype size and stack usage
|
2020-02-10 18:16:34 +01:00
|
|
|
# - performance
|
|
|
|
|
# in this order
|
|
|
|
|
|
|
|
|
|
# ############################################################
|
|
|
|
|
# Design
|
|
|
|
|
|
|
|
|
|
# To avoid carry issues we don't use the
|
|
|
|
|
# most significant bit of each machine word.
|
|
|
|
|
# i.e. for a uint64 base we only use 63-bit.
|
|
|
|
|
# More info: https://github.com/status-im/nim-constantine/wiki/Constant-time-arithmetics#guidelines
|
|
|
|
|
# Especially:
|
|
|
|
|
# - https://bearssl.org/bigint.html
|
|
|
|
|
# - https://cryptojedi.org/peter/data/pairing-20131122.pdf
|
|
|
|
|
# - http://docs.milagro.io/en/amcl/milagro-crypto-library-white-paper.html
|
|
|
|
|
#
|
|
|
|
|
# Note that this might also be beneficial in terms of performance.
|
|
|
|
|
# Due to opcode latency, on Nehalem ADC is 6x times slower than ADD
|
|
|
|
|
# if it has dependencies (i.e the ADC depends on a previous ADC result)
|
|
|
|
|
#
|
|
|
|
|
# Control flow should only depends on the static maximum number of bits
|
|
|
|
|
# This number is defined per Finite Field/Prime/Elliptic Curve
|
|
|
|
|
#
|
|
|
|
|
# We internally order the limbs in little-endian
|
|
|
|
|
# So the least significant limb is limb[0]
|
|
|
|
|
# This is independent from the base type endianness.
|
|
|
|
|
#
|
|
|
|
|
# Constantine uses Nim generic integer to prevent mixing
|
|
|
|
|
# BigInts of different bitlength at compile-time and
|
|
|
|
|
# properly statically size the BigInt buffers.
|
|
|
|
|
#
|
|
|
|
|
# To avoid code-bloat due to monomorphization (i.e. duplicating code per announced bitlength)
|
|
|
|
|
# actual computation is deferred to type-erased routines.
|
|
|
|
|
|
|
|
|
|
import
|
2020-02-12 00:20:31 +01:00
|
|
|
../primitives/constant_time,
|
|
|
|
|
../primitives/extended_precision,
|
|
|
|
|
../config/common
|
2020-02-10 18:16:34 +01:00
|
|
|
from sugar import distinctBase
|
|
|
|
|
|
2020-02-12 00:20:31 +01:00
|
|
|
# ############################################################
|
|
|
|
|
#
|
|
|
|
|
# BigInts type-erased API
|
|
|
|
|
#
|
|
|
|
|
# ############################################################
|
2020-02-10 18:16:34 +01:00
|
|
|
|
2020-02-12 00:20:31 +01:00
|
|
|
# The "checked" API is exported as a building blocks
|
|
|
|
|
# with enforced compile-time checking of BigInt bitsize
|
|
|
|
|
# and memory ownership.
|
|
|
|
|
#
|
|
|
|
|
# The "raw" compute API uses views to avoid code duplication
|
|
|
|
|
# due to generic/static monomorphization.
|
|
|
|
|
#
|
|
|
|
|
# The "checked" API is a thin wrapper above the "raw" API to get the best of both world:
|
|
|
|
|
# - small code footprint
|
|
|
|
|
# - compiler enforced checks: types, bitsizes
|
|
|
|
|
# - compiler enforced memory: stack allocation and buffer ownership
|
2020-02-10 18:16:34 +01:00
|
|
|
|
|
|
|
|
type
|
|
|
|
|
BigIntView* = ptr object
|
|
|
|
|
## Type-erased fixed-precision big integer
|
|
|
|
|
##
|
|
|
|
|
## This type mirrors the BigInt type and is used
|
|
|
|
|
## for the low-level computation API
|
|
|
|
|
## This design
|
|
|
|
|
## - avoids code bloat due to generic monomorphization
|
|
|
|
|
## otherwise each bigint routines would have an instantiation for
|
|
|
|
|
## each static `bits` parameter.
|
|
|
|
|
## - while not forcing the caller to preallocate computation buffers
|
2020-02-12 00:33:12 +01:00
|
|
|
## for the high-level API and enforcing bitsizes
|
|
|
|
|
## - avoids runtime bound-checks on the view
|
|
|
|
|
## for performance
|
|
|
|
|
## and to ensure exception-free code
|
|
|
|
|
## even when compiled in non "-d:danger" mode
|
2020-02-10 18:16:34 +01:00
|
|
|
##
|
|
|
|
|
## As with the BigInt type:
|
|
|
|
|
## - "bitLength" is the internal bitlength of the integer
|
|
|
|
|
## This differs from the canonical bit-length as
|
|
|
|
|
## Constantine word-size is smaller than a machine word.
|
|
|
|
|
## This value should never be used as-is to prevent leaking secret data.
|
|
|
|
|
## Computing this value requires constant-time operations.
|
|
|
|
|
## Using this value requires converting it to the # of limbs in constant-time
|
|
|
|
|
##
|
|
|
|
|
## - "limbs" is an internal field that holds the internal representation
|
|
|
|
|
## of the big integer. Least-significant limb first. Within limbs words are native-endian.
|
|
|
|
|
##
|
|
|
|
|
## This internal representation can be changed
|
|
|
|
|
## without notice and should not be used by external applications or libraries.
|
|
|
|
|
##
|
|
|
|
|
## Accesses should be done via BigIntViewConst / BigIntViewConst
|
|
|
|
|
## to have the compiler check for mutability
|
|
|
|
|
bitLength: uint32
|
|
|
|
|
limbs: UncheckedArray[Word]
|
|
|
|
|
|
|
|
|
|
# "Indirection" to enforce pointer types deep immutability
|
|
|
|
|
BigIntViewConst* = distinct BigIntView
|
|
|
|
|
## Immutable view into a BigInt
|
|
|
|
|
BigIntViewMut* = distinct BigIntView
|
|
|
|
|
## Mutable view into a BigInt
|
|
|
|
|
BigIntViewAny* = BigIntViewConst or BigIntViewMut
|
|
|
|
|
|
|
|
|
|
# No exceptions allowed
|
|
|
|
|
{.push raises: [].}
|
|
|
|
|
|
|
|
|
|
# ############################################################
|
|
|
|
|
#
|
2020-02-12 00:20:31 +01:00
|
|
|
# Deep Mutability safety
|
2020-02-10 18:16:34 +01:00
|
|
|
#
|
|
|
|
|
# ############################################################
|
|
|
|
|
|
|
|
|
|
template `[]`*(v: BigIntViewConst, limbIdx: int): Word =
|
|
|
|
|
distinctBase(type v)(v).limbs[limbIdx]
|
|
|
|
|
|
|
|
|
|
template `[]`*(v: BigIntViewMut, limbIdx: int): var Word =
|
|
|
|
|
distinctBase(type v)(v).limbs[limbIdx]
|
|
|
|
|
|
|
|
|
|
template `[]=`*(v: BigIntViewMut, limbIdx: int, val: Word) =
|
|
|
|
|
distinctBase(type v)(v).limbs[limbIdx] = val
|
|
|
|
|
|
|
|
|
|
template bitSizeof(v: BigIntViewAny): uint32 =
|
2020-02-22 16:37:31 +01:00
|
|
|
bind BigIntView
|
2020-02-10 18:16:34 +01:00
|
|
|
distinctBase(type v)(v).bitLength
|
|
|
|
|
|
|
|
|
|
const divShiftor = log2(WordPhysBitSize)
|
|
|
|
|
template numLimbs*(v: BigIntViewAny): int =
|
|
|
|
|
## Compute the number of limbs from
|
|
|
|
|
## the **internal** bitlength
|
|
|
|
|
(bitSizeof(v).int + WordPhysBitSize - 1) shr divShiftor
|
|
|
|
|
|
|
|
|
|
template setBitLength(v: BigIntViewMut, internalBitLength: uint32) =
|
|
|
|
|
distinctBase(type v)(v).bitLength = internalBitLength
|
|
|
|
|
|
|
|
|
|
# TODO: Check if repeated v.numLimbs calls are optimized away
|
|
|
|
|
|
|
|
|
|
template `[]`*(v: BigIntViewConst, limbIdxFromEnd: BackwardsIndex): Word =
|
2020-02-12 00:20:31 +01:00
|
|
|
distinctBase(type v)(v).limbs[numLimbs(v).int - int limbIdxFromEnd]
|
2020-02-10 18:16:34 +01:00
|
|
|
|
|
|
|
|
template `[]`*(v: BigIntViewMut, limbIdxFromEnd: BackwardsIndex): var Word =
|
2020-02-12 00:20:31 +01:00
|
|
|
distinctBase(type v)(v).limbs[numLimbs(v).int - int limbIdxFromEnd]
|
2020-02-10 18:16:34 +01:00
|
|
|
|
|
|
|
|
template `[]=`*(v: BigIntViewMut, limbIdxFromEnd: BackwardsIndex, val: Word) =
|
2020-02-12 00:20:31 +01:00
|
|
|
distinctBase(type v)(v).limbs[numLimbs(v).int - int limbIdxFromEnd] = val
|
2020-02-10 18:16:34 +01:00
|
|
|
|
|
|
|
|
# ############################################################
|
|
|
|
|
#
|
|
|
|
|
# Checks and debug/test only primitives
|
|
|
|
|
#
|
|
|
|
|
# ############################################################
|
|
|
|
|
|
|
|
|
|
template checkMatchingBitlengths(a, b: distinct BigIntViewAny) =
|
|
|
|
|
## Check that bitlengths of bigints match
|
|
|
|
|
## This is only checked
|
|
|
|
|
## with "-d:debugConstantine" and when assertions are on.
|
|
|
|
|
debug:
|
|
|
|
|
assert distinctBase(type a)(a).bitLength ==
|
|
|
|
|
distinctBase(type b)(b).bitLength, "Internal Error: operands bitlength do not match"
|
|
|
|
|
|
|
|
|
|
template checkValidModulus(m: BigIntViewConst) =
|
|
|
|
|
## Check that the modulus is valid
|
|
|
|
|
## The check is approximate, it only checks that
|
|
|
|
|
## the most-significant words is non-zero instead of
|
|
|
|
|
## checking that the last announced bit is 1.
|
|
|
|
|
## This is only checked
|
|
|
|
|
## with "-d:debugConstantine" and when assertions are on.
|
|
|
|
|
debug:
|
2020-02-15 02:59:08 +01:00
|
|
|
assert not isZero(m[^1]).bool, "Internal Error: the modulus must use all declared bits"
|
2020-02-10 18:16:34 +01:00
|
|
|
|
2020-02-15 00:26:40 +01:00
|
|
|
template checkOddModulus(m: BigIntViewConst) =
|
|
|
|
|
## CHeck that the modulus is odd
|
|
|
|
|
## and valid for use in the Montgomery n-residue representation
|
|
|
|
|
debug:
|
|
|
|
|
assert bool(BaseType(m[0]) and 1), "Internal Error: the modulus must be odd to use the Montgomery representation."
|
|
|
|
|
|
2020-02-17 00:13:42 +01:00
|
|
|
template checkWordShift(k: int) =
|
|
|
|
|
## Checks that the shift is less than the word bit size
|
|
|
|
|
debug:
|
|
|
|
|
assert k <= WordBitSize, "Internal Error: the shift must be less than the word bit size"
|
|
|
|
|
|
2020-02-22 16:37:31 +01:00
|
|
|
template checkPowScratchSpaceLen(len: int) =
|
|
|
|
|
## Checks that there is a minimum of scratchspace to hold the temporaries
|
|
|
|
|
debug:
|
|
|
|
|
assert len >= 2, "Internal Error: the scratchspace for powmod should be equal or greater than 2"
|
2020-02-17 00:13:42 +01:00
|
|
|
|
2020-02-10 18:16:34 +01:00
|
|
|
debug:
|
|
|
|
|
func `$`*(a: BigIntViewAny): string =
|
|
|
|
|
let len = a.numLimbs()
|
|
|
|
|
result = "["
|
|
|
|
|
for i in 0 ..< len - 1:
|
|
|
|
|
result.add $a[i]
|
|
|
|
|
result.add ", "
|
|
|
|
|
result.add $a[len-1]
|
|
|
|
|
result.add "] ("
|
|
|
|
|
result.add $a.bitSizeof
|
|
|
|
|
result.add " bits)"
|
|
|
|
|
|
|
|
|
|
# ############################################################
|
|
|
|
|
#
|
|
|
|
|
# BigInt primitives
|
|
|
|
|
#
|
|
|
|
|
# ############################################################
|
|
|
|
|
|
|
|
|
|
func isZero*(a: BigIntViewAny): CTBool[Word] =
|
|
|
|
|
## Returns true if a big int is equal to zero
|
|
|
|
|
var accum: Word
|
|
|
|
|
for i in 0 ..< a.numLimbs():
|
|
|
|
|
accum = accum or a[i]
|
|
|
|
|
result = accum.isZero()
|
|
|
|
|
|
2020-02-15 02:59:08 +01:00
|
|
|
func setZero*(a: BigIntViewMut) =
|
|
|
|
|
## Set a BigInt to 0
|
|
|
|
|
## It's bit size is unchanged
|
|
|
|
|
zeroMem(a[0].unsafeAddr, a.numLimbs() * sizeof(Word))
|
|
|
|
|
|
2020-02-16 22:18:22 +01:00
|
|
|
func cmov*(a: BigIntViewMut, b: BigIntViewAny, ctl: CTBool[Word]) =
|
|
|
|
|
## Constant-time conditional copy
|
|
|
|
|
## If ctl is true: b is copied into a
|
|
|
|
|
## if ctl is false: b is not copied and a is untouched
|
|
|
|
|
## Time and memory accesses are the same whether a copy occurs or not
|
|
|
|
|
checkMatchingBitlengths(a, b)
|
|
|
|
|
for i in 0 ..< a.numLimbs():
|
|
|
|
|
a[i] = ctl.mux(b[i], a[i])
|
|
|
|
|
|
2020-02-10 18:16:34 +01:00
|
|
|
# The arithmetic primitives all accept a control input that indicates
|
|
|
|
|
# if it is a placebo operation. It stills performs the
|
|
|
|
|
# same memory accesses to be side-channel attack resistant.
|
|
|
|
|
|
|
|
|
|
func add*(a: BigIntViewMut, b: BigIntViewAny, ctl: CTBool[Word]): CTBool[Word] =
|
|
|
|
|
## Constant-time big integer in-place optional addition
|
|
|
|
|
## The addition is only performed if ctl is "true"
|
|
|
|
|
## The result carry is always computed.
|
|
|
|
|
##
|
|
|
|
|
## a and b MAY be the same buffer
|
|
|
|
|
## a and b MUST have the same announced bitlength (i.e. `bits` static parameters)
|
|
|
|
|
checkMatchingBitlengths(a, b)
|
|
|
|
|
|
|
|
|
|
for i in 0 ..< a.numLimbs():
|
|
|
|
|
let new_a = a[i] + b[i] + Word(result)
|
|
|
|
|
result = new_a.isMsbSet()
|
2020-02-17 00:13:42 +01:00
|
|
|
a[i] = ctl.mux(new_a.mask(), a[i])
|
2020-02-10 18:16:34 +01:00
|
|
|
|
|
|
|
|
func sub*(a: BigIntViewMut, b: BigIntViewAny, ctl: CTBool[Word]): CTBool[Word] =
|
|
|
|
|
## Constant-time big integer in-place optional substraction
|
|
|
|
|
## The substraction is only performed if ctl is "true"
|
|
|
|
|
## The result carry is always computed.
|
|
|
|
|
##
|
|
|
|
|
## a and b MAY be the same buffer
|
|
|
|
|
## a and b MUST have the same announced bitlength (i.e. `bits` static parameters)
|
|
|
|
|
checkMatchingBitlengths(a, b)
|
|
|
|
|
|
|
|
|
|
for i in 0 ..< a.numLimbs():
|
|
|
|
|
let new_a = a[i] - b[i] - Word(result)
|
|
|
|
|
result = new_a.isMsbSet()
|
2020-02-17 00:13:42 +01:00
|
|
|
a[i] = ctl.mux(new_a.mask(), a[i])
|
|
|
|
|
|
|
|
|
|
func dec*(a: BigIntViewMut, w: Word): CTBool[Word] =
|
|
|
|
|
## Decrement a big int by a small word
|
|
|
|
|
# returns the result carry
|
|
|
|
|
|
|
|
|
|
a[0] -= w
|
|
|
|
|
result = a[0].isMsbSet()
|
|
|
|
|
a[0] = a[0].mask()
|
|
|
|
|
for i in 1 ..< a.numLimbs():
|
|
|
|
|
a[i] -= Word(result)
|
|
|
|
|
result = a[i].isMsbSet()
|
|
|
|
|
a[i] = a[i].mask()
|
|
|
|
|
|
|
|
|
|
func shiftRight*(a: BigIntViewMut, k: int) =
|
|
|
|
|
## Shift right by k.
|
|
|
|
|
##
|
|
|
|
|
## k MUST be less than the base word size (2^31 or 2^63)
|
|
|
|
|
# We don't reuse shr for this in-place operation
|
|
|
|
|
# Do we need to return the shifted out part?
|
|
|
|
|
#
|
|
|
|
|
# Note: for speed, loading a[i] and a[i+1]
|
|
|
|
|
# instead of a[i-1] and a[i]
|
|
|
|
|
# is probably easier to parallelize for the compiler
|
|
|
|
|
# (antidependence WAR vs loop-carried dependence RAW)
|
|
|
|
|
checkWordShift(k)
|
|
|
|
|
|
|
|
|
|
let len = a.numLimbs()
|
|
|
|
|
for i in 0 ..< len-1:
|
|
|
|
|
a[i] = (a[i] shr k) or mask(a[i+1] shl (WordBitSize - k))
|
|
|
|
|
a[len-1] = a[len-1] shr k
|
2020-02-10 18:16:34 +01:00
|
|
|
|
|
|
|
|
# ############################################################
|
|
|
|
|
#
|
|
|
|
|
# Modular BigInt
|
|
|
|
|
#
|
|
|
|
|
# ############################################################
|
|
|
|
|
|
|
|
|
|
func shlAddMod(a: BigIntViewMut, c: Word, M: BigIntViewConst) =
|
|
|
|
|
## Fused modular left-shift + add
|
|
|
|
|
## Shift input `a` by a word and add `c` modulo `M`
|
|
|
|
|
##
|
|
|
|
|
## With a word W = 2^WordBitSize and a modulus M
|
|
|
|
|
## Does a <- a * W + c (mod M)
|
|
|
|
|
##
|
|
|
|
|
## The modulus `M` MUST announced most-significant bit must be set.
|
|
|
|
|
checkValidModulus(M)
|
|
|
|
|
|
|
|
|
|
let aLen = a.numLimbs()
|
|
|
|
|
let mBits = bitSizeof(M)
|
|
|
|
|
|
|
|
|
|
if mBits <= WordBitSize:
|
|
|
|
|
# If M fits in a single limb
|
|
|
|
|
var q: Word
|
|
|
|
|
|
|
|
|
|
# (hi, lo) = a * 2^63 + c
|
|
|
|
|
let hi = a[0] shr 1 # 64 - 63 = 1
|
|
|
|
|
let lo = (a[0] shl WordBitSize) or c # Assumes most-significant bit in c is not set
|
|
|
|
|
unsafeDiv2n1n(q, a[0], hi, lo, M[0]) # (hi, lo) mod M
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
## Multiple limbs
|
|
|
|
|
let hi = a[^1] # Save the high word to detect carries
|
2020-02-22 16:37:31 +01:00
|
|
|
let R = mBits and WordBitSize # R = mBits mod 64
|
2020-02-10 18:16:34 +01:00
|
|
|
|
|
|
|
|
var a0, a1, m0: Word
|
|
|
|
|
if R == 0: # If the number of mBits is a multiple of 64
|
|
|
|
|
a0 = a[^1] #
|
|
|
|
|
moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof) # we can just shift words
|
|
|
|
|
a[0] = c # and replace the first one by c
|
|
|
|
|
a1 = a[^1]
|
|
|
|
|
m0 = M[^1]
|
|
|
|
|
else: # Else: need to deal with partial word shifts at the edge.
|
2020-02-17 00:13:42 +01:00
|
|
|
a0 = mask((a[^1] shl (WordBitSize-R)) or (a[^2] shr R))
|
2020-02-10 18:16:34 +01:00
|
|
|
moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof)
|
|
|
|
|
a[0] = c
|
2020-02-17 00:13:42 +01:00
|
|
|
a1 = mask((a[^1] shl (WordBitSize-R)) or (a[^2] shr R))
|
|
|
|
|
m0 = mask((M[^1] shl (WordBitSize-R)) or (M[^2] shr R))
|
2020-02-10 18:16:34 +01:00
|
|
|
|
|
|
|
|
# m0 has its high bit set. (a0, a1)/p0 fits in a limb.
|
|
|
|
|
# Get a quotient q, at most we will be 2 iterations off
|
|
|
|
|
# from the true quotient
|
|
|
|
|
|
|
|
|
|
let
|
|
|
|
|
a_hi = a0 shr 1 # 64 - 63 = 1
|
|
|
|
|
a_lo = (a0 shl WordBitSize) or a1
|
|
|
|
|
var q, r: Word
|
|
|
|
|
unsafeDiv2n1n(q, r, a_hi, a_lo, m0) # Estimate quotient
|
2020-02-16 22:13:54 +01:00
|
|
|
q = mux( # If n_hi == divisor
|
2020-02-10 18:16:34 +01:00
|
|
|
a0 == m0, MaxWord, # Quotient == MaxWord (0b0111...1111)
|
2020-02-16 22:13:54 +01:00
|
|
|
mux(
|
2020-02-10 18:16:34 +01:00
|
|
|
q.isZero, Zero, # elif q == 0, true quotient = 0
|
|
|
|
|
q - One # else instead of being of by 0, 1 or 2
|
|
|
|
|
) # we returning q-1 to be off by -1, 0 or 1
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Now substract a*2^63 - q*p
|
|
|
|
|
var carry = Zero
|
2020-02-12 00:20:31 +01:00
|
|
|
var over_p = CtTrue # Track if quotient greater than the modulus
|
2020-02-10 18:16:34 +01:00
|
|
|
|
|
|
|
|
for i in 0 ..< M.numLimbs():
|
|
|
|
|
var qp_lo: Word
|
|
|
|
|
|
|
|
|
|
block: # q*p
|
2020-02-10 22:19:47 +01:00
|
|
|
# q * p + carry (doubleword) carry from previous limb
|
|
|
|
|
let qp = unsafeExtPrecMul(q, M[i]) + carry.DoubleWord
|
|
|
|
|
carry = Word(qp shr WordBitSize) # New carry: high digit besides LSB
|
2020-02-17 00:13:42 +01:00
|
|
|
qp_lo = qp.Word.mask() # Normalize to u63
|
2020-02-10 18:16:34 +01:00
|
|
|
|
|
|
|
|
block: # a*2^63 - q*p
|
|
|
|
|
a[i] -= qp_lo
|
2020-02-10 22:19:47 +01:00
|
|
|
carry += Word(a[i].isMsbSet) # Adjust if borrow
|
2020-02-17 00:13:42 +01:00
|
|
|
a[i] = a[i].mask() # Normalize to u63
|
2020-02-10 18:16:34 +01:00
|
|
|
|
2020-02-16 22:13:54 +01:00
|
|
|
over_p = mux(
|
2020-02-10 18:16:34 +01:00
|
|
|
a[i] == M[i], over_p,
|
|
|
|
|
a[i] > M[i]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Fix quotient, the true quotient is either q-1, q or q+1
|
|
|
|
|
#
|
|
|
|
|
# if carry < q or carry == q and over_p we must do "a -= p"
|
|
|
|
|
# if carry > hi (negative result) we must do "a += p"
|
|
|
|
|
|
2020-02-10 23:56:57 +01:00
|
|
|
let neg = carry > hi
|
2020-02-10 18:16:34 +01:00
|
|
|
let tooBig = not neg and (over_p or (carry < hi))
|
|
|
|
|
|
|
|
|
|
discard a.add(M, ctl = neg)
|
|
|
|
|
discard a.sub(M, ctl = tooBig)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
func reduce*(r: BigIntViewMut, a: BigIntViewAny, M: BigIntViewConst) =
|
|
|
|
|
## Reduce `a` modulo `M` and store the result in `r`
|
|
|
|
|
##
|
|
|
|
|
## The modulus `M` MUST announced most-significant bit must be set.
|
|
|
|
|
## The result `r` buffer size MUST be at least the size of `M` buffer
|
|
|
|
|
##
|
|
|
|
|
## CT: Depends only on the bitlength of `a` and the modulus `M`
|
|
|
|
|
|
|
|
|
|
# Note: for all cryptographic intents and purposes the modulus is known at compile-time
|
|
|
|
|
# but we don't want to inline it as it would increase codesize, better have Nim
|
|
|
|
|
# pass a pointer+length to a fixed session of the BSS.
|
|
|
|
|
checkValidModulus(M)
|
|
|
|
|
|
|
|
|
|
let aBits = bitSizeof(a)
|
|
|
|
|
let mBits = bitSizeof(M)
|
|
|
|
|
let aLen = a.numLimbs()
|
|
|
|
|
|
|
|
|
|
r.setBitLength(bitSizeof(M))
|
|
|
|
|
|
|
|
|
|
if aBits < mBits:
|
|
|
|
|
# if a uses less bits than the modulus,
|
|
|
|
|
# it is guaranteed < modulus.
|
|
|
|
|
# This relies on the precondition that the modulus uses all declared bits
|
|
|
|
|
copyMem(r[0].addr, a[0].unsafeAddr, aLen * sizeof(Word))
|
|
|
|
|
for i in aLen ..< r.numLimbs():
|
|
|
|
|
r[i] = Zero
|
|
|
|
|
else:
|
|
|
|
|
# a length i at least equal to the modulus.
|
|
|
|
|
# we can copy modulus.limbs-1 words
|
|
|
|
|
# and modular shift-left-add the rest
|
|
|
|
|
let mLen = M.numLimbs()
|
|
|
|
|
let aOffset = aLen - mLen
|
2020-02-10 19:57:35 +01:00
|
|
|
copyMem(r[0].addr, a[aOffset+1].unsafeAddr, (mLen-1) * sizeof(Word))
|
2020-02-10 18:16:34 +01:00
|
|
|
r[^1] = Zero
|
2020-02-10 19:57:35 +01:00
|
|
|
# Now shift-left the copied words while adding the new word modulo M
|
|
|
|
|
for i in countdown(aOffset, 0):
|
2020-02-10 18:16:34 +01:00
|
|
|
r.shlAddMod(a[i], M)
|
2020-02-15 00:26:40 +01:00
|
|
|
|
2020-02-15 02:59:08 +01:00
|
|
|
# ############################################################
|
|
|
|
|
#
|
|
|
|
|
# Montgomery Arithmetic
|
|
|
|
|
#
|
|
|
|
|
# ############################################################
|
|
|
|
|
|
2020-02-15 19:22:40 +01:00
|
|
|
template wordMul(a, b: Word): Word =
|
2020-02-17 00:13:42 +01:00
|
|
|
mask(a * b)
|
2020-02-15 19:22:40 +01:00
|
|
|
|
|
|
|
|
func montyMul*(
|
|
|
|
|
r: BigIntViewMut, a, b: distinct BigIntViewAny,
|
|
|
|
|
M: BigIntViewConst, negInvModWord: Word) =
|
|
|
|
|
## Compute r <- a*b (mod M) in the Montgomery domain
|
|
|
|
|
## `negInvModWord` = -1/M (mod Word). Our words are 2^31 or 2^63
|
2020-02-15 13:56:13 +01:00
|
|
|
##
|
2020-02-15 19:22:40 +01:00
|
|
|
## This resets r to zero before processing. Use {.noInit.}
|
|
|
|
|
## to avoid duplicating with Nim zero-init policy
|
|
|
|
|
## The result `r` buffer size MUST be at least the size of `M` buffer
|
2020-02-15 13:56:13 +01:00
|
|
|
##
|
2020-02-15 00:26:40 +01:00
|
|
|
##
|
2020-02-15 19:22:40 +01:00
|
|
|
## Assuming 63-bit wors, the magic constant should be:
|
2020-02-15 00:26:40 +01:00
|
|
|
##
|
2020-02-15 19:22:40 +01:00
|
|
|
## - µ ≡ -1/M[0] (mod 2^63) for a general multiplication
|
|
|
|
|
## This can be precomputed with `negInvModWord`
|
|
|
|
|
## - 1 for conversion from Montgomery to canonical representation
|
|
|
|
|
## The library implements a faster `redc` primitive for that use-case
|
|
|
|
|
## - R^2 (mod M) for conversion from canonical to Montgomery representation
|
|
|
|
|
##
|
|
|
|
|
# i.e. c'R <- a'R b'R * R^-1 (mod M) in the natural domain
|
|
|
|
|
# as in the Montgomery domain all numbers are scaled by R
|
2020-02-15 00:26:40 +01:00
|
|
|
|
2020-02-15 19:22:40 +01:00
|
|
|
checkValidModulus(M)
|
|
|
|
|
checkOddModulus(M)
|
|
|
|
|
checkMatchingBitlengths(a, M)
|
|
|
|
|
checkMatchingBitlengths(b, M)
|
2020-02-15 00:26:40 +01:00
|
|
|
|
2020-02-15 19:22:40 +01:00
|
|
|
let nLen = M.numLimbs()
|
|
|
|
|
r.setBitLength(bitSizeof(M))
|
|
|
|
|
setZero(r)
|
|
|
|
|
|
|
|
|
|
var r_hi = Zero # represents the high word that is used in intermediate computation before reduction mod M
|
|
|
|
|
for i in 0 ..< nLen:
|
|
|
|
|
|
|
|
|
|
let zi = (r[0] + wordMul(a[i], b[0])).wordMul(negInvModWord)
|
|
|
|
|
var carry = Zero
|
|
|
|
|
|
|
|
|
|
for j in 0 ..< nLen:
|
|
|
|
|
let z = DoubleWord(r[j]) + unsafeExtPrecMul(a[i], b[j]) +
|
|
|
|
|
unsafeExtPrecMul(zi, M[j]) + DoubleWord(carry)
|
|
|
|
|
carry = Word(z shr WordBitSize)
|
|
|
|
|
if j != 0:
|
2020-02-17 00:13:42 +01:00
|
|
|
r[j-1] = Word(z).mask()
|
2020-02-15 19:22:40 +01:00
|
|
|
|
|
|
|
|
r_hi += carry
|
2020-02-17 00:13:42 +01:00
|
|
|
r[^1] = r_hi.mask()
|
2020-02-15 19:22:40 +01:00
|
|
|
r_hi = r_hi shr WordBitSize
|
|
|
|
|
|
|
|
|
|
# If the extra word is not zero or if r-M does not borrow (i.e. r > M)
|
|
|
|
|
# Then substract M
|
|
|
|
|
discard r.sub(M, r_hi.isNonZero() or not r.sub(M, CtFalse))
|
2020-02-15 02:59:08 +01:00
|
|
|
|
2020-02-16 18:59:10 +01:00
|
|
|
func redc*(r: BigIntViewMut, a: BigIntViewAny, one, N: BigIntViewConst, negInvModWord: Word) {.inline.} =
|
2020-02-15 00:26:40 +01:00
|
|
|
## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)
|
|
|
|
|
## to the regular natural representation (mod N)
|
|
|
|
|
##
|
2020-02-15 13:56:13 +01:00
|
|
|
## with W = N.numLimbs()
|
|
|
|
|
## and R = (2^WordBitSize)^W
|
|
|
|
|
##
|
|
|
|
|
## Does "a * R^-1 (mod N)"
|
2020-02-15 00:26:40 +01:00
|
|
|
##
|
|
|
|
|
## This is called a Montgomery Reduction
|
|
|
|
|
## The Montgomery Magic Constant is µ = -1/N mod N
|
2020-02-15 13:13:01 +01:00
|
|
|
## is used internally and can be precomputed with negInvModWord(Curve)
|
2020-02-15 00:26:40 +01:00
|
|
|
# References:
|
|
|
|
|
# - https://eprint.iacr.org/2017/1057.pdf (Montgomery)
|
|
|
|
|
# page: Radix-r interleaved multiplication algorithm
|
|
|
|
|
# - https://en.wikipedia.org/wiki/Montgomery_modular_multiplication#Montgomery_arithmetic_on_multiprecision_(variable-radix)_integers
|
|
|
|
|
# - http://langevin.univ-tln.fr/cours/MLC/extra/montgomery.pdf
|
|
|
|
|
# Montgomery original paper
|
|
|
|
|
#
|
|
|
|
|
checkValidModulus(N)
|
|
|
|
|
checkOddModulus(N)
|
|
|
|
|
checkMatchingBitlengths(a, N)
|
|
|
|
|
|
2020-02-16 18:59:10 +01:00
|
|
|
# TODO: This is a Montgomery multiplication by 1 and can be specialized
|
|
|
|
|
montyMul(r, a, one, N, negInvModWord)
|
2020-02-15 02:59:08 +01:00
|
|
|
|
2020-02-15 19:22:40 +01:00
|
|
|
func montyResidue*(
|
|
|
|
|
r: BigIntViewMut, a: BigIntViewAny,
|
2020-02-16 18:59:10 +01:00
|
|
|
N, r2modN: BigIntViewConst, negInvModWord: Word) {.inline.} =
|
2020-02-15 19:22:40 +01:00
|
|
|
## Transform a bigint ``a`` from it's natural representation (mod N)
|
|
|
|
|
## to a the Montgomery n-residue representation
|
|
|
|
|
##
|
|
|
|
|
## Montgomery-Multiplication - based
|
|
|
|
|
##
|
|
|
|
|
## with W = N.numLimbs()
|
|
|
|
|
## and R = (2^WordBitSize)^W
|
|
|
|
|
##
|
|
|
|
|
## Does "a * R (mod N)"
|
|
|
|
|
##
|
|
|
|
|
## `a`: The source BigInt in the natural representation. `a` in [0, N) range
|
|
|
|
|
## `N`: The field modulus. N must be odd.
|
|
|
|
|
## `r2modN`: 2^WordBitSize mod `N`. Can be precomputed with `r2mod` function
|
|
|
|
|
##
|
|
|
|
|
## Important: `r` is overwritten
|
|
|
|
|
## The result `r` buffer size MUST be at least the size of `M` buffer
|
|
|
|
|
# Reference: https://eprint.iacr.org/2017/1057.pdf
|
|
|
|
|
checkValidModulus(N)
|
|
|
|
|
checkOddModulus(N)
|
|
|
|
|
checkMatchingBitlengths(a, N)
|
2020-02-15 02:59:08 +01:00
|
|
|
|
2020-02-15 19:22:40 +01:00
|
|
|
montyMul(r, a, r2ModN, N, negInvModWord)
|
2020-02-17 00:13:42 +01:00
|
|
|
|
2020-02-22 16:37:31 +01:00
|
|
|
# Montgomery Modular Exponentiation
|
|
|
|
|
# ------------------------------------------
|
|
|
|
|
# We use fixed-window based exponentiation
|
|
|
|
|
# that is constant-time: i.e. the number of multiplications
|
|
|
|
|
# does not depend on the number of set bits in the exponents
|
|
|
|
|
# those are always done and conditionally copied.
|
2020-02-17 00:13:42 +01:00
|
|
|
#
|
2020-02-22 16:37:31 +01:00
|
|
|
# TODO: analyze cost difference with naive exponentiation
|
|
|
|
|
# with n being the number of words to represent a number in Fp
|
|
|
|
|
# and k the window-size
|
|
|
|
|
# - we always multiply even for unused multiplications
|
|
|
|
|
# - conditional copy only save a small fraction of time
|
|
|
|
|
# (multiplication O(n²), cmov O(n), doing nothing i.e. non constant-time O(n))
|
|
|
|
|
# - Table lookup is O(kn) copy time since we need to access the whole table to
|
|
|
|
|
# defeat cache attacks. Without windows, we don't have table lookups at all.
|
2020-02-17 00:13:42 +01:00
|
|
|
#
|
2020-02-22 16:37:31 +01:00
|
|
|
# The exponent MUST NOT be private data (until audited otherwise)
|
|
|
|
|
# - Power attack on RSA, https://www.di.ens.fr/~fouque/pub/ches06.pdf
|
|
|
|
|
# - Flush-and-reload on Sliding window exponentiation: https://tutcris.tut.fi/portal/files/8966761/p1639_pereida_garcia.pdf
|
|
|
|
|
# - Sliding right into disaster, https://eprint.iacr.org/2017/627.pdf
|
|
|
|
|
# - Fixed window leak: https://www.scirp.org/pdf/JCC_2019102810331929.pdf
|
|
|
|
|
# - Constructing sliding-windows leak, https://easychair.org/publications/open/fBNC
|
|
|
|
|
#
|
|
|
|
|
# For pairing curves, this is the case since exponentiation is only
|
|
|
|
|
# used for inversion via the Little Fermat theorem.
|
|
|
|
|
# For RSA, some exponentiations uses private exponents.
|
|
|
|
|
#
|
|
|
|
|
# Note:
|
|
|
|
|
# - Implementation closely follows Thomas Pornin's BearSSL
|
|
|
|
|
# - Apache Milagro Crypto has an alternative implementation
|
|
|
|
|
# that is more straightforward however:
|
|
|
|
|
# - the exponent hamming weight is used as loop bounds
|
|
|
|
|
# - the base^k is stored at each index of a temp table of size k
|
|
|
|
|
# - the base^k to use is indexed by the hamming weight
|
|
|
|
|
# of the exponent, leaking this to cache attacks
|
|
|
|
|
# - in contrast BearSSL touches the whole table to
|
|
|
|
|
# hide the actual selection
|
|
|
|
|
#
|
|
|
|
|
# Directly using the Hamming weight would probably
|
|
|
|
|
# significantly improve pairing-friendly curves as
|
|
|
|
|
# they are chosen for their low Hamming-Weight (see BLS12-381 x factor)
|
|
|
|
|
# --> Expose an exponent-leaky powMod?
|
|
|
|
|
# If so, create distinct type for leaked bits and BigInt
|
|
|
|
|
# so that sensitive data use is compiler-checked
|
|
|
|
|
|
|
|
|
|
func getWindowLen(bufLen: int): uint =
|
|
|
|
|
## Compute the maximum window size that fits in the scratchspace buffer
|
|
|
|
|
checkPowScratchSpaceLen(bufLen)
|
|
|
|
|
result = 5
|
|
|
|
|
while (1 shl result) + 1 > bufLen:
|
|
|
|
|
dec result
|
|
|
|
|
|
2020-02-22 18:18:17 +01:00
|
|
|
func montyPowPrologue(
|
|
|
|
|
a: BigIntViewMut, M, one: BigIntViewConst,
|
|
|
|
|
negInvModWord: Word,
|
|
|
|
|
scratchspace: openarray[BigIntViewMut]
|
2020-02-22 18:39:29 +01:00
|
|
|
): tuple[window: uint, bigIntSize: int] {.inline.}=
|
|
|
|
|
# Due to the high number of parameters,
|
|
|
|
|
# forcing this inline actually reduces the code size
|
2020-02-22 18:18:17 +01:00
|
|
|
|
|
|
|
|
result.window = scratchspace.len.getWindowLen()
|
|
|
|
|
result.bigIntSize = a.numLimbs() * sizeof(Word) + sizeof(BigIntView.bitLength)
|
|
|
|
|
|
|
|
|
|
# Precompute window content, special case for window = 1
|
|
|
|
|
# (i.e scratchspace has only space for 2 temporaries)
|
|
|
|
|
# The content scratchspace[2+k] is set at a^k
|
|
|
|
|
# with scratchspace[0] untouched
|
|
|
|
|
if result.window == 1:
|
|
|
|
|
copyMem(pointer scratchspace[1], pointer a, result.bigIntSize)
|
|
|
|
|
else:
|
|
|
|
|
copyMem(pointer scratchspace[2], pointer a, result.bigIntSize)
|
|
|
|
|
for k in 2 ..< 1 shl result.window:
|
|
|
|
|
scratchspace[k+1].montyMul(scratchspace[k], a, M, negInvModWord)
|
|
|
|
|
|
|
|
|
|
scratchspace[1].setBitLength(bitSizeof(M))
|
|
|
|
|
|
|
|
|
|
# Set a to one
|
|
|
|
|
copyMem(pointer a, pointer one, result.bigIntSize)
|
|
|
|
|
|
|
|
|
|
func montyPowSquarings(
|
|
|
|
|
a: BigIntViewMut,
|
|
|
|
|
exponent: openarray[byte],
|
|
|
|
|
M: BigIntViewConst,
|
|
|
|
|
negInvModWord: Word,
|
|
|
|
|
tmp: BigIntViewMut,
|
|
|
|
|
window: uint,
|
|
|
|
|
bigIntSize: int,
|
|
|
|
|
acc, acc_len: var uint,
|
|
|
|
|
e: var int,
|
2020-02-22 18:39:29 +01:00
|
|
|
): tuple[k, bits: uint] {.inline.}=
|
2020-02-22 18:18:17 +01:00
|
|
|
## Squaring step of exponentiation by squaring
|
|
|
|
|
## Get the next k bits in range [1, window)
|
|
|
|
|
## Square k times
|
|
|
|
|
## Returns the number of squarings done and the corresponding bits
|
|
|
|
|
##
|
|
|
|
|
## Updates iteration variables and accumulators
|
2020-02-22 18:39:29 +01:00
|
|
|
# Due to the high number of parameters,
|
|
|
|
|
# forcing this inline actually reduces the code size
|
2020-02-22 18:18:17 +01:00
|
|
|
|
|
|
|
|
# Get the next bits
|
|
|
|
|
var k = window
|
|
|
|
|
if acc_len < window:
|
|
|
|
|
if e < exponent.len:
|
|
|
|
|
acc = (acc shl 8) or exponent[e].uint
|
|
|
|
|
inc e
|
|
|
|
|
acc_len += 8
|
|
|
|
|
else: # Drained all exponent bits
|
|
|
|
|
k = acc_len
|
|
|
|
|
|
|
|
|
|
let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
|
|
|
|
|
acc_len -= k
|
|
|
|
|
|
|
|
|
|
# We have k bits and can do k squaring
|
|
|
|
|
for i in 0 ..< k:
|
|
|
|
|
tmp.montyMul(a, a, M, negInvModWord)
|
|
|
|
|
copyMem(pointer a, pointer tmp, bigIntSize)
|
|
|
|
|
|
|
|
|
|
return (k, bits)
|
|
|
|
|
|
2020-02-22 16:37:31 +01:00
|
|
|
func montyPow*(
|
|
|
|
|
a: BigIntViewMut,
|
|
|
|
|
exponent: openarray[byte],
|
|
|
|
|
M, one: BigIntViewConst,
|
|
|
|
|
negInvModWord: Word,
|
|
|
|
|
scratchspace: openarray[BigIntViewMut]
|
|
|
|
|
) =
|
|
|
|
|
## Modular exponentiation r = a^exponent mod M
|
2020-02-22 18:18:17 +01:00
|
|
|
## in the Montgomery domain
|
2020-02-22 16:37:31 +01:00
|
|
|
##
|
|
|
|
|
## This uses fixed-window optimization if possible
|
|
|
|
|
##
|
|
|
|
|
## - On input ``a`` is the base, on ``output`` a = a^exponent (mod M)
|
|
|
|
|
## ``a`` is in the Montgomery domain
|
|
|
|
|
## - ``exponent`` is the exponent in big-endian canonical format (octet-string)
|
|
|
|
|
## Use ``exportRawUint`` for conversion
|
|
|
|
|
## - ``M`` is the modulus
|
|
|
|
|
## - ``one`` is 1 (mod M) in montgomery representation
|
|
|
|
|
## - ``negInvModWord`` is the montgomery magic constant "-1/M[0] mod 2^WordBitSize"
|
|
|
|
|
## - ``scratchspace`` with k the window bitsize of size up to 5
|
|
|
|
|
## This is a buffer that can hold between 2^k + 1 big-ints
|
|
|
|
|
## A window of of 1-bit (no window optimization) requires only 2 big-ints
|
|
|
|
|
##
|
|
|
|
|
## Note that the best window size require benchmarking and is a tradeoff between
|
|
|
|
|
## - performance
|
|
|
|
|
## - stack usage
|
|
|
|
|
## - precomputation
|
|
|
|
|
##
|
|
|
|
|
## For example BLS12-381 window size of 5 is 30% faster than no window,
|
|
|
|
|
## but windows of size 2, 3, 4 bring no performance benefit, only increased stack space.
|
|
|
|
|
## A window of size 5 requires (2^5 + 1)*(381 + 7)/8 = 33 * 48 bytes = 1584 bytes
|
|
|
|
|
## of scratchspace (on the stack).
|
|
|
|
|
|
2020-02-22 18:18:17 +01:00
|
|
|
let (window, bigIntSize) = montyPowPrologue(a, M, one, negInvModWord, scratchspace)
|
2020-02-22 16:37:31 +01:00
|
|
|
|
|
|
|
|
# We process bits with from most to least significant.
|
|
|
|
|
# At each loop iteration with have acc_len bits in acc.
|
|
|
|
|
# To maintain constant-time the number of iterations
|
|
|
|
|
# or the number of operations or memory accesses should be the same
|
|
|
|
|
# regardless of acc & acc_len
|
|
|
|
|
var
|
|
|
|
|
acc, acc_len: uint
|
|
|
|
|
e = 0
|
|
|
|
|
while acc_len > 0 or e < exponent.len:
|
2020-02-22 18:18:17 +01:00
|
|
|
let (k, bits) = montyPowSquarings(
|
|
|
|
|
a, exponent, M, negInvModWord,
|
|
|
|
|
scratchspace[0], window, bigIntSize,
|
|
|
|
|
acc, acc_len, e
|
|
|
|
|
)
|
|
|
|
|
|
2020-02-22 16:37:31 +01:00
|
|
|
# Window lookup: we set scratchspace[1] to the lookup value.
|
|
|
|
|
# If the window length is 1, then it's already set.
|
|
|
|
|
if window > 1:
|
|
|
|
|
# otherwise we need a constant-time lookup
|
|
|
|
|
# in particular we need the same memory accesses, we can't
|
|
|
|
|
# just index the openarray with the bits to avoid cache attacks.
|
|
|
|
|
for i in 1 ..< 1 shl k:
|
|
|
|
|
let ctl = Word(i) == Word(bits)
|
|
|
|
|
scratchspace[1].cmov(scratchspace[1+i], ctl)
|
|
|
|
|
|
|
|
|
|
# Multiply with the looked-up value
|
|
|
|
|
# we keep the product only if the exponent bits are not all zero
|
|
|
|
|
scratchspace[0].montyMul(a, scratchspace[1], M, negInvModWord)
|
|
|
|
|
a.cmov(scratchspace[0], Word(bits) != Zero)
|
2020-02-22 18:18:17 +01:00
|
|
|
|
|
|
|
|
func montyPowUnsafeExponent*(
|
|
|
|
|
a: BigIntViewMut,
|
|
|
|
|
exponent: openarray[byte],
|
|
|
|
|
M, one: BigIntViewConst,
|
|
|
|
|
negInvModWord: Word,
|
|
|
|
|
scratchspace: openarray[BigIntViewMut]
|
|
|
|
|
) =
|
|
|
|
|
## Modular exponentiation r = a^exponent mod M
|
|
|
|
|
## in the Montgomery domain
|
|
|
|
|
##
|
|
|
|
|
## Warning ⚠️ :
|
|
|
|
|
## This is an optimization for public exponent
|
|
|
|
|
## Otherwise bits of the exponent can be retrieved with:
|
|
|
|
|
## - memory access analysis
|
|
|
|
|
## - power analysis
|
|
|
|
|
## - timing analysis
|
|
|
|
|
|
|
|
|
|
# TODO: scratchspace[1] is unused when window > 1
|
|
|
|
|
|
|
|
|
|
let (window, bigIntSize) = montyPowPrologue(
|
|
|
|
|
a, M, one, negInvModWord, scratchspace)
|
|
|
|
|
|
|
|
|
|
var
|
|
|
|
|
acc, acc_len: uint
|
|
|
|
|
e = 0
|
|
|
|
|
while acc_len > 0 or e < exponent.len:
|
|
|
|
|
let (k, bits) = montyPowSquarings(
|
|
|
|
|
a, exponent, M, negInvModWord,
|
|
|
|
|
scratchspace[0], window, bigIntSize,
|
|
|
|
|
acc, acc_len, e
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
## Warning ⚠️: Exposes the exponent bits
|
|
|
|
|
if bits != 0:
|
|
|
|
|
if window > 1:
|
|
|
|
|
scratchspace[0].montyMul(a, scratchspace[1+bits], M, negInvModWord)
|
|
|
|
|
else:
|
|
|
|
|
# scratchspace[1] holds the original `a`
|
|
|
|
|
scratchspace[0].montyMul(a, scratchspace[1], M, negInvModWord)
|
|
|
|
|
copyMem(pointer a, pointer scratchspace[0], bigIntSize)
|