Refactoring, optimize code-size: use type-erased views to avoid monomorphization of compute kernels

This commit is contained in:
Mamy André-Ratsimbazafy 2020-02-10 18:16:34 +01:00
parent ade919b003
commit b689223cf5
No known key found for this signature in database
GPG Key ID: 7B88AD1FE79492E1
11 changed files with 577 additions and 367 deletions

View File

@ -1,313 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# ############################################################
#
# BigInt representation
#
# ############################################################
# To avoid carry issues we don't use the
# most significant bit of each word.
# i.e. for a uint64 base we only use 63-bit.
# More info: https://github.com/status-im/nim-constantine/wiki/Constant-time-arithmetics#guidelines
# Especially:
# - https://bearssl.org/bigint.html
# - https://cryptojedi.org/peter/data/pairing-20131122.pdf
# - http://docs.milagro.io/en/amcl/milagro-crypto-library-white-paper.html
#
# Note that this might also be beneficial in terms of performance.
# Due to opcode latency, on Nehalem ADC is 6x times slower than ADD
# if it has dependencies (i.e the ADC depends on a previous ADC result)
# Control flow should only depends on the static maximum number of bits
# This number is defined per Finite Field/Prime/Elliptic Curve
#
# For efficiency, our limbs will use a word size of 63-bit
# Warning ⚠️ : This assumes that u64 + u64 and u64 * u64
# are constant-time even on 32-bit platforms
#
# We internally order the limbs in little-endian
# So the least significant limb is limb[0]
# This is independent from the base type endianness.
import ./primitives
from ./private/primitives_internal import unsafeDiv2n1n, unsafeExtendedPrecMul
type Word* = Ct[uint32]
type BaseType* = uint32 # Exported type for conversion in "normal integers"
const WordBitSize* = sizeof(Word) * 8 - 1
## Limbs are 63-bit by default
const
Zero* = Word(0)
One* = Word(1)
MaxWord* = (not Zero) shr 1
## This represents 0x7F_FF_FF_FF__FF_FF_FF_FF
## also 0b0111...1111
## This biggest representable number in our limbs.
## i.e. The most significant bit is never set at the end of each function
func wordsRequired(bits: int): int {.compileTime.}=
(bits + WordBitSize - 1) div WordBitSize
# TODO: Currently the library is instantiation primitives like "add"
# for each "bits" size supported. This will lead to duplication
# if many sizes (for example for scp256k1, bn254 and BLS12-381)
# are required.
# It could be avoided by having the bitsize be a runtime field
# of the bigint. However the tradeoff would be:
# - overhead of this additional field
# - limbs have to be stored in an UncheckedArray instead of an array
# introducing memory management issues
type
BigInt*[bits: static int] = object
## Fixed-precision big integer
##
## "limbs" is an internal field that holds the internal representation
## of the big integer. This internal representation can be changed
## without notice and should not be used by external applications or libraries.
# Constantine BigInt have a word-size chosen to minimize bigint memory usage
# while allowing carry-less operations in a machine-efficient type like uint32
# uint64 or uint128 if available.
# In practice the word size is 63-bit.
#
# "Limb-endianess" is little-endian (least significant limb at BigInt.limbs[0])
limbs*: array[bits.wordsRequired, Word]
# No exceptions allowed
# TODO: can we use compile-time "Natural" instead of "int" in that case?
{.push raises: [].}
# ############################################################
#
# Internal
#
# ############################################################
func copyLimbs*[dstBits, srcBits](
dst: var BigInt[dstBits], dstStart: static int,
src: BigInt[srcBits], srcStart: static int,
numLimbs: static int) {.inline.}=
## Copy `numLimbs` from src into dst
## If `dst` buffer is larger than `numLimbs` buffer
## the extra space will be zero-ed out
##
## Limbs ordering is little-endian. limb 0 is the least significant/
##
## This should work at both compile-time and runtime.
##
## `numLimbs` must be less or equal the limbs of the `dst` and `src` buffers
## This is checked at compile-time and has no runtime impact
static:
doAssert numLimbs >= 0, "`numLimbs` must be greater or equal zero"
doAssert numLimbs + srcStart <= src.limbs.len,
"The number of limbs to copy (" & $numLimbs &
") must be less or equal to the number of limbs in the `src` buffer (" &
$src.limbs.len & " for " & $srcBits & " bits)"
doAssert numLimbs + dstStart <= dst.limbs.len,
"The number of limbs to copy (" & $numLimbs &
") must be less or equal to the number of limbs in the `dst` buffer (" &
$dst.limbs.len & " for " & $dstBits & " bits)"
# TODO: do we need a copyMem / memcpy specialization for runtime
# or use dst.limbs[0..<numLimbs] = src.toOpenarray(0, numLimbs - 1)
for i in static(0 ..< numLimbs):
dst.limbs[i+dstStart] = src.limbs[i+srcStart]
func setZero*(a: var BigInt, start, stop: static int) {.inline.} =
## Set limbs to zero
## The [start, stop] range is inclusive
## If stop < start, a is unmodified
static:
doAssert start in 0 ..< a.limbs.len, $start & " not in 0 ..< " & $a.limbs.len & " (numLimbs)"
doAssert stop in 0 ..< a.limbs.len, $stop & " not in 0 ..< " & $a.limbs.len & " (numLimbs)"
for i in static(start .. stop):
a.limbs[i] = Zero
# ############################################################
#
# BigInt primitives
#
# ############################################################
# TODO: {.inline.} analysis
func isZero*(a: BigInt): CTBool[Word] =
## Returns if a big int is equal to zero
var accum: Word
for i in static(0 ..< a.limbs.len):
accum = accum or a.limbs[i]
result = accum.isZero()
func `==`*(a, b: BigInt): CTBool[Word] =
## Returns true if 2 big ints are equal
var accum: Word
for i in static(0 ..< a.limbs.len):
accum = accum or (a.limbs[i] xor b.limbs[i])
result = accum.isZero
# The arithmetic primitives all accept a control input that indicates
# if it is a placebo operation. It stills performs the
# same memory accesses to be side-channel attack resistant.
func add*[bits](a: var BigInt[bits], b: BigInt[bits], ctl: CTBool[Word]): CTBool[Word] =
## Constant-time big integer in-place optional addition
## The addition is only performed if ctl is "true"
## The result carry is always computed.
for i in static(0 ..< a.limbs.len):
let new_a = a.limbs[i] + b.limbs[i] + Word(result)
result = new_a.isMsbSet()
a.limbs[i] = ctl.mux(new_a and MaxWord, a.limbs[i])
func sub*[bits](a: var BigInt[bits], b: BigInt[bits], ctl: CTBool[Word]): CTBool[Word] =
## Constant-time big integer in-place optional substraction
## The substraction is only performed if ctl is "true"
## The result carry is always computed.
for i in static(0 ..< a.limbs.len):
let new_a = a.limbs[i] - b.limbs[i] - Word(result)
result = new_a.isMsbSet()
a.limbs[i] = ctl.mux(new_a and MaxWord, a.limbs[i])
# ############################################################
#
# Modular BigInt
#
# ############################################################
# TODO: push boundsCheck off. They would be extremely costly.
func shlAddMod[bits](a: var BigInt[bits], c: Word, M: BigInt[bits]) =
## Fused modular left-shift + add
## Shift input `a` by a word and add `c` modulo `M`
##
## With a word W = 2^WordBitSize and a modulus M
## Does a <- a * W + c (mod M)
##
## The modulus `M` **must** use `mBits` bits.
assert not M.limbs[^1].isZero.bool, "The modulus must use all declared bits"
const len = a.limbs.len
when bits <= WordBitSize:
# If M fits in a single limb
var q: Word
# (hi, lo) = a * 2^63 + c
let hi = a.limbs[0] shr 1 # 64 - 63 = 1
let lo = (a.limbs[0] shl WordBitSize) or c # Assumes most-significant bit in c is not set
unsafeDiv2n1n(q, a.limbs[0], hi, lo, M.limbs[0]) # (hi, lo) mod M
return
else: # TODO replace moveMem with a proc that also works at compile-time
## Multiple limbs
let hi = a.limbs[^1] # Save the high word to detect carries
const R = bits and WordBitSize # R = bits mod 64
when R == 0: # If the number of bits is a multiple of 64
let a0 = a.limbs[^1] #
moveMem(a.limbs[1].addr, a.limbs[0].addr, (len-1) * Word.sizeof) # we can just shift words
a.limbs[0] = c # and replace the first one by c
let a1 = a.limbs[^1]
let m0 = M.limbs[^1]
else: # Need to deal with partial word shifts at the edge.
let a0 = ((a.limbs[^1] shl (WordBitSize-R)) or (a.limbs[^2] shr R)) and MaxWord
moveMem(a.limbs[1].addr, a.limbs[0].addr, (len-1) * Word.sizeof)
a.limbs[0] = c
let a1 = ((a.limbs[^1] shl (WordBitSize-R)) or (a.limbs[^2] shr R)) and MaxWord
let m0 = ((M.limbs[^1] shl (WordBitSize-R)) or (M.limbs[^2] shr R)) and MaxWord
# m0 has its high bit set. (a0, a1)/p0 fits in a limb.
# Get a quotient q, at most we will be 2 iterations off
# from the true quotient
let
a_hi = a0 shr 1 # 64 - 63 = 1
a_lo = (a0 shl WordBitSize) or a1
var q, r: Word
unsafeDiv2n1n(q, r, a_hi, a_lo, m0) # Estimate quotient
q = mux( # If n_hi == divisor
a0 == m0, MaxWord, # Quotient == MaxWord (0b0111...1111)
mux(
q.isZero, Zero, # elif q == 0, true quotient = 0
q - One # else instead of being of by 0, 1 or 2
) # we returning q-1 to be off by -1, 0 or 1
)
# Now substract a*2^63 - q*p
var carry = Zero
var over_p = ctrue(Word) # Track if quotient greater than the modulus
for i in static(0 ..< M.limbs.len):
var qp_lo: Word
block: # q*p
var qp_hi: Word
unsafeExtendedPrecMul(qp_hi, qp_lo, q, M.limbs[i]) # q * p
qp_lo += carry # Add carry from previous limb
carry = qp_hi shl 1 + qp_lo.isMsbSet.Word # New carry
qp_lo = qp_lo and MaxWord # Normalize to u63
block: # a*2^63 - q*p
a.limbs[i] -= qp_lo
carry += Word(a.limbs[i].isMsbSet) # Adjust if borrow
a.limbs[i] = a.limbs[i] and MaxWord # Normalize to u63
over_p = mux(
a.limbs[i] == M.limbs[i], over_p,
a.limbs[i] > M.limbs[i]
)
# Fix quotient, the true quotient is either q-1, q or q+1
#
# if carry < q or carry == q and over_p we must do "a -= p"
# if carry > hi (negative result) we must do "a += p"
let neg = carry < hi
let tooBig = not neg and (over_p or (carry < hi))
discard a.add(M, ctl = neg)
discard a.sub(M, ctl = tooBig)
return
func reduce*[aBits, mBits](r: var BigInt[mBits], a: BigInt[aBits], M: BigInt[mBits]) =
## Reduce `a` modulo `M` and store the result in `r`
##
## The modulus `M` **must** use `mBits` bits.
##
## CT: Depends only on the length of the modulus `M`
# Note: for all cryptographic intents and purposes the modulus is known at compile-time
# but we don't want to inline it as it would increase codesize, better have Nim
# pass a pointer+length to a fixed session of the BSS.
assert not M.limbs[^1].isZero.bool, "The modulus must use all declared bits"
when aBits < mBits:
# if a uses less bits than the modulus,
# it is guaranteed < modulus.
# This relies on the precondition that the modulus uses all declared bits
copyLimbs(r, 0, a, 0, a.limbs.len)
r.setZero(a.limbs.len, r.limbs.len-1)
else:
# a length i at least equal to the modulus.
# we can copy modulus.limbs-1 words
# and modular shift-left-add the rest
const aOffset = a.limbs.len - M.limbs.len
copyLimbs(r, 0, a, aOffset, M.limbs.len - 1)
r.limbs[^1] = Zero
for i in countdown(aOffset-1, 0):
r.shlAddMod(a.limbs[i], M)

View File

@ -0,0 +1,54 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
./bigints_raw,
./primitives
# ############################################################
#
# BigInts Public API
#
# ############################################################
# The "public" API, exported for finite field computations
# enforced compile-time checking of BigInt bitsize
#
# The "raw" compute API, uses views to avoid code duplication due to generic/static monomorphization.
# No exceptions allowed
{.push raises: [].}
{.push inline.}
func isZero*(a: BigInt): CTBool[Word] =
## Returns true if a big int is equal to zero
a.view.isZero
func add*[bits](a: var BigInt[bits], b: BigInt[bits], ctl: CTBool[Word]): CTBool[Word] =
## Constant-time big integer in-place optional addition
## The addition is only performed if ctl is "true"
## The result carry is always computed.
add(a.view, b.view, ctl)
func sub*[bits](a: var BigInt[bits], b: BigInt[bits], ctl: CTBool[Word]): CTBool[Word] =
## Constant-time big integer in-place optional addition
## The addition is only performed if ctl is "true"
## The result carry is always computed.
sub(a.view, b.view, ctl)
func reduce*[aBits, mBits](r: var BigInt[mBits], a: BigInt[aBits], M: BigInt[mBits]) =
## Reduce `a` modulo `M` and store the result in `r`
##
## The modulus `M` **must** use `mBits` bits (bits at position mBits-1 must be set)
##
## CT: Depends only on the length of the modulus `M`
# Note: for all cryptographic intents and purposes the modulus is known at compile-time
# but we don't want to inline it as it would increase codesize, better have Nim
# pass a pointer+length to a fixed session of the BSS.
reduce(r.view, a.view, M.view)

424
constantine/bigints_raw.nim Normal file
View File

@ -0,0 +1,424 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# ############################################################
#
# BigInt Raw representation and operations
#
# ############################################################
#
# This file holds the raw operations done on big ints
# The representation is optimized for:
# - constant-time (not leaking secret data via side-channel)
# - generated code size and datatype size
# - performance
# in this order
# ############################################################
# Design
# To avoid carry issues we don't use the
# most significant bit of each machine word.
# i.e. for a uint64 base we only use 63-bit.
# More info: https://github.com/status-im/nim-constantine/wiki/Constant-time-arithmetics#guidelines
# Especially:
# - https://bearssl.org/bigint.html
# - https://cryptojedi.org/peter/data/pairing-20131122.pdf
# - http://docs.milagro.io/en/amcl/milagro-crypto-library-white-paper.html
#
# Note that this might also be beneficial in terms of performance.
# Due to opcode latency, on Nehalem ADC is 6x times slower than ADD
# if it has dependencies (i.e the ADC depends on a previous ADC result)
#
# Control flow should only depends on the static maximum number of bits
# This number is defined per Finite Field/Prime/Elliptic Curve
#
# We internally order the limbs in little-endian
# So the least significant limb is limb[0]
# This is independent from the base type endianness.
#
# Constantine uses Nim generic integer to prevent mixing
# BigInts of different bitlength at compile-time and
# properly statically size the BigInt buffers.
#
# To avoid code-bloat due to monomorphization (i.e. duplicating code per announced bitlength)
# actual computation is deferred to type-erased routines.
import
./primitives, ./common,
./primitives_extprecision
from sugar import distinctBase
type Word* = Ct[uint32]
## Logical BigInt word
## A logical BigInt word is of size physical MachineWord-1
type BaseType* = uint32
## Physical BigInt for conversion in "normal integers"
const
WordPhysBitSize = sizeof(Word) * 8
WordBitSize* = WordPhysBitSize - 1
const
Zero* = Word(0)
One* = Word(1)
MaxWord* = (not Zero) shr 1
## This represents 0x7F_FF_FF_FF__FF_FF_FF_FF
## also 0b0111...1111
## This biggest representable number in our limbs.
## i.e. The most significant bit is never set at the end of each function
func wordsRequired(bits: int): int {.compileTime.} =
## Compute the number of limbs required
# from the **announced** bit length
(bits + WordBitSize - 1) div WordBitSize
type
BigInt*[bits: static int] = object
## Fixed-precision big integer
##
## - "bits" is the announced bit-length of the BigInt
## This is public data, usually equal to the curve prime bitlength.
##
## - "bitLength" is the internal bitlength of the integer
## This differs from the canonical bit-length as
## Constantine word-size is smaller than a machine word.
## This value should never be used as-is to prevent leaking secret data.
## Computing this value requires constant-time operations.
## Using this value requires converting it to the # of limbs in constant-time
##
## - "limbs" is an internal field that holds the internal representation
## of the big integer. Least-significant limb first. Within limbs words are native-endian.
##
## This internal representation can be changed
## without notice and should not be used by external applications or libraries.
bitLength: uint32
limbs*: array[bits.wordsRequired, Word]
BigIntView* = ptr object
## Type-erased fixed-precision big integer
##
## This type mirrors the BigInt type and is used
## for the low-level computation API
## This design
## - avoids code bloat due to generic monomorphization
## otherwise each bigint routines would have an instantiation for
## each static `bits` parameter.
## - while not forcing the caller to preallocate computation buffers
## for the high-level API
##
## As with the BigInt type:
## - "bitLength" is the internal bitlength of the integer
## This differs from the canonical bit-length as
## Constantine word-size is smaller than a machine word.
## This value should never be used as-is to prevent leaking secret data.
## Computing this value requires constant-time operations.
## Using this value requires converting it to the # of limbs in constant-time
##
## - "limbs" is an internal field that holds the internal representation
## of the big integer. Least-significant limb first. Within limbs words are native-endian.
##
## This internal representation can be changed
## without notice and should not be used by external applications or libraries.
##
## Accesses should be done via BigIntViewConst / BigIntViewConst
## to have the compiler check for mutability
bitLength: uint32
limbs: UncheckedArray[Word]
# "Indirection" to enforce pointer types deep immutability
BigIntViewConst* = distinct BigIntView
## Immutable view into a BigInt
BigIntViewMut* = distinct BigIntView
## Mutable view into a BigInt
BigIntViewAny* = BigIntViewConst or BigIntViewMut
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Mutability safety
#
# ############################################################
template view*(a: BigInt): BigIntViewConst =
## Returns a borrowed type-erased immutable view to a bigint
BigIntViewConst(cast[BigIntView](a.unsafeAddr))
template view*(a: var BigInt): BigIntViewMut =
## Returns a borrowed type-erased mutable view to a mutable bigint
BigIntViewMut(cast[BigIntView](a.addr))
template `[]`*(v: BigIntViewConst, limbIdx: int): Word =
distinctBase(type v)(v).limbs[limbIdx]
template `[]`*(v: BigIntViewMut, limbIdx: int): var Word =
distinctBase(type v)(v).limbs[limbIdx]
template `[]=`*(v: BigIntViewMut, limbIdx: int, val: Word) =
distinctBase(type v)(v).limbs[limbIdx] = val
template bitSizeof(v: BigIntViewAny): uint32 =
distinctBase(type v)(v).bitLength
const divShiftor = log2(WordPhysBitSize)
template numLimbs*(v: BigIntViewAny): int =
## Compute the number of limbs from
## the **internal** bitlength
(bitSizeof(v).int + WordPhysBitSize - 1) shr divShiftor
template setBitLength(v: BigIntViewMut, internalBitLength: uint32) =
distinctBase(type v)(v).bitLength = internalBitLength
# TODO: Check if repeated v.numLimbs calls are optimized away
template `[]`*(v: BigIntViewConst, limbIdxFromEnd: BackwardsIndex): Word =
distinctBase(type v)(v).limbs[v.numLimbs.int - int limbIdxFromEnd]
template `[]`*(v: BigIntViewMut, limbIdxFromEnd: BackwardsIndex): var Word =
distinctBase(type v)(v).limbs[v.numLimbs.int - int limbIdxFromEnd]
template `[]=`*(v: BigIntViewMut, limbIdxFromEnd: BackwardsIndex, val: Word) =
distinctBase(type v)(v).limbs[v.numLimbs.int - int limbIdxFromEnd] = val
# ############################################################
#
# Checks and debug/test only primitives
#
# ############################################################
template checkMatchingBitlengths(a, b: distinct BigIntViewAny) =
## Check that bitlengths of bigints match
## This is only checked
## with "-d:debugConstantine" and when assertions are on.
debug:
assert distinctBase(type a)(a).bitLength ==
distinctBase(type b)(b).bitLength, "Internal Error: operands bitlength do not match"
template checkValidModulus(m: BigIntViewConst) =
## Check that the modulus is valid
## The check is approximate, it only checks that
## the most-significant words is non-zero instead of
## checking that the last announced bit is 1.
## This is only checked
## with "-d:debugConstantine" and when assertions are on.
debug:
assert not m[^1].isZero.bool, "Internal Error: the modulus must use all declared bits"
debug:
func `==`*(a, b: BigInt): CTBool[Word] =
## Returns true if 2 big ints are equal
var accum: Word
for i in static(0 ..< a.limbs.len):
accum = accum or (a.limbs[i] xor b.limbs[i])
result = accum.isZero
func `$`*(a: BigIntViewAny): string =
let len = a.numLimbs()
result = "["
for i in 0 ..< len - 1:
result.add $a[i]
result.add ", "
result.add $a[len-1]
result.add "] ("
result.add $a.bitSizeof
result.add " bits)"
# ############################################################
#
# BigInt primitives
#
# ############################################################
func setInternalBitLength*(a: var BigInt) {.inline.} =
## Derive the actual bitsize used internally of a BigInt
## from the announced BigInt bitsize
## and set the bitLength field of that BigInt
## to that computed value.
a.bitLength = static(a.bits + a.bits div WordBitSize)
func isZero*(a: BigIntViewAny): CTBool[Word] =
## Returns true if a big int is equal to zero
var accum: Word
for i in 0 ..< a.numLimbs():
accum = accum or a[i]
result = accum.isZero()
# The arithmetic primitives all accept a control input that indicates
# if it is a placebo operation. It stills performs the
# same memory accesses to be side-channel attack resistant.
func add*(a: BigIntViewMut, b: BigIntViewAny, ctl: CTBool[Word]): CTBool[Word] =
## Constant-time big integer in-place optional addition
## The addition is only performed if ctl is "true"
## The result carry is always computed.
##
## a and b MAY be the same buffer
## a and b MUST have the same announced bitlength (i.e. `bits` static parameters)
checkMatchingBitlengths(a, b)
for i in 0 ..< a.numLimbs():
let new_a = a[i] + b[i] + Word(result)
result = new_a.isMsbSet()
a[i] = ctl.mux(new_a and MaxWord, a[i])
func sub*(a: BigIntViewMut, b: BigIntViewAny, ctl: CTBool[Word]): CTBool[Word] =
## Constant-time big integer in-place optional substraction
## The substraction is only performed if ctl is "true"
## The result carry is always computed.
##
## a and b MAY be the same buffer
## a and b MUST have the same announced bitlength (i.e. `bits` static parameters)
checkMatchingBitlengths(a, b)
for i in 0 ..< a.numLimbs():
let new_a = a[i] - b[i] - Word(result)
result = new_a.isMsbSet()
a[i] = ctl.mux(new_a and MaxWord, a[i])
# ############################################################
#
# Modular BigInt
#
# ############################################################
# TODO: push boundsCheck off. They would be extremely costly.
func shlAddMod(a: BigIntViewMut, c: Word, M: BigIntViewConst) =
## Fused modular left-shift + add
## Shift input `a` by a word and add `c` modulo `M`
##
## With a word W = 2^WordBitSize and a modulus M
## Does a <- a * W + c (mod M)
##
## The modulus `M` MUST announced most-significant bit must be set.
checkValidModulus(M)
let aLen = a.numLimbs()
let mBits = bitSizeof(M)
if mBits <= WordBitSize:
# If M fits in a single limb
var q: Word
# (hi, lo) = a * 2^63 + c
let hi = a[0] shr 1 # 64 - 63 = 1
let lo = (a[0] shl WordBitSize) or c # Assumes most-significant bit in c is not set
unsafeDiv2n1n(q, a[0], hi, lo, M[0]) # (hi, lo) mod M
return
else:
## Multiple limbs
let hi = a[^1] # Save the high word to detect carries
let R = mBits and WordBitSize # R = mBits mod 64
var a0, a1, m0: Word
if R == 0: # If the number of mBits is a multiple of 64
a0 = a[^1] #
moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof) # we can just shift words
a[0] = c # and replace the first one by c
a1 = a[^1]
m0 = M[^1]
else: # Else: need to deal with partial word shifts at the edge.
a0 = ((a[^1] shl (WordBitSize-R)) or (a[^2] shr R)) and MaxWord
moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof)
a[0] = c
a1 = ((a[^1] shl (WordBitSize-R)) or (a[^2] shr R)) and MaxWord
m0 = ((M[^1] shl (WordBitSize-R)) or (M[^2] shr R)) and MaxWord
# m0 has its high bit set. (a0, a1)/p0 fits in a limb.
# Get a quotient q, at most we will be 2 iterations off
# from the true quotient
let
a_hi = a0 shr 1 # 64 - 63 = 1
a_lo = (a0 shl WordBitSize) or a1
var q, r: Word
unsafeDiv2n1n(q, r, a_hi, a_lo, m0) # Estimate quotient
q = mux( # If n_hi == divisor
a0 == m0, MaxWord, # Quotient == MaxWord (0b0111...1111)
mux(
q.isZero, Zero, # elif q == 0, true quotient = 0
q - One # else instead of being of by 0, 1 or 2
) # we returning q-1 to be off by -1, 0 or 1
)
# Now substract a*2^63 - q*p
var carry = Zero
var over_p = ctrue(Word) # Track if quotient greater than the modulus
for i in 0 ..< M.numLimbs():
var qp_lo: Word
block: # q*p
var qp_hi: Word
unsafeExtendedPrecMul(qp_hi, qp_lo, q, M[i]) # q * p
qp_lo += carry # Add carry from previous limb
carry = qp_hi shl 1 + qp_lo.isMsbSet.Word # New carry
qp_lo = qp_lo and MaxWord # Normalize to u63
block: # a*2^63 - q*p
a[i] -= qp_lo
carry += Word(a[i].isMsbSet) # Adjust if borrow
a[i] = a[i] and MaxWord # Normalize to u63
over_p = mux(
a[i] == M[i], over_p,
a[i] > M[i]
)
# Fix quotient, the true quotient is either q-1, q or q+1
#
# if carry < q or carry == q and over_p we must do "a -= p"
# if carry > hi (negative result) we must do "a += p"
let neg = carry < hi
let tooBig = not neg and (over_p or (carry < hi))
discard a.add(M, ctl = neg)
discard a.sub(M, ctl = tooBig)
return
func reduce*(r: BigIntViewMut, a: BigIntViewAny, M: BigIntViewConst) =
## Reduce `a` modulo `M` and store the result in `r`
##
## The modulus `M` MUST announced most-significant bit must be set.
## The result `r` buffer size MUST be at least the size of `M` buffer
##
## CT: Depends only on the bitlength of `a` and the modulus `M`
# Note: for all cryptographic intents and purposes the modulus is known at compile-time
# but we don't want to inline it as it would increase codesize, better have Nim
# pass a pointer+length to a fixed session of the BSS.
checkValidModulus(M)
let aBits = bitSizeof(a)
let mBits = bitSizeof(M)
let aLen = a.numLimbs()
r.setBitLength(bitSizeof(M))
if aBits < mBits:
# if a uses less bits than the modulus,
# it is guaranteed < modulus.
# This relies on the precondition that the modulus uses all declared bits
copyMem(r[0].addr, a[0].unsafeAddr, aLen * sizeof(Word))
for i in aLen ..< r.numLimbs():
r[i] = Zero
else:
# a length i at least equal to the modulus.
# we can copy modulus.limbs-1 words
# and modular shift-left-add the rest
let mLen = M.numLimbs()
let aOffset = aLen - mLen
copyMem(r[0].addr, a[aOffset].unsafeAddr, (mLen-1) * sizeof(Word))
r[^1] = Zero
for i in countdown(aOffset-1, 0):
r.shlAddMod(a[i], M)

13
constantine/common.nim Normal file
View File

@ -0,0 +1,13 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# Common configuration
template debug*(body: untyped): untyped =
when defined(debugConstantine):
body

View File

@ -12,7 +12,7 @@
import
endians,
./primitives, ./bigints
./primitives, ./bigints_raw
# ############################################################
#
@ -23,14 +23,16 @@ import
# TODO: tag/remove exceptions raised.
func fromRawUintLE(
T: type BigInt,
src: openarray[byte]): T =
dst: var BigInt,
src: openarray[byte]) =
## Parse an unsigned integer from its canonical
## little-endian unsigned representation
## And store it into a BigInt of size bits
##
## CT:
## Constant-Time:
## - no leaks
##
## Can work at compile-time
var
dst_idx = 0
@ -46,13 +48,33 @@ func fromRawUintLE(
# if full, dump
if acc_len >= WordBitSize:
result.limbs[dst_idx] = acc and MaxWord
dst.limbs[dst_idx] = acc and MaxWord
inc dst_idx
acc_len -= WordBitSize
acc = src_byte shr (8 - acc_len)
if dst_idx < result.limbs.len:
result.limbs[dst_idx] = acc
if dst_idx < dst.limbs.len:
dst.limbs[dst_idx] = acc
func fromRawUint*(
dst: var BigInt,
src: openarray[byte],
srcEndianness: static Endianness) {.inline.}=
## Parse an unsigned integer from its canonical
## big-endian or little-endian unsigned representation
## And store it into a BigInt of size `bits`
##
## Constant-Time:
## - no leaks
##
## Can work at compile-time to embed curve moduli
## from a canonical integer representation
when srcEndianness == littleEndian:
dst.fromRawUintLE(src)
else:
{.error: "Not implemented at the moment".}
dst.setInternalBitLength()
func fromRawUint*(
T: type BigInt,
@ -62,20 +84,19 @@ func fromRawUint*(
## big-endian or little-endian unsigned representation
## And store it into a BigInt of size `bits`
##
## CT:
## Constant-Time:
## - no leaks
when srcEndianness == littleEndian:
fromRawUintLE(T, src)
else:
{.error: "Not implemented at the moment".}
##
## Can work at compile-time to embed curve moduli
## from a canonical integer representation
result.fromRawUint(src, srcEndianness)
func fromUint*(
T: type BigInt,
src: SomeUnsignedInt): T =
## Parse a regular unsigned integer
## and store it into a BigInt of size `bits`
fromRawUint(T, cast[array[sizeof(src), byte]](src), cpuEndian)
result.fromRawUint(cast[array[sizeof(src), byte]](src), cpuEndian)
# ############################################################
#
@ -128,7 +149,6 @@ func dumpRawUintLE(
if tail >= sizeof(Word):
# Unrolled copy
# debugecho src.repr
littleEndianXX(dst[dst_idx].addr, lo.unsafeAddr)
dst_idx += sizeof(Word)
tail -= sizeof(Word)
@ -293,6 +313,8 @@ func fromHex*(T: type BigInt, s: string): T =
##
## This API is intended for configuration and debugging purposes
## Do not pass secret or private data to it.
##
## Can work at compile-time to declare curve moduli from their hex strings
# 1. Convert to canonical uint
const canonLen = (T.bits + 8 - 1) div 8
@ -300,7 +322,7 @@ func fromHex*(T: type BigInt, s: string): T =
hexToPaddedByteArray(s, bytes, littleEndian)
# 2. Convert canonical uint to Big Int
result = T.fromRawUint(bytes, littleEndian)
result.fromRawUint(bytes, littleEndian)
func dumpHex*(big: BigInt, order: static Endianness = bigEndian): string =
## Stringify an int to hex.

View File

@ -116,6 +116,21 @@ func isMsbSet*[T: Ct](x: T): CTBool[T] =
const msb_pos = T.sizeof * 8 - 1
result = (CTBool[T])(x shr msb_pos)
func log2*(x: uint32): uint32 =
## Find the log base 2 of a 32-bit or less integer.
## using De Bruijn multiplication
## Works at compile-time, guaranteed constant-time.
# https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn
const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18,
22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31]
var v = x
v = v or v shr 1 # first round down to one less than a power of 2
v = v or v shr 2
v = v or v shr 4
v = v or v shr 8
v = v or v shr 16
lookup[(v * 0x07C4ACDD'u32) shr 27]
# ############################################################
#
# Hardened Boolean primitives

View File

@ -12,7 +12,7 @@
#
# ############################################################
import ../primitives
import ./primitives
func asm_x86_64_extMul(hi, lo: var uint64, a, b: uint64) {.inline.}=
## Extended precision multiplication uint64 * uint64 --> uint128
@ -161,34 +161,34 @@ when isMainModule:
doAssert q == 6148914691236517205'u64
doAssert r == 1
block: # TODO - support Quotient that doesn't fit in the result
# The usual way with normalization by the bitSize difference
# is fundamentally non constant-time
# it is probable that division is not constant-time at the hardware level as well
# as it throws sigfpe when the quotient doesn't fit in the result size
# block: # TODO - support Quotient that doesn't fit in the result
# # The usual way with normalization by the bitSize difference
# # is fundamentally non constant-time
# # it is probable that division is not constant-time at the hardware level as well
# # as it throws sigfpe when the quotient doesn't fit in the result size
var q, r: uint64
# var q, r: uint64
let n_hi = 1'u64
let n_lo = 0'u64
let d = 1'u64
# let n_hi = 1'u64
# let n_lo = 0'u64
# let d = 1'u64
asm_x86_64_div2n1n(q, r, n_hi, n_lo, d)
# asm_x86_64_div2n1n(q, r, n_hi, n_lo, d)
echo "quotient: ", q
echo "remainder: ", r
# echo "quotient: ", q
# echo "remainder: ", r
block:
var q, r: uint64
# block:
# var q, r: uint64
let n_hi = 4186590388502004879'u64
let n_lo = 17852795547484522084'u64
let d = 327340459940166448'u64
# let n_hi = 4186590388502004879'u64
# let n_lo = 17852795547484522084'u64
# let d = 327340459940166448'u64
asm_x86_64_div2n1n(q, r, n_hi, n_lo, d)
# asm_x86_64_div2n1n(q, r, n_hi, n_lo, d)
echo "quotient: ", q
echo "remainder: ", r
# echo "quotient: ", q
# echo "remainder: ", r
# ##############################################################
#

View File

@ -7,7 +7,7 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import unittest, random, strutils,
../constantine/[io, bigints, primitives]
../constantine/[io, bigints_public, bigints_raw, primitives]
suite "isZero":
test "isZero for zero":
@ -128,12 +128,12 @@ suite "Modular operations - small modulus":
# Vectors taken from Stint - https://github.com/status-im/nim-stint
test "100 mod 13":
let a = BigInt[32].fromUint(100'u32)
let m = BigInt[8].fromUint(13'u8)
let m = BigInt[4].fromUint(13'u8)
var r: BigInt[8]
var r: BigInt[4]
r.reduce(a, m)
check:
bool(r == BigInt[8].fromUint(100'u8 mod 13))
bool(r == BigInt[4].fromUint(100'u8 mod 13))
test "2^64 mod 3":
let a = BigInt[65].fromHex("0x1_00000000_00000000")
@ -160,29 +160,23 @@ suite "Modular operations - small modulus - Stint specific failures highlighted
let v = 174261910798982'u64
let a = BigInt[64].fromUint(u)
let m = BigInt[49].fromUint(v)
let m = BigInt[48].fromUint(v)
var r: BigInt[49]
var r: BigInt[48]
r.reduce(a, m)
# Copy the result in a conveniently sized buffer
var rr: BigInt[49]
copyLimbs(rr, 0, r, 0, r.limbs.len)
check:
bool(rr == BigInt[49].fromUint(u mod v))
bool(r == BigInt[48].fromUint(u mod v))
test "Modulo: 15080397990160655 mod 600432699691":
let u = 15080397990160655'u64
let v = 600432699691'u64
let a = BigInt[64].fromUint(u)
let m = BigInt[41].fromUint(v)
let m = BigInt[40].fromUint(v)
var r: BigInt[41]
var r: BigInt[40]
r.reduce(a, m)
# Copy the result in a conveniently sized buffer
var rr: BigInt[41]
copyLimbs(rr, 0, r, 0, r.limbs.len)
check:
bool(rr == BigInt[41].fromUint(u mod v))
bool(r == BigInt[40].fromUint(u mod v))

View File

@ -0,0 +1 @@
-d:debugConstantine

View File

@ -7,7 +7,7 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import unittest, random,
../constantine/[io, bigints]
../constantine/[io, bigints_raw]
randomize(0xDEADBEEF) # Random seed for reproducibility
type T = BaseType