Skeleton of modular exponentiation

This commit is contained in:
Mamy André-Ratsimbazafy 2020-02-22 16:37:31 +01:00
parent 236047767f
commit 4b65d0d723
No known key found for this signature in database
GPG Key ID: 7B88AD1FE79492E1
10 changed files with 332 additions and 140 deletions

View File

@ -6,7 +6,7 @@ license = "MIT or Apache License 2.0"
srcDir = "src"
### Dependencies
requires "nim >= 1.0.6"
requires "nim >= 1.1.0"
### Helper functions
proc test(fakeCurves: string, path: string, lang = "c") =

View File

@ -70,6 +70,9 @@ else:
curve P256: # secp256r1 / NIST P-256
bitsize: 256
modulus: "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff"
curve BLS12_381:
bitsize: 381
modulus: "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab"
# ############################################################
#
@ -103,22 +106,30 @@ macro genMontyMagics(T: typed): untyped =
let E = T.getImpl[2]
for i in 1 ..< E.len:
let curve = E[i]
# const MyCurve_R2modP = r2mod(MyCurve_Modulus)
result.add newConstStmt(
ident($curve & "_R2modP"), newCall(
bindSym"r2mod",
# The curve parser creates modulus
# under symbol "MyCurve_Modulus"
nnkDotExpr.newTree(
bindSym($curve & "_Modulus"),
ident"mres"
)
)
)
# const MyCurve_NegInvModWord = negInvModWord(MyCurve_Modulus)
result.add newConstStmt(
ident($curve & "_NegInvModWord"), newCall(
bindSym"negInvModWord",
# The curve parser creates modulus
# under symbol "MyCurve_Modulus"
nnkDotExpr.newTree(
bindSym($curve & "_Modulus"),
ident"mres"
)
)
)
# const MyCurve_montyOne = montyOne(MyCurve_Modulus)
result.add newConstStmt(
ident($curve & "_MontyOne"), newCall(
bindSym"montyOne",
nnkDotExpr.newTree(
bindSym($curve & "_Modulus"),
ident"mres"
@ -138,6 +149,10 @@ macro getNegInvModWord*(C: static Curve): untyped =
## Get the Montgomery "-1/P[0] mod 2^WordBitSize" constant associated to a curve field modulus
result = bindSym($C & "_NegInvModWord")
macro getMontyOne*(C: static Curve): untyped =
## Get one in Montgomery representation (i.e. R mod P)
result = bindSym($C & "_MontyOne")
# ############################################################
#
# Debug info printed at compile-time

View File

@ -1,78 +0,0 @@
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# From https://github.com/status-im/nim-stew/blob/master/stew/endians2.nim
#
# Nim standard library "endians" work with pointers which doesn't work at compile-time
# For auditing purpose and to ensure constant-time safety
# it's better not to introduce a dependency for such a small piece of code
type
SomeEndianInt* = uint8|uint16|uint32|uint64
## types that we support endian conversions for - uint8 is there for
## for syntactic / generic convenience. Other candidates:
## * int/uint - uncertain size, thus less suitable for binary interop
## * intX - over and underflow protection in nim might easily cause issues -
## need to consider before adding here
when defined(gcc) or defined(llvm_gcc) or defined(clang):
func swapBytesBuiltin(x: uint8): uint8 = x
func swapBytesBuiltin(x: uint16): uint16 {.
importc: "__builtin_bswap16", nodecl.}
func swapBytesBuiltin(x: uint32): uint32 {.
importc: "__builtin_bswap32", nodecl.}
func swapBytesBuiltin(x: uint64): uint64 {.
importc: "__builtin_bswap64", nodecl.}
elif defined(icc):
func swapBytesBuiltin(x: uint8): uint8 = x
func swapBytesBuiltin(a: uint16): uint16 {.importc: "_bswap16", nodecl.}
func swapBytesBuiltin(a: uint32): uint32 {.importc: "_bswap", nodec.}
func swapBytesBuiltin(a: uint64): uint64 {.importc: "_bswap64", nodecl.}
elif defined(vcc):
func swapBytesBuiltin(x: uint8): uint8 = x
proc builtin_bswap16(a: uint16): uint16 {.
importc: "_byteswap_ushort", cdecl, header: "<intrin.h>".}
proc builtin_bswap32(a: uint32): uint32 {.
importc: "_byteswap_ulong", cdecl, header: "<intrin.h>".}
proc builtin_bswap64(a: uint64): uint64 {.
importc: "_byteswap_uint64", cdecl, header: "<intrin.h>".}
func swapBytesNim(x: uint8): uint8 = x
func swapBytesNim(x: uint16): uint16 = (x shl 8) or (x shr 8)
func swapBytesNim(x: uint32): uint32 =
let v = (x shl 16) or (x shr 16)
((v shl 8) and 0xff00ff00'u32) or ((v shr 8) and 0x00ff00ff'u32)
func swapBytesNim(x: uint64): uint64 =
var v = (x shl 32) or (x shr 32)
v =
((v and 0x0000ffff0000ffff'u64) shl 16) or
((v and 0xffff0000ffff0000'u64) shr 16)
((v and 0x00ff00ff00ff00ff'u64) shl 8) or
((v and 0xff00ff00ff00ff00'u64) shr 8)
template swapBytes*[T: SomeEndianInt](x: T): T =
## Reverse the bytes within an integer, such that the most significant byte
## changes place with the least significant one, etc
##
## Example:
## doAssert swapBytes(0x01234567'u32) == 0x67452301
when nimvm:
swapBytesNim(x)
else:
when defined(swapBytesBuiltin):
swapBytesBuiltin(x)
else:
swapBytesNim(x)

View File

@ -11,7 +11,6 @@
# - Burning memory to ensure secrets are not left after dealloc.
import
./endians2,
../primitives/constant_time,
../math/bigints_checked,
../config/common
@ -150,15 +149,20 @@ func fromUint*(
# Serialising from internal representation to canonical format
#
# ############################################################
import strutils
template blobFrom*(dst: var openArray[byte], src: SomeEndianInt, startIdx: int, endian: static Endianness) =
template blobFrom(dst: var openArray[byte], src: SomeUnsignedInt, startIdx: int, endian: static Endianness) =
## Write an integer into a raw binary blob
## Swapping endianness if needed
let s = when endian == cpuEndian: src
else: swapBytes(src)
const bits = sizeof(src) * 8
when endian == cpuEndian:
for i in 0 ..< sizeof(src):
dst[startIdx+i] = byte((s shr (i * 8)))
dst[startIdx+i] = byte((src shr (i * 8)))
else:
for i in 0 ..< sizeof(src):
dst[startIdx+sizeof(src)-1-i] = byte((src shr (i * 8)))
func exportRawUintLE(
dst: var openarray[byte],
@ -218,7 +222,6 @@ func exportRawUintBE(
var
src_idx = 0
dst_idx = dst.len - 1
acc: BaseType = 0
acc_len = 0
@ -240,9 +243,8 @@ func exportRawUintBE(
if tail >= sizeof(Word):
# Unrolled copy
dst.blobFrom(src = lo, dst_idx, littleEndian)
dst_idx -= sizeof(Word)
tail -= sizeof(Word)
dst.blobFrom(src = lo, tail, bigEndian)
else:
# Process the tail and exit
when cpuEndian == littleEndian:
@ -250,11 +252,11 @@ func exportRawUintBE(
# we can just copy each byte
# tail is inclusive
for i in 0 ..< tail:
dst[dst_idx-i] = byte(lo shr (i*8))
dst[tail-i] = byte(lo shr (i*8))
else: # TODO check this
# We need to copy from the end
for i in 0 ..< tail:
dst[dst_idx-i] = byte(lo shr ((tail-i)*8))
dst[tail-i] = byte(lo shr ((tail-i)*8))
return
func exportRawUint*(

View File

@ -157,3 +157,31 @@ func montyMul*[mBits](r: var BigInt[mBits], a, b, M: BigInt[mBits], negInvModWor
## This resets r to zero before processing. Use {.noInit.}
## to avoid duplicating with Nim zero-init policy
montyMul(r.view, a.view, b.view, M.view, Word(negInvModWord))
import stew/byteutils
func montyPow*[mBits, eBits: static int](
a: var BigInt[mBits], exponent: BigInt[eBits],
M, one: BigInt[mBits], negInvModWord: static BaseType, windowSize: static int) =
## Compute a <- a^exponent (mod M)
## ``a`` in the Montgomery domain
## ``exponent`` is any BigInt, in the canonical domain
##
## This uses fixed window optimization
## A window size in the range [1, 5] must be chosen
##
## This is constant-time: the window optimization does
## not reveal the exponent bits or hamming weight
mixin exportRawUint # exported in io_bigints which depends on this module ...
var expBE {.noInit.}: array[(ebits + 7) div 8, byte]
expBE.exportRawUint(exponent, bigEndian)
const scratchLen = if windowSize == 1: 2
else: (1 shl windowSize) + 1
var scratchSpace {.noInit.}: array[scratchLen, BigInt[mBits]]
var scratchPtrs {.noInit.}: array[scratchLen, BigIntViewMut]
for i in 0 ..< scratchLen:
scratchPtrs[i] = scratchSpace[i].view()
montyPow(a.view, expBE, M.view, one.view, Word(negInvModWord), scratchPtrs)

View File

@ -54,7 +54,6 @@ import
../primitives/extended_precision,
../config/common
from sugar import distinctBase
from bitops import countSetBits # only used on modulus and public values
# ############################################################
#
@ -117,16 +116,6 @@ type
## Mutable view into a BigInt
BigIntViewAny* = BigIntViewConst or BigIntViewMut
BigIntLeakedConst* = distinct BigIntViewConst
## BigInt which information will be leaked
## besides the announced bit length
## This is only suitable for values
## that are publicly known
SensitiveInt* = distinct int
## Integer that contains sensitive information
## and will not be manipulated in a constant-time manner
# No exceptions allowed
{.push raises: [].}
@ -146,6 +135,7 @@ template `[]=`*(v: BigIntViewMut, limbIdx: int, val: Word) =
distinctBase(type v)(v).limbs[limbIdx] = val
template bitSizeof(v: BigIntViewAny): uint32 =
bind BigIntView
distinctBase(type v)(v).bitLength
const divShiftor = log2(WordPhysBitSize)
@ -203,6 +193,10 @@ template checkWordShift(k: int) =
debug:
assert k <= WordBitSize, "Internal Error: the shift must be less than the word bit size"
template checkPowScratchSpaceLen(len: int) =
## Checks that there is a minimum of scratchspace to hold the temporaries
debug:
assert len >= 2, "Internal Error: the scratchspace for powmod should be equal or greater than 2"
debug:
func `$`*(a: BigIntViewAny): string =
@ -222,9 +216,6 @@ debug:
#
# ############################################################
template mask*(w: Word): Word =
w and MaxWord
func isZero*(a: BigIntViewAny): CTBool[Word] =
## Returns true if a big int is equal to zero
var accum: Word
@ -559,39 +550,147 @@ func montyResidue*(
montyMul(r, a, r2ModN, N, negInvModWord)
# ############################################################
# Montgomery Modular Exponentiation
# ------------------------------------------
# We use fixed-window based exponentiation
# that is constant-time: i.e. the number of multiplications
# does not depend on the number of set bits in the exponents
# those are always done and conditionally copied.
#
# Sensitive Primitives
# TODO: analyze cost difference with naive exponentiation
# with n being the number of words to represent a number in Fp
# and k the window-size
# - we always multiply even for unused multiplications
# - conditional copy only save a small fraction of time
# (multiplication O(n²), cmov O(n), doing nothing i.e. non constant-time O(n))
# - Table lookup is O(kn) copy time since we need to access the whole table to
# defeat cache attacks. Without windows, we don't have table lookups at all.
#
# ############################################################
# Warning: Primitives that expose bits of information
# due to non-constant time.
# Proper usage is enforced by compiler.
# Only apply explicitly to public data like the field modulus
# The exponent MUST NOT be private data (until audited otherwise)
# - Power attack on RSA, https://www.di.ens.fr/~fouque/pub/ches06.pdf
# - Flush-and-reload on Sliding window exponentiation: https://tutcris.tut.fi/portal/files/8966761/p1639_pereida_garcia.pdf
# - Sliding right into disaster, https://eprint.iacr.org/2017/627.pdf
# - Fixed window leak: https://www.scirp.org/pdf/JCC_2019102810331929.pdf
# - Constructing sliding-windows leak, https://easychair.org/publications/open/fBNC
#
# For pairing curves, this is the case since exponentiation is only
# used for inversion via the Little Fermat theorem.
# For RSA, some exponentiations uses private exponents.
#
# Note:
# - Implementation closely follows Thomas Pornin's BearSSL
# - Apache Milagro Crypto has an alternative implementation
# that is more straightforward however:
# - the exponent hamming weight is used as loop bounds
# - the base^k is stored at each index of a temp table of size k
# - the base^k to use is indexed by the hamming weight
# of the exponent, leaking this to cache attacks
# - in contrast BearSSL touches the whole table to
# hide the actual selection
#
# Directly using the Hamming weight would probably
# significantly improve pairing-friendly curves as
# they are chosen for their low Hamming-Weight (see BLS12-381 x factor)
# --> Expose an exponent-leaky powMod?
# If so, create distinct type for leaked bits and BigInt
# so that sensitive data use is compiler-checked
template bitSizeof(v: BigIntLeakedConst): uint32 =
distinctBase(type v)(v).bitLength
func getWindowLen(bufLen: int): uint =
## Compute the maximum window size that fits in the scratchspace buffer
checkPowScratchSpaceLen(bufLen)
result = 5
while (1 shl result) + 1 > bufLen:
dec result
template numLimbs*(v: BigIntLeakedConst): int =
## Compute the number of limbs from
## the **internal** bitlength
(bitSizeof(v).int + WordPhysBitSize - 1) shr divShiftor
func montyPow*(
a: BigIntViewMut,
exponent: openarray[byte],
M, one: BigIntViewConst,
negInvModWord: Word,
scratchspace: openarray[BigIntViewMut]
) =
## Modular exponentiation r = a^exponent mod M
## in the montgomery domain
##
## This uses fixed-window optimization if possible
##
## - On input ``a`` is the base, on ``output`` a = a^exponent (mod M)
## ``a`` is in the Montgomery domain
## - ``exponent`` is the exponent in big-endian canonical format (octet-string)
## Use ``exportRawUint`` for conversion
## - ``M`` is the modulus
## - ``one`` is 1 (mod M) in montgomery representation
## - ``negInvModWord`` is the montgomery magic constant "-1/M[0] mod 2^WordBitSize"
## - ``scratchspace`` with k the window bitsize of size up to 5
## This is a buffer that can hold between 2^k + 1 big-ints
## A window of of 1-bit (no window optimization) requires only 2 big-ints
##
## Note that the best window size require benchmarking and is a tradeoff between
## - performance
## - stack usage
## - precomputation
##
## For example BLS12-381 window size of 5 is 30% faster than no window,
## but windows of size 2, 3, 4 bring no performance benefit, only increased stack space.
## A window of size 5 requires (2^5 + 1)*(381 + 7)/8 = 33 * 48 bytes = 1584 bytes
## of scratchspace (on the stack).
template `[]`*(v: BigIntLeakedConst, limbIdx: int): SensitiveInt =
SensitiveInt distinctBase(type v)(v).limbs[limbIdx]
let window = scratchspace.len.getWindowLen()
let bigIntSize = a.numLimbs() * sizeof(Word) + sizeof(BigIntView.bitLength)
func popcount(a: BigIntLeakedConst): SensitiveInt =
## Count the number of bits set in an integer
## also called popcount or Hamming Weight
## ⚠️⚠️⚠️: This is only intended for use on public data
var accum: int
for i in 0 ..< a.numLimbs:
accum += countSetBits(a[i].BaseType)
return SensitiveInt accum
# Precompute window content, special case for window = 1
# (i.e scratchspace has only space for 2 temporaries)
# The content scratchspace[2+k] is set at a^k
# with scratchspace[0] untouched
if window == 1:
copyMem(pointer scratchspace[1], pointer a, bigIntSize)
else:
copyMem(pointer scratchspace[2], pointer a, bigIntSize)
for k in 2 ..< 1 shl window:
scratchspace[k+1].montyMul(scratchspace[k], a, M, negInvModWord)
func lastBits(a: BigIntLeakedConst, k: int): SensitiveInt =
## Returns the last bits of an integer
## k MUST be less than the base word size (2^31 or 2^63)
checkWordShift(k)
let mask = BaseType((1 shl k) - 1)
return SensitiveInt(a[0].BaseType and mask)
scratchspace[1].setBitLength(bitSizeof(M))
# Set a to one
copyMem(pointer a, pointer one, bigIntSize)
# We process bits with from most to least significant.
# At each loop iteration with have acc_len bits in acc.
# To maintain constant-time the number of iterations
# or the number of operations or memory accesses should be the same
# regardless of acc & acc_len
var
acc, acc_len: uint
e = 0
while acc_len > 0 or e < exponent.len:
# Get the next bits
var k = window
if acc_len < window:
if e < exponent.len:
acc = (acc shl 8) or exponent[e].uint
inc e
acc_len += 8
else: # Drained all exponent bits
k = acc_len
let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
acc_len -= k
# We have k bits and can do k squaring
for i in 0 ..< k:
scratchspace[0].montyMul(a, a, M, negInvModWord)
copyMem(pointer a, pointer scratchspace[0], bigIntSize)
# Window lookup: we set scratchspace[1] to the lookup value.
# If the window length is 1, then it's already set.
if window > 1:
# otherwise we need a constant-time lookup
# in particular we need the same memory accesses, we can't
# just index the openarray with the bits to avoid cache attacks.
for i in 1 ..< 1 shl k:
let ctl = Word(i) == Word(bits)
scratchspace[1].cmov(scratchspace[1+i], ctl)
# Multiply with the looked-up value
# we keep the product only if the exponent bits are not all zero
scratchspace[0].montyMul(a, scratchspace[1], M, negInvModWord)
a.cmov(scratchspace[0], Word(bits) != Zero)

View File

@ -25,6 +25,8 @@ import
../config/[common, curves],
./bigints_checked
from ../io/io_bigints import exportRawUint # for "pow"
# type
# `Fq`*[C: static Curve] = object
# ## All operations on a field are modulo P
@ -126,3 +128,10 @@ func `*`*(a, b: Fq): Fq {.noInit.} =
## routine will zero init internally the result.
result.mres.setInternalBitLength()
result.mres.montyMul(a.mres, b.mres, Fq.C.Mod.mres, Fq.C.getNegInvModWord())
func pow*(a: var Fq, exponent: BigInt) =
## Exponentiation over Fq
## ``a``: a field element to be exponentiated
## ``exponent``: a big integer
const windowSize = 5 # TODO: find best window size for each curves
a.mres.montyPow(exponent, Fq.C.Mod.mres, Fq.C.getMontyOne(), Fq.C.getNegInvModWord(), windowSize)

View File

@ -43,6 +43,10 @@ func double(a: var BigInt): bool =
func sub(a: var BigInt, b: BigInt, ctl: bool): bool =
## In-place optional substraction
##
## It is NOT constant-time and is intended
## only for compile-time precomputation
## of non-secret data.
for i in 0 ..< a.limbs.len:
let new_a = BaseType(a.limbs[i]) - BaseType(b.limbs[i]) - BaseType(result)
result = new_a.isMsbSet()
@ -132,9 +136,11 @@ func negInvModWord*(M: BigInt): BaseType =
# Our actual word size is 2^63 not 2^64
result = result and BaseType(MaxWord)
func r2mod*(M: BigInt): BigInt =
func r_powmod(n: static int, M: BigInt): BigInt =
## Returns the Montgomery domain magic constant for the input modulus:
##
## R ≡ R (mod M) with R = (2^WordBitSize)^numWords
## or
## R² ≡ R² (mod M) with R = (2^WordBitSize)^numWords
##
## Assuming a field modulus of size 256-bit with 63-bit words, we require 5 words
@ -162,8 +168,22 @@ func r2mod*(M: BigInt): BigInt =
w = M.limbs.len
msb = M.bits-1 - WordBitSize * (w - 1)
start = (w-1)*WordBitSize + msb
stop = 2*WordBitSize*w
stop = n*WordBitSize*w
result.limbs[^1] = Word(1 shl msb) # C0 = 2^(wn-1), the power of 2 immediatly less than the modulus
for _ in start ..< stop:
result.doubleMod(M)
func r2mod*(M: BigInt): BigInt =
## Returns the Montgomery domain magic constant for the input modulus:
##
## R² ≡ R² (mod M) with R = (2^WordBitSize)^numWords
##
## Assuming a field modulus of size 256-bit with 63-bit words, we require 5 words
## R² ≡ ((2^63)^5)^2 (mod M) = 2^630 (mod M)
r_powmod(2, M)
func montyOne*(M: BigInt): BigInt =
## Returns "1 (mod M)" in the Montgomery domain.
## This is equivalent to R (mod M) in the natural domain
r_powmod(1, M)

View File

@ -0,0 +1,95 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import unittest, random,
../constantine/math/[bigints_checked, finite_fields],
../constantine/io/io_fields,
../constantine/config/curves
import ../constantine/io/io_bigints
static: doAssert defined(testingCurves), "This modules requires the -d:testingCurves compile option"
proc main() =
suite "Modular exponentiation over finite fields":
test "n² mod 101":
let exponent = BigInt[64].fromUint(2'u64)
block: # 1^2 mod 101
var n, expected: Fq[Fake101]
n.fromUint(1'u32)
expected = n
n.pow(exponent)
var n_bytes: array[8, byte]
n_bytes.exportRawUint(n, cpuEndian)
let r = cast[uint64](n_bytes)
check:
# Check equality in the Montgomery domain
bool(n == expected)
# Check equality when converting back to natural domain
1'u64 == r
block: # 2^2 mod 101
var n, expected: Fq[Fake101]
n.fromUint(2'u32)
expected = n
n.pow(exponent)
var n_bytes: array[8, byte]
n_bytes.exportRawUint(n, cpuEndian)
let r = cast[uint64](n_bytes)
check:
# Check equality in the Montgomery domain
bool(n == expected)
# Check equality when converting back to natural domain
4'u64 == r
block: # 10^2 mod 101
var n, expected: Fq[Fake101]
n.fromUint(10'u32)
expected = n
n.pow(exponent)
var n_bytes: array[8, byte]
n_bytes.exportRawUint(n, cpuEndian)
let r = cast[uint64](n_bytes)
check:
# Check equality in the Montgomery domain
bool(n == expected)
# Check equality when converting back to natural domain
100'u64 == r
block: # 11^2 mod 101
var n, expected: Fq[Fake101]
n.fromUint(10'u32)
expected = n
n.pow(exponent)
var n_bytes: array[8, byte]
n_bytes.exportRawUint(n, cpuEndian)
let r = cast[uint64](n_bytes)
check:
# Check equality in the Montgomery domain
bool(n == expected)
# Check equality when converting back to natural domain
20'u64 == r
main()

View File

@ -0,0 +1,2 @@
-d:testingCurves
-d:debugConstantine