From 4970572393b9d5c04deec27cd257d419d94f7220 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mamy=20Andr=C3=A9-Ratsimbazafy?= <mamy_github@numforge.co>
Date: Sat, 15 Feb 2020 19:22:40 +0100
Subject: [PATCH] Introduce alternate conversion to Montgomery Residue based on
 Montgomery Multiplication

---
 constantine/io/io_fields.nim             |   8 +-
 constantine/math/bigints_checked.nim     |  13 +-
 constantine/math/bigints_raw.nim         | 157 ++++++++++++++---------
 constantine/math/finite_fields.nim       |   7 +-
 constantine/math/precomputed.nim         |  95 +++++++++++++-
 constantine/primitives/constant_time.nim |   2 +-
 6 files changed, 212 insertions(+), 70 deletions(-)

diff --git a/constantine/io/io_fields.nim b/constantine/io/io_fields.nim
index 2434848..cf22c6b 100644
--- a/constantine/io/io_fields.nim
+++ b/constantine/io/io_fields.nim
@@ -9,7 +9,9 @@
 import
   ./io_bigints,
   ../config/curves,
-  ../math/[bigints_checked, finite_fields]
+  ../math/[bigints_checked, finite_fields],
+  # TODO: should be a const/proc in curves.nim
+  ../math/precomputed
 
 # ############################################################
 #
@@ -21,8 +23,8 @@ func fromUint*(dst: var Fq,
                src: SomeUnsignedInt) =
   ## Parse a regular unsigned integer
   ## and store it into a BigInt of size `bits`
-  dst.mres.fromRawUint(cast[array[sizeof(src), byte]](src), cpuEndian)
-  dst.mres.unsafeMontyResidue(Fq.C.Mod.mres)
+  let raw = (type dst.mres).fromRawUint(cast[array[sizeof(src), byte]](src), cpuEndian)
+  dst.mres.unsafeMontyResidue(raw, Fq.C.Mod.mres, r2mod(Fq.C.Mod.mres), neginvModWord(Fq.C.Mod.mres))
 
 func serializeRawUint*(dst: var openarray[byte],
                        src: Fq,
diff --git a/constantine/math/bigints_checked.nim b/constantine/math/bigints_checked.nim
index 57e5e11..44383a0 100644
--- a/constantine/math/bigints_checked.nim
+++ b/constantine/math/bigints_checked.nim
@@ -111,16 +111,23 @@ func reduce*[aBits, mBits](r: var BigInt[mBits], a: BigInt[aBits], M: BigInt[mBi
   # pass a pointer+length to a fixed session of the BSS.
   reduce(r.view, a.view, M.view)
 
-func unsafeMontyResidue*[mBits](mres: var BigInt[mBits], N: BigInt[mBits]) =
+func unsafeMontyResidue*[mBits](mres: var BigInt[mBits], a, N, r2modN: BigInt[mBits], negInvModWord: static BaseType) =
   ## Convert a BigInt from its natural representation
   ## to the Montgomery n-residue form
   ##
-  ## `mres` is modified in-place
+  ## `mres` is overwritten. It's bitlength must be properly set before calling this procedure.
   ##
   ## Caller must take care of properly switching between
   ## the natural and montgomery domain.
   ## Nesting Montgomery form is possible by applying this function twice.
-  montyResidue(mres.view, N.view)
+  # TODO: benchmark
+  when true:
+    # Montgomery multiplication based
+    montyResidue(mres.view, a.view, N.view, r2modN.view, Word(negInvModWord))
+  else:
+    # Modular left shift based
+    mres = a
+    montyResidue(mres.view, N.view)
 
 func unsafeRedc*[mBits](mres: var BigInt[mBits], N: BigInt[mBits], negInvModWord: static BaseType) =
   ## Convert a BigInt from its Montgomery n-residue form
diff --git a/constantine/math/bigints_raw.nim b/constantine/math/bigints_raw.nim
index 3ee6c8b..aa8b8d0 100644
--- a/constantine/math/bigints_raw.nim
+++ b/constantine/math/bigints_raw.nim
@@ -393,31 +393,61 @@ func reduce*(r: BigIntViewMut, a: BigIntViewAny, M: BigIntViewConst) =
 #
 # ############################################################
 
-func montyResidue*(a: BigIntViewMut, N: BigIntViewConst) =
-  ## Transform a bigint ``a`` from it's natural representation (mod N)
-  ## to a the Montgomery n-residue representation
-  ##
-  ## with W = N.numLimbs()
-  ## and R = (2^WordBitSize)^W
-  ##
-  ## Does "a * R (mod N)"
-  ##
-  ## `a`: The source BigInt in the natural representation. `a` in [0, N) range
-  ## `N`: The field modulus. N must be odd.
-  ##
-  ## Important: `a` is overwritten
-  # Reference: https://eprint.iacr.org/2017/1057.pdf
-  checkValidModulus(N)
-  checkOddModulus(N)
-  checkMatchingBitlengths(a, N)
-
-  let nLen = N.numLimbs()
-  for i in countdown(nLen, 1):
-    a.shlAddMod(Zero, N)
-
 template wordMul(a, b: Word): Word =
   (a * b) and MaxWord
 
+func montyMul*(
+       r: BigIntViewMut, a, b: distinct BigIntViewAny,
+       M: BigIntViewConst, negInvModWord: Word) =
+  ## Compute r <- a*b (mod M) in the Montgomery domain
+  ## `negInvModWord` = -1/M (mod Word). Our words are 2^31 or 2^63
+  ##
+  ## This resets r to zero before processing. Use {.noInit.}
+  ## to avoid duplicating with Nim zero-init policy
+  ## The result `r` buffer size MUST be at least the size of `M` buffer
+  ##
+  ##
+  ## Assuming 63-bit wors, the magic constant should be:
+  ##
+  ## - µ ≡ -1/M[0] (mod 2^63) for a general multiplication
+  ##   This can be precomputed with `negInvModWord`
+  ## - 1 for conversion from Montgomery to canonical representation
+  ##   The library implements a faster `redc` primitive for that use-case
+  ## - R^2 (mod M) for conversion from canonical to Montgomery representation
+  ##
+  # i.e. c'R <- a'R b'R * R^-1 (mod M) in the natural domain
+  # as in the Montgomery domain all numbers are scaled by R
+
+  checkValidModulus(M)
+  checkOddModulus(M)
+  checkMatchingBitlengths(a, M)
+  checkMatchingBitlengths(b, M)
+
+  let nLen = M.numLimbs()
+  r.setBitLength(bitSizeof(M))
+  setZero(r)
+
+  var r_hi = Zero   # represents the high word that is used in intermediate computation before reduction mod M
+  for i in 0 ..< nLen:
+
+    let zi = (r[0] + wordMul(a[i], b[0])).wordMul(negInvModWord)
+    var carry = Zero
+
+    for j in 0 ..< nLen:
+      let z = DoubleWord(r[j]) + unsafeExtPrecMul(a[i], b[j]) +
+              unsafeExtPrecMul(zi, M[j]) + DoubleWord(carry)
+      carry = Word(z shr WordBitSize)
+      if j != 0:
+        r[j-1] = Word(z) and MaxWord
+
+    r_hi += carry
+    r[^1] = r_hi and MaxWord
+    r_hi = r_hi shr WordBitSize
+
+  # If the extra word is not zero or if r-M does not borrow (i.e. r > M)
+  # Then substract M
+  discard r.sub(M, r_hi.isNonZero() or not r.sub(M, CtFalse))
+
 func redc*(a: BigIntViewMut, N: BigIntViewConst, negInvModWord: Word) =
   ## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)
   ## to the regular natural representation (mod N)
@@ -455,43 +485,54 @@ func redc*(a: BigIntViewMut, N: BigIntViewConst, negInvModWord: Word) =
 
     a[^1] = Word(carry)
 
-func montyMul*(
-       r: BigIntViewMut, a, b: distinct BigIntViewAny,
-       M: BigIntViewConst, negInvModWord: Word) =
-  ## Compute r <- a*b (mod M) in the Montgomery domain
-  ## `negInvModWord` = -1/M (mod Word). Our words are 2^31 or 2^63
+# TODO: benchmark Montgomery Multiplication vs Shift-Left (after constant-time division)
+
+func montyResidue*(
+       r: BigIntViewMut, a: BigIntViewAny,
+       N, r2modN: BigIntViewConst, negInvModWord: Word) =
+  ## Transform a bigint ``a`` from it's natural representation (mod N)
+  ## to a the Montgomery n-residue representation
   ##
-  ## This resets r to zero before processing. Use {.noInit.}
-  ## to avoid duplicating with Nim zero-init policy
-  # i.e. c'R <- a'R b'R * R^-1 (mod M) in the natural domain
-  # as in the Montgomery domain all numbers are scaled by R
+  ## Montgomery-Multiplication - based
+  ##
+  ## with W = N.numLimbs()
+  ## and R = (2^WordBitSize)^W
+  ##
+  ## Does "a * R (mod N)"
+  ##
+  ## `a`: The source BigInt in the natural representation. `a` in [0, N) range
+  ## `N`: The field modulus. N must be odd.
+  ## `r2modN`: 2^WordBitSize mod `N`. Can be precomputed with `r2mod` function
+  ##
+  ## Important: `r` is overwritten
+  ## The result `r` buffer size MUST be at least the size of `M` buffer
+  # Reference: https://eprint.iacr.org/2017/1057.pdf
+  checkValidModulus(N)
+  checkOddModulus(N)
+  checkMatchingBitlengths(a, N)
 
-  checkValidModulus(M)
-  checkOddModulus(M)
-  checkMatchingBitlengths(r, M)
-  checkMatchingBitlengths(a, M)
-  checkMatchingBitlengths(b, M)
+  montyMul(r, a, r2ModN, N, negInvModWord)
 
-  let nLen = M.numLimbs()
-  setZero(r)
+func montyResidue*(a: BigIntViewMut, N: BigIntViewConst) =
+  ## Transform a bigint ``a`` from it's natural representation (mod N)
+  ## to a the Montgomery n-residue representation
+  ##
+  ## Modular shift - based
+  ##
+  ## with W = N.numLimbs()
+  ## and R = (2^WordBitSize)^W
+  ##
+  ## Does "a * R (mod N)"
+  ##
+  ## `a`: The source BigInt in the natural representation. `a` in [0, N) range
+  ## `N`: The field modulus. N must be odd.
+  ##
+  ## Important: `a` is overwritten
+  # Reference: https://eprint.iacr.org/2017/1057.pdf
+  checkValidModulus(N)
+  checkOddModulus(N)
+  checkMatchingBitlengths(a, N)
 
-  var r_hi = Zero   # represents the high word that is used in intermediate computation before reduction mod M
-  for i in 0 ..< nLen:
-
-    let zi = (r[0] + wordMul(a[i], b[0])).wordMul(negInvModWord)
-    var carry = Zero
-
-    for j in 0 ..< nLen:
-      let z = DoubleWord(r[j]) + unsafeExtPrecMul(a[i], b[j]) +
-              unsafeExtPrecMul(zi, M[j]) + DoubleWord(carry)
-      carry = Word(z shr WordBitSize)
-      if j != 0:
-        r[j-1] = Word(z) and MaxWord
-
-    r_hi += carry
-    r[^1] = r_hi and MaxWord
-    r_hi = r_hi shr WordBitSize
-
-  # If the extra word is not zero or if r-M does not borrow (i.e. r > M)
-  # Then substract M
-  discard r.sub(M, r_hi.isNonZero() or not r.sub(M, CtFalse))
+  let nLen = N.numLimbs()
+  for i in countdown(nLen, 1):
+    a.shlAddMod(Zero, N)
diff --git a/constantine/math/finite_fields.nim b/constantine/math/finite_fields.nim
index 5cd172c..627af72 100644
--- a/constantine/math/finite_fields.nim
+++ b/constantine/math/finite_fields.nim
@@ -23,7 +23,9 @@
 import
   ../primitives/constant_time,
   ../config/[common, curves],
-  ./bigints_checked, ./precomputed
+  ./bigints_checked,
+  # TODO: should be a const/proc in curves.nim
+  ./precomputed
 
 # type
 #   `Fq`*[C: static Curve] = object
@@ -51,8 +53,7 @@ debug:
 
 func fromBig*(T: type Fq, src: BigInt): T =
   ## Convert a BigInt to its Montgomery form
-  result.mres = src
-  result.mres.unsafeMontyResidue(Fq.C.Mod)
+  result.mres.unsafeMontyResidue(src, Fq.C.Mod.mres, r2mod(Fq.C.Mod.mres), neginvModWord(Fq.C.Mod.mres))
 
 func toBig*(src: Fq): auto =
   ## Convert a finite-field element to a BigInt in natral representation
diff --git a/constantine/math/precomputed.nim b/constantine/math/precomputed.nim
index 1a5c8a5..7c98f3d 100644
--- a/constantine/math/precomputed.nim
+++ b/constantine/math/precomputed.nim
@@ -14,19 +14,73 @@ import
 # Precomputed constants
 # ############################################################
 
+# ############################################################
+#
+#                   Modular primitives
+#
+# ############################################################
+#
+# Those primitives are intended to be compile-time only
+# They are generic over the bitsize: enabling them at runtime
+# would create a copy for each bitsize used (monomorphization)
+# leading to code-bloat.
+
+func double(a: var BigInt): CTBool[Word] =
+  ## In-place multiprecision double
+  ##   a -> 2a
+  for i in 0 ..< a.limbs.len:
+    BaseType(a.limbs[i]) *= 2
+    a.limbs[i] += Word(result)
+    result = a.limbs[i].isMsbSet()
+    a.limbs[i] = a.limbs[i] and MaxWord
+
+func sub(a: var BigInt, b: BigInt, ctl: CTBool[Word]): CTBool[Word] =
+  ## In-place optional substraction
+  for i in 0 ..< a.limbs.len:
+    let new_a = a.limbs[i] - b.limbs[i] - Word(result)
+    result = new_a.isMsbSet()
+    a.limbs[i] = ctl.mux(new_a and MaxWord, a.limbs[i])
+
+func doubleMod(a: var BigInt, M: BigInt) =
+  ## In-place modular double
+  ##   a -> 2a (mod M)
+  ##
+  ## It is NOT constant-time and is intended
+  ## only for compile-time precomputation
+  ## of non-secret data.
+  var ctl = double(a)
+  ctl = ctl or not a.sub(M, CtFalse)
+  discard a.sub(M, ctl)
+
 # ############################################################
 #
 #          Montgomery Magic Constants precomputation
 #
 # ############################################################
 
+func checkOddModulus(M: BigInt) =
+  doAssert bool(BaseType(M.limbs[0]) and 1), "Internal Error: the modulus must be odd to use the Montgomery representation."
+
+import strutils
+
+func checkValidModulus(M: BigInt) =
+  const expectedMsb = M.bits-1 - WordBitSize * (M.limbs.len - 1)
+  let msb = log2(BaseType(M.limbs[^1]))
+
+  doAssert msb == expectedMsb, "Internal Error: the modulus must use all declared bits and only those"
+
 func negInvModWord*(M: BigInt): BaseType =
   ## Returns the Montgomery domain magic constant for the input modulus:
   ##
-  ##   µ = -1/M[0] mod M
+  ##   µ ≡ -1/M[0] (mod Word)
   ##
   ## M[0] is the least significant limb of M
   ## M must be odd and greater than 2.
+  ##
+  ## Assuming 63-bit words:
+  ##
+  ## µ ≡ -1/M[0] (mod 2^63)
+
   # We use BaseType for return value because static distinct type
   # confuses Nim semchecks [UPSTREAM BUG]
   # We don't enforce compile-time evaluation here
@@ -43,7 +97,7 @@ func negInvModWord*(M: BigInt): BaseType =
   # - http://marc-b-reynolds.github.io/math/2017/09/18/ModInverse.html
 
   # For Montgomery magic number, we are in a special case
-  # where a = M and m = 2^LimbSize.
+  # where a = M and m = 2^WordBitsize.
   # For a and m to be coprimes, a must be odd.
 
   # We have the following relation
@@ -56,6 +110,9 @@ func negInvModWord*(M: BigInt): BaseType =
   # To get the the modular inverse of 2^k' with arbitrary k' (like k=63 in our case)
   # we can do modInv(a, 2^64) mod 2^63 as mentionned in Koc paper.
 
+  checkOddModulus(M)
+  checkValidModulus(M)
+
   let
     M0 = BaseType(M.limbs[0])
     k = log2(WordPhysBitSize)
@@ -66,3 +123,37 @@ func negInvModWord*(M: BigInt): BaseType =
 
   # Our actual word size is 2^63 not 2^64
   result = result and BaseType(MaxWord)
+
+func r2mod*(M: BigInt): BigInt =
+  ## Returns the Montgomery domain magic constant for the input modulus:
+  ##
+  ##   R² ≡ R² (mod M) with R = (2^WordBitSize)^numWords
+  ##
+  ## Assuming a field modulus of size 256-bit with 63-bit words, we require 5 words
+  ##   R² ≡ ((2^63)^5)^2 (mod M) = 2^630 (mod M)
+
+  # Algorithm
+  # Bos and Montgomery, Montgomery Arithmetic from a Software Perspective
+  # https://eprint.iacr.org/2017/1057.pdf
+  #
+  # For R = r^n = 2^wn and 2^(wn − 1) ≤ N < 2^wn
+  # r^n = 2^63 in on 64-bit and w the number of words
+  #
+  # 1. C0 = 2^(wn - 1), the power of two immediately less than N
+  # 2. for i in 1 ... wn+1
+  #      Ci = C(i-1) + C(i-1) (mod M)
+  #
+  # Thus: C(wn+1) ≡ 2^(wn+1) C0 ≡ 2^(wn + 1) 2^(wn - 1) ≡ 2^(2wn) ≡ (2^wn)^2 ≡ R² (mod M)
+
+  checkOddModulus(M)
+  checkValidModulus(M)
+
+  result.setInternalBitLength()
+
+  const
+    w = M.limbs.len
+    msb = M.bits-1 - WordBitSize * (w - 1)
+
+  result.limbs[^1] = Word(1 shl msb) # C0 = 2^(wn-1), the power of 2 immediatly less than the modulus
+  for _ in (w-1)*WordBitSize + msb ..< w * WordBitSize * 2:
+    result.doubleMod(M)
diff --git a/constantine/primitives/constant_time.nim b/constantine/primitives/constant_time.nim
index 3a1f341..89372bb 100644
--- a/constantine/primitives/constant_time.nim
+++ b/constantine/primitives/constant_time.nim
@@ -151,7 +151,7 @@ template isMsbSet*[T: Ct](x: T): CTBool[T] =
   const msb_pos = T.sizeof * 8 - 1
   (CTBool[T])(x shr msb_pos)
 
-func log2*(x: uint32): uint32 {.compileTime.} =
+func log2*(x: uint32): uint32 =
   ## Find the log base 2 of a 32-bit or less integer.
   ## using De Bruijn multiplication
   ## Works at compile-time, guaranteed constant-time.