Cleanup in Montgomery Mul, Square, Pow

2020-03-22 13:24:37 +01:00 · 2020-03-22 13:24:37 +01:00 · 8b7374f405
parent 2d5b173a39
commit 8b7374f405
4 changed files with 61 additions and 45 deletions
--- a/benchmarks/bench_fp.nim
+++ b/benchmarks/bench_fp.nim
@ -9,6 +9,7 @@
 import
  # Internals
  ../constantine/config/curves,
  ../constantine/arithmetic,
  # Helpers
  ../helpers/static_for,
  ./bench_fields_template,
--- a/constantine/arithmetic/bigints.nim
+++ b/constantine/arithmetic/bigints.nim
@ -315,7 +315,7 @@ func montySquare*(r: var BigInt, a, M: BigInt, negInvModWord: static BaseType, c
 func montyPow*[mBits, eBits: static int](
       a: var BigInt[mBits], exponent: BigInt[eBits],
       M, one: BigInt[mBits], negInvModWord: static BaseType, windowSize: static int,
-       canUseNoCarryMontyMul: static bool
+       canUseNoCarryMontyMul, canUseNoCarryMontySquare: static bool
      ) =
  ## Compute a <- a^exponent (mod M)
  ## ``a`` in the Montgomery domain
@ -334,12 +334,12 @@ func montyPow*[mBits, eBits: static int](
  const scratchLen = if windowSize == 1: 2
                     else: (1 shl windowSize) + 1
  var scratchSpace {.noInit.}: array[scratchLen, Limbs[mBits.wordsRequired]]
-  montyPow(a.limbs, expBE, M.limbs, one.limbs, negInvModWord, scratchSpace, canUseNoCarryMontyMul)
+  montyPow(a.limbs, expBE, M.limbs, one.limbs, negInvModWord, scratchSpace, canUseNoCarryMontyMul, canUseNoCarryMontySquare)
 func montyPowUnsafeExponent*[mBits, eBits: static int](
       a: var BigInt[mBits], exponent: BigInt[eBits],
       M, one: BigInt[mBits], negInvModWord: static BaseType, windowSize: static int,
-       canUseNoCarryMontyMul: static bool
+       canUseNoCarryMontyMul, canUseNoCarryMontySquare: static bool
      ) =
  ## Compute a <- a^exponent (mod M)
  ## ``a`` in the Montgomery domain
@ -362,12 +362,12 @@ func montyPowUnsafeExponent*[mBits, eBits: static int](
  const scratchLen = if windowSize == 1: 2
                     else: (1 shl windowSize) + 1
  var scratchSpace {.noInit.}: array[scratchLen, Limbs[mBits.wordsRequired]]
-  montyPowUnsafeExponent(a.limbs, expBE, M.limbs, one.limbs, negInvModWord, scratchSpace, canUseNoCarryMontyMul)
+  montyPowUnsafeExponent(a.limbs, expBE, M.limbs, one.limbs, negInvModWord, scratchSpace, canUseNoCarryMontyMul, canUseNoCarryMontySquare)
 func montyPowUnsafeExponent*[mBits: static int](
       a: var BigInt[mBits], exponent: openarray[byte],
       M, one: BigInt[mBits], negInvModWord: static BaseType, windowSize: static int,
-       canUseNoCarryMontyMul: static bool
+       canUseNoCarryMontyMul, canUseNoCarryMontySquare: static bool
      ) =
  ## Compute a <- a^exponent (mod M)
  ## ``a`` in the Montgomery domain
@ -386,7 +386,7 @@ func montyPowUnsafeExponent*[mBits: static int](
  const scratchLen = if windowSize == 1: 2
                     else: (1 shl windowSize) + 1
  var scratchSpace {.noInit.}: array[scratchLen, Limbs[mBits.wordsRequired]]
-  montyPowUnsafeExponent(a.limbs, exponent, M.limbs, one.limbs, negInvModWord, scratchSpace, canUseNoCarryMontyMul)
+  montyPowUnsafeExponent(a.limbs, exponent, M.limbs, one.limbs, negInvModWord, scratchSpace, canUseNoCarryMontyMul, canUseNoCarryMontySquare)
 {.pop.} # inline
 {.pop.} # raises no exceptions
--- a/constantine/arithmetic/finite_fields.nim
+++ b/constantine/arithmetic/finite_fields.nim
@ -170,7 +170,8 @@ func pow*(a: var Fp, exponent: BigInt) =
    exponent,
    Fp.C.Mod, Fp.C.getMontyOne(),
    Fp.C.getNegInvModWord(), windowSize,
-    Fp.C.canUseNoCarryMontyMul()
+    Fp.C.canUseNoCarryMontyMul(),
    Fp.C.canUseNoCarryMontySquare()
  )
 func powUnsafeExponent*(a: var Fp, exponent: BigInt) =
@ -189,9 +190,29 @@ func powUnsafeExponent*(a: var Fp, exponent: BigInt) =
    exponent,
    Fp.C.Mod, Fp.C.getMontyOne(),
    Fp.C.getNegInvModWord(), windowSize,
-    Fp.C.canUseNoCarryMontyMul()
+    Fp.C.canUseNoCarryMontyMul(),
    Fp.C.canUseNoCarryMontySquare()
  )
 func powUnsafeExponent*(a: var Fp, exponent: openarray[byte]) =
  ## Exponentiation modulo p
  ## ``a``: a field element to be exponentiated
  ## ``exponent``: a big integer
  ##
  ## Warning ⚠️ :
  ## This is an optimization for public exponent
  ## Otherwise bits of the exponent can be retrieved with:
  ## - memory access analysis
  ## - power analysis
  ## - timing analysis
  const windowSize = 5 # TODO: find best window size for each curves
  a.mres.montyPowUnsafeExponent(
    exponent,
    Fp.C.Mod, Fp.C.getMontyOne(),
    Fp.C.getNegInvModWord(), windowSize,
    Fp.C.canUseNoCarryMontyMul(),
    Fp.C.canUseNoCarryMontySquare()
  )
 # ############################################################
 #
--- a/constantine/arithmetic/limbs_montgomery.nim
+++ b/constantine/arithmetic/limbs_montgomery.nim
@ -85,9 +85,6 @@ macro staticFor(idx: untyped{nkIdent}, start, stopEx: static int, body: untyped)
 # Implementation
 # ------------------------------------------------------------
 # Note: the low-level implementations should not use static parameter
 #       the code generated is already big enough for curve with different
 #       limb sizes, we want to use the same codepath when limbs lenght are compatible.
 func montyMul_CIOS_nocarry(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) =
  ## Montgomery Multiplication using Coarse Grained Operand Scanning (CIOS)
@ -266,7 +263,7 @@ func montySquare_CIOS(r: var Limbs, a, M: Limbs, m0ninv: BaseType) =
 func montyMul*(
        r: var Limbs, a, b, M: Limbs,
-        m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) {.inline.} =
+        m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) =
  ## Compute r <- a*b (mod M) in the Montgomery domain
  ## `m0ninv` = -1/M (mod Word). Our words are 2^32 or 2^64
  ##
@ -278,7 +275,7 @@ func montyMul*(
  ## Assuming 64-bit words, the magic constant should be:
  ##
  ## - µ ≡ -1/M[0] (mod 2^64) for a general multiplication
-  ##   This can be precomputed with `negInvModWord`
+  ##   This can be precomputed with `m0ninv`
  ## - 1 for conversion from Montgomery to canonical representation
  ##   The library implements a faster `redc` primitive for that use-case
  ## - R^2 (mod M) for conversion from canonical to Montgomery representation
@ -286,27 +283,22 @@ func montyMul*(
  # i.e. c'R <- a'R b'R * R^-1 (mod M) in the natural domain
  # as in the Montgomery domain all numbers are scaled by R
-  # Nim doesn't like static Word, so we pass static BaseType up to here
+  # Many curve moduli are "Montgomery-friendly" which means that m0ninv is 1
  # Then we passe them as Word again for the final processing.
  # Many curve moduli are "Montgomery-friendly" which means that m0inv is 1
  # This saves N basic type multiplication and potentially many register mov
  # as well as unless using "mulx" instruction, x86 "mul" requires very specific registers.
-  # Compilers should be able to constant-propagate, but this prevents reusing code
+  #
-  # for example between secp256k1 (friendly) and BN254 (non-friendly).
+  # The implementation is visible from here, the compiler can make decision whether to:
-  # Here, as "montyMul" is inlined at the call site, the compiler shouldn't constant fold, saving size.
+  # - specialize/duplicate code for m0ninv == 1 (especially if only 1 curve is needed)
-  # Inlining the implementation instead (and no-inline this "montyMul" proc) would allow constant propagation
+  # - keep it generic and optimize code size
  # of Montgomery-friendly m0ninv if the compiler deems it interesting,
  # or we use `when m0ninv == 1` and enforce the inlining.
  when canUseNoCarryMontyMul:
    montyMul_CIOS_nocarry(r, a, b, M, m0ninv)
  else:
    montyMul_CIOS(r, a, b, M, m0ninv)
 func montySquare*(r: var Limbs, a, M: Limbs,
-                  m0ninv: static BaseType, canUseNoCarryMontySquare: static bool) {.inline.} =
+                  m0ninv: static BaseType, canUseNoCarryMontySquare: static bool) =
  ## Compute r <- a^2 (mod M) in the Montgomery domain
-  ## `negInvModWord` = -1/M (mod Word). Our words are 2^31 or 2^63
+  ## `m0ninv` = -1/M (mod Word). Our words are 2^31 or 2^63
  when canUseNoCarryMontySquare:
    montySquare_CIOS_nocarry(r, a, M, m0ninv)
@ -314,7 +306,7 @@ func montySquare*(r: var Limbs, a, M: Limbs,
    montySquare_CIOS(r, a, M, m0ninv)
 func redc*(r: var Limbs, a, one, M: Limbs,
-           m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) {.inline.} =
+           m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) =
  ## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)
  ## to the regular natural representation (mod N)
  ##
@ -325,7 +317,7 @@ func redc*(r: var Limbs, a, one, M: Limbs,
  ##
  ## This is called a Montgomery Reduction
  ## The Montgomery Magic Constant is µ = -1/N mod M
-  ## is used internally and can be precomputed with negInvModWord(Curve)
+  ## is used internally and can be precomputed with m0ninv(Curve)
  # References:
  #   - https://eprint.iacr.org/2017/1057.pdf (Montgomery)
  #     page: Radix-r interleaved multiplication algorithm
@ -336,7 +328,7 @@ func redc*(r: var Limbs, a, one, M: Limbs,
  montyMul(r, a, one, M, m0ninv, canUseNoCarryMontyMul)
 func montyResidue*(r: var Limbs, a, M, r2modM: Limbs,
-                   m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) {.inline.} =
+                   m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) =
  ## Transform a bigint ``a`` from it's natural representation (mod N)
  ## to a the Montgomery n-residue representation
  ##
@ -424,12 +416,12 @@ func montyPowSquarings(
        a: var Limbs,
        exponent: openarray[byte],
        M: Limbs,
-        negInvModWord: static BaseType,
+        m0ninv: static BaseType,
        tmp: var Limbs,
        window: uint,
        acc, acc_len: var uint,
        e: var int,
-        canUseNoCarryMontyMul: static bool
+        canUseNoCarryMontySquare: static bool
      ): tuple[k, bits: uint] {.inline.}=
  ## Squaring step of exponentiation by squaring
  ## Get the next k bits in range [1, window)
@ -455,7 +447,7 @@ func montyPowSquarings(
  # We have k bits and can do k squaring
  for i in 0 ..< k:
-    tmp.montySquare(a, M, negInvModWord, canUseNoCarryMontyMul)
+    tmp.montySquare(a, M, m0ninv, canUseNoCarryMontySquare)
    a = tmp
  return (k, bits)
@ -464,9 +456,10 @@ func montyPow*(
       a: var Limbs,
       exponent: openarray[byte],
       M, one: Limbs,
-       negInvModWord: static BaseType,
+       m0ninv: static BaseType,
       scratchspace: var openarray[Limbs],
-       canUseNoCarryMontyMul: static bool
+       canUseNoCarryMontyMul: static bool,
       canUseNoCarryMontySquare: static bool
      ) =
  ## Modular exponentiation r = a^exponent mod M
  ## in the Montgomery domain
@ -479,7 +472,7 @@ func montyPow*(
  ##   Use ``exportRawUint`` for conversion
  ## - ``M`` is the modulus
  ## - ``one`` is 1 (mod M) in montgomery representation
-  ## - ``negInvModWord`` is the montgomery magic constant "-1/M[0] mod 2^WordBitSize"
+  ## - ``m0ninv`` is the montgomery magic constant "-1/M[0] mod 2^WordBitSize"
  ## - ``scratchspace`` with k the window bitsize of size up to 5
  ##   This is a buffer that can hold between 2^k + 1 big-ints
  ##   A window of of 1-bit (no window optimization) requires only 2 big-ints
@ -494,7 +487,7 @@ func montyPow*(
  ## A window of size 5 requires (2^5 + 1)*(381 + 7)/8 = 33 * 48 bytes = 1584 bytes
  ## of scratchspace (on the stack).
-  let window = montyPowPrologue(a, M, one, negInvModWord, scratchspace, canUseNoCarryMontyMul)
+  let window = montyPowPrologue(a, M, one, m0ninv, scratchspace, canUseNoCarryMontyMul)
  # We process bits with from most to least significant.
  # At each loop iteration with have acc_len bits in acc.
@ -506,10 +499,10 @@ func montyPow*(
    e = 0
  while acc_len > 0 or e < exponent.len:
    let (k, bits) = montyPowSquarings(
-      a, exponent, M, negInvModWord,
+      a, exponent, M, m0ninv,
      scratchspace[0], window,
      acc, acc_len, e,
-      canUseNoCarryMontyMul
+      canUseNoCarryMontySquare
    )
    # Window lookup: we set scratchspace[1] to the lookup value.
@ -524,16 +517,17 @@ func montyPow*(
    # Multiply with the looked-up value
    # we keep the product only if the exponent bits are not all zero
-    scratchspace[0].montyMul(a, scratchspace[1], M, negInvModWord, canUseNoCarryMontyMul)
+    scratchspace[0].montyMul(a, scratchspace[1], M, m0ninv, canUseNoCarryMontyMul)
    a.ccopy(scratchspace[0], Word(bits).isNonZero())
 func montyPowUnsafeExponent*(
       a: var Limbs,
       exponent: openarray[byte],
       M, one: Limbs,
-       negInvModWord: static BaseType,
+       m0ninv: static BaseType,
       scratchspace: var openarray[Limbs],
-       canUseNoCarryMontyMul: static bool
+       canUseNoCarryMontyMul: static bool,
       canUseNoCarryMontySquare: static bool
      ) =
  ## Modular exponentiation r = a^exponent mod M
  ## in the Montgomery domain
@ -547,26 +541,26 @@ func montyPowUnsafeExponent*(
  # TODO: scratchspace[1] is unused when window > 1
-  let window = montyPowPrologue(a, M, one, negInvModWord, scratchspace, canUseNoCarryMontyMul)
+  let window = montyPowPrologue(a, M, one, m0ninv, scratchspace, canUseNoCarryMontyMul)
  var
    acc, acc_len: uint
    e = 0
  while acc_len > 0 or e < exponent.len:
    let (k, bits) = montyPowSquarings(
-      a, exponent, M, negInvModWord,
+      a, exponent, M, m0ninv,
      scratchspace[0], window,
      acc, acc_len, e,
-      canUseNoCarryMontyMul
+      canUseNoCarryMontySquare
    )
    ## Warning ⚠️: Exposes the exponent bits
    if bits != 0:
      if window > 1:
-        scratchspace[0].montyMul(a, scratchspace[1+bits], M, negInvModWord, canUseNoCarryMontyMul)
+        scratchspace[0].montyMul(a, scratchspace[1+bits], M, m0ninv, canUseNoCarryMontyMul)
      else:
        # scratchspace[1] holds the original `a`
-        scratchspace[0].montyMul(a, scratchspace[1], M, negInvModWord, canUseNoCarryMontyMul)
+        scratchspace[0].montyMul(a, scratchspace[1], M, m0ninv, canUseNoCarryMontyMul)
      a = scratchspace[0]
 {.pop.} # raises no exceptions