Skeleton of modular exponentiation

2020-02-22 16:37:31 +01:00 · 2020-02-22 16:37:31 +01:00 · 4b65d0d723
parent 236047767f
commit 4b65d0d723
10 changed files with 332 additions and 140 deletions
--- a/constantine.nimble
+++ b/constantine.nimble
@ -6,7 +6,7 @@ license       = "MIT or Apache License 2.0"
 srcDir        = "src"

 ### Dependencies
-requires "nim >= 1.0.6"
+requires "nim >= 1.1.0"

 ### Helper functions
 proc test(fakeCurves: string, path: string, lang = "c") =
--- a/constantine/config/curves.nim
+++ b/constantine/config/curves.nim
@ -70,6 +70,9 @@ else:
    curve P256: # secp256r1 / NIST P-256
      bitsize: 256
      modulus: "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff"
+    curve BLS12_381:
+      bitsize: 381
+      modulus: "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab"

 # ############################################################
 #
@ -103,22 +106,30 @@ macro genMontyMagics(T: typed): untyped =
  let E = T.getImpl[2]
  for i in 1 ..< E.len:
    let curve = E[i]
+    # const MyCurve_R2modP = r2mod(MyCurve_Modulus)
    result.add newConstStmt(
      ident($curve & "_R2modP"), newCall(
        bindSym"r2mod",
-        # The curve parser creates modulus
-        # under symbol "MyCurve_Modulus"
        nnkDotExpr.newTree(
          bindSym($curve & "_Modulus"),
          ident"mres"
        )
      )
    )
+    # const MyCurve_NegInvModWord = negInvModWord(MyCurve_Modulus)
    result.add newConstStmt(
      ident($curve & "_NegInvModWord"), newCall(
        bindSym"negInvModWord",
-        # The curve parser creates modulus
-        # under symbol "MyCurve_Modulus"
+        nnkDotExpr.newTree(
+          bindSym($curve & "_Modulus"),
+          ident"mres"
+        )
+      )
+    )
+    # const MyCurve_montyOne = montyOne(MyCurve_Modulus)
+    result.add newConstStmt(
+      ident($curve & "_MontyOne"), newCall(
+        bindSym"montyOne",
        nnkDotExpr.newTree(
          bindSym($curve & "_Modulus"),
          ident"mres"
@ -138,6 +149,10 @@ macro getNegInvModWord*(C: static Curve): untyped =
  ## Get the Montgomery "-1/P[0] mod 2^WordBitSize" constant associated to a curve field modulus
  result = bindSym($C & "_NegInvModWord")

+macro getMontyOne*(C: static Curve): untyped =
+  ## Get one in Montgomery representation (i.e. R mod P)
+  result = bindSym($C & "_MontyOne")
+
 # ############################################################
 #
 #                Debug info printed at compile-time
--- a/constantine/io/endians2.nim
+++ b/constantine/io/endians2.nim
@ -1,78 +0,0 @@
-# Copyright (c) 2018-2019 Status Research & Development GmbH
-# Licensed and distributed under either of
-#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
-#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
-# at your option. This file may not be copied, modified, or distributed except according to those terms.
-
-# From https://github.com/status-im/nim-stew/blob/master/stew/endians2.nim
-#
-# Nim standard library "endians" work with pointers which doesn't work at compile-time
-# For auditing purpose and to ensure constant-time safety
-# it's better not to introduce a dependency for such a small piece of code
-
-type
-  SomeEndianInt* = uint8|uint16|uint32|uint64
-    ## types that we support endian conversions for - uint8 is there for
-    ## for syntactic / generic convenience. Other candidates:
-    ## * int/uint - uncertain size, thus less suitable for binary interop
-    ## * intX - over and underflow protection in nim might easily cause issues -
-    ##          need to consider before adding here
-
-when defined(gcc) or defined(llvm_gcc) or defined(clang):
-  func swapBytesBuiltin(x: uint8): uint8 = x
-  func swapBytesBuiltin(x: uint16): uint16 {.
-      importc: "__builtin_bswap16", nodecl.}
-
-  func swapBytesBuiltin(x: uint32): uint32 {.
-      importc: "__builtin_bswap32", nodecl.}
-
-  func swapBytesBuiltin(x: uint64): uint64 {.
-      importc: "__builtin_bswap64", nodecl.}
-
-elif defined(icc):
-  func swapBytesBuiltin(x: uint8): uint8 = x
-  func swapBytesBuiltin(a: uint16): uint16 {.importc: "_bswap16", nodecl.}
-  func swapBytesBuiltin(a: uint32): uint32 {.importc: "_bswap", nodec.}
-  func swapBytesBuiltin(a: uint64): uint64 {.importc: "_bswap64", nodecl.}
-
-elif defined(vcc):
-  func swapBytesBuiltin(x: uint8): uint8 = x
-  proc builtin_bswap16(a: uint16): uint16 {.
-      importc: "_byteswap_ushort", cdecl, header: "<intrin.h>".}
-
-  proc builtin_bswap32(a: uint32): uint32 {.
-      importc: "_byteswap_ulong", cdecl, header: "<intrin.h>".}
-
-  proc builtin_bswap64(a: uint64): uint64 {.
-      importc: "_byteswap_uint64", cdecl, header: "<intrin.h>".}
-
-func swapBytesNim(x: uint8): uint8 = x
-func swapBytesNim(x: uint16): uint16 = (x shl 8) or (x shr 8)
-
-func swapBytesNim(x: uint32): uint32 =
-  let v = (x shl 16) or (x shr 16)
-
-  ((v shl 8) and 0xff00ff00'u32) or ((v shr 8) and 0x00ff00ff'u32)
-
-func swapBytesNim(x: uint64): uint64 =
-  var v = (x shl 32) or (x shr 32)
-  v =
-    ((v and 0x0000ffff0000ffff'u64) shl 16) or
-    ((v and 0xffff0000ffff0000'u64) shr 16)
-
-  ((v and 0x00ff00ff00ff00ff'u64) shl 8) or
-    ((v and 0xff00ff00ff00ff00'u64) shr 8)
-
-template swapBytes*[T: SomeEndianInt](x: T): T =
-  ## Reverse the bytes within an integer, such that the most significant byte
-  ## changes place with the least significant one, etc
-  ##
-  ## Example:
-  ## doAssert swapBytes(0x01234567'u32) == 0x67452301
-  when nimvm:
-    swapBytesNim(x)
-  else:
-    when defined(swapBytesBuiltin):
-      swapBytesBuiltin(x)
-    else:
-      swapBytesNim(x)
--- a/constantine/io/io_bigints.nim
+++ b/constantine/io/io_bigints.nim
@ -11,7 +11,6 @@
 #   - Burning memory to ensure secrets are not left after dealloc.

 import
-  ./endians2,
  ../primitives/constant_time,
  ../math/bigints_checked,
  ../config/common
@ -150,15 +149,20 @@ func fromUint*(
 # Serialising from internal representation to canonical format
 #
 # ############################################################
+import strutils

-template blobFrom*(dst: var openArray[byte], src: SomeEndianInt, startIdx: int, endian: static Endianness) =
+template blobFrom(dst: var openArray[byte], src: SomeUnsignedInt, startIdx: int, endian: static Endianness) =
  ## Write an integer into a raw binary blob
  ## Swapping endianness if needed
-  let s = when endian == cpuEndian: src
-          else: swapBytes(src)

+  const bits = sizeof(src) * 8
+
+  when endian == cpuEndian:
    for i in 0 ..< sizeof(src):
-    dst[startIdx+i] = byte((s shr (i * 8)))
+      dst[startIdx+i] = byte((src shr (i * 8)))
+  else:
+    for i in 0 ..< sizeof(src):
+      dst[startIdx+sizeof(src)-1-i] = byte((src shr (i * 8)))

 func exportRawUintLE(
        dst: var openarray[byte],
@ -218,7 +222,6 @@ func exportRawUintBE(

  var
    src_idx = 0
-    dst_idx = dst.len - 1
    acc: BaseType = 0
    acc_len = 0

@ -240,9 +243,8 @@ func exportRawUintBE(

      if tail >= sizeof(Word):
        # Unrolled copy
-        dst.blobFrom(src = lo, dst_idx, littleEndian)
-        dst_idx -= sizeof(Word)
        tail -= sizeof(Word)
+        dst.blobFrom(src = lo, tail, bigEndian)
      else:
        # Process the tail and exit
        when cpuEndian == littleEndian:
@ -250,11 +252,11 @@ func exportRawUintBE(
          # we can just copy each byte
          # tail is inclusive
          for i in 0 ..< tail:
-            dst[dst_idx-i] = byte(lo shr (i*8))
+            dst[tail-i] = byte(lo shr (i*8))
        else: # TODO check this
          # We need to copy from the end
          for i in 0 ..< tail:
-            dst[dst_idx-i] = byte(lo shr ((tail-i)*8))
+            dst[tail-i] = byte(lo shr ((tail-i)*8))
        return

 func exportRawUint*(
--- a/constantine/math/bigints_checked.nim
+++ b/constantine/math/bigints_checked.nim
@ -157,3 +157,31 @@ func montyMul*[mBits](r: var BigInt[mBits], a, b, M: BigInt[mBits], negInvModWor
  ## This resets r to zero before processing. Use {.noInit.}
  ## to avoid duplicating with Nim zero-init policy
  montyMul(r.view, a.view, b.view, M.view, Word(negInvModWord))
+
+import stew/byteutils
+
+func montyPow*[mBits, eBits: static int](
+       a: var BigInt[mBits], exponent: BigInt[eBits],
+       M, one: BigInt[mBits], negInvModWord: static BaseType, windowSize: static int) =
+  ## Compute a <- a^exponent (mod M)
+  ## ``a`` in the Montgomery domain
+  ## ``exponent`` is any BigInt, in the canonical domain
+  ##
+  ## This uses fixed window optimization
+  ## A window size in the range [1, 5] must be chosen
+  ##
+  ## This is constant-time: the window optimization does
+  ## not reveal the exponent bits or hamming weight
+  mixin exportRawUint # exported in io_bigints which depends on this module ...
+
+  var expBE {.noInit.}: array[(ebits + 7) div 8, byte]
+  expBE.exportRawUint(exponent, bigEndian)
+
+  const scratchLen = if windowSize == 1: 2
+                     else: (1 shl windowSize) + 1
+  var scratchSpace {.noInit.}: array[scratchLen, BigInt[mBits]]
+  var scratchPtrs {.noInit.}: array[scratchLen, BigIntViewMut]
+  for i in 0 ..< scratchLen:
+    scratchPtrs[i] = scratchSpace[i].view()
+
+  montyPow(a.view, expBE, M.view, one.view, Word(negInvModWord), scratchPtrs)
--- a/constantine/math/bigints_raw.nim
+++ b/constantine/math/bigints_raw.nim
@ -54,7 +54,6 @@ import
  ../primitives/extended_precision,
  ../config/common
 from sugar import distinctBase
-from bitops import countSetBits # only used on modulus and public values

 # ############################################################
 #
@ -117,16 +116,6 @@ type
    ## Mutable view into a BigInt
  BigIntViewAny* = BigIntViewConst or BigIntViewMut

-  BigIntLeakedConst* = distinct BigIntViewConst
-    ## BigInt which information will be leaked
-    ## besides the announced bit length
-    ## This is only suitable for values
-    ## that are publicly known
-
-  SensitiveInt* = distinct int
-    ## Integer that contains sensitive information
-    ## and will not be manipulated in a constant-time manner
-
 # No exceptions allowed
 {.push raises: [].}

@ -146,6 +135,7 @@ template `[]=`*(v: BigIntViewMut, limbIdx: int, val: Word) =
  distinctBase(type v)(v).limbs[limbIdx] = val

 template bitSizeof(v: BigIntViewAny): uint32 =
+  bind BigIntView
  distinctBase(type v)(v).bitLength

 const divShiftor = log2(WordPhysBitSize)
@ -203,6 +193,10 @@ template checkWordShift(k: int) =
  debug:
    assert k <= WordBitSize, "Internal Error: the shift must be less than the word bit size"

+template checkPowScratchSpaceLen(len: int) =
+  ## Checks that there is a minimum of scratchspace to hold the temporaries
+  debug:
+    assert len >= 2, "Internal Error: the scratchspace for powmod should be equal or greater than 2"

 debug:
  func `$`*(a: BigIntViewAny): string =
@ -222,9 +216,6 @@ debug:
 #
 # ############################################################

-template mask*(w: Word): Word =
-  w and MaxWord
-
 func isZero*(a: BigIntViewAny): CTBool[Word] =
  ## Returns true if a big int is equal to zero
  var accum: Word
@ -559,39 +550,147 @@ func montyResidue*(

  montyMul(r, a, r2ModN, N, negInvModWord)

-# ############################################################
+# Montgomery Modular Exponentiation
+# ------------------------------------------
+# We use fixed-window based exponentiation
+# that is constant-time: i.e. the number of multiplications
+# does not depend on the number of set bits in the exponents
+# those are always done and conditionally copied.
 #
-#                  Sensitive Primitives
+# TODO: analyze cost difference with naive exponentiation
+#       with n being the number of words to represent a number in Fp
+#       and k the window-size
+#       - we always multiply even for unused multiplications
+#       - conditional copy only save a small fraction of time
+#         (multiplication O(n²), cmov O(n), doing nothing i.e. non constant-time O(n))
+#       - Table lookup is O(kn) copy time since we need to access the whole table to
+#         defeat cache attacks. Without windows, we don't have table lookups at all.
 #
-# ############################################################
-# Warning: Primitives that expose bits of information
-#          due to non-constant time.
-# Proper usage is enforced by compiler.
-# Only apply explicitly to public data like the field modulus
+# The exponent MUST NOT be private data (until audited otherwise)
+# - Power attack on RSA, https://www.di.ens.fr/~fouque/pub/ches06.pdf
+# - Flush-and-reload on Sliding window exponentiation: https://tutcris.tut.fi/portal/files/8966761/p1639_pereida_garcia.pdf
+# - Sliding right into disaster, https://eprint.iacr.org/2017/627.pdf
+# - Fixed window leak: https://www.scirp.org/pdf/JCC_2019102810331929.pdf
+# - Constructing sliding-windows leak, https://easychair.org/publications/open/fBNC
+#
+# For pairing curves, this is the case since exponentiation is only
+# used for inversion via the Little Fermat theorem.
+# For RSA, some exponentiations uses private exponents.
+#
+# Note:
+# - Implementation closely follows Thomas Pornin's BearSSL
+# - Apache Milagro Crypto has an alternative implementation
+#   that is more straightforward however:
+#   - the exponent hamming weight is used as loop bounds
+#   - the base^k is stored at each index of a temp table of size k
+#   - the base^k to use is indexed by the hamming weight
+#     of the exponent, leaking this to cache attacks
+#   - in contrast BearSSL touches the whole table to
+#     hide the actual selection
+#
+# Directly using the Hamming weight would probably
+# significantly improve pairing-friendly curves as
+# they are chosen for their low Hamming-Weight (see BLS12-381 x factor)
+# --> Expose an exponent-leaky powMod?
+#     If so, create distinct type for leaked bits and BigInt
+#     so that sensitive data use is compiler-checked

-template bitSizeof(v: BigIntLeakedConst): uint32 =
-  distinctBase(type v)(v).bitLength
+func getWindowLen(bufLen: int): uint =
+  ## Compute the maximum window size that fits in the scratchspace buffer
+  checkPowScratchSpaceLen(bufLen)
+  result = 5
+  while (1 shl result) + 1 > bufLen:
+    dec result

-template numLimbs*(v: BigIntLeakedConst): int =
-  ## Compute the number of limbs from
-  ## the **internal** bitlength
-  (bitSizeof(v).int + WordPhysBitSize - 1) shr divShiftor
+func montyPow*(
+       a: BigIntViewMut,
+       exponent: openarray[byte],
+       M, one: BigIntViewConst,
+       negInvModWord: Word,
+       scratchspace: openarray[BigIntViewMut]
+      ) =
+  ## Modular exponentiation r = a^exponent mod M
+  ## in the montgomery domain
+  ##
+  ## This uses fixed-window optimization if possible
+  ##
+  ## - On input ``a`` is the base, on ``output`` a = a^exponent (mod M)
+  ##   ``a`` is in the Montgomery domain
+  ## - ``exponent`` is the exponent in big-endian canonical format (octet-string)
+  ##   Use ``exportRawUint`` for conversion
+  ## - ``M`` is the modulus
+  ## - ``one`` is 1 (mod M) in montgomery representation
+  ## - ``negInvModWord`` is the montgomery magic constant "-1/M[0] mod 2^WordBitSize"
+  ## - ``scratchspace`` with k the window bitsize of size up to 5
+  ##   This is a buffer that can hold between 2^k + 1 big-ints
+  ##   A window of of 1-bit (no window optimization) requires only 2 big-ints
+  ##
+  ## Note that the best window size require benchmarking and is a tradeoff between
+  ## - performance
+  ## - stack usage
+  ## - precomputation
+  ##
+  ## For example BLS12-381 window size of 5 is 30% faster than no window,
+  ## but windows of size 2, 3, 4 bring no performance benefit, only increased stack space.
+  ## A window of size 5 requires (2^5 + 1)*(381 + 7)/8 = 33 * 48 bytes = 1584 bytes
+  ## of scratchspace (on the stack).

-template `[]`*(v: BigIntLeakedConst, limbIdx: int): SensitiveInt =
-  SensitiveInt distinctBase(type v)(v).limbs[limbIdx]
+  let window = scratchspace.len.getWindowLen()
+  let bigIntSize = a.numLimbs() * sizeof(Word) + sizeof(BigIntView.bitLength)

-func popcount(a: BigIntLeakedConst): SensitiveInt =
-  ## Count the number of bits set in an integer
-  ## also called popcount or Hamming Weight
-  ## ⚠️⚠️⚠️: This is only intended for use on public data
-  var accum: int
-  for i in 0 ..< a.numLimbs:
-    accum += countSetBits(a[i].BaseType)
-  return SensitiveInt accum
+  # Precompute window content, special case for window = 1
+  # (i.e scratchspace has only space for 2 temporaries)
+  # The content scratchspace[2+k] is set at a^k
+  # with scratchspace[0] untouched
+  if window == 1:
+    copyMem(pointer scratchspace[1], pointer a, bigIntSize)
+  else:
+    copyMem(pointer scratchspace[2], pointer a, bigIntSize)
+    for k in 2 ..< 1 shl window:
+      scratchspace[k+1].montyMul(scratchspace[k], a, M, negInvModWord)

-func lastBits(a: BigIntLeakedConst, k: int): SensitiveInt =
-  ## Returns the last bits of an integer
-  ## k MUST be less than the base word size (2^31 or 2^63)
-  checkWordShift(k)
-  let mask = BaseType((1 shl k) - 1)
-  return SensitiveInt(a[0].BaseType and mask)
+  scratchspace[1].setBitLength(bitSizeof(M))
+
+  # Set a to one
+  copyMem(pointer a, pointer one, bigIntSize)
+
+  # We process bits with from most to least significant.
+  # At each loop iteration with have acc_len bits in acc.
+  # To maintain constant-time the number of iterations
+  # or the number of operations or memory accesses should be the same
+  # regardless of acc & acc_len
+  var
+    acc, acc_len: uint
+    e = 0
+  while acc_len > 0 or e < exponent.len:
+    # Get the next bits
+    var k = window
+    if acc_len < window:
+      if e < exponent.len:
+        acc = (acc shl 8) or exponent[e].uint
+        inc e
+        acc_len += 8
+      else: # Drained all exponent bits
+        k = acc_len
+
+    let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
+    acc_len -= k
+
+    # We have k bits and can do k squaring
+    for i in 0 ..< k:
+      scratchspace[0].montyMul(a, a, M, negInvModWord)
+      copyMem(pointer a, pointer scratchspace[0], bigIntSize)
+    # Window lookup: we set scratchspace[1] to the lookup value.
+    # If the window length is 1, then it's already set.
+    if window > 1:
+      # otherwise we need a constant-time lookup
+      # in particular we need the same memory accesses, we can't
+      # just index the openarray with the bits to avoid cache attacks.
+      for i in 1 ..< 1 shl k:
+        let ctl = Word(i) == Word(bits)
+        scratchspace[1].cmov(scratchspace[1+i], ctl)
+
+    # Multiply with the looked-up value
+    # we keep the product only if the exponent bits are not all zero
+    scratchspace[0].montyMul(a, scratchspace[1], M, negInvModWord)
+    a.cmov(scratchspace[0], Word(bits) != Zero)
--- a/constantine/math/finite_fields.nim
+++ b/constantine/math/finite_fields.nim
@ -25,6 +25,8 @@ import
  ../config/[common, curves],
  ./bigints_checked

+from ../io/io_bigints import exportRawUint # for "pow"
+
 # type
 #   `Fq`*[C: static Curve] = object
 #     ## All operations on a field are modulo P
@ -126,3 +128,10 @@ func `*`*(a, b: Fq): Fq {.noInit.} =
  ## routine will zero init internally the result.
  result.mres.setInternalBitLength()
  result.mres.montyMul(a.mres, b.mres, Fq.C.Mod.mres, Fq.C.getNegInvModWord())
+
+func pow*(a: var Fq, exponent: BigInt) =
+  ## Exponentiation over Fq
+  ## ``a``: a field element to be exponentiated
+  ## ``exponent``: a big integer
+  const windowSize = 5 # TODO: find best window size for each curves
+  a.mres.montyPow(exponent, Fq.C.Mod.mres, Fq.C.getMontyOne(), Fq.C.getNegInvModWord(), windowSize)
--- a/constantine/math/precomputed.nim
+++ b/constantine/math/precomputed.nim
@ -43,6 +43,10 @@ func double(a: var BigInt): bool =

 func sub(a: var BigInt, b: BigInt, ctl: bool): bool =
  ## In-place optional substraction
+  ##
+  ## It is NOT constant-time and is intended
+  ## only for compile-time precomputation
+  ## of non-secret data.
  for i in 0 ..< a.limbs.len:
    let new_a = BaseType(a.limbs[i]) - BaseType(b.limbs[i]) - BaseType(result)
    result = new_a.isMsbSet()
@ -132,9 +136,11 @@ func negInvModWord*(M: BigInt): BaseType =
  # Our actual word size is 2^63 not 2^64
  result = result and BaseType(MaxWord)

-func r2mod*(M: BigInt): BigInt =
+func r_powmod(n: static int, M: BigInt): BigInt =
  ## Returns the Montgomery domain magic constant for the input modulus:
  ##
+  ##   R ≡ R (mod M) with R = (2^WordBitSize)^numWords
+  ##   or
  ##   R² ≡ R² (mod M) with R = (2^WordBitSize)^numWords
  ##
  ## Assuming a field modulus of size 256-bit with 63-bit words, we require 5 words
@ -162,8 +168,22 @@ func r2mod*(M: BigInt): BigInt =
    w = M.limbs.len
    msb = M.bits-1 - WordBitSize * (w - 1)
    start = (w-1)*WordBitSize + msb
-    stop = 2*WordBitSize*w
+    stop = n*WordBitSize*w

  result.limbs[^1] = Word(1 shl msb) # C0 = 2^(wn-1), the power of 2 immediatly less than the modulus
  for _ in start ..< stop:
    result.doubleMod(M)
+
+func r2mod*(M: BigInt): BigInt =
+  ## Returns the Montgomery domain magic constant for the input modulus:
+  ##
+  ##   R² ≡ R² (mod M) with R = (2^WordBitSize)^numWords
+  ##
+  ## Assuming a field modulus of size 256-bit with 63-bit words, we require 5 words
+  ##   R² ≡ ((2^63)^5)^2 (mod M) = 2^630 (mod M)
+  r_powmod(2, M)
+
+func montyOne*(M: BigInt): BigInt =
+  ## Returns "1 (mod M)" in the Montgomery domain.
+  ## This is equivalent to R (mod M) in the natural domain
+  r_powmod(1, M)
--- a/tests/test_finite_fields_powinv.nim
+++ b/tests/test_finite_fields_powinv.nim
@ -0,0 +1,95 @@
+# Constantine
+# Copyright (c) 2018-2019    Status Research & Development GmbH
+# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+import  unittest, random,
+        ../constantine/math/[bigints_checked, finite_fields],
+        ../constantine/io/io_fields,
+        ../constantine/config/curves
+
+import ../constantine/io/io_bigints
+
+static: doAssert defined(testingCurves), "This modules requires the -d:testingCurves compile option"
+
+proc main() =
+  suite "Modular exponentiation over finite fields":
+    test "n² mod 101":
+      let exponent = BigInt[64].fromUint(2'u64)
+
+      block: # 1^2 mod 101
+        var n, expected: Fq[Fake101]
+
+        n.fromUint(1'u32)
+        expected = n
+
+        n.pow(exponent)
+
+        var n_bytes: array[8, byte]
+        n_bytes.exportRawUint(n, cpuEndian)
+        let r = cast[uint64](n_bytes)
+
+        check:
+          # Check equality in the Montgomery domain
+          bool(n == expected)
+          # Check equality when converting back to natural domain
+          1'u64 == r
+
+      block: # 2^2 mod 101
+        var n, expected: Fq[Fake101]
+
+        n.fromUint(2'u32)
+        expected = n
+
+        n.pow(exponent)
+
+        var n_bytes: array[8, byte]
+        n_bytes.exportRawUint(n, cpuEndian)
+        let r = cast[uint64](n_bytes)
+
+        check:
+          # Check equality in the Montgomery domain
+          bool(n == expected)
+          # Check equality when converting back to natural domain
+          4'u64 == r
+
+      block: # 10^2 mod 101
+        var n, expected: Fq[Fake101]
+
+        n.fromUint(10'u32)
+        expected = n
+
+        n.pow(exponent)
+
+        var n_bytes: array[8, byte]
+        n_bytes.exportRawUint(n, cpuEndian)
+        let r = cast[uint64](n_bytes)
+
+        check:
+          # Check equality in the Montgomery domain
+          bool(n == expected)
+          # Check equality when converting back to natural domain
+          100'u64 == r
+
+      block: # 11^2 mod 101
+        var n, expected: Fq[Fake101]
+
+        n.fromUint(10'u32)
+        expected = n
+
+        n.pow(exponent)
+
+        var n_bytes: array[8, byte]
+        n_bytes.exportRawUint(n, cpuEndian)
+        let r = cast[uint64](n_bytes)
+
+        check:
+          # Check equality in the Montgomery domain
+          bool(n == expected)
+          # Check equality when converting back to natural domain
+          20'u64 == r
+
+main()
--- a/tests/test_finite_fields_powinv.nim.cfg
+++ b/tests/test_finite_fields_powinv.nim.cfg
@ -0,0 +1,2 @@
+-d:testingCurves
+-d:debugConstantine