Rework towering (#148)

* naive removal of out-of-place mul by non residue * Use {.inline.} in a consistent manner across the codebase * Handle aliasing for quadratic multiplication * reorg optimization * Handle aliasing for quadratic squaring * handle aliasing in mul_sparse_complex_by_0y * Rework multiplication by nonresidue, assume tower and twist use same non-residue * continue rework * continue on non-residues * Remove "NonResidue *" calls * handle aliasing in Chung-Hasan SQR2 * Handla aliasing in Chung-Hasan SQR3 * Use one less temporary in Chung Hasan sqr2 * handle aliasing in cubic extensions * merge extension tower in the same file to reduce duplicate proc and allow better inlining * handle aliasing in cubic inversion * drop out-of-place proc from BigInt and finite fields as well * less copies in line_projective * remove a copy in fp12 by lines
2021-02-06 16:28:38 +01:00 · 2021-02-06 16:28:38 +01:00 · c312210878
parent 2c5e12d5f8
commit c312210878
28 changed files with 1417 additions and 1285 deletions
--- a/constantine/arithmetic/bigints.nim
+++ b/constantine/arithmetic/bigints.nim
@ -10,7 +10,7 @@ import
  ../config/[common, type_bigint],
  ../primitives,
  ./limbs,
-  ./limbs_double_width,
+  ./limbs_extmul,
  ./limbs_modular,
  ./limbs_montgomery
@ -347,7 +347,7 @@ func bit0*(a: BigInt): Ct[uint8] =
 # Multiplication by small cosntants
 # ------------------------------------------------------------
-func `*=`*(a: var BigInt, b: static int) {.inline.} =
+func `*=`*(a: var BigInt, b: static int) =
  ## Multiplication by a small integer known at compile-time
  # Implementation:
  #
@ -420,11 +420,6 @@ func `*=`*(a: var BigInt, b: static int) {.inline.} =
  else:
    {.error: "Multiplication by this small int not implemented".}
 func `*`*(b: static int, a: BigInt): BigInt {.noinit, inline.} =
  ## Multiplication by a small integer known at compile-time
  result = a
  result *= b
 # Division by constants
 # ------------------------------------------------------------
--- a/constantine/arithmetic/finite_fields.nim
+++ b/constantine/arithmetic/finite_fields.nim
@ -43,6 +43,7 @@ export Fp, Fr, FF
 # No exceptions allowed
 {.push raises: [].}
 {.push inline.}
 # ############################################################
 #
@ -50,14 +51,14 @@ export Fp, Fr, FF
 #
 # ############################################################
-func fromBig*(dst: var FF, src: BigInt) {.inline.}=
+func fromBig*(dst: var FF, src: BigInt) =
  ## Convert a BigInt to its Montgomery form
  when nimvm:
    dst.mres.montyResidue_precompute(src, FF.fieldMod(), FF.getR2modP(), FF.getNegInvModWord())
  else:
    dst.mres.montyResidue(src, FF.fieldMod(), FF.getR2modP(), FF.getNegInvModWord(), FF.canUseNoCarryMontyMul())
-func fromBig*[C: static Curve](T: type FF[C], src: BigInt): FF[C] {.noInit, inline.} =
+func fromBig*[C: static Curve](T: type FF[C], src: BigInt): FF[C] {.noInit.} =
  ## Convert a BigInt to its Montgomery form
  result.fromBig(src)
@ -70,14 +71,14 @@ func toBig*(src: FF): auto {.noInit, inline.} =
 # Copy
 # ------------------------------------------------------------
-func ccopy*(a: var FF, b: FF, ctl: SecretBool) {.inline, meter.} =
+func ccopy*(a: var FF, b: FF, ctl: SecretBool) {.meter.} =
  ## Constant-time conditional copy
  ## If ctl is true: b is copied into a
  ## if ctl is false: b is not copied and a is unmodified
  ## Time and memory accesses are the same whether a copy occurs or not
  ccopy(a.mres, b.mres, ctl)
-func cswap*(a, b: var FF, ctl: CTBool) {.inline, meter.} =
+func cswap*(a, b: var FF, ctl: CTBool) {.meter.} =
  ## Swap ``a`` and ``b`` if ``ctl`` is true
  ##
  ## Constant-time:
@ -105,34 +106,34 @@ func cswap*(a, b: var FF, ctl: CTBool) {.inline, meter.} =
 # In practice I'm not aware of such prime being using in elliptic curves.
 # 2^127 - 1 and 2^521 - 1 are used but 127 and 521 are not multiple of 32/64
-func `==`*(a, b: FF): SecretBool {.inline.} =
+func `==`*(a, b: FF): SecretBool =
  ## Constant-time equality check
  a.mres == b.mres
-func isZero*(a: FF): SecretBool {.inline.} =
+func isZero*(a: FF): SecretBool =
  ## Constant-time check if zero
  a.mres.isZero()
-func isOne*(a: FF): SecretBool {.inline.} =
+func isOne*(a: FF): SecretBool =
  ## Constant-time check if one
  a.mres == FF.getMontyOne()
-func isMinusOne*(a: FF): SecretBool {.inline.} =
+func isMinusOne*(a: FF): SecretBool =
  ## Constant-time check if -1 (mod p)
  a.mres == FF.getMontyPrimeMinus1()
-func setZero*(a: var FF) {.inline.} =
+func setZero*(a: var FF) =
  ## Set ``a`` to zero
  a.mres.setZero()
-func setOne*(a: var FF) {.inline.} =
+func setOne*(a: var FF) =
  ## Set ``a`` to one
  # Note: we need 1 in Montgomery residue form
  # TODO: Nim codegen is not optimal it uses a temporary
  #       Check if the compiler optimizes it away
  a.mres = FF.getMontyOne()
-func `+=`*(a: var FF, b: FF) {.inline, meter.} =
+func `+=`*(a: var FF, b: FF) {.meter.} =
  ## In-place addition modulo p
  when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
    addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs)
@ -141,7 +142,7 @@ func `+=`*(a: var FF, b: FF) {.inline, meter.} =
    overflowed = overflowed or not(a.mres < FF.fieldMod())
    discard csub(a.mres, FF.fieldMod(), overflowed)
-func `-=`*(a: var FF, b: FF) {.inline, meter.} =
+func `-=`*(a: var FF, b: FF) {.meter.} =
  ## In-place substraction modulo p
  when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
    submod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs)
@ -149,7 +150,7 @@ func `-=`*(a: var FF, b: FF) {.inline, meter.} =
    let underflowed = sub(a.mres, b.mres)
    discard cadd(a.mres, FF.fieldMod(), underflowed)
-func double*(a: var FF) {.inline, meter.} =
+func double*(a: var FF) {.meter.} =
  ## Double ``a`` modulo p
  when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
    addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
@ -158,7 +159,7 @@ func double*(a: var FF) {.inline, meter.} =
    overflowed = overflowed or not(a.mres < FF.fieldMod())
    discard csub(a.mres, FF.fieldMod(), overflowed)
-func sum*(r: var FF, a, b: FF) {.inline, meter.} =
+func sum*(r: var FF, a, b: FF) {.meter.} =
  ## Sum ``a`` and ``b`` into ``r`` modulo p
  ## r is initialized/overwritten
  when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
@ -168,11 +169,11 @@ func sum*(r: var FF, a, b: FF) {.inline, meter.} =
    overflowed = overflowed or not(r.mres < FF.fieldMod())
    discard csub(r.mres, FF.fieldMod(), overflowed)
-func sumNoReduce*(r: var FF, a, b: FF) {.inline, meter.} =
+func sumNoReduce*(r: var FF, a, b: FF) {.meter.} =
  ## Sum ``a`` and ``b`` into ``r`` without reduction
  discard r.mres.sum(a.mres, b.mres)
-func diff*(r: var FF, a, b: FF) {.inline, meter.} =
+func diff*(r: var FF, a, b: FF) {.meter.} =
  ## Substract `b` from `a` and store the result into `r`.
  ## `r` is initialized/overwritten
  ## Requires r != b
@ -182,12 +183,12 @@ func diff*(r: var FF, a, b: FF) {.inline, meter.} =
    var underflowed = r.mres.diff(a.mres, b.mres)
    discard cadd(r.mres, FF.fieldMod(), underflowed)
-func diffNoReduce*(r: var FF, a, b: FF) {.inline, meter.} =
+func diffNoReduce*(r: var FF, a, b: FF) {.meter.} =
  ## Substract `b` from `a` and store the result into `r`
  ## without reduction
  discard r.mres.diff(a.mres, b.mres)
-func double*(r: var FF, a: FF) {.inline, meter.} =
+func double*(r: var FF, a: FF) {.meter.} =
  ## Double ``a`` into ``r``
  ## `r` is initialized/overwritten
  when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
@ -197,16 +198,16 @@ func double*(r: var FF, a: FF) {.inline, meter.} =
    overflowed = overflowed or not(r.mres < FF.fieldMod())
    discard csub(r.mres, FF.fieldMod(), overflowed)
-func prod*(r: var FF, a, b: FF) {.inline, meter.} =
+func prod*(r: var FF, a, b: FF) {.meter.} =
  ## Store the product of ``a`` by ``b`` modulo p into ``r``
  ## ``r`` is initialized / overwritten
  r.mres.montyMul(a.mres, b.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontyMul())
-func square*(r: var FF, a: FF) {.inline, meter.} =
+func square*(r: var FF, a: FF) {.meter.} =
  ## Squaring modulo p
  r.mres.montySquare(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontySquare())
-func neg*(r: var FF, a: FF) {.inline, meter.} =
+func neg*(r: var FF, a: FF) {.meter.} =
  ## Negate modulo p
  when UseASM_X86_64:
    negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
@ -221,11 +222,11 @@ func neg*(r: var FF, a: FF) {.inline, meter.} =
    t.mres.czero(isZero)
    r = t
-func neg*(a: var FF) {.inline, meter.} =
+func neg*(a: var FF) {.meter.} =
  ## Negate modulo p
  a.neg(a)
-func div2*(a: var FF) {.inline, meter.} =
+func div2*(a: var FF) {.meter.} =
  ## Modular division by 2
  a.mres.div2_modular(FF.getPrimePlus1div2())
@ -269,7 +270,7 @@ func csub*(a: var FF, b: FF, ctl: SecretBool) {.meter.} =
 #
 # Internally those procedures will allocate extra scratchspace on the stack
-func pow*(a: var FF, exponent: BigInt) {.inline.} =
+func pow*(a: var FF, exponent: BigInt) =
  ## Exponentiation modulo p
  ## ``a``: a field element to be exponentiated
  ## ``exponent``: a big integer
@ -282,7 +283,7 @@ func pow*(a: var FF, exponent: BigInt) {.inline.} =
    FF.canUseNoCarryMontySquare()
  )
-func pow*(a: var FF, exponent: openarray[byte]) {.inline.} =
+func pow*(a: var FF, exponent: openarray[byte]) =
  ## Exponentiation modulo p
  ## ``a``: a field element to be exponentiated
  ## ``exponent``: a big integer in canonical big endian representation
@ -295,7 +296,7 @@ func pow*(a: var FF, exponent: openarray[byte]) {.inline.} =
    FF.canUseNoCarryMontySquare()
  )
-func powUnsafeExponent*(a: var FF, exponent: BigInt) {.inline.} =
+func powUnsafeExponent*(a: var FF, exponent: BigInt) =
  ## Exponentiation modulo p
  ## ``a``: a field element to be exponentiated
  ## ``exponent``: a big integer
@ -315,7 +316,7 @@ func powUnsafeExponent*(a: var FF, exponent: BigInt) {.inline.} =
    FF.canUseNoCarryMontySquare()
  )
-func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) {.inline.} =
+func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) =
  ## Exponentiation modulo p
  ## ``a``: a field element to be exponentiated
  ## ``exponent``: a big integer a big integer in canonical big endian representation
@ -342,47 +343,27 @@ func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) {.inline.} =
 # ############################################################
 #
 # This implements extra primitives for ergonomics.
 # The in-place ones should be preferred as they avoid copies on assignment
 # Two kinds:
 # - Those that return a field element
 # - Those that internally allocate a temporary field element
-func `+`*(a, b: FF): FF {.noInit, inline, meter.} =
+func `*=`*(a: var FF, b: FF) {.meter.} =
  ## Addition modulo p
  result.sum(a, b)
 func `-`*(a, b: FF): FF {.noInit, inline, meter.} =
  ## Substraction modulo p
  result.diff(a, b)
 func `*`*(a, b: FF): FF {.noInit, inline, meter.} =
  ## Multiplication modulo p
  ##
  ## It is recommended to assign with {.noInit.}
  ## as FF elements are usually large and this
  ## routine will zero init internally the result.
  result.prod(a, b)
 func `*=`*(a: var FF, b: FF) {.inline, meter.} =
  ## Multiplication modulo p
  a.prod(a, b)
-func square*(a: var FF) {.inline, meter.} =
+func square*(a: var FF) {.meter.} =
  ## Squaring modulo p
  a.mres.montySquare(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontySquare())
-func square_repeated*(r: var FF, num: int) {.inline, meter.} =
+func square_repeated*(r: var FF, num: int) {.meter.} =
  ## Repeated squarings
  for _ in 0 ..< num:
    r.square()
-func square_repeated*(r: var FF, a: FF, num: int) {.inline, meter.} =
+func square_repeated*(r: var FF, a: FF, num: int) {.meter.} =
  ## Repeated squarings
  r.square(a)
  for _ in 1 ..< num:
    r.square()
-func `*=`*(a: var FF, b: static int) {.inline.} =
+func `*=`*(a: var FF, b: static int) =
  ## Multiplication by a small integer known at compile-time
  # Implementation:
  # We don't want to go convert the integer to the Montgomery domain (O(n²))
@ -464,7 +445,27 @@ func `*=`*(a: var FF, b: static int) {.inline.} =
  else:
    {.error: "Multiplication by this small int not implemented".}
-func `*`*(b: static int, a: FF): FF {.noinit, inline.} =
+func prod*(r: var FF, a: FF, b: static int) =
  ## Multiplication by a small integer known at compile-time
-  result = a
+  const negate = b < 0
-  result *= b
+  const b = if negate: -b
            else: b
  when negate:
    r.neg(a)
  else:
    r = a
  r *= b
 template mulCheckSparse*(a: var Fp, b: Fp) =
  ## Multiplication with optimization for sparse inputs
  when b.isOne().bool:
    discard
  elif b.isZero().bool:
    a.setZero()
  elif b.isMinusOne().bool:
    a.neg()
  else:
    a *= b
 {.pop.} # inline
 {.pop.} # raises no exceptions
--- a/constantine/arithmetic/finite_fields_double_width.nim
+++ b/constantine/arithmetic/finite_fields_double_width.nim
@ -12,7 +12,7 @@ import
  ./bigints,
  ./finite_fields,
  ./limbs,
-  ./limbs_double_width,
+  ./limbs_extmul,
  ./limbs_montgomery
 when UseASM_X86_64:
@ -28,18 +28,22 @@ template doubleWidth*(T: typedesc[Fp]): typedesc =
  ## Return the double-width type matching with Fp
  FpDbl[T.C]
-func `==`*(a, b: FpDbl): SecretBool {.inline.} =
+# No exceptions allowed
 {.push raises: [].}
 {.push inline.}
 func `==`*(a, b: FpDbl): SecretBool =
  a.limbs2x == b.limbs2x
-func mulNoReduce*(r: var FpDbl, a, b: Fp) {.inline.} =
+func mulNoReduce*(r: var FpDbl, a, b: Fp) =
  ## Store the product of ``a`` by ``b`` into ``r``
  r.limbs2x.prod(a.mres.limbs, b.mres.limbs)
-func squareNoReduce*(r: var FpDbl, a: Fp) {.inline.} =
+func squareNoReduce*(r: var FpDbl, a: Fp) =
  ## Store the square of ``a`` into ``r``
  r.limbs2x.square(a.mres.limbs)
-func reduce*(r: var Fp, a: FpDbl) {.inline.} =
+func reduce*(r: var Fp, a: FpDbl) =
  ## Reduce a double-width field element into r
  const N = r.mres.limbs.len
  montyRed(
@ -54,7 +58,7 @@ func diffNoReduce*(r: var FpDbl, a, b: FpDbl) =
  ## Double-width substraction without reduction
  discard r.limbs2x.diff(a.limbs2x, b.limbs2x)
-func diff*(r: var FpDbl, a, b: FpDbl) {.inline.}=
+func diff*(r: var FpDbl, a, b: FpDbl) =
  ## Double-width modular substraction
  when UseASM_X86_64:
    sub2x_asm(r.limbs2x, a.limbs2x, b.limbs2x, FpDbl.C.Mod.limbs)
@ -69,6 +73,9 @@ func diff*(r: var FpDbl, a, b: FpDbl) {.inline.}=
      addC(carry, sum, r.limbs2x[i+N], M.limbs[i], carry)
      underflowed.ccopy(r.limbs2x[i+N], sum)
-func `-=`*(a: var FpDbl, b: FpDbl) {.inline.}=
+func `-=`*(a: var FpDbl, b: FpDbl) =
  ## Double-width modular substraction
  a.diff(a, b)
 {.pop.} # inline
 {.pop.} # raises no exceptions
--- a/constantine/arithmetic/finite_fields_inversion.nim
+++ b/constantine/arithmetic/finite_fields_inversion.nim
@ -19,13 +19,17 @@ export zoo_inversions
 #
 # ############################################################
-func inv_euclid*(r: var Fp, a: Fp) {.inline.} =
+# No exceptions allowed
 {.push raises: [].}
 {.push inline.}
 func inv_euclid*(r: var Fp, a: Fp) =
  ## Inversion modulo p via
  ## Niels Moller constant-time version of
  ## Stein's GCD derived from extended binary Euclid algorithm
  r.mres.steinsGCD(a.mres, Fp.getR2modP(), Fp.C.Mod, Fp.getPrimePlus1div2())
-func inv*(r: var Fp, a: Fp) {.inline.} =
+func inv*(r: var Fp, a: Fp) =
  ## Inversion modulo p
  ##
  ## The inverse of 0 is 0.
@ -41,7 +45,7 @@ func inv*(r: var Fp, a: Fp) {.inline.} =
  else:
    r.inv_euclid(a)
-func inv*(a: var Fp) {.inline.} =
+func inv*(a: var Fp) =
  ## Inversion modulo p
  ##
  ## The inverse of 0 is 0.
@ -52,3 +56,6 @@ func inv*(a: var Fp) {.inline.} =
    a.inv_addchain(a)
  else:
    a.inv_euclid(a)
 {.pop.} # inline
 {.pop.} # raises no exceptions
--- a/constantine/arithmetic/finite_fields_square_root.nim
+++ b/constantine/arithmetic/finite_fields_square_root.nim
@ -18,10 +18,14 @@ import
 #
 # ############################################################
 # No exceptions allowed
 {.push raises: [].}
 {.push inline.}
 # Legendre symbol / Euler's Criterion / Kronecker's symbol
 # ------------------------------------------------------------
-func isSquare*(a: Fp): SecretBool {.inline.} =
+func isSquare*(a: Fp): SecretBool =
  ## Returns true if ``a`` is a square (quadratic residue) in 𝔽p
  ##
  ## Assumes that the prime modulus ``p`` is public.
@ -50,7 +54,7 @@ func hasP3mod4_primeModulus(C: static Curve): static bool =
  ## Returns true iff p ≡ 3 (mod 4)
  (BaseType(C.Mod.limbs[0]) and 3) == 3
-func sqrt_p3mod4(a: var Fp) {.inline.} =
+func sqrt_p3mod4(a: var Fp) =
  ## Compute the square root of ``a``
  ##
  ## This requires ``a`` to be a square
@ -64,7 +68,7 @@ func sqrt_p3mod4(a: var Fp) {.inline.} =
  static: doAssert BaseType(Fp.C.Mod.limbs[0]) mod 4 == 3
  a.powUnsafeExponent(Fp.getPrimePlus1div4_BE())
-func sqrt_invsqrt_p3mod4(sqrt, invsqrt: var Fp, a: Fp) {.inline.} =
+func sqrt_invsqrt_p3mod4(sqrt, invsqrt: var Fp, a: Fp) =
  ## If ``a`` is a square, compute the square root of ``a`` in sqrt
  ## and the inverse square root of a in invsqrt
  ##
@ -85,7 +89,7 @@ func sqrt_invsqrt_p3mod4(sqrt, invsqrt: var Fp, a: Fp) {.inline.} =
  # √a ≡ a * 1/√a ≡ a^((p+1)/4) (mod p)
  sqrt.prod(invsqrt, a)
-func sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.inline.} =
+func sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt: var Fp, a: Fp): SecretBool =
  ## If ``a`` is a square, compute the square root of ``a`` in sqrt
  ## and the inverse square root of a in invsqrt
  ##
@ -97,7 +101,7 @@ func sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.i
  test.square(sqrt)
  result = test == a
-func sqrt_if_square_p3mod4*(a: var Fp): SecretBool {.inline.} =
+func sqrt_if_square_p3mod4*(a: var Fp): SecretBool =
  ## If ``a`` is a square, compute the square root of ``a``
  ## if not, ``a`` is unmodified.
  ##
@ -115,7 +119,7 @@ func sqrt_if_square_p3mod4*(a: var Fp): SecretBool {.inline.} =
 # Specialized routines for addchain-based square roots
 # ------------------------------------------------------------
-func sqrt_addchain(a: var Fp) {.inline.} =
+func sqrt_addchain(a: var Fp) =
  ## Compute the square root of ``a``
  ##
  ## This requires ``a`` to be a square
@ -128,13 +132,13 @@ func sqrt_addchain(a: var Fp) {.inline.} =
  invsqrt.invsqrt_addchain(a)
  a *= invsqrt
-func sqrt_invsqrt_addchain(sqrt, invsqrt: var Fp, a: Fp) {.inline.} =
+func sqrt_invsqrt_addchain(sqrt, invsqrt: var Fp, a: Fp) =
  ## If ``a`` is a square, compute the square root of ``a`` in sqrt
  ## and the inverse square root of a in invsqrt
  invsqrt.invsqrt_addchain(a)
  sqrt.prod(invsqrt, a)
-func sqrt_invsqrt_if_square_addchain(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.inline.} =
+func sqrt_invsqrt_if_square_addchain(sqrt, invsqrt: var Fp, a: Fp): SecretBool =
  ## If ``a`` is a square, compute the square root of ``a`` in sqrt
  ## and the inverse square root of a in invsqrt
  ##
@ -144,7 +148,7 @@ func sqrt_invsqrt_if_square_addchain(sqrt, invsqrt: var Fp, a: Fp): SecretBool {
  test.square(sqrt)
  result = test == a
-func sqrt_if_square_addchain*(a: var Fp): SecretBool {.inline.} =
+func sqrt_if_square_addchain*(a: var Fp): SecretBool =
  ## If ``a`` is a square, compute the square root of ``a``
  ## if not, ``a`` is unmodified.
  ##
@ -155,6 +159,8 @@ func sqrt_if_square_addchain*(a: var Fp): SecretBool {.inline.} =
  result = sqrt_invsqrt_if_square_addchain(sqrt, invsqrt, a)
  a.ccopy(sqrt, result)
 {.pop.} # inline
 # Tonelli Shanks for any prime
 # ------------------------------------------------------------
@ -228,7 +234,7 @@ func sqrt_invsqrt_tonelli_shanks_pre(
 # ----------------------------------------------
-func sqrt_tonelli_shanks(a: var Fp, useAddChain: static bool) {.inline.} =
+func sqrt_tonelli_shanks(a: var Fp, useAddChain: static bool) =
  ## Compute the square root of ``a``
  ##
  ## This requires ``a`` to be a square
@ -244,7 +250,7 @@ func sqrt_tonelli_shanks(a: var Fp, useAddChain: static bool) {.inline.} =
  sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp)
  a = sqrt
-func sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool) {.inline.} =
+func sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool) =
  ## Compute the square root and inverse square root of ``a``
  ##
  ## This requires ``a`` to be a square
@ -258,7 +264,7 @@ func sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: stat
  a_pre_exp.precompute_tonelli_shanks(a, useAddChain)
  sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp)
-func sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool): SecretBool  {.inline.} =
+func sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool): SecretBool =
  ## Compute the square root and ivnerse square root of ``a``
  ##
  ## This returns true if ``a`` is square and sqrt/invsqrt contains the square root/inverse square root
@ -274,7 +280,7 @@ func sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddC
  sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp)
  a = sqrt
-func sqrt_if_square_tonelli_shanks*(a: var Fp, useAddChain: static bool): SecretBool {.inline.} =
+func sqrt_if_square_tonelli_shanks*(a: var Fp, useAddChain: static bool): SecretBool =
  ## If ``a`` is a square, compute the square root of ``a``
  ## if not, ``a`` is unmodified.
  ##
@ -293,7 +299,9 @@ func sqrt_if_square_tonelli_shanks*(a: var Fp, useAddChain: static bool): Secret
 # Note: we export the inner sqrt_invsqrt_IMPL
 #       for benchmarking purposes.
-func sqrt*[C](a: var Fp[C]) {.inline.} =
+{.push inline.}
 func sqrt*[C](a: var Fp[C]) =
  ## Compute the square root of ``a``
  ##
  ## This requires ``a`` to be a square
@ -311,7 +319,7 @@ func sqrt*[C](a: var Fp[C]) {.inline.} =
  else:
    sqrt_tonelli_shanks(a, useAddChain = C.hasTonelliShanksAddchain())
-func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) {.inline.} =
+func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) =
  ## Compute the square root and inverse square root of ``a``
  ##
  ## This requires ``a`` to be a square
@ -328,7 +336,7 @@ func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) {.inline.} =
  else:
    sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt, a, useAddChain = C.hasTonelliShanksAddchain())
-func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool  {.inline.} =
+func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool  =
  ## Compute the square root and ivnerse square root of ``a``
  ##
  ## This returns true if ``a`` is square and sqrt/invsqrt contains the square root/inverse square root
@ -345,7 +353,7 @@ func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool
  else:
    result = sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt, a, useAddChain = C.hasTonelliShanksAddchain())
-func sqrt_if_square*[C](a: var Fp[C]): SecretBool {.inline.} =
+func sqrt_if_square*[C](a: var Fp[C]): SecretBool =
  ## If ``a`` is a square, compute the square root of ``a``
  ## if not, ``a`` is unmodified.
  ##
@ -359,3 +367,6 @@ func sqrt_if_square*[C](a: var Fp[C]): SecretBool {.inline.} =
    result = sqrt_if_square_p3mod4(a)
  else:
    result = sqrt_if_square_tonelli_shanks(a, useAddChain = C.hasTonelliShanksAddchain())
 {.pop.} # inline
 {.pop.} # raises no exceptions
--- a/constantine/arithmetic/limbs.nim
+++ b/constantine/arithmetic/limbs.nim
@ -174,7 +174,7 @@ func isEven*(a: Limbs): SecretBool =
 # Bit manipulation
 # ------------------------------------------------------------
-func shiftRight*(a: var Limbs, k: int) {.inline.}=
+func shiftRight*(a: var Limbs, k: int) =
  ## Shift right by k.
  ##
  ## k MUST be less than the base word size (2^32 or 2^64)
--- a/constantine/arithmetic/limbs_double_width.nim
+++ b/constantine/arithmetic/limbs_double_width.nim
@ -21,13 +21,46 @@ when UseASM_X86_64:
 #
 # ############################################################
 # Inlining note:
 # - The public dispatch functions are inlined.
 #   This allows the compiler to check CPU features only once
 #   in the high-level proc and also removes an intermediate function call
 #   to the wrapper function
 # - The ASM procs or internal fallbacks are not inlined
 #   to save on code size.
 # No exceptions allowed
 {.push raises: [].}
 # Multiplication
 # ------------------------------------------------------------
-func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) =
+func prod_comba[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) =
  ## Extended precision multiplication
  # We use Product Scanning / Comba multiplication
  var t, u, v = Zero
  const stopEx = min(a.len+b.len, r.len)
  staticFor i, 0, stopEx:
    # Invariant for product scanning:
    # if we multiply accumulate by a[k1] * b[k2]
    # we have k1+k2 == i
    const ib = min(b.len-1, i)
    const ia = i - ib
    staticFor j, 0, min(a.len - ia, ib+1):
      mulAcc(t, u, v, a[ia+j], b[ib-j])
    r[i] = v
    when i < stopEx-1:
      v = u
      u = t
      t = Zero
  if aLen+bLen < rLen:
    for i in aLen+bLen ..< rLen:
      r[i] = Zero
 func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) {.inline.} =
  ## Multi-precision multiplication
  ## r <- a*b
  ##
@ -46,28 +79,7 @@ func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b:
  elif UseASM_X86_64:
    mul_asm(r, a, b)
  else:
-    # We use Product Scanning / Comba multiplication
+    prod_comba(r, a, b)
    var t, u, v = Zero
    const stopEx = min(a.len+b.len, r.len)
    staticFor i, 0, stopEx:
      # Invariant for product scanning:
      # if we multiply accumulate by a[k1] * b[k2]
      # we have k1+k2 == i
      const ib = min(b.len-1, i)
      const ia = i - ib
      staticFor j, 0, min(a.len - ia, ib+1):
        mulAcc(t, u, v, a[ia+j], b[ib-j])
      r[i] = v
      when i < stopEx-1:
        v = u
        u = t
        t = Zero
    if aLen+bLen < rLen:
      for i in aLen+bLen ..< rLen:
        r[i] = Zero
 func prod_high_words*[rLen, aLen, bLen](
       r: var Limbs[rLen],
@ -178,7 +190,7 @@ func square_operandScan[rLen, aLen](
 func square*[rLen, aLen](
       r: var Limbs[rLen],
-       a: Limbs[aLen]) =
+       a: Limbs[aLen]) {.inline.} =
  ## Multi-precision squaring
  ## r <- a²
  ##
--- a/constantine/arithmetic/limbs_modular.nim
+++ b/constantine/arithmetic/limbs_modular.nim
@ -20,7 +20,7 @@ import
 #
 # ############################################################
-func div2_modular*(a: var Limbs, mp1div2: Limbs) {.inline.}=
+func div2_modular*(a: var Limbs, mp1div2: Limbs) {.inline.} =
  ## Modular Division by 2
  ## `a` will be divided in-place
  ## `mp1div2` is the modulus (M+1)/2
--- a/constantine/arithmetic/limbs_montgomery.nim
+++ b/constantine/arithmetic/limbs_montgomery.nim
@ -40,7 +40,13 @@ when UseASM_X86_64:
 #       - pairing final exponentiation
 #       are bottlenecked by Montgomery multiplications or squarings
 #
-# Hence we use inline assembly where possible
+# Inlining note:
 # - The public dispatch functions are inlined.
 #   This allows the compiler to check CPU features only once
 #   in the high-level proc and also removes an intermediate function call
 #   to the wrapper function
 # - The ASM procs or internal fallbacks are not inlined
 #   to save on code size.
 # No exceptions allowed
 {.push raises: [].}
@ -381,31 +387,12 @@ func montyRed_Comba[N: static int](
  discard z.csub(M, SecretBool(carry) or not(z < M))
  r = z
 # TODO upstream, using Limbs[N] breaks semcheck
 func montyRed*[N: static int](
       r: var array[N, SecretWord],
       a: array[N*2, SecretWord],
       M: array[N, SecretWord],
       m0ninv: BaseType, canUseNoCarryMontyMul: static bool) =
  ## Montgomery reduce a double-width bigint modulo M
  when UseASM_X86_64 and r.len <= 6:
    if ({.noSideEffect.}: hasBmi2()) and ({.noSideEffect.}: hasAdx()):
      montRed_asm_adx_bmi2(r, a, M, m0ninv, canUseNoCarryMontyMul)
    else:
      montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul)
  elif UseASM_X86_32 and r.len <= 6:
    # TODO: Assembly faster than GCC but slower than Clang
    montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul)
  else:
    montyRed_CIOS(r, a, M, m0ninv)
    # montyRed_Comba(r, a, M, m0ninv)
 # Exported API
 # ------------------------------------------------------------
 func montyMul*(
        r: var Limbs, a, b, M: Limbs,
-        m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) =
+        m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) {.inline.} =
  ## Compute r <- a*b (mod M) in the Montgomery domain
  ## `m0ninv` = -1/M (mod SecretWord). Our words are 2^32 or 2^64
  ##
@ -471,6 +458,25 @@ func montySquare*(r: var Limbs, a, M: Limbs,
  #   # montySquare_CIOS(r, a, M, m0ninv) # TODO <--- Fix this
  #   montyMul_FIPS(r, a, a, M, m0ninv)
 # TODO upstream, using Limbs[N] breaks semcheck
 func montyRed*[N: static int](
       r: var array[N, SecretWord],
       a: array[N*2, SecretWord],
       M: array[N, SecretWord],
       m0ninv: BaseType, canUseNoCarryMontyMul: static bool) {.inline.} =
  ## Montgomery reduce a double-width bigint modulo M
  when UseASM_X86_64 and r.len <= 6:
    if ({.noSideEffect.}: hasBmi2()) and ({.noSideEffect.}: hasAdx()):
      montRed_asm_adx_bmi2(r, a, M, m0ninv, canUseNoCarryMontyMul)
    else:
      montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul)
  elif UseASM_X86_32 and r.len <= 6:
    # TODO: Assembly faster than GCC but slower than Clang
    montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul)
  else:
    montyRed_CIOS(r, a, M, m0ninv)
    # montyRed_Comba(r, a, M, m0ninv)
 func redc*(r: var Limbs, a, one, M: Limbs,
           m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) =
  ## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)
--- a/constantine/config/curves_declaration.nim
+++ b/constantine/config/curves_declaration.nim
@ -83,12 +83,11 @@ declareCurves:
    eq_form: ShortWeierstrass
    coef_a: 0
    coef_b: 2
-    nonresidue_quad_fp: -1       #      -1   is not a square in 𝔽p
+    nonresidue_fp: -1       #      -1   is not a square in 𝔽p
-    nonresidue_cube_fp2: (1, 1)  # 1+𝑖   1+𝑖  is not a cube in 𝔽p²
+    nonresidue_fp2: (1, 1)  # 1+𝑖   1+𝑖  is not a square or cube in 𝔽p²
    embedding_degree: 12
    sexticTwist: D_Twist
    sexticNonResidue_fp2: (1, 1) # 1+𝑖
  curve BN254_Snarks: # Zero-Knowledge proofs curve (SNARKS, STARKS, Ethereum)
    bitwidth: 254
@ -103,12 +102,11 @@ declareCurves:
    eq_form: ShortWeierstrass
    coef_a: 0
    coef_b: 3
-    nonresidue_quad_fp: -1       #      -1   is not a square in 𝔽p
+    nonresidue_fp: -1       #      -1   is not a square in 𝔽p
-    nonresidue_cube_fp2: (9, 1)  # 9+𝑖   9+𝑖  is not a cube in 𝔽p²
+    nonresidue_fp2: (9, 1)  # 9+𝑖   9+𝑖  is not a square or cube in 𝔽p²
    embedding_degree: 12
    sexticTwist: D_Twist
    sexticNonResidue_fp2: (9, 1) # 9+𝑖
  curve Curve25519: # Bernstein curve
    bitwidth: 255
@ -136,12 +134,11 @@ declareCurves:
    eq_form: ShortWeierstrass
    coef_a: 0
    coef_b: 1
-    nonresidue_quad_fp: -5       #      -5   is not a square in 𝔽p
+    nonresidue_fp: -5       #      -5   is not a square in 𝔽p
-    nonresidue_cube_fp2: (0, 1)  # √-5  √-5  is not a cube in 𝔽p²
+    nonresidue_fp2: (0, 1)  # √-5  √-5  is not a square or cube in 𝔽p²
    embedding_degree: 12
    sexticTwist: D_Twist
    sexticNonResidue_fp2: (0, 1) # √-5
  curve BLS12_381:
    bitwidth: 381
@ -157,12 +154,11 @@ declareCurves:
    eq_form: ShortWeierstrass
    coef_a: 0
    coef_b: 4
-    nonresidue_quad_fp: -1       #      -1   is not a square in 𝔽p
+    nonresidue_fp: -1       #      -1   is not a square in 𝔽p
-    nonresidue_cube_fp2: (1, 1)  # 1+𝑖   1+𝑖  is not a cube in 𝔽p²
+    nonresidue_fp2: (1, 1)  # 1+𝑖   1+𝑖  is not a square or cube in 𝔽p²
    embedding_degree: 12
    sexticTwist: M_Twist
    sexticNonResidue_fp2: (1, 1) # 1+𝑖
  curve BW6_761:
    bitwidth: 761
@ -182,9 +178,8 @@ declareCurves:
    coef_b: -1
    # TODO: rework the quad/cube/sextic non residue declaration
-    nonresidue_quad_fp: -4       # -4   is not a square in 𝔽p
+    nonresidue_fp: -4       # -4   is not a square or cube in 𝔽p
-    nonresidue_cube_fp2: (0, 1)  # -4   is not a cube in 𝔽p²
+    nonresidue_fp2: (0, 1)  # -4   is not a cube in 𝔽p²
    embedding_degree: 6
    sexticTwist: M_Twist
    sexticNonResidue_fp2: (0, 1)  # -4
--- a/constantine/config/curves_parser.nim
+++ b/constantine/config/curves_parser.nim
@ -96,8 +96,8 @@ type
    modulus: NimNode  # nnkStrLit (hex)
    # Towering
-    nonresidue_quad_fp: NimNode # nnkIntLit
+    nonresidue_fp: NimNode # nnkIntLit
-    nonresidue_cube_fp2: NimNode # nnkPar(nnkIntLit, nnkIntLit)
+    nonresidue_fp2: NimNode # nnkPar(nnkIntLit, nnkIntLit)
    # Curve parameters
    eq_form: CurveEquationForm
@ -108,7 +108,6 @@ type
    embedding_degree: int
    sexticTwist: SexticTwist
    sexticNonResidue_fp2: NimNode # nnkPar(nnkIntLit, nnkIntLit)
    family: CurveFamily
@ -198,17 +197,15 @@ proc parseCurveDecls(defs: var seq[CurveParams], curves: NimNode) =
        params.orderBitwidth = sectionVal
      elif sectionId.eqIdent"cofactor":
        discard "TODO"
-      elif sectionId.eqIdent"nonresidue_quad_fp":
+      elif sectionId.eqIdent"nonresidue_fp":
-        params.nonresidue_quad_fp = sectionVal
+        params.nonresidue_fp = sectionVal
-      elif sectionId.eqIdent"nonresidue_cube_fp2":
+      elif sectionId.eqIdent"nonresidue_fp2":
-        params.nonresidue_cube_fp2 = sectionVal
+        params.nonresidue_fp2 = sectionVal
      elif sectionId.eqIdent"embedding_degree":
        params.embedding_degree = sectionVal.intVal.int
      elif sectionId.eqIdent"sexticTwist":
        params.sexticTwist = parseEnum[SexticTwist]($sectionVal)
      elif sectionId.eqIdent"sexticNonResidue_fp2":
        params.sexticNonResidue_fp2 = sectionVal
      else:
        error "Invalid section: \n", curveParams[i].toStrLit()
@ -323,12 +320,12 @@ proc genMainConstants(defs: var seq[CurveParams]): NimNode =
      # Towering
      # -----------------------------------------------
      curveEllipticStmts.add newConstStmt(
-        exported($curve & "_nonresidue_quad_fp"),
+        exported($curve & "_nonresidue_fp"),
-        curveDef.nonresidue_quad_fp
+        curveDef.nonresidue_fp
      )
      curveEllipticStmts.add newConstStmt(
-        exported($curve & "_nonresidue_cube_fp2"),
+        exported($curve & "_nonresidue_fp2"),
-        curveDef.nonresidue_cube_fp2
+        curveDef.nonresidue_fp2
      )
      # Pairing
@ -341,10 +338,6 @@ proc genMainConstants(defs: var seq[CurveParams]): NimNode =
        exported($curve & "_sexticTwist"),
        newLit curveDef.sexticTwist
      )
      curveEllipticStmts.add newConstStmt(
        exported($curve & "_sexticNonResidue_fp2"),
        curveDef.sexticNonResidue_fp2
      )
  # end for ---------------------------------------------------
--- a/constantine/config/curves_prop_core.nim
+++ b/constantine/config/curves_prop_core.nim
@ -82,21 +82,27 @@ macro getCoefB*(C: static Curve): untyped =
  ## or a bigInt depending on the curve
  result = bindSym($C & "_coef_B")
-macro get_QNR_Fp*(C: static Curve): untyped =
+macro getNonResidueFp*(C: static Curve): untyped =
-  ## Returns the tower extension quadratic non-residue in 𝔽p
+  ## Returns the tower extension (and twist) non-residue for 𝔽p
-  ## i.e. a number that is not a square in 𝔽p
+  ## Depending on the curve it might be:
-  result = bindSym($C & "_nonresidue_quad_fp")
+  ## - not a square (quadratic non-residue to construct Fp2)
  ## - not a cube (cubic non-residue to construct Fp3)
  ## - neither a square or cube (sextic non-residue to construct Fp2, Fp3 or Fp6)
  result = bindSym($C & "_nonresidue_fp")
-macro get_CNR_Fp2*(C: static Curve): untyped =
+macro getNonResidueFp2*(C: static Curve): untyped =
-  ## Returns the tower extension cubic non-residue 𝔽p²
+  ## Returns the tower extension (and twist) non-residue for 𝔽p²
-  ## i.e. a number that is not a cube in 𝔽p²
+  ## Depending on the curve it might be:
  ## - not a square (quadratic non-residue to construct Fp4)
  ## - not a cube (cubic non-residue to construct Fp6)
  ## - neither a square or cube (sextic non-residue to construct Fp4, Fp6 or Fp12)
  ##
  ## The return value is a tuple (a, b)
  ## that corresponds to the number a + b𝑗
  ## with 𝑗 choosen for 𝑗² - QNR_Fp == 0
  ## i.e. if -1 is chosen as a quadratic non-residue 𝑗 = √-1
  ##      if -2 is chosen as a quadratic non-residue 𝑗 = √-2
-  result = bindSym($C & "_nonresidue_cube_fp2")
+  result = bindSym($C & "_nonresidue_fp2")
 macro getEmbeddingDegree*(C: static Curve): untyped =
  ## Returns the prime embedding degree,
@ -108,9 +114,3 @@ macro getEmbeddingDegree*(C: static Curve): untyped =
 macro getSexticTwist*(C: static Curve): untyped =
  ## Returns if D-Twist or M-Twist
  result = bindSym($C & "_sexticTwist")
 macro get_SNR_Fp2*(C: static Curve): untyped =
  ## Returns the sextic non-residue in 𝔽p²
  ## choosen to build the twisted curve E'(𝔽p²)
  ## i.e. a number µ so that x⁶ - µ is irreducible
  result = bindSym($C & "_sexticNonResidue_fp2")
--- a/constantine/elliptic/ec_shortweierstrass_affine.nim
+++ b/constantine/elliptic/ec_shortweierstrass_affine.nim
@ -32,6 +32,8 @@ type
    ## over a field F
    x*, y*: F
  SexticNonResidue* = NonResidue
 func isInf*(P: ECP_ShortW_Aff): SecretBool =
  ## Returns true if P is an infinity point
  ## and false otherwise
--- a/constantine/isogeny/frobenius.nim
+++ b/constantine/isogeny/frobenius.nim
@ -49,48 +49,6 @@ func frobenius_map*(r: var Fp2, a: Fp2, k: static int = 1) {.inline.} =
  else:
    r = a
 template mulCheckSparse(a: var Fp, b: Fp) =
  when b.isOne().bool:
    discard
  elif b.isZero().bool:
    a.setZero()
  elif b.isMinusOne().bool:
    a.neg()
  else:
    a *= b
 template mulCheckSparse(a: var Fp2, b: Fp2) =
  when b.isOne().bool:
    discard
  elif b.isMinusOne().bool:
    a.neg()
  elif b.c0.isZero().bool and b.c1.isOne().bool:
    var t {.noInit.}: type(a.c0)
    when fromComplexExtension(b):
      t.neg(a.c1)
      a.c1 = a.c0
      a.c0 = t
    else:
      t = NonResidue * a.c1
      a.c1 = a.c0
      a.c0 = t
  elif b.c0.isZero().bool and b.c1.isMinusOne().bool:
    var t {.noInit.}: type(a.c0)
    when fromComplexExtension(b):
      t = a.c1
      a.c1.neg(a.c0)
      a.c0 = t
    else:
      t = NonResidue * a.c1
      a.c1.neg(a.c0)
      a.c0.neg(t)
  elif b.c0.isZero().bool:
    a.mul_sparse_by_0y(b)
  elif b.c1.isZero().bool:
    a.mul_sparse_by_x0(b)
  else:
    a *= b
 # Frobenius map - on extension fields
 # -----------------------------------------------------------------
--- a/constantine/pairing/lines_common.nim
+++ b/constantine/pairing/lines_common.nim
@ -35,6 +35,22 @@ type
    ##   with Fp2 coordinates: xyz000
    x*, y*, z*: F
  SexticNonResidue* = NonResidue
    ## The Sextic non-residue to build
    ## 𝔽p2 -> 𝔽p12 towering and the G2 sextic twist
    ## or
    ## 𝔽p -> 𝔽p6 towering and the G2 sextic twist
    ##
    ## Note:
    ## while the non-residues for
    ## - 𝔽p2 -> 𝔽p4
    ## - 𝔽p2 -> 𝔽p6
    ## are also sextic non-residues by construction.
    ## the non-residues for
    ## - 𝔽p4 -> 𝔽p12
    ## - 𝔽p6 -> 𝔽p12
    ## are not.
 func toHex*(line: Line, order: static Endianness = bigEndian): string =
  result = static($line.typeof.genericHead() & '(')
  for fieldName, fieldValue in fieldPairs(line):
--- a/constantine/pairing/lines_projective.nim
+++ b/constantine/pairing/lines_projective.nim
@ -92,15 +92,10 @@ func line_eval_double[F](
  template B: untyped = line.y
  template C: untyped = line.z
-  A = T.y               # A = Y
+  A.prod(T.y, T.z)      # A = Y.Z
-  v = T.y               # v = Y
+  C.square(T.x)         # C = X²
-  B = T.z               # B = Z
+  v.square(T.y)         # v = Y²
-  C = T.x               # C = X
+  B.square(T.z)         # B = Z²
  A *= B                # A = Y.Z
  C.square()            # C = X²
  v.square()            # v = Y²
  B.square()            # B = Z²
  A.double()            # A =  2 Y.Z
  A.neg()               # A = -2 Y.Z
@ -148,28 +143,21 @@ func line_eval_add[F](
  template B: untyped = line.y
  template C: untyped = line.z
-  A = T.x     # A = X₁
+  v.prod(T.z, Q.y) # v = Z₁Y₂
-  v = T.z     # v = Z₁
+  B.prod(T.z, Q.x) # B = Z₁X₂
  B = T.z     # B = Z₁
  C = T.y     # C = Y₁
-  v *= Q.y    # v = Z₁Y₂
+  A.diff(T.x, B)   # A = X₁-Z₁X₂
-  B *= Q.x    # B = Z₁X₂
+  C.diff(T.y, v)   # C = Y₁-Z₁Y₂
-  A -= B      # A = X₁-Z₁X₂
+  v.prod(A, Q.y)   # v = (X₁-Z₁X₂) Y₂
-  C -= v      # C = Y₁-Z₁Y₂
+  B.prod(C, Q.x)   # B = (Y₁-Z₁Y₂) X₂
  B -= v           # B = (Y₁-Z₁Y₂) X₂ - (X₁-Z₁X₂) Y₂
  C.neg()          # C = -(Y₁-Z₁Y₂)
  v = A       # v = X₁-Z₁X₂
  when F.C.getSexticTwist() == M_Twist:
    A *= SexticNonResidue # A = ξ (X₁ - Z₁X₂)
  v *= Q.y    # v = (X₁-Z₁X₂) Y₂
  B = C       # B = Y₁-Z₁Y₂
  B *= Q.x    # B = (Y₁-Z₁Y₂) X₂
  B -= v      # B = (Y₁-Z₁Y₂) X₂ - (X₁-Z₁X₂) Y₂
  C.neg()     # C = -(Y₁-Z₁Y₂)
 func line_eval_fused_double[F](
       line: var Line[F],
       T: var ECP_ShortW_Proj[F, OnTwist]) =
@ -193,13 +181,11 @@ func line_eval_fused_double[F](
  when F.C.getSexticTwist() == D_Twist:
    snrB *= SexticNonResidue
-  E = C
+  E.prod(C, b3)
  E *= b3
  when F.C.getSexticTwist() == M_Twist:
    E *= SexticNonResidue # E = 3b'Z² = 3bξ Z²
-  F = E
+  F.prod(E, 3)      # F = 3E = 9bZ²
  F *= 3            # F = 3E = 9bZ²
  G.sum(snrB, F)
  G.div2()          # G = (B+F)/2
  H.sum(T.y, T.z)
--- a/constantine/pairing/mul_fp12_by_lines.nim
+++ b/constantine/pairing/mul_fp12_by_lines.nim
@ -56,7 +56,7 @@ func mul_by_line_xy0*[C: static Curve](
  v0.prod(a.c0, b.x)
  v1.prod(a.c1, b.y)
  r.c0.prod(a.c2, b.y)
-  r.c0 *= ξ
+  r.c0 *= SexticNonResidue
  r.c0 += v0
  r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated
@ -93,10 +93,9 @@ func mul_sparse_by_line_xy00z0*[C: static Curve](
  f.c1 += f.c0
  v3.mul_by_line_xy0(f.c1, v2)
  v3 -= v0
-  v3 -= v1
+  f.c1.diff(v3, v1)
  f.c1 = v3
-  v3.c0 = ξ * v1.c2
+  v3.c0.prod(v1.c2, SexticNonResidue)
  v3.c0 += v0.c0
  v3.c1.sum(v0.c1, v1.c0)
  v3.c2.sum(v0.c2, v1.c1)
@ -150,7 +149,7 @@ func mul_sparse_by_line_xyz000*[C: static Curve](
  # r0 = ξ a2 b1 + v0
  f.c0.mul_sparse_by_y0(f.c2, l.z)
-  f.c0 *= NonResidue
+  f.c0 *= SexticNonResidue
  f.c0 += v0
  # r2 = a2 b0 + v1
@ -201,12 +200,12 @@ func mul_sparse_by_line_xy000z*[C: static Curve](
  # r0 = ξ a1 b2 + v0
  f.c0.mul_sparse_by_0y(f.c1, l.z)
-  f.c0 *= NonResidue
+  f.c0 *= SexticNonResidue
  f.c0 += v0
  # r1 = a1 b0 + ξ v2
  f.c1 *= b0
-  v2 *= NonResidue
+  v2 *= SexticNonResidue
  f.c1 += v2
 func mul*[C](f: var Fp12[C], line: Line[Fp2[C]]) {.inline.} =
--- a/constantine/pairing/pairing_bls12.nim
+++ b/constantine/pairing/pairing_bls12.nim
@ -89,7 +89,8 @@ func millerLoopGenericBLS12*[C](
  f.setOne()
  template u: untyped = C.pairing(ate_param)
-  let u3 = 3*C.pairing(ate_param)
+  var u3 = C.pairing(ate_param)
  u3 *= 3
  for i in countdown(u3.bits - 2, 1):
    f.square()
    line.line_double(T, P)
--- a/constantine/pairing/pairing_bn.nim
+++ b/constantine/pairing/pairing_bn.nim
@ -87,7 +87,8 @@ func millerLoopGenericBN*[C](
  f.setOne()
  template u: untyped = C.pairing(ate_param)
-  let u3 = 3*C.pairing(ate_param)
+  var u3 = C.pairing(ate_param)
  u3 *= 3
  for i in countdown(u3.bits - 2, 1):
    f.square()
    line.line_double(T, P)
--- a/constantine/tower_field_extensions/cubic_extensions.nim
+++ b/constantine/tower_field_extensions/cubic_extensions.nim
@ -1,228 +0,0 @@
 # Constantine
 # Copyright (c) 2018-2019    Status Research & Development GmbH
 # Copyright (c) 2020-Present Mamy André-Ratsimbazafy
 # Licensed and distributed under either of
 #   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
 #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 import
  ../arithmetic,
  ../primitives,
  ./tower_common
 # Commutative ring implementation for Cubic Extension Fields
 # -------------------------------------------------------------------
 # Cubic extensions can use specific squaring procedures
 # beyond Schoolbook and Karatsuba:
 # - Chung-Hasan (3 different algorithms)
 # - Toom-Cook-3x
 #
 # Chung-Hasan papers
 # http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf
 # https://www.lirmm.fr/arith18/papers/Chung-Squaring.pdf
 #
 # The papers focus on polynomial squaring, they have been adapted
 # to towered extension fields with the relevant costs in
 #
 # - Multiplication and Squaring on Pairing-Friendly Fields
 #   Augusto Jun Devegili and Colm Ó hÉigeartaigh and Michael Scott and Ricardo Dahab, 2006
 #   https://eprint.iacr.org/2006/471
 #
 # Costs in the underlying field
 # M: Mul, S: Square, A: Add/Sub, B: Mul by non-residue
 #
 # | Method      | > Linear | Linear            |
 # |-------------|----------|-------------------|
 # | Schoolbook  | 3M + 3S  | 6A + 2B           |
 # | Karatsuba   | 6S       | 13A + 2B          |
 # | Tom-Cook-3x | 5S       | 33A + 2B          |
 # | CH-SQR1     | 3M + 2S  | 11A + 2B          |
 # | CH-SQR2     | 2M + 3S  | 10A + 2B          |
 # | CH-SQR3     | 1M + 4S  | 11A + 2B + 1 Div2 |
 # | CH-SQR3x    | 1M + 4S  | 14A + 2B          |
 func square_Chung_Hasan_SQR2(r: var CubicExt, a: CubicExt) {.used.}=
  ## Returns r = a²
  mixin prod, square, sum
  var v3{.noInit.}, v4{.noInit.}, v5{.noInit.}: typeof(r.c0)
  v4.prod(a.c0, a.c1)
  v4.double()
  v5.square(a.c2)
  r.c1 = NonResidue * v5
  r.c1 += v4
  r.c2.diff(v4, v5)
  v3.square(a.c0)
  v4.diff(a.c0, a.c1)
  v4 += a.c2
  v5.prod(a.c1, a.c2)
  v5.double()
  v4.square()
  r.c0 = NonResidue * v5
  r.c0 += v3
  r.c2 += v4
  r.c2 += v5
  r.c2 -= v3
 func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) =
  ## Returns r = a²
  mixin prod, square, sum
  var v0{.noInit.}, v2{.noInit.}: typeof(r.c0)
  r.c1.sum(a.c0, a.c2)    # r1 = a0 + a2
  v2.diff(r.c1, a.c1)     # v2 = a0 - a1 + a2
  r.c1 += a.c1            # r1 = a0 + a1 + a2
  r.c1.square()           # r1 = (a0 + a1 + a2)²
  v2.square()             # v2 = (a0 - a1 + a2)²
  r.c2.sum(r.c1, v2)      # r2 = (a0 + a1 + a2)² + (a0 - a1 + a2)²
  r.c2.div2()             # r2 = ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2
  r.c0.prod(a.c1, a.c2)   # r0 = a1 a2
  r.c0.double()           # r0 = 2 a1 a2
  v2.square(a.c2)         # v2 = a2²
  r.c1 += NonResidue * v2 # r1 = (a0 + a1 + a2)² + β a2²
  r.c1 -= r.c0            # r1 = (a0 + a1 + a2)² - 2 a1 a2 + β a2²
  r.c1 -= r.c2            # r1 = (a0 + a1 + a2)² - 2 a1 a2 - ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2 + β a2²
  v0.square(a.c0)         # v0 = a0²
  r.c0 *= NonResidue      # r0 = β 2 a1 a2
  r.c0 += v0              # r0 = a0² + β 2 a1 a2
  r.c2 -= v0              # r2 = ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2 - a0²
  r.c2 -= v2              # r2 = ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2 - a0² - a2²
 func square*(r: var CubicExt, a: CubicExt) {.inline.} =
  ## Returns r = a²
  square_Chung_Hasan_SQR3(r, a)
 func prod*(r: var CubicExt, a, b: CubicExt) =
  ## Returns r = a * b
  ##
  ## r MUST not share a buffer with a
  # Algorithm is Karatsuba
  var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}, t{.noInit.}: typeof(r.c0)
  v0.prod(a.c0, b.c0)
  v1.prod(a.c1, b.c1)
  v2.prod(a.c2, b.c2)
  # r.c0 = β ((a.c1 + a.c2) * (b.c1 + b.c2) - v1 - v2) + v0
  r.c0.sum(a.c1, a.c2)
  t.sum(b.c1, b.c2)
  r.c0 *= t
  r.c0 -= v1
  r.c0 -= v2
  r.c0 *= NonResidue
  r.c0 += v0
  # r.c1 = (a.c0 + a.c1) * (b.c0 + b.c1) - v0 - v1 + β v2
  r.c1.sum(a.c0, a.c1)
  t.sum(b.c0, b.c1)
  r.c1 *= t
  r.c1 -= v0
  r.c1 -= v1
  r.c1 += NonResidue * v2
  # r.c2 = (a.c0 + a.c2) * (b.c0 + b.c2) - v0 - v2 + v1
  r.c2.sum(a.c0, a.c2)
  t.sum(b.c0, b.c2)
  r.c2 *= t
  r.c2 -= v0
  r.c2 -= v2
  r.c2 += v1
 func inv*(r: var CubicExt, a: CubicExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  #
  # Algorithm 5.23
  #
  # Arithmetic of Finite Fields
  # Chapter 5 of Guide to Pairing-Based Cryptography
  # Jean Luc Beuchat, Luis J. Dominguez Perez, Sylvain Duquesne, Nadia El Mrabet, Laura Fuentes-Castañeda, Francisco Rodríguez-Henríquez, 2017\
  # https://www.researchgate.net/publication/319538235_Arithmetic_of_Finite_Fields
  #
  # We optimize for stack usage and use 4 temporaries (+r as temporary)
  # instead of 9, because 5 * 2 (𝔽p2) * Bitsize would be:
  # - ~2540 bits for BN254
  # - ~3810 bits for BLS12-381
  var v1 {.noInit.}, v2 {.noInit.}, v3 {.noInit.}: typeof(r.c0)
  # A in r0
  # A <- a0² - β a1 a2
  r.c0.square(a.c0)
  v1.prod(a.c1, a.c2)
  v1 *= NonResidue
  r.c0 -= v1
  # B in v1
  # B <- β a2² - a0 a1
  v1.square(a.c2)
  v1 *= NonResidue
  v2.prod(a.c0, a.c1)
  v1 -= v2
  # C in v2
  # C <- a1² - a0 a2
  v2.square(a.c1)
  v3.prod(a.c0, a.c2)
  v2 -= v3
  # F in v3
  # F <- β a1 C + a0 A + β a2 B
  r.c1.prod(v1, NonResidue * a.c2)
  r.c2.prod(v2, NonResidue * a.c1)
  v3.prod(r.c0, a.c0)
  v3 += r.c1
  v3 += r.c2
  let t = v3 # TODO, support aliasing in all primitives
  v3.inv(t)
  # (a0 + a1 v + a2 v²)^-1 = (A + B v + C v²) / F
  r.c0 *= v3
  r.c1.prod(v1, v3)
  r.c2.prod(v2, v3)
 func inv*(a: var CubicExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  let t = a
  a.inv(t)
 func `*=`*(a: var CubicExt, b: CubicExt) {.inline.} =
  ## In-place multiplication
  # On higher extension field like 𝔽p6,
  # if `prod` is called on shared in and out buffer, the result is wrong
  let t = a
  a.prod(t, b)
 func conj*(a: var CubicExt) {.inline.} =
  ## Computes the conjugate in-place
  mixin conj, conjneg
  a.c0.conj()
  a.c1.conjneg()
  a.c2.conj()
 func conj*(r: var CubicExt, a: CubicExt) {.inline.} =
  ## Computes the conjugate out-of-place
  mixin conj, conjneg
  r.c0.conj(a.c0)
  r.c1.conjneg(a.c1)
  r.c2.conj(a.c2)
 func square*(a: var CubicExt) {.inline.} =
  ## In-place squaring
  let t = a
  a.square(t)
--- a/constantine/tower_field_extensions/exponentiations.nim
+++ b/constantine/tower_field_extensions/exponentiations.nim
@ -12,9 +12,7 @@ import
  ../config/common,
  ../primitives,
  ../io/io_bigints,
-  ./tower_common,
+  ./extension_fields
  ./quadratic_extensions,
  ./cubic_extensions
 # ############################################################
 #
--- a/constantine/tower_field_extensions/extension_fields.nim
+++ b/constantine/tower_field_extensions/extension_fields.nim
@ -0,0 +1,977 @@
 # Constantine
 # Copyright (c) 2018-2019    Status Research & Development GmbH
 # Copyright (c) 2020-Present Mamy André-Ratsimbazafy
 # Licensed and distributed under either of
 #   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
 #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 import
  ../config/common,
  ../primitives,
  ../arithmetic
 # Note: to avoid burdening the Nim compiler, we rely on generic extension
 # to complain if the base field procedures don't exist
 # No exceptions allowed
 {.push raises: [].}
 {.push inline.}
 # ############################################################
 #                                                            #
 #                    Extension Fields                        #
 #                                                            #
 # ############################################################
 type
  NonResidue* = object
    ## Non-Residue
    ##
    ## Placeholder for the appropriate quadratic, cubic or sectic non-residue
  CubicExt* = concept x
    ## Cubic Extension field concept
    type BaseField = auto
    x.c0 is BaseField
    x.c1 is BaseField
    x.c2 is BaseField
  QuadraticExt* = concept x
    ## Quadratic Extension field concept
    not(x is CubicExt)
    type BaseField = auto
    x.c0 is BaseField
    x.c1 is BaseField
  ExtensionField* = QuadraticExt or CubicExt
 # Initialization
 # -------------------------------------------------------------------
 func setZero*(a: var ExtensionField) =
  ## Set ``a`` to 0 in the extension field
  for field in fields(a):
    field.setZero()
 func setOne*(a: var ExtensionField) =
  ## Set ``a`` to 1 in the extension field
  for fieldName, fA in fieldPairs(a):
    when fieldName == "c0":
      fA.setOne()
    else:
      fA.setZero()
 func fromBig*(a: var ExtensionField, src: BigInt) =
  ## Set ``a`` to the bigint value in the extension field
  for fieldName, fA in fieldPairs(a):
    when fieldName == "c0":
      fA.fromBig(src)
    else:
      fA.setZero()
 # Comparison
 # -------------------------------------------------------------------
 func `==`*(a, b: ExtensionField): SecretBool =
  ## Constant-time equality check
  result = CtTrue
  for fA, fB in fields(a, b):
    result = result and (fA == fB)
 func isZero*(a: ExtensionField): SecretBool =
  ## Constant-time check if zero
  result = CtTrue
  for fA in fields(a):
    result = result and fA.isZero()
 func isOne*(a: ExtensionField): SecretBool =
  ## Constant-time check if one
  result = CtTrue
  for fieldName, fA in fieldPairs(a):
    when fieldName == "c0":
      result = result and fA.isOne()
    else:
      result = result and fA.isZero()
 func isMinusOne*(a: ExtensionField): SecretBool =
  ## Constant-time check if -1
  result = CtTrue
  for fieldName, fA in fieldPairs(a):
    when fieldName == "c0":
      result = result and fA.isMinusOne()
    else:
      result = result and fA.isZero()
 # Copies
 # -------------------------------------------------------------------
 func ccopy*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
  ## Constant-time conditional copy
  ## If ctl is true: b is copied into a
  ## if ctl is false: b is not copied and a is unmodified
  ## Time and memory accesses are the same whether a copy occurs or not
  for fA, fB in fields(a, b):
    ccopy(fA, fB, ctl)
 # Abelian group
 # -------------------------------------------------------------------
 func neg*(r: var ExtensionField, a: ExtensionField) =
  ## Field out-of-place negation
  for fR, fA in fields(r, a):
    fR.neg(fA)
 func neg*(a: var ExtensionField) =
  ## Field in-place negation
  for fA in fields(a):
    fA.neg()
 func `+=`*(a: var ExtensionField, b: ExtensionField) =
  ## Addition in the extension field
  for fA, fB in fields(a, b):
    fA += fB
 func `-=`*(a: var ExtensionField, b: ExtensionField) =
  ## Substraction in the extension field
  for fA, fB in fields(a, b):
    fA -= fB
 func double*(r: var ExtensionField, a: ExtensionField) =
  ## Field out-of-place doubling
  for fR, fA in fields(r, a):
    fR.double(fA)
 func double*(a: var ExtensionField) =
  ## Field in-place doubling
  for fA in fields(a):
    fA.double()
 func div2*(a: var ExtensionField) =
  ## Field in-place division by 2
  for fA in fields(a):
    fA.div2()
 func sum*(r: var QuadraticExt, a, b: QuadraticExt) =
  ## Sum ``a`` and ``b`` into ``r``
  r.c0.sum(a.c0, b.c0)
  r.c1.sum(a.c1, b.c1)
 func sum*(r: var CubicExt, a, b: CubicExt) =
  ## Sum ``a`` and ``b`` into ``r``
  r.c0.sum(a.c0, b.c0)
  r.c1.sum(a.c1, b.c1)
  r.c2.sum(a.c2, b.c2)
 func diff*(r: var QuadraticExt, a, b: QuadraticExt) =
  ## Diff ``a`` and ``b`` into ``r``
  r.c0.diff(a.c0, b.c0)
  r.c1.diff(a.c1, b.c1)
 func diff*(r: var CubicExt, a, b: CubicExt) =
  ## Diff ``a`` and ``b`` into ``r``
  r.c0.diff(a.c0, b.c0)
  r.c1.diff(a.c1, b.c1)
  r.c2.diff(a.c2, b.c2)
 func conj*(a: var QuadraticExt) =
  ## Computes the conjugate in-place
  a.c1.neg()
 func conj*(r: var QuadraticExt, a: QuadraticExt) =
  ## Computes the conjugate out-of-place
  r.c0 = a.c0
  r.c1.neg(a.c1)
 func conjneg*(a: var QuadraticExt) =
  ## Computes the negated conjugate in-place
  a.c0.neg()
 func conjneg*(r: var QuadraticExt, a: QuadraticExt) =
  ## Computes the negated conjugate out-of-place
  r.c0.neg(a.c0)
  r.c1 = a.c1
 func conj*(a: var CubicExt) =
  ## Computes the conjugate in-place
  a.c0.conj()
  a.c1.conjneg()
  a.c2.conj()
 func conj*(r: var CubicExt, a: CubicExt) =
  ## Computes the conjugate out-of-place
  r.c0.conj(a.c0)
  r.c1.conjneg(a.c1)
  r.c2.conj(a.c2)
 # Conditional arithmetic
 # -------------------------------------------------------------------
 func cneg*(a: var ExtensionField, ctl: SecretBool) =
  ## Constant-time in-place conditional negation
  ## Only negate if ctl is true
  for fA in fields(a):
    fA.cneg(ctl)
 func cadd*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
  ## Constant-time in-place conditional addition
  for fA, fB in fields(a, b):
    fA.cadd(fB, ctl)
 func csub*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
  ## Constant-time in-place conditional substraction
  for fA, fB in fields(a, b):
    fA.csub(fB, ctl)
 # Multiplication by a small integer known at compile-time
 # -------------------------------------------------------------------
 func `*=`*(a: var ExtensionField, b: static int) =
  ## Multiplication by a small integer known at compile-time
  const negate = b < 0
  const b = if negate: -b
            else: b
  when negate:
    a.neg(a)
  when b == 0:
    a.setZero()
  elif b == 1:
    return
  elif b == 2:
    a.double()
  elif b == 3:
    let t1 = a
    a.double()
    a += t1
  elif b == 4:
    a.double()
    a.double()
  elif b == 5:
    let t1 = a
    a.double()
    a.double()
    a += t1
  elif b == 6:
    a.double()
    let t2 = a
    a.double() # 4
    a += t2
  elif b == 7:
    let t1 = a
    a.double()
    let t2 = a
    a.double() # 4
    a += t2
    a += t1
  elif b == 8:
    a.double()
    a.double()
    a.double()
  elif b == 9:
    let t1 = a
    a.double()
    a.double()
    a.double() # 8
    a += t1
  elif b == 10:
    a.double()
    let t2 = a
    a.double()
    a.double() # 8
    a += t2
  elif b == 11:
    let t1 = a
    a.double()
    let t2 = a
    a.double()
    a.double() # 8
    a += t2
    a += t1
  elif b == 12:
    a.double()
    a.double() # 4
    let t4 = a
    a.double() # 8
    a += t4
  else:
    {.error: "Multiplication by this small int not implemented".}
 func prod*(r: var ExtensionField, a: ExtensionField, b: static int) =
  ## Multiplication by a small integer known at compile-time
  const negate = b < 0
  const b = if negate: -b
            else: b
  when negate:
    r.neg(a)
  else:
    r = a
  r *= b
 {.pop.} # inline
 # ############################################################
 #                                                            #
 #                Quadratic extensions                        #
 #                                                            #
 # ############################################################
 # Commutative ring implementation for complex quadratic extension fields
 # ----------------------------------------------------------------------
 func square_complex(r: var QuadraticExt, a: QuadraticExt) =
  ## Return a² in 𝔽p2 = 𝔽p[𝑖] in ``r``
  ## ``r`` is initialized/overwritten
  ##
  ## Requires a complex extension field
  # (c0, c1)² => (c0 + c1𝑖)²
  #           => c0² + 2 c0 c1𝑖 + (c1𝑖)²
  #           => c0²-c1² + 2 c0 c1𝑖
  #           => (c0²-c1², 2 c0 c1)
  #           or
  #           => ((c0-c1)(c0+c1), 2 c0 c1)
  #           => ((c0-c1)(c0-c1 + 2 c1), c0 * 2 c1)
  #
  # Costs (naive implementation)
  # - 1 Multiplication 𝔽p
  # - 2 Squarings 𝔽p
  # - 1 Doubling 𝔽p
  # - 1 Substraction 𝔽p
  # Stack: 4 * ModulusBitSize (4x 𝔽p element)
  #
  # Or (with 1 less Mul/Squaring at the cost of 1 addition and extra 2 𝔽p stack space)
  #
  # - 2 Multiplications 𝔽p
  # - 1 Addition 𝔽p
  # - 1 Doubling 𝔽p
  # - 1 Substraction 𝔽p
  #
  # To handle aliasing between r and a, we need
  # r to be used only when a is unneeded
  # so we can't use r fields as a temporary
  mixin fromComplexExtension
  static: doAssert r.fromComplexExtension()
  var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
  v0.diff(a.c0, a.c1)    # v0 = c0 - c1               [1 Sub]
  v1.sum(a.c0, a.c1)     # v1 = c0 + c1               [1 Dbl, 1 Sub]
  r.c1.prod(a.c0, a.c1)  # r.c1 = c0 c1               [1 Mul, 1 Dbl, 1 Sub]
  # aliasing: a unneeded now
  r.c1.double()          # r.c1 = 2 c0 c1             [1 Mul, 2 Dbl, 1 Sub]
  r.c0.prod(v0, v1)      # r.c0 = (c0 + c1)(c0 - c1)  [2 Mul, 2 Dbl, 1 Sub]
 func prod_complex(r: var QuadraticExt, a, b: QuadraticExt) =
  ## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r``
  ## ``r`` is initialized/overwritten
  ##
  ## Requires a complex extension field
  # (a0, a1) (b0, b1) => (a0 + a1𝑖) (b0 + b1𝑖)
  #                   => (a0 b0 - a1 b1) + (a0 b1 + a1 b0) 𝑖
  #
  # In Fp, multiplication has cost O(n²) with n the number of limbs
  # while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction)
  # and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition)
  #
  # Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words)
  # where addition/substraction are significantly cheaper than multiplication
  #
  # So we always reframe the imaginary part using Karatsuba approach to save a multiplication
  # (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 )
  #
  # Costs (naive implementation)
  # - 4 Multiplications 𝔽p
  # - 1 Addition 𝔽p
  # - 1 Substraction 𝔽p
  # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries)
  #
  # Costs (Karatsuba)
  # - 3 Multiplications 𝔽p
  # - 3 Substraction 𝔽p (2 are fused)
  # - 2 Addition 𝔽p
  # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary)
  mixin fromComplexExtension
  static: doAssert r.fromComplexExtension()
  # TODO: GCC is adding an unexplainable 30 cycles tax to this function (~10% slow down)
  #       for seemingly no reason
  when false: # Single-width implementation - BLS12-381
              # Clang 348 cycles on i9-9980XE @3.9 GHz
    var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0)
    a0b0.prod(a.c0, b.c0)                                         # [1 Mul]
    a1b1.prod(a.c1, b.c1)                                         # [2 Mul]
    r.c0.sum(a.c0, a.c1)  # r0 = (a0 + a1)                        # [2 Mul, 1 Add]
    r.c1.sum(b.c0, b.c1)  # r1 = (b0 + b1)                        # [2 Mul, 2 Add]
    # aliasing: a and b unneeded now
    r.c1 *= r.c0          # r1 = (b0 + b1)(a0 + a1)               # [3 Mul, 2 Add] - 𝔽p temporary
    r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1                    # [3 Mul, 2 Add, 1 Sub]
    r.c1 -= a0b0          # r1 = (b0 + b1)(a0 + a1) - a0b0        # [3 Mul, 2 Add, 2 Sub]
    r.c1 -= a1b1          # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub]
  else: # Double-width implementation with lazy reduction
        # Clang 341 cycles on i9-9980XE @3.9 GHz
    var a0b0 {.noInit.}, a1b1 {.noInit.}: doubleWidth(typeof(r.c0))
    var d {.noInit.}: doubleWidth(typeof(r.c0))
    const msbSet = r.c0.typeof.canUseNoCarryMontyMul()
    a0b0.mulNoReduce(a.c0, b.c0)     # 44 cycles - cumul 44
    a1b1.mulNoReduce(a.c1, b.c1)     # 44 cycles - cumul 88
    when msbSet:
      r.c0.sum(a.c0, a.c1)
      r.c1.sum(b.c0, b.c1)
    else:
      r.c0.sumNoReduce(a.c0, a.c1)   # 5 cycles  - cumul 93
      r.c1.sumNoReduce(b.c0, b.c1)   # 5 cycles  - cumul 98
    # aliasing: a and b unneeded now
    d.mulNoReduce(r.c0, r.c1)        # 44 cycles - cumul 142
    when msbSet:
      d -= a0b0
      d -= a1b1
    else:
      d.diffNoReduce(d, a0b0)        # 11 cycles - cumul 153
      d.diffNoReduce(d, a1b1)        # 11 cycles - cumul 164
    a0b0.diff(a0b0, a1b1)            # 19 cycles - cumul 183
    r.c0.reduce(a0b0)                # 50 cycles - cumul 233
    r.c1.reduce(d)                   # 50 cycles - cumul 288
  # Single-width [3 Mul, 2 Add, 3 Sub]
  #    3*88 + 2*14 + 3*14 = 334 theoretical cycles
  #    348 measured
  # Double-Width
  #    288 theoretical cycles
  #    329 measured
  #    Unexplained 40 cycles diff between theo and measured
  #    and unexplained 30 cycles between Clang and GCC
  #    - Function calls?
  #    - push/pop stack?
 func mul_sparse_complex_by_0y(
       r: var QuadraticExt, a: QuadraticExt,
       sparseB: auto) =
  ## Multiply `a` by `b` with sparse coordinates (0, y)
  ## On a complex quadratic extension field 𝔽p2 = 𝔽p[𝑖]
  #
  # r0 = a0 b0 - a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b0 = 0, hence
  #
  # r0 = - a1 b1
  # r1 = (a0 + a1) b1 - a1 b1 = a0 b1
  mixin fromComplexExtension
  static: doAssert r.fromComplexExtension()
  when typeof(sparseB) is typeof(a):
    template b(): untyped = sparseB.c1
  elif typeof(sparseB) is typeof(a.c0):
    template b(): untyped = sparseB
  else:
    {.error: "sparseB type is " & $typeof(sparseB) &
      " which does not match with either a (" & $typeof(a) &
      ") or a.c0 (" & $typeof(a.c0) & ")".}
  var t{.noInit.}: typeof(a.c0)
  t.prod(a.c1, b)
  r.c1.prod(a.c0, b)
  r.c0.neg(t)
 # Commutative ring implementation for generic quadratic extension fields
 # ----------------------------------------------------------------------
 func square_generic(r: var QuadraticExt, a: QuadraticExt) =
  ## Return a² in ``r``
  ## ``r`` is initialized/overwritten
  # Algorithm (with β the non-residue in the base field)
  #
  # (c0, c1)² => (c0 + c1 w)²
  #           => c0² + 2 c0 c1 w + c1²w²
  #           => c0² + β c1² + 2 c0 c1 w
  #           => (c0² + β c1², 2 c0 c1)
  # We have 2 squarings and 1 multiplication in the base field
  # which are significantly more costly than additions.
  # For example when construction 𝔽p12 from 𝔽p6:
  # - 4 limbs like BN254:     multiplication is 20x slower than addition/substraction
  # - 6 limbs like BLS12-381: multiplication is 28x slower than addition/substraction
  #
  # We can save operations with one of the following expressions
  # of c0² + β c1² and noticing that c0c1 is already computed for the "y" coordinate
  #
  # Alternative 1:
  #   c0² + β c1² <=> (c0 - c1)(c0 - β c1) + β c0c1 + c0c1
  #
  # Alternative 2:
  #   c0² + β c1² <=> (c0 + c1)(c0 + β c1) - β c0c1 - c0c1
  mixin prod
  var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
  # v1 <- (c0 + β c1)
  v1.prod(a.c1, NonResidue)
  v1 += a.c0
  # v0 <- (c0 + c1)(c0 + β c1)
  v0.sum(a.c0, a.c1)
  v0 *= v1
  # v1 <- c0 c1
  v1.prod(a.c0, a.c1)
  # aliasing: a unneeded now
  # r0 = (c0 + c1)(c0 + β c1) - c0c1
  v0 -= v1
  # r1 = 2 c0c1
  r.c1.double(v1)
  # r0 = (c0 + c1)(c0 + β c1) - c0c1 - β c0c1
  v1 *= NonResidue
  r.c0.diff(v0, v1)
 func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) =
  ## Returns r = a * b
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  mixin prod
  var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: typeof(r.c0)
  # v2 <- (a0 + a1)(b0 + b1)
  v0.sum(a.c0, a.c1)
  v1.sum(b.c0, b.c1)
  v2.prod(v0, v1)
  # v0 <- a0 b0
  # v1 <- a1 b1
  v0.prod(a.c0, b.c0)
  v1.prod(a.c1, b.c1)
  # aliasing: a and b unneeded now
  # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
  r.c1.diff(v2, v1)
  r.c1 -= v0
  # r0 <- a0 b0 + β a1 b1
  v1 *= NonResidue
  r.c0.sum(v0, v1)
 func mul_sparse_generic_by_x0(r: var QuadraticExt, a, sparseB: QuadraticExt) =
  ## Multiply `a` by `b` with sparse coordinates (x, 0)
  ## On a generic quadratic extension field
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b1 = 0, hence
  #
  # r0 = a0 b0
  # r1 = (a0 + a1) b0 - a0 b0 = a1 b0
  mixin prod
  template b(): untyped = sparseB
  r.c0.prod(a.c0, b.c0)
  r.c1.prod(a.c1, b.c0)
 func mul_sparse_generic_by_0y(
       r: var QuadraticExt, a: QuadraticExt,
       sparseB: auto) =
  ## Multiply `a` by `b` with sparse coordinates (0, y)
  ## On a generic quadratic extension field
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b0 = 0, hence
  #
  # r0 = β a1 b1
  # r1 = (a0 + a1) b1 - a1 b1 = a0 b1
  when typeof(sparseB) is typeof(a):
    template b(): untyped = sparseB.c1
  elif typeof(sparseB) is typeof(a.c0):
    template b(): untyped = sparseB
  else:
    {.error: "sparseB type is " & $typeof(sparseB) &
      " which does not match with either a (" & $typeof(a) &
      ") or a.c0 (" & $typeof(a.c0) & ")".}
  var t{.noInit.}: typeof(a.c0)
  t.prod(a.c1, b)
  t *= NonResidue
  r.c1.prod(a.c0, b)
  # aliasing: a unneeded now
  r.c0 = t
 func mul_sparse_generic_by_0y(
       r: var QuadraticExt, a: QuadraticExt,
       sparseB: static int) =
  ## Multiply `a` by `b` with sparse coordinates (0, y)
  ## On a generic quadratic extension field
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b0 = 0, hence
  #
  # r0 = β a1 b1
  # r1 = (a0 + a1) b1 - a1 b1 = a0 b1
  template b(): untyped = sparseB
  var t{.noInit.}: typeof(a.c0)
  t.prod(a.c1, b)
  t *= NonResidue
  r.c1.prod(a.c0, b)
  # aliasing: a unneeded now
  r.c0 = t
 func invImpl(r: var QuadraticExt, a: QuadraticExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  #
  # Algorithm:
  #
  # 1 / (a0 + a1 w) <=> (a0 - a1 w) / (a0 + a1 w)(a0 - a1 w)
  #                 <=> (a0 - a1 w) / (a0² - a1² w²)
  # with w being our coordinate system and β the quadratic non-residue
  # we have w² = β
  # So the inverse is (a0 - a1 w) / (a0² - β a1²)
  mixin fromComplexExtension
  # [2 Sqr, 1 Add]
  var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
  v0.square(a.c0)
  v1.square(a.c1)
  when r.fromComplexExtension():
    v0 += v1
  else:
    v1 *= NonResidue
    v0 -= v1              # v0 = a0² - β a1² (the norm / squared magnitude of a)
  # [1 Inv, 2 Sqr, 1 Add]
  v1.inv(v0)              # v1 = 1 / (a0² - β a1²)
  # [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
  r.c0.prod(a.c0, v1)     # r0 = a0 / (a0² - β a1²)
  v0.neg(v1)              # v0 = -1 / (a0² - β a1²)
  r.c1.prod(a.c1, v0)     # r1 = -a1 / (a0² - β a1²)
 # Exported quadratic symbols
 # -------------------------------------------------------------------
 {.push inline.}
 func square*(r: var QuadraticExt, a: QuadraticExt) =
  mixin fromComplexExtension
  when r.fromComplexExtension():
    r.square_complex(a)
  else:
    r.square_generic(a)
 func prod*(r: var QuadraticExt, a, b: QuadraticExt) =
  mixin fromComplexExtension
  when r.fromComplexExtension():
    r.prod_complex(a, b)
  else:
    r.prod_generic(a, b)
 func inv*(r: var QuadraticExt, a: QuadraticExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  r.invImpl(a)
 func inv*(a: var QuadraticExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  a.invImpl(a)
 func `*=`*(a: var QuadraticExt, b: QuadraticExt) =
  ## In-place multiplication
  a.prod(a, b)
 func square*(a: var QuadraticExt) =
  ## In-place squaring
  a.square(a)
 func mul_sparse_by_0y*(r: var QuadraticExt, a: QuadraticExt, sparseB: auto) =
  ## Sparse multiplication
  mixin fromComplexExtension
  when a.fromComplexExtension():
    r.mul_sparse_complex_by_0y(a, sparseB)
  else:
    r.mul_sparse_generic_by_0y(a, sparseB)
 func mul_sparse_by_0y*(r: var QuadraticExt, a: QuadraticExt, sparseB: static int) =
  ## Sparse multiplication
  mixin fromComplexExtension
  when a.fromComplexExtension():
    {.error: "Not implemented.".}
  else:
    r.mul_sparse_generic_by_0y(a, sparseB)
 func mul_sparse_by_0y*(a: var QuadraticExt, sparseB: auto) =
  ## Sparse in-place multiplication
  a.mul_sparse_by_0y(a, sparseB)
 func mul_sparse_by_x0*(a: var QuadraticExt, sparseB: QuadraticExt) =
  ## Sparse in-place multiplication
  a.mul_sparse_generic_by_x0(a, sparseB)
 {.pop.} # inline
 # ############################################################
 #                                                            #
 #                    Cubic extensions                        #
 #                                                            #
 # ############################################################
 # Commutative ring implementation for Cubic Extension Fields
 # -------------------------------------------------------------------
 # Cubic extensions can use specific squaring procedures
 # beyond Schoolbook and Karatsuba:
 # - Chung-Hasan (3 different algorithms)
 # - Toom-Cook-3x
 #
 # Chung-Hasan papers
 # http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf
 # https://www.lirmm.fr/arith18/papers/Chung-Squaring.pdf
 #
 # The papers focus on polynomial squaring, they have been adapted
 # to towered extension fields with the relevant costs in
 #
 # - Multiplication and Squaring on Pairing-Friendly Fields
 #   Augusto Jun Devegili and Colm Ó hÉigeartaigh and Michael Scott and Ricardo Dahab, 2006
 #   https://eprint.iacr.org/2006/471
 #
 # Costs in the underlying field
 # M: Mul, S: Square, A: Add/Sub, B: Mul by non-residue
 #
 # | Method      | > Linear | Linear            |
 # |-------------|----------|-------------------|
 # | Schoolbook  | 3M + 3S  | 6A + 2B           |
 # | Karatsuba   | 6S       | 13A + 2B          |
 # | Tom-Cook-3x | 5S       | 33A + 2B          |
 # | CH-SQR1     | 3M + 2S  | 11A + 2B          |
 # | CH-SQR2     | 2M + 3S  | 10A + 2B          |
 # | CH-SQR3     | 1M + 4S  | 11A + 2B + 1 Div2 |
 # | CH-SQR3x    | 1M + 4S  | 14A + 2B          |
 func square_Chung_Hasan_SQR2(r: var CubicExt, a: CubicExt) {.used.}=
  ## Returns r = a²
  mixin prod, square, sum
  var s0{.noInit.}, m01{.noInit.}, m12{.noInit.}: typeof(r.c0)
  # precomputations that use a
  m01.prod(a.c0, a.c1)
  m01.double()
  m12.prod(a.c1, a.c2)
  m12.double()
  s0.square(a.c2)
  # aliasing: a₂ unused
  # r₂ = (a₀ - a₁ + a₂)²
  r.c2.sum(a.c2, a.c0)
  r.c2 -= a.c1
  r.c2.square()
  # aliasing, a almost unneeded now
  # r₂ = (a₀ - a₁ + a₂)² + 2a₀a₁ + 2a₁a₂ - a₂²
  r.c2 += m01
  r.c2 += m12
  r.c2 -= s0
  # r₁ = 2a₀a₁ + β a₂²
  r.c1.prod(s0, NonResidue)
  r.c1 += m01
  # r₂ = (a₀ - a₁ + a₂)² + 2a₀a₁ + 2a₁a₂ - a₀² - a₂²
  s0.square(a.c0)
  r.c2 -= s0
  # r₀ = a₀² + β 2a₁a₂
  r.c0.prod(m12, NonResidue)
  r.c0 += s0
 func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) =
  ## Returns r = a²
  mixin prod, square, sum
  var s0{.noInit.}, t{.noInit.}, m12{.noInit.}: typeof(r.c0)
  # s₀ = (a₀ + a₁ + a₂)²
  # t = ((a₀ + a₁ + a₂)² + (a₀ - a₁ + a₂)²) / 2
  s0.sum(a.c0, a.c2)
  t.diff(s0, a.c1)
  s0 += a.c1
  s0.square()
  t.square()
  t += s0
  t.div2()
  # m12 = 2a₁a₂ and r₁ = a₂²
  # then a₁ and a₂ are unused for aliasing
  m12.prod(a.c1, a.c2)
  m12.double()
  r.c1.square(a.c2)       # r₁ = a₂²
  r.c2.diff(t, r.c1)      # r₂ = t - a₂²
  r.c1 *= NonResidue      # r₁ = β a₂²
  r.c1 += s0              # r₁ = (a₀ + a₁ + a₂)² + β a₂²
  r.c1 -= m12             # r₁ = (a₀ + a₁ + a₂)² - 2a₁a₂ + β a₂²
  r.c1 -= t               # r₁ = (a₀ + a₁ + a₂)² - 2a₁a₂ - t + β a₂²
  s0.square(a.c0)
  # aliasing: a₀ unused
  r.c2 -= s0
  r.c0.prod(m12, NonResidue)
  r.c0 += s0
 func prodImpl(r: var CubicExt, a, b: CubicExt) =
  ## Returns r = a * b  # Algorithm is Karatsuba
  var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(r.c0)
  var t0{.noInit.}, t1{.noInit.}, t2{.noInit.}: typeof(r.c0)
  v0.prod(a.c0, b.c0)
  v1.prod(a.c1, b.c1)
  v2.prod(a.c2, b.c2)
  # r₀ = β ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
  t0.sum(a.c1, a.c2)
  t1.sum(b.c1, b.c2)
  t0 *= t1
  t0 -= v1
  t0 -= v2
  t0 *= NonResidue
  # r₀ = t₀ + v₀ at the end to handle aliasing
  # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
  t1.sum(a.c0, a.c1)
  t2.sum(b.c0, b.c1)
  r.c1.prod(t1, t2)
  r.c1 -= v0
  r.c1 -= v1
  t1.prod(v2, NonResidue)
  r.c1 += t1
  # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
  t1.sum(a.c0, a.c2)
  t2.sum(b.c0, b.c2)
  r.c2.prod(t1, t2)
  r.c2 -= v0
  r.c2 -= v2
  r.c2 += v1
  # Finish r₀
  r.c0.sum(t0, v0)
 func invImpl(r: var CubicExt, a: CubicExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  #
  # Algorithm 5.23
  #
  # Arithmetic of Finite Fields
  # Chapter 5 of Guide to Pairing-Based Cryptography
  # Jean Luc Beuchat, Luis J. Dominguez Perez, Sylvain Duquesne, Nadia El Mrabet, Laura Fuentes-Castañeda, Francisco Rodríguez-Henríquez, 2017\
  # https://www.researchgate.net/publication/319538235_Arithmetic_of_Finite_Fields
  #
  # We optimize for stack usage and use 4 temporaries
  # instead of 9, because 4 * 2 (𝔽p2) * Bitsize would be:
  # - ~2032 bits for BN254
  # - ~3048 bits for BLS12-381
  var A {.noInit.}, B {.noInit.}, C {.noInit.}: typeof(r.c0)
  var t {.noInit.}: typeof(r.c0)
  # A <- a₀² - β a₁ a₂
  A.square(a.c0)
  t.prod(a.c1, a.c2)
  t *= NonResidue
  A -= t
  # B <- β a₂² - a₀ a₁
  B.square(a.c2)
  B *= NonResidue
  t.prod(a.c0, a.c1)
  B -= t
  # C <- a₁² - a₀ a₂
  C.square(a.c1)
  t.prod(a.c0, a.c2)
  C -= t
  # F in t
  # F <- β a₁ C + a₀ A + β a₂ B
  t.prod(a.c1, C)
  r.c2.prod(a.c2, B) # aliasing: last use of a₂, destroy r₂
  t += r.c2
  t *= NonResidue
  r.c0.prod(a.c0, A) # aliasing: last use of a₀, destroy r₀
  t += r.c0
  t.inv()
  # (a0 + a1 v + a2 v²)^-1 = (A + B v + C v²) / F
  r.c0.prod(A, t)
  r.c1.prod(B, t)
  r.c2.prod(C, t)
 # Exported cubic symbols
 # -------------------------------------------------------------------
 {.push inline.}
 func square*(r: var CubicExt, a: CubicExt) =
  ## Returns r = a²
  square_Chung_Hasan_SQR3(r, a)
 func square*(a: var CubicExt) =
  ## In-place squaring
  a.square(a)
 func prod*(r: var CubicExt, a, b: CubicExt) =
  ## In-place multiplication
  r.prodImpl(a, b)
 func `*=`*(a: var CubicExt, b: CubicExt) =
  ## In-place multiplication
  a.prodImpl(a, b)
 func inv*(r: var CubicExt, a: CubicExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  r.invImpl(a)
 func inv*(a: var CubicExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  a.invImpl(a)
 {.pop.} # inline
 {.pop.} # raises no exceptions
--- a/constantine/tower_field_extensions/quadratic_extensions.nim
+++ b/constantine/tower_field_extensions/quadratic_extensions.nim
@ -1,365 +0,0 @@
 # Constantine
 # Copyright (c) 2018-2019    Status Research & Development GmbH
 # Copyright (c) 2020-Present Mamy André-Ratsimbazafy
 # Licensed and distributed under either of
 #   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
 #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 import
  ../arithmetic,
  ../primitives,
  ./tower_common
 # Commutative ring implementation for complex extension fields
 # -------------------------------------------------------------------
 func square_complex(r: var QuadraticExt, a: QuadraticExt) =
  ## Return a² in 𝔽p2 = 𝔽p[𝑖] in ``r``
  ## ``r`` is initialized/overwritten
  ##
  ## Requires a complex extension field
  # (c0, c1)² => (c0 + c1𝑖)²
  #           => c0² + 2 c0 c1𝑖 + (c1𝑖)²
  #           => c0²-c1² + 2 c0 c1𝑖
  #           => (c0²-c1², 2 c0 c1)
  #           or
  #           => ((c0-c1)(c0+c1), 2 c0 c1)
  #           => ((c0-c1)(c0-c1 + 2 c1), c0 * 2 c1)
  #
  # Costs (naive implementation)
  # - 1 Multiplication 𝔽p
  # - 2 Squarings 𝔽p
  # - 1 Doubling 𝔽p
  # - 1 Substraction 𝔽p
  # Stack: 4 * ModulusBitSize (4x 𝔽p element)
  #
  # Or (with 1 less Mul/Squaring at the cost of 1 addition and extra 2 𝔽p stack space)
  #
  # - 2 Multiplications 𝔽p
  # - 1 Addition 𝔽p
  # - 1 Doubling 𝔽p
  # - 1 Substraction 𝔽p
  # Stack: 6 * ModulusBitSize (4x 𝔽p element + 1 named temporaries + 1 in-place multiplication temporary)
  # as in-place multiplications require a (shared) internal temporary
  mixin fromComplexExtension
  static: doAssert r.fromComplexExtension()
  var c0mc1 {.noInit.}: typeof(r.c0)
  c0mc1.diff(a.c0, a.c1) # c0mc1 = c0 - c1                            [1 Sub]
  r.c1.double(a.c1)      # result.c1 = 2 c1                           [1 Dbl, 1 Sub]
  r.c0.sum(c0mc1, r.c1)  # result.c0 = c0 - c1 + 2 c1                 [1 Add, 1 Dbl, 1 Sub]
  r.c0 *= c0mc1          # result.c0 = (c0 + c1)(c0 - c1) = c0² - c1² [1 Mul, 1 Add, 1 Dbl, 1 Sub] - 𝔽p temporary
  r.c1 *= a.c0           # result.c1 = 2 c1 c0                        [2 Mul, 1 Add, 1 Dbl, 1 Sub] - 𝔽p temporary
 func prod_complex(r: var QuadraticExt, a, b: QuadraticExt) =
  ## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r``
  ## ``r`` is initialized/overwritten
  ##
  ## Requires a complex extension field
  # (a0, a1) (b0, b1) => (a0 + a1𝑖) (b0 + b1𝑖)
  #                   => (a0 b0 - a1 b1) + (a0 b1 + a1 b0) 𝑖
  #
  # In Fp, multiplication has cost O(n²) with n the number of limbs
  # while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction)
  # and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition)
  #
  # Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words)
  # where addition/substraction are significantly cheaper than multiplication
  #
  # So we always reframe the imaginary part using Karatsuba approach to save a multiplication
  # (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 )
  #
  # Costs (naive implementation)
  # - 4 Multiplications 𝔽p
  # - 1 Addition 𝔽p
  # - 1 Substraction 𝔽p
  # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries)
  #
  # Costs (Karatsuba)
  # - 3 Multiplications 𝔽p
  # - 3 Substraction 𝔽p (2 are fused)
  # - 2 Addition 𝔽p
  # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary)
  mixin fromComplexExtension
  static: doAssert r.fromComplexExtension()
  # TODO: GCC is adding an unexplainable 30 cycles tax to this function (~10% slow down)
  #       for seemingly no reason
  when false: # Single-width implementation - BLS12-381
              # Clang 348 cycles on i9-9980XE @3.9 GHz
    var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0)
    a0b0.prod(a.c0, b.c0)                                         # [1 Mul]
    a1b1.prod(a.c1, b.c1)                                         # [2 Mul]
    r.c0.sum(a.c0, a.c1)  # r0 = (a0 + a1)                        # [2 Mul, 1 Add]
    r.c1.sum(b.c0, b.c1)  # r1 = (b0 + b1)                        # [2 Mul, 2 Add]
    r.c1 *= r.c0          # r1 = (b0 + b1)(a0 + a1)               # [3 Mul, 2 Add] - 𝔽p temporary
    r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1                    # [3 Mul, 2 Add, 1 Sub]
    r.c1 -= a0b0          # r1 = (b0 + b1)(a0 + a1) - a0b0        # [3 Mul, 2 Add, 2 Sub]
    r.c1 -= a1b1          # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub]
  else: # Double-width implementation with lazy reduction
        # Clang 341 cycles on i9-9980XE @3.9 GHz
    var a0b0 {.noInit.}, a1b1 {.noInit.}: doubleWidth(typeof(r.c0))
    var d {.noInit.}: doubleWidth(typeof(r.c0))
    const msbSet = r.c0.typeof.canUseNoCarryMontyMul()
    a0b0.mulNoReduce(a.c0, b.c0)     # 44 cycles - cumul 44
    a1b1.mulNoReduce(a.c1, b.c1)     # 44 cycles - cumul 88
    when msbSet:
      r.c0.sum(a.c0, a.c1)
      r.c1.sum(b.c0, b.c1)
    else:
      r.c0.sumNoReduce(a.c0, a.c1)   # 5 cycles  - cumul 93
      r.c1.sumNoReduce(b.c0, b.c1)   # 5 cycles  - cumul 98
    d.mulNoReduce(r.c0, r.c1)        # 44 cycles - cumul 142
    when msbSet:
      d -= a0b0
      d -= a1b1
    else:
      d.diffNoReduce(d, a0b0)        # 11 cycles - cumul 153
      d.diffNoReduce(d, a1b1)        # 11 cycles - cumul 164
    a0b0.diff(a0b0, a1b1)            # 19 cycles - cumul 183
    r.c0.reduce(a0b0)                # 50 cycles - cumul 233
    r.c1.reduce(d)                   # 50 cycles - cumul 288
  # Single-width [3 Mul, 2 Add, 3 Sub]
  #    3*88 + 2*14 + 3*14 = 334 theoretical cycles
  #    348 measured
  # Double-Width
  #    288 theoretical cycles
  #    329 measured
  #    Unexplained 40 cycles diff between theo and measured
  #    and unexplained 30 cycles between Clang and GCC
  #    - Function calls?
  #    - push/pop stack?
 func mul_sparse_complex_by_0y(r: var QuadraticExt, a, sparseB: QuadraticExt) =
  ## Multiply `a` by `b` with sparse coordinates (0, y)
  ## On a complex quadratic extension field 𝔽p2 = 𝔽p[𝑖]
  #
  # r0 = a0 b0 - a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b0 = 0, hence
  #
  # r0 = - a1 b1
  # r1 = (a0 + a1) b1 - a1 b1 = a0 b1
  mixin fromComplexExtension
  static: doAssert r.fromComplexExtension()
  template b(): untyped = sparseB
  r.c0.prod(a.c1, b.c1)
  r.c0.neg(r.c0)
  r.c1.prod(a.c0, b.c1)
 # Commutative ring implementation for generic quadratic extension fields
 # -------------------------------------------------------------------
 func square_generic(r: var QuadraticExt, a: QuadraticExt) =
  ## Return a² in ``r``
  ## ``r`` is initialized/overwritten
  # Algorithm (with β the non-residue in the base field)
  #
  # (c0, c1)² => (c0 + c1 w)²
  #           => c0² + 2 c0 c1 w + c1²w²
  #           => c0² + β c1² + 2 c0 c1 w
  #           => (c0² + β c1², 2 c0 c1)
  # We have 2 squarings and 1 multiplication in the base field
  # which are significantly more costly than additions.
  # For example when construction 𝔽p12 from 𝔽p6:
  # - 4 limbs like BN254:     multiplication is 20x slower than addition/substraction
  # - 6 limbs like BLS12-381: multiplication is 28x slower than addition/substraction
  #
  # We can save operations with one of the following expressions
  # of c0² + β c1² and noticing that c0c1 is already computed for the "y" coordinate
  #
  # Alternative 1:
  #   c0² + β c1² <=> (c0 - c1)(c0 - β c1) + β c0c1 + c0c1
  #
  # Alternative 2:
  #   c0² + β c1² <=> (c0 + c1)(c0 + β c1) - β c0c1 - c0c1
  mixin prod
  # r0 <- (c0 + c1)(c0 + β c1)
  r.c0.sum(a.c0, a.c1)
  r.c1.sum(a.c0, NonResidue * a.c1)
  r.c0 *= r.c1
  # r1 <- c0 c1
  r.c1.prod(a.c0, a.c1)
  # r0 = (c0 + c1)(c0 + β c1) - β c0c1 - c0c1
  r.c0 -= NonResidue * r.c1
  r.c0 -= r.c1
  # r1 = 2 c0c1
  r.c1.double()
 func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) =
  ## Returns r = a * b
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  mixin prod
  var t {.noInit.}: typeof(r.c0)
  # r1 <- (a0 + a1)(b0 + b1)
  r.c0.sum(a.c0, a.c1)
  t.sum(b.c0, b.c1)
  r.c1.prod(r.c0, t)
  # r0 <- a0 b0
  # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
  r.c0.prod(a.c0, b.c0)
  t.prod(a.c1, b.c1)
  r.c1 -= r.c0
  r.c1 -= t
  # r0 <- a0 b0 + β a1 b1
  r.c0 += NonResidue * t
 func mul_sparse_generic_by_x0(r: var QuadraticExt, a, sparseB: QuadraticExt) =
  ## Multiply `a` by `b` with sparse coordinates (x, 0)
  ## On a generic quadratic extension field
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b1 = 0, hence
  #
  # r0 = a0 b0
  # r1 = (a0 + a1) b0 - a0 b0 = a1 b0
  mixin prod
  template b(): untyped = sparseB
  r.c0.prod(a.c0, b.c0)
  r.c1.prod(a.c1, b.c0)
 func mul_sparse_generic_by_0y(r: var QuadraticExt, a, sparseB: QuadraticExt) =
  ## Multiply `a` by `b` with sparse coordinates (0, y)
  ## On a generic quadratic extension field
  # Algorithm (with β the non-residue in the base field)
  #
  # r0 = a0 b0 + β a1 b1
  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
  #
  # with b0 = 0, hence
  #
  # r0 = β a1 b1
  # r1 = (a0 + a1) b1 - a1 b1 = a0 b1
  template b(): untyped = sparseB
  r.c0.prod(a.c1, b.c1)
  r.c0 *= NonResidue
  r.c1.prod(a.c0, b.c1)
 # Exported symbols
 # -------------------------------------------------------------------
 func conj*(a: var QuadraticExt) {.inline.} =
  ## Computes the conjugate in-place
  a.c1.neg()
 func conj*(r: var QuadraticExt, a: QuadraticExt) {.inline.} =
  ## Computes the conjugate out-of-place
  r.c0 = a.c0
  r.c1.neg(a.c1)
 func conjneg*(a: var QuadraticExt) {.inline.} =
  ## Computes the negated conjugate in-place
  a.c0.neg()
 func conjneg*(r: var QuadraticExt, a: QuadraticExt) {.inline.} =
  ## Computes the negated conjugate out-of-place
  r.c0.neg(a.c0)
  r.c1 = a.c1
 func square*(r: var QuadraticExt, a: QuadraticExt) {.inline.} =
  mixin fromComplexExtension
  when r.fromComplexExtension():
    r.square_complex(a)
  else:
    r.square_generic(a)
 func prod*(r: var QuadraticExt, a, b: QuadraticExt) {.inline.} =
  mixin fromComplexExtension
  when r.fromComplexExtension():
    r.prod_complex(a, b)
  else:
    r.prod_generic(a, b)
 func inv*(r: var QuadraticExt, a: QuadraticExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  #
  # Algorithm:
  #
  # 1 / (a0 + a1 w) <=> (a0 - a1 w) / (a0 + a1 w)(a0 - a1 w)
  #                 <=> (a0 - a1 w) / (a0² - a1² w²)
  # with w being our coordinate system and β the quadratic non-residue
  # we have w² = β
  # So the inverse is (a0 - a1 w) / (a0² - β a1²)
  mixin fromComplexExtension
  # [2 Sqr, 1 Add]
  var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
  v0.square(a.c0)
  v1.square(a.c1)
  when r.fromComplexExtension():
    v0 += v1
  else:
    v0 -= NonResidue * v1 # v0 = a0² - β a1² (the norm / squared magnitude of a)
  # [1 Inv, 2 Sqr, 1 Add]
  v1.inv(v0)              # v1 = 1 / (a0² - β a1²)
  # [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
  r.c0.prod(a.c0, v1)     # r0 = a0 / (a0² - β a1²)
  v0.neg(v1)              # v0 = -1 / (a0² - β a1²)
  r.c1.prod(a.c1, v0)     # r1 = -a1 / (a0² - β a1²)
 func inv*(a: var QuadraticExt) =
  ## Compute the multiplicative inverse of ``a``
  ##
  ## The inverse of 0 is 0.
  ## Incidentally this avoids extra check
  ## to convert Jacobian and Projective coordinates
  ## to affine for elliptic curve
  a.inv(a)
 func `*=`*(a: var QuadraticExt, b: QuadraticExt) {.inline.} =
  ## In-place multiplication
  # On higher extension field like 𝔽p12,
  # if `prod` is called on shared in and out buffer, the result is wrong
  let t = a
  a.prod(t, b)
 func square*(a: var QuadraticExt) {.inline.} =
  let t = a
  a.square(t)
 func mul_sparse_by_0y*(a: var QuadraticExt, sparseB: QuadraticExt) {.inline.} =
  ## Sparse in-place multiplication
  mixin fromComplexExtension
  when a.fromComplexExtension():
    let t = a
    a.mul_sparse_complex_by_0y(t, sparseB)
  else:
    let t = a
    a.mul_sparse_generic_by_0y(t, sparseB)
 func mul_sparse_by_x0*(a: var QuadraticExt, sparseB: QuadraticExt) {.inline.} =
  ## Sparse in-place multiplication
  let t = a
  a.mul_sparse_generic_by_x0(t, sparseB)
--- a/constantine/tower_field_extensions/tower_common.nim
+++ b/constantine/tower_field_extensions/tower_common.nim
@ -1,263 +0,0 @@
 # Constantine
 # Copyright (c) 2018-2019    Status Research & Development GmbH
 # Copyright (c) 2020-Present Mamy André-Ratsimbazafy
 # Licensed and distributed under either of
 #   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
 #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 import
  ../config/common,
  ../primitives,
  ../arithmetic
 # Note: to avoid burdening the Nim compiler, we rely on generic extension
 # to complain if the base field procedures don't exist
 # Common type definition
 # -------------------------------------------------------------------
 type
  NonResidue* = object
    ## Non-Residue
    ##
    ## Placeholder for the appropriate quadratic, cubic or sectic non-residue
  CubicExt* = concept x
    ## Cubic Extension field concept
    type BaseField = auto
    x.c0 is BaseField
    x.c1 is BaseField
    x.c2 is BaseField
  QuadraticExt* = concept x
    ## Quadratic Extension field concept
    not(x is CubicExt)
    type BaseField = auto
    x.c0 is BaseField
    x.c1 is BaseField
  ExtensionField* = QuadraticExt or CubicExt
 # Initialization
 # -------------------------------------------------------------------
 func setZero*(a: var ExtensionField) =
  ## Set ``a`` to 0 in the extension field
  for field in fields(a):
    field.setZero()
 func setOne*(a: var ExtensionField) =
  ## Set ``a`` to 1 in the extension field
  for fieldName, fA in fieldPairs(a):
    when fieldName == "c0":
      fA.setOne()
    else:
      fA.setZero()
 func fromBig*(a: var ExtensionField, src: BigInt) =
  ## Set ``a`` to the bigint value in the extension field
  for fieldName, fA in fieldPairs(a):
    when fieldName == "c0":
      fA.fromBig(src)
    else:
      fA.setZero()
 # Comparison
 # -------------------------------------------------------------------
 func `==`*(a, b: ExtensionField): SecretBool =
  ## Constant-time equality check
  result = CtTrue
  for fA, fB in fields(a, b):
    result = result and (fA == fB)
 func isZero*(a: ExtensionField): SecretBool =
  ## Constant-time check if zero
  result = CtTrue
  for fA in fields(a):
    result = result and fA.isZero()
 func isOne*(a: ExtensionField): SecretBool =
  ## Constant-time check if one
  result = CtTrue
  for fieldName, fA in fieldPairs(a):
    when fieldName == "c0":
      result = result and fA.isOne()
    else:
      result = result and fA.isZero()
 func isMinusOne*(a: ExtensionField): SecretBool =
  ## Constant-time check if -1
  result = CtTrue
  for fieldName, fA in fieldPairs(a):
    when fieldName == "c0":
      result = result and fA.isMinusOne()
    else:
      result = result and fA.isZero()
 # Copies
 # -------------------------------------------------------------------
 func ccopy*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
  ## Constant-time conditional copy
  ## If ctl is true: b is copied into a
  ## if ctl is false: b is not copied and a is unmodified
  ## Time and memory accesses are the same whether a copy occurs or not
  for fA, fB in fields(a, b):
    ccopy(fA, fB, ctl)
 # Abelian group
 # -------------------------------------------------------------------
 func neg*(r: var ExtensionField, a: ExtensionField) =
  ## Field out-of-place negation
  for fR, fA in fields(r, a):
    fR.neg(fA)
 func neg*(a: var ExtensionField) =
  ## Field in-place negation
  for fA in fields(a):
    fA.neg()
 func `+=`*(a: var ExtensionField, b: ExtensionField) =
  ## Addition in the extension field
  for fA, fB in fields(a, b):
    fA += fB
 func `-=`*(a: var ExtensionField, b: ExtensionField) =
  ## Substraction in the extension field
  for fA, fB in fields(a, b):
    fA -= fB
 func double*(r: var ExtensionField, a: ExtensionField) =
  ## Field out-of-place doubling
  for fR, fA in fields(r, a):
    fR.double(fA)
 func double*(a: var ExtensionField) =
  ## Field in-place doubling
  for fA in fields(a):
    fA.double()
 func div2*(a: var ExtensionField) =
  ## Field in-place division by 2
  for fA in fields(a):
    fA.div2()
 func sum*(r: var QuadraticExt, a, b: QuadraticExt) =
  ## Sum ``a`` and ``b`` into ``r``
  r.c0.sum(a.c0, b.c0)
  r.c1.sum(a.c1, b.c1)
 func sum*(r: var CubicExt, a, b: CubicExt) =
  ## Sum ``a`` and ``b`` into ``r``
  r.c0.sum(a.c0, b.c0)
  r.c1.sum(a.c1, b.c1)
  r.c2.sum(a.c2, b.c2)
 func diff*(r: var QuadraticExt, a, b: QuadraticExt) =
  ## Diff ``a`` and ``b`` into ``r``
  r.c0.diff(a.c0, b.c0)
  r.c1.diff(a.c1, b.c1)
 func diff*(r: var CubicExt, a, b: CubicExt) =
  ## Diff ``a`` and ``b`` into ``r``
  r.c0.diff(a.c0, b.c0)
  r.c1.diff(a.c1, b.c1)
  r.c2.diff(a.c2, b.c2)
 # Conditional arithmetic
 # -------------------------------------------------------------------
 func cneg*(a: var ExtensionField, ctl: SecretBool) =
  ## Constant-time in-place conditional negation
  ## Only negate if ctl is true
  for fA in fields(a):
    fA.cneg(ctl)
 func cadd*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
  ## Constant-time in-place conditional addition
  for fA, fB in fields(a, b):
    fA.cadd(fB, ctl)
 func csub*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
  ## Constant-time in-place conditional substraction
  for fA, fB in fields(a, b):
    fA.csub(fB, ctl)
 # Multiplication by a small integer known at compile-time
 # -------------------------------------------------------------------
 func `*=`*(a: var ExtensionField, b: static int) {.inline.} =
  ## Multiplication by a small integer known at compile-time
  const negate = b < 0
  const b = if negate: -b
            else: b
  when negate:
    a.neg(a)
  when b == 0:
    a.setZero()
  elif b == 1:
    return
  elif b == 2:
    a.double()
  elif b == 3:
    let t1 = a
    a.double()
    a += t1
  elif b == 4:
    a.double()
    a.double()
  elif b == 5:
    let t1 = a
    a.double()
    a.double()
    a += t1
  elif b == 6:
    a.double()
    let t2 = a
    a.double() # 4
    a += t2
  elif b == 7:
    let t1 = a
    a.double()
    let t2 = a
    a.double() # 4
    a += t2
    a += t1
  elif b == 8:
    a.double()
    a.double()
    a.double()
  elif b == 9:
    let t1 = a
    a.double()
    a.double()
    a.double() # 8
    a += t1
  elif b == 10:
    a.double()
    let t2 = a
    a.double()
    a.double() # 8
    a += t2
  elif b == 11:
    let t1 = a
    a.double()
    let t2 = a
    a.double()
    a.double() # 8
    a += t2
    a += t1
  elif b == 12:
    a.double()
    a.double() # 4
    let t4 = a
    a.double() # 8
    a += t4
  else:
    {.error: "Multiplication by this small int not implemented".}
--- a/constantine/tower_field_extensions/tower_instantiation.nim
+++ b/constantine/tower_field_extensions/tower_instantiation.nim
@ -12,14 +12,32 @@
 import
  ../arithmetic,
-  ../config/curves,
+  ../config/[common, curves],
  ../io/io_fields,
-  ./tower_common,
+  ./extension_fields,
  ./quadratic_extensions,
  ./cubic_extensions,
  ./exponentiations
-export tower_common, quadratic_extensions, cubic_extensions, exponentiations
+export extension_fields, exponentiations
 # We assume that the sextic non-residues used to construct
 # the elliptic curve twists
 # match with the quadratic and cubic non-residues
 # chosen to construct the tower of extension fields.
 # 𝔽p
 # ----------------------------------------------------------------
 func `*=`*(a: var Fp, _: type NonResidue) {.inline.} =
  ## Multiply an element of 𝔽p by the quadratic non-residue
  ## chosen to construct 𝔽p2
  static: doAssert Fp.C.getNonResidueFp() != -1, "𝔽p2 should be specialized for complex extension"
  a *= Fp.C.getNonResidueFp()
 func prod*(r: var Fp, a: Fp, _: type NonResidue){.inline.} =
  ## Multiply an element of 𝔽p by the quadratic non-residue
  ## chosen to construct 𝔽p2
  static: doAssert Fp.C.getNonResidueFp() != -1, "𝔽p2 should be specialized for complex extension"
  r.prod(a, Fp.C.getNonResidueFp())
 # 𝔽p2
 # ----------------------------------------------------------------
@ -28,53 +46,58 @@ type
  Fp2*[C: static Curve] = object
    c0*, c1*: Fp[C]
  β = NonResidue
    # Quadratic or Cubic non-residue
  SexticNonResidue* = object
 template fromComplexExtension*[F](elem: F): static bool =
  ## Returns true if the input is a complex extension
  ## i.e. the irreducible polynomial chosen is
  ##   x² - µ with µ = -1
  ## and so 𝔽p2 = 𝔽p[x] / x² - µ = 𝔽p[𝑖]
-  when F is Fp2 and F.C.get_QNR_Fp() == -1:
+  when F is Fp2 and F.C.getNonResidueFp() == -1:
    true
  else:
    false
-func `*=`*(a: var Fp, _: typedesc[β]) {.inline.} =
+template mulCheckSparse*(a: var Fp2, b: Fp2) =
-  ## Multiply an element of 𝔽p by the quadratic non-residue
+  when b.isOne().bool:
-  ## chosen to construct 𝔽p2
+    discard
-  static: doAssert Fp.C.get_QNR_Fp() != -1, "𝔽p2 should be specialized for complex extension"
+  elif b.isMinusOne().bool:
-  a *= Fp.C.get_QNR_Fp()
+    a.neg()
  elif b.c0.isZero().bool and b.c1.isOne().bool:
    var t {.noInit.}: type(a.c0)
    when fromComplexExtension(b):
      t.neg(a.c1)
      a.c1 = a.c0
      a.c0 = t
    else:
      t.prod(a.c1, NonResidue)
      a.c1 = a.c0
      a.c0 = t
  elif b.c0.isZero().bool and b.c1.isMinusOne().bool:
    var t {.noInit.}: type(a.c0)
    when fromComplexExtension(b):
      t = a.c1
      a.c1.neg(a.c0)
      a.c0 = t
    else:
      t.prod(a.c1, NonResidue)
      a.c1.neg(a.c0)
      a.c0.neg(t)
  elif b.c0.isZero().bool:
    a.mul_sparse_by_0y(b)
  elif b.c1.isZero().bool:
    a.mul_sparse_by_x0(b)
  else:
    a *= b
-func `*`*(_: typedesc[β], a: Fp): Fp {.inline, noInit.} =
+func prod*(r: var Fp2, a: Fp2, _: type NonResidue) {.inline.} =
-  ## Multiply an element of 𝔽p by the quadratic non-residue
+  ## Multiply an element of 𝔽p2 by the non-residue
-  ## chosen to construct 𝔽p2
+  ## chosen to construct the next extension or the twist:
-  result = a
+  ## - if quadratic non-residue: 𝔽p4
-  result *= β
+  ## - if cubic non-residue: 𝔽p6
-
+  ## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12
 # TODO: rework the quad/cube/sextic non residue declaration
 func `*=`*(a: var Fp, _: typedesc[SexticNonResidue]) {.inline.} =
  ## Multiply an element of 𝔽p by the sextic non-residue
  ## chosen to construct 𝔽p6
  a *= Fp.C.get_QNR_Fp()
 func `*`*(_: typedesc[SexticNonResidue], a: Fp): Fp {.inline, noInit.} =
  ## Multiply an element of 𝔽p by the sextic non-residue
  ## chosen to construct 𝔽p6
  result = a
  result *= SexticNonResidue
 func `*=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} =
  ## Multiply an element of 𝔽p2 by the sextic non-residue
  ## chosen to construct the sextic twist
  # Yet another const tuple unpacking bug
-  const u = Fp2.C.get_SNR_Fp2()[0] # Sextic non-residue to construct 𝔽p12
+  const u = Fp2.C.getNonResidueFp2()[0]
-  const v = Fp2.C.get_SNR_Fp2()[1]
+  const v = Fp2.C.getNonResidueFp2()[1]
-  const Beta {.used.} = Fp2.C.get_QNR_Fp()  # Quadratic non-residue to construct 𝔽p2
+  const Beta {.used.} = Fp2.C.getNonResidueFp()
  # ξ = u + v x
  # and x² = β
  #
@ -82,24 +105,49 @@ func `*=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} =
  #                       => u c0 + β v c1 + (v c0 + u c1) x
  when a.fromComplexExtension() and u == 1 and v == 1:
    let t = a.c0
-    a.c0 -= a.c1
+    r.c0.diff(t, a.c1)
-    a.c1 += t
+    r.c1.sum(t, a.c1)
  else:
-    let a0 = a.c0
+    # Case:
-    let a1 = a.c1
+    # - BN254_Snarks, QNR_Fp: -1, SNR_Fp2: 9+1𝑖  (𝑖 = √-1)
-    when a.fromComplexExtension():
+    # - BLS12_377, QNR_Fp: -5, SNR_Fp2: 0+1j    (j = √-5)
-      a.c0.diff(u * a0, v * a1)
+    # - BW6_761, SNR_Fp: -4, CNR_Fp2: 0+1j      (j = √-4)
    when u == 0:
      # BLS12_377 and BW6_761, use small addition chain
      r.mul_sparse_by_0y(a, v)
    else:
-      a.c0.sum(u * a0, (Beta * v) * a1)
+      # BN254_Snarks, u = 9
-    a.c1.sum(v * a0, u * a1)
+      # Full 𝔽p2 multiplication is cheaper than addition chains
      # for u*c0 and u*c1
      static:
        doAssert u >= 0 and uint64(u) <= uint64(high(BaseType))
        doAssert v >= 0 and uint64(v) <= uint64(high(BaseType))
      # TODO: compile-time
      var NR {.noInit.}: Fp2
      NR.c0.fromUint(uint u)
      NR.c1.fromUint(uint v)
      r.prod(a, NR)
-func `/=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} =
+func `*=`*(a: var Fp2, _: type NonResidue) {.inline.} =
-  ## Multiply an element of 𝔽p by the quadratic non-residue
+  ## Multiply an element of 𝔽p2 by the non-residue
-  ## chosen to construct sextic twist
+  ## chosen to construct the next extension or the twist:
  ## - if quadratic non-residue: 𝔽p4
  ## - if cubic non-residue: 𝔽p6
  ## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12
  # Yet another const tuple unpacking bug
-  const u = Fp2.C.get_SNR_Fp2()[0] # Sextic non-residue to construct 𝔽p12
+  a.prod(a, NonResidue)
-  const v = Fp2.C.get_SNR_Fp2()[1]
+
-  const Beta = Fp2.C.get_QNR_Fp()  # Quadratic non-residue to construct 𝔽p2
+func `/=`*(a: var Fp2, _: type NonResidue) {.inline.} =
  ## Divide an element of 𝔽p by the non-residue
  ## chosen to construct the next extension or the twist:
  ## - if quadratic non-residue: 𝔽p4
  ## - if cubic non-residue: 𝔽p6
  ## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12
  # Yet another const tuple unpacking bug
  # Yet another const tuple unpacking bug
  const u = Fp2.C.getNonresidueFp2()[0] # Sextic non-residue to construct 𝔽p12
  const v = Fp2.C.getNonresidueFp2()[1]
  const Beta = Fp2.C.getNonResidueFp()  # Quadratic non-residue to construct 𝔽p2
  # ξ = u + v x
  # and x² = β
  #
@ -118,7 +166,7 @@ func `/=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} =
    a.c1 -= t
    a.div2()
  else:
-    let a0 = a.c0
+    var a0 = a.c0
    let a1 = a.c1
    const u2v2 = u*u - Beta*v*v # (u² - βv²)
    # TODO can be precomputed (or precompute b/µ the twist coefficient)
@ -127,12 +175,18 @@ func `/=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} =
    u2v2inv.fromUint(u2v2)
    u2v2inv.inv()
-    a.c0.diff(u * a0, (Beta * v) * a1)
+    a.c0 *= u
-    a.c1.diff(u * a1, v * a0)
+    a.c1 *= Beta * v
    a.c0 -= a.c1
    a.c1 = a1
    a.c1 *= u
    a0 *= v
    a.c1 -= a0
    a.c0 *= u2v2inv
    a.c1 *= u2v2inv
-# 𝔽p6
+# 𝔽p4 & 𝔽p6
 # ----------------------------------------------------------------
 type
@ -142,53 +196,58 @@ type
  Fp6*[C: static Curve] = object
    c0*, c1*, c2*: Fp2[C]
-  ξ* = NonResidue
+func prod*(r: var Fp4, a: Fp4, _: type NonResidue) =
-    # We call the non-residue ξ on 𝔽p4/𝔽p6 to avoid confusion
+  ## Multiply an element of 𝔽p4 by the non-residue
-    # between non-residue
+  ## chosen to construct the next extension or the twist:
-    # of different tower level
+  ## - if quadratic non-residue: 𝔽p8
  ## - if cubic non-residue: 𝔽p12
  ## - if sextic non-residue: 𝔽p8, 𝔽p12 or 𝔽p24
  ##
  ## Assumes that it is sqrt(NonResidue_Fp2)
  let t = a.c0
  r.c0.prod(a.c1, NonResidue)
  r.c1 = t
-func `*`*(_: typedesc[ξ], a: Fp2): Fp2 {.inline, noInit.} =
+func `*=`*(a: var Fp4, _: type NonResidue) {.inline.} =
-  ## Multiply an element of 𝔽p2 by the quadratic and cubic non-residue
+  ## Multiply an element of 𝔽p4 by the non-residue
-  ## chosen to construct 𝔽p4/𝔽p6
+  ## chosen to construct the next extension or the twist:
-  # Yet another const tuple unpacking bug
+  ## - if quadratic non-residue: 𝔽p8
-  const u = Fp2.C.get_CNR_Fp2()[0]         # Quadratic & Cubic non-residue to construct 𝔽p4/𝔽p6
+  ## - if cubic non-residue: 𝔽p12
-  const v = Fp2.C.get_CNR_Fp2()[1]
+  ## - if sextic non-residue: 𝔽p8, 𝔽p12 or 𝔽p24
-  const Beta {.used.} = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2
+  ##
-  # ξ = u + v x
+  ## Assumes that it is sqrt(NonResidue_Fp2)
-  # and x² = β
+  a.prod(a, NonResidue)
  #
  # (c0 + c1 x) (u + v x) => u c0 + (u c0 + u c1)x + v c1 x²
  #                       => u c0 + β v c1 + (v c0 + u c1) x
-  # TODO: check code generated when ξ = 1 + 𝑖
+func prod*(r: var Fp6, a: Fp6, _: type NonResidue) {.inline.} =
-  #       The mul by constant are inline but
+  ## Multiply an element of 𝔽p4 by the non-residue
-  #       since we don't have __restrict tag
+  ## chosen to construct the next extension or the twist:
-  #       and we use arrays (which decay into pointer)
+  ## - if quadratic non-residue: 𝔽p12
-  #       the compiler might not elide the temporary
+  ## - if cubic non-residue: 𝔽p18
-  when a.fromComplexExtension():
+  ## - if sextic non-residue: 𝔽p12, 𝔽p18 or 𝔽p36
-    result.c0.diff(u * a.c0, v * a.c1)
+  ##
-  else:
+  ## Assumes that it is cube_root(NonResidue_Fp2)
-    result.c0.sum(u * a.c0, (Beta * v) * a.c1)
+  ##
-  result.c1.sum(v * a.c0, u * a.c1 )
+  ## For all curves γ = v with v the factor for 𝔽p6 coordinate
  ## and v³ = ξ
  ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
  let t = a.c2
  r.c1 = a.c0
  r.c2 = a.c1
  t.c0.prod(t, NonResidue)
-func `*=`*(a: var Fp2, _: typedesc[ξ]) {.inline.} =
+func `*=`*(a: var Fp6, _: type NonResidue) {.inline.} =
-  ## Multiply an element of 𝔽p2 by the quadratic non-residue
+  ## Multiply an element of 𝔽p4 by the non-residue
-  ## chosen to construct 𝔽p6
+  ## chosen to construct the next extension or the twist:
-  # Yet another const tuple unpacking bug
+  ## - if quadratic non-residue: 𝔽p12
-  const u = Fp2.C.get_CNR_Fp2()[0] # Cubic non-residue to construct 𝔽p6
+  ## - if cubic non-residue: 𝔽p18
-  const v = Fp2.C.get_CNR_Fp2()[1]
+  ## - if sextic non-residue: 𝔽p12, 𝔽p18 or 𝔽p36
-  const Beta {.used.} = Fp2.C.get_QNR_Fp()  # Quadratic non-residue to construct 𝔽p2
+  ##
-  # ξ = u + v x
+  ## Assumes that it is cube_root(NonResidue_Fp2)
-  # and x² = β
+  ##
-  #
+  ## For all curves γ = v with v the factor for 𝔽p6 coordinate
-  # (c0 + c1 x) (u + v x) => u c0 + (u c0 + u c1)x + v c1 x²
+  ## and v³ = ξ
-  #                       => u c0 + β v c1 + (v c0 + u c1) x
+  ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
-  when a.fromComplexExtension() and u == 1 and v == 1:
+  a.prod(a, NonResidue)
    let t = a.c0
    a.c0 -= a.c1
    a.c1 += t
  else: # TODO: optim for inline
    a = ξ * a
 # 𝔽p12
 # ----------------------------------------------------------------
@ -198,32 +257,6 @@ type
    c0*, c1*, c2*: Fp4[C]
    # c0*, c1*: Fp6[C]
  γ = NonResidue
    # We call the non-residue γ (Gamma) on 𝔽p6 to avoid confusion between non-residue
    # of different tower level
 func `*`*(_: typedesc[γ], a: Fp4): Fp4 {.noInit, inline.} =
  ## Multiply an element of 𝔽p4 by the sextic non-residue
  ## chosen to construct 𝔽p12
  result.c0 = ξ * a.c1
  result.c1 = a.c0
 func `*=`*(a: var Fp4, _: typedesc[γ]) {.inline.} =
  a = γ * a
 func `*`*(_: typedesc[γ], a: Fp6): Fp6 {.noInit, inline.} =
  ## Multiply an element of 𝔽p6 by the cubic non-residue
  ## chosen to construct 𝔽p12
  ## For all curves γ = v with v the factor for 𝔽p6 coordinate
  ## and v³ = ξ
  ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
  result.c0 = ξ * a.c2
  result.c1 = a.c0
  result.c2 = a.c1
 func `*=`*(a: var Fp6, _: typedesc[γ]) {.inline.} =
  a = γ * a
 # Sparse functions
 # ----------------------------------------------------------------
@ -238,13 +271,6 @@ func mul_sparse_by_y0*[C: static Curve](r: var Fp4[C], a: Fp4[C], b: Fp2[C]) =
  r.c0.prod(a.c0, b)
  r.c1.prod(a.c1, b)
 func mul_sparse_by_0y*[C: static Curve](r: var Fp4[C], a: Fp4[C], b: Fp2[C]) =
  ## Sparse multiplication of an Fp4 element
  ## with coordinates (a₀, a₁) by (0, b₁)
  r.c0.prod(a.c1, b)
  r.c0 *= NonResidue
  r.c1.prod(a.c0, b)
 func mul_sparse_by_0y0*[C: static Curve](r: var Fp6[C], a: Fp6[C], b: Fp2[C]) =
  ## Sparse multiplication of an Fp6 element
  ## with coordinates (a₀, a₁, a₂) by (0, b₁, 0)
@ -265,6 +291,6 @@ func mul_sparse_by_0y0*[C: static Curve](r: var Fp6[C], a: Fp6[C], b: Fp2[C]) =
  #    = a1 b1
  r.c0.prod(a.c2, b)
-  r.c0 *= ξ
+  r.c0 *= NonResidue
  r.c1.prod(a.c0, b)
  r.c2.prod(a.c1, b)
--- a/docs/optimizations.md
+++ b/docs/optimizations.md
@ -75,6 +75,9 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n
  - [ ] x86: MULX, ADCX, ADOX instructions
  - [ ] no-carry optimization for CIOS (Coarsely Integrated Operand Scanning)
 - Addition chains
  - [ ] unreduced squarings/multiplications in addition chains
 - Exponentiation
  - [x] variable-time exponentiation
  - [x] fixed window optimization _(sliding windows are not constant-time)_
@ -158,19 +161,25 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n
 - Line functions
  - [x] Homogeneous projective coordinates
    - [x] D-Twist
      - [x] Fused line add + elliptic curve add
      - [x] Fused line double + elliptic curve double
    - [x] M-Twist
-    - [x] Fused line add + elliptic curve add
+      - [x] Fused line add + elliptic curve add
-    - [x] Fused line double + elliptic curve double
+      - [x] Fused line double + elliptic curve double
  - [ ] Jacobian projective coordinates
    - [ ] D-Twist
      - [ ] Fused line add + elliptic curve add
      - [ ] Fused line double + elliptic curve double
    - [ ] M-Twist
-    - [ ] Fused line add + elliptic curve add
+      - [ ] Fused line add + elliptic curve add
-    - [ ] Fused line double + elliptic curve double
+      - [ ] Fused line double + elliptic curve double
  - [x] Sparse multiplication line * Gₜ element
    - [x] 6-way sparse
    - [ ] Pseudo 8-sparse
    - [x] D-Twist
      - [x] 6-way sparse
      - [ ] Pseudo 8-sparse
    - [x] M-Twist
      - [x] 6-way sparse
      - [ ] Pseudo 8-sparse
 - Miller Loop
  - [x] NAF recoding
--- a/helpers/prng_unsafe.nim
+++ b/helpers/prng_unsafe.nim
@ -15,7 +15,7 @@ import
    ec_shortweierstrass_projective,
    ec_shortweierstrass_jacobian],
  ../constantine/io/io_bigints,
-  ../constantine/tower_field_extensions/tower_common
+  ../constantine/tower_field_extensions/extension_fields
 # ############################################################
 #
--- a/tests/t_finite_fields.nim
+++ b/tests/t_finite_fields.nim
@ -25,7 +25,6 @@ proc main() =
        y.fromUint(10'u32)
        z.fromUint(90'u32)
        let u = x + y
        x += y
        var x_bytes: array[8, byte]
@ -34,7 +33,6 @@ proc main() =
        check:
          # Check equality in the Montgomery domain
          bool(z == x)
          bool(z == u)
          # Check equality when converting back to natural domain
          90'u64 == cast[uint64](x_bytes)
@ -45,7 +43,6 @@ proc main() =
        y.fromUint(21'u32)
        z.fromUint(0'u32)
        let u = x + y
        x += y
        var x_bytes: array[8, byte]
@ -54,7 +51,6 @@ proc main() =
        check:
          # Check equality in the Montgomery domain
          bool(z == x)
          bool(z == u)
          # Check equality when converting back to natural domain
          0'u64 == cast[uint64](x_bytes)
@ -65,7 +61,6 @@ proc main() =
        y.fromUint(22'u32)
        z.fromUint(1'u32)
        let u = x + y
        x += y
        var x_bytes: array[8, byte]
@ -74,7 +69,6 @@ proc main() =
        check:
          # Check equality in the Montgomery domain
          bool(z == x)
          bool(z == u)
          # Check equality when converting back to natural domain
          1'u64 == cast[uint64](x_bytes)
@ -86,7 +80,6 @@ proc main() =
        y.fromUint(10'u32)
        z.fromUint(70'u32)
        let u = x - y
        x -= y
        var x_bytes: array[8, byte]
@ -95,7 +88,6 @@ proc main() =
        check:
          # Check equality in the Montgomery domain
          bool(z == x)
          bool(z == u)
          # Check equality when converting back to natural domain
          70'u64 == cast[uint64](x_bytes)
@ -106,7 +98,6 @@ proc main() =
        y.fromUint(80'u32)
        z.fromUint(0'u32)
        let u = x - y
        x -= y
        var x_bytes: array[8, byte]
@ -115,7 +106,6 @@ proc main() =
        check:
          # Check equality in the Montgomery domain
          bool(z == x)
          bool(z == u)
          # Check equality when converting back to natural domain
          0'u64 == cast[uint64](x_bytes)
@ -126,7 +116,6 @@ proc main() =
        y.fromUint(81'u32)
        z.fromUint(100'u32)
        let u = x - y
        x -= y
        var x_bytes: array[8, byte]
@ -135,19 +124,18 @@ proc main() =
        check:
          # Check equality in the Montgomery domain
          bool(z == x)
          bool(z == u)
          # Check equality when converting back to natural domain
          100'u64 == cast[uint64](x_bytes)
    test "Multiplication mod 101":
      block:
-        var x, y, z: Fp[Fake101]
+        var x, y, z, r: Fp[Fake101]
        x.fromUint(10'u32)
        y.fromUint(10'u32)
        z.fromUint(100'u32)
-        let r = x * y
+        r.prod(x, y)
        var r_bytes: array[8, byte]
        r_bytes.exportRawUint(r, cpuEndian)
@ -159,13 +147,13 @@ proc main() =
          100'u64 == cast[uint64](r_bytes)
      block:
-        var x, y, z: Fp[Fake101]
+        var x, y, z, r: Fp[Fake101]
        x.fromUint(10'u32)
        y.fromUint(11'u32)
        z.fromUint(9'u32)
-        let r = x * y
+        r.prod(x, y)
        var r_bytes: array[8, byte]
        r_bytes.exportRawUint(r, cpuEndian)
@ -275,13 +263,13 @@ proc main() =
    test "Multiplication mod 2^61 - 1":
      block:
-        var x, y, z: Fp[Mersenne61]
+        var x, y, z, r: Fp[Mersenne61]
        x.fromUint(10'u32)
        y.fromUint(10'u32)
        z.fromUint(100'u32)
-        let r = x * y
+        r.prod(x, y)
        var r_bytes: array[8, byte]
        r_bytes.exportRawUint(r, cpuEndian)
@ -294,13 +282,13 @@ proc main() =
          cast[uint64](r_bytes) == 100'u64
      block:
-        var x, y, z: Fp[Mersenne61]
+        var x, y, z, r: Fp[Mersenne61]
        x.fromUint(1'u32 shl 31)
        y.fromUint(1'u32 shl 31)
        z.fromUint(2'u32)
-        let r = x * y
+        r.prod(x, y)
        var r_bytes: array[8, byte]
        r_bytes.exportRawUint(r, cpuEndian)