From 320ecbff1a51ef4c3980f33639537e0f5a99657c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mamy=20Andr=C3=A9-Ratsimbazafy?= <mamy_github@numforge.co>
Date: Tue, 25 Feb 2020 15:18:39 +0100
Subject: [PATCH] Change square signature and reorg finite fields to
 avoid/highlight proc that allocate a temporary

---
 constantine/arithmetic/bigints_checked.nim    |  2 +-
 constantine/arithmetic/finite_fields.nim      | 86 +++++++++++--------
 .../tower_field_extensions/fp2_complex.nim    | 15 ++--
 tests/test_finite_fields_vs_gmp.nim           |  3 +-
 4 files changed, 63 insertions(+), 43 deletions(-)

diff --git a/constantine/arithmetic/bigints_checked.nim b/constantine/arithmetic/bigints_checked.nim
index 2525c18..4e09c85 100644
--- a/constantine/arithmetic/bigints_checked.nim
+++ b/constantine/arithmetic/bigints_checked.nim
@@ -90,7 +90,7 @@ debug:
 {.push raises: [].}
 {.push inline.}
 
-func setInternalBitLength*(a: var BigInt) {.inline.} =
+func setInternalBitLength*(a: var BigInt) =
   ## Derive the actual bitsize used internally of a BigInt
   ## from the announced BigInt bitsize
   ## and set the bitLength field of that BigInt
diff --git a/constantine/arithmetic/finite_fields.nim b/constantine/arithmetic/finite_fields.nim
index 183c1f0..e9271a5 100644
--- a/constantine/arithmetic/finite_fields.nim
+++ b/constantine/arithmetic/finite_fields.nim
@@ -94,6 +94,8 @@ func setZero*(a: var Fp) =
 func setOne*(a: var Fp) =
   ## Set ``a`` to one
   # Note: we need 1 in Montgomery residue form
+  # TODO: Nim codegen is not optimal it uses a temporary
+  #       Check if the compiler optimizes it away
   a.mres = Fp.C.getMontyOne()
 
 func `+=`*(a: var Fp, b: Fp) =
@@ -133,46 +135,22 @@ func double*(r: var Fp, a: Fp) =
   overflowed = overflowed or not csub(r.mres, Fp.C.Mod.mres, CtFalse) # r >= P
   discard csub(r.mres, Fp.C.Mod.mres, overflowed)
 
-func `+`*(a, b: Fp): Fp {.noInit.} =
-  ## Addition modulo p
-  result.sum(a, b)
-
-func `-`*(a, b: Fp): Fp {.noInit.} =
-  ## Substraction modulo p
-  result.diff(a, b)
-
 func prod*(r: var Fp, a, b: Fp) =
   ## Store the product of ``a`` by ``b`` modulo p into ``r``
   ## ``r`` is initialized / overwritten
   r.mres.montyMul(a.mres, b.mres, Fp.C.Mod.mres, Fp.C.getNegInvModWord())
 
-func `*`*(a, b: Fp): Fp {.noInit.} =
-  ## Multiplication modulo p
-  ##
-  ## It is recommended to assign with {.noInit.}
-  ## as Fp elements are usually large and this
-  ## routine will zero init internally the result.
-  result.prod(a, b)
-
-func `*=`*(a: var Fp, b: Fp) =
-  ## Multiplication modulo p
-  ##
-  ## Implementation note:
-  ## - This requires a temporary field element
-  ##
-  ## Cost
-  ## Stack: 1 * ModulusBitSize
-  var tmp{.noInit.}: Fp
-  tmp.prod(a, b)
-  a = tmp
-
-func square*(a: Fp): Fp {.noInit.} =
+func square*(r: var Fp, a: Fp): Fp =
   ## Squaring modulo p
-  ##
-  ## It is recommended to assign with {.noInit.}
-  ## as Fp elements are usually large and this
-  ## routine will zero init internally the result.
-  result.mres.montySquare(a.mres, Fp.C.Mod.mres, Fp.C.getNegInvModWord())
+  r.mres.montySquare(a.mres, Fp.C.Mod.mres, Fp.C.getNegInvModWord())
+
+# ############################################################
+#
+#         Field arithmetic exponentiation and inversion
+#
+# ############################################################
+#
+# Internally those procedures will allocate extra scratchspace on the stack
 
 func pow*(a: var Fp, exponent: BigInt) =
   ## Exponentiation modulo p
@@ -213,3 +191,43 @@ func inv*(a: var Fp) =
     Fp.C.Mod.mres, Fp.C.getMontyOne(),
     Fp.C.getNegInvModWord(), windowSize
   )
+
+# ############################################################
+#
+#            Field arithmetic ergonomic primitives
+#
+# ############################################################
+#
+# This implements extra primitives for ergonomics.
+# The in-place ones should be preferred as they avoid copies on assignment
+# Two kinds:
+# - Those that return a field element
+# - Those that internally allocate a temporary field element
+
+func `+`*(a, b: Fp): Fp {.noInit.} =
+  ## Addition modulo p
+  result.sum(a, b)
+
+func `-`*(a, b: Fp): Fp {.noInit.} =
+  ## Substraction modulo p
+  result.diff(a, b)
+
+func `*`*(a, b: Fp): Fp {.noInit.} =
+  ## Multiplication modulo p
+  ##
+  ## It is recommended to assign with {.noInit.}
+  ## as Fp elements are usually large and this
+  ## routine will zero init internally the result.
+  result.prod(a, b)
+
+func `*=`*(a: var Fp, b: Fp) =
+  ## Multiplication modulo p
+  ##
+  ## Implementation note:
+  ## - This requires a temporary field element
+  ##
+  ## Cost
+  ## Stack: 1 * ModulusBitSize
+  var tmp{.noInit.}: Fp
+  tmp.prod(a, b)
+  a = tmp
diff --git a/constantine/tower_field_extensions/fp2_complex.nim b/constantine/tower_field_extensions/fp2_complex.nim
index 6eead05..5a95ddd 100644
--- a/constantine/tower_field_extensions/fp2_complex.nim
+++ b/constantine/tower_field_extensions/fp2_complex.nim
@@ -58,8 +58,9 @@ type
     ## be a square (mod p)
     c0*, c1*: Fp[C]
 
-func square*(a: Fp2): Fp2 {.noInit.} =
-  ## Return a^2 in 𝔽p2
+func square*(r: var Fp2, a: Fp2) =
+  ## Return a^2 in 𝔽p2 in ``r``
+  ## ``r`` is initialized/overwritten
   # (c0, c1)² => (c0 + c1𝑖)²
   #           => c0² + 2 c0 c1𝑖 + (c1𝑖)²
   #           => c0²-c1² + 2 c0 c1𝑖
@@ -85,8 +86,8 @@ func square*(a: Fp2): Fp2 {.noInit.} =
   # as multiplications require a (shared) internal temporary
 
   var c0mc1 {.noInit.}: typeof(a.c0)
-  c0mc1.diff(a.c0, a.c1)           # c0mc1 = c0 - c1                               [1 Sub]
-  result.c1.double(a.c1)           # result.c1 = 2 c1                              [1 Dbl, 1 Sub]
-  result.c0.sum(c0mc1, result.c1)  # result.c0 = c0 - c1 + 2 c1                    [1 Add, 1 Dbl, 1 Sub]
-  result.c0 *= c0mc1               # result.c0 = (c0 + c1)(c0 - c1) = c0² - c1²    [1 Mul, 1 Add, 1 Dbl, 1 Sub]
-  result.c1 *= a.c0                # result.c1 = 2 c1 c0                           [2 Mul, 1 Add, 1 Dbl, 1 Sub]
+  c0mc1.diff(a.c0, a.c1) # c0mc1 = c0 - c1                            [1 Sub]
+  r.c1.double(a.c1)      # result.c1 = 2 c1                           [1 Dbl, 1 Sub]
+  r.c0.sum(c0mc1, r.c1)  # result.c0 = c0 - c1 + 2 c1                 [1 Add, 1 Dbl, 1 Sub]
+  r.c0 *= c0mc1          # result.c0 = (c0 + c1)(c0 - c1) = c0² - c1² [1 Mul, 1 Add, 1 Dbl, 1 Sub]
+  r.c1 *= a.c0           # result.c1 = 2 c1 c0                        [2 Mul, 1 Add, 1 Dbl, 1 Sub]
diff --git a/tests/test_finite_fields_vs_gmp.nim b/tests/test_finite_fields_vs_gmp.nim
index 6918d3b..aa21636 100644
--- a/tests/test_finite_fields_vs_gmp.nim
+++ b/tests/test_finite_fields_vs_gmp.nim
@@ -102,7 +102,8 @@ proc mainMul() =
     mpz_mul(r, a, b)
     mpz_mod(r, r, p)
 
-    let rTest = aTest * bTest
+    var rTest {.noInit.}: Fp[curve]
+    rTest.prod(aTest, bTest)
 
     #########################################################
     # Check