From 9770b3108c21810d10124d0d958dfa61cbf25efe Mon Sep 17 00:00:00 2001
From: Mamy Ratsimbazafy <mamy_github@numforge.co>
Date: Sun, 14 Aug 2022 09:48:10 +0200
Subject: [PATCH] Fp12 over fp6 (#201)

* introduce sumprod for direct fp6_mul

* change curves -> constants

* forgotten constants

* Full pairing using Fp2->Fp6->Fp12 towering
---
 benchmarks/bench_fields_template.nim          |  42 +-
 benchmarks/bench_fp.nim                       |   9 +-
 benchmarks/bench_fp2.nim                      |   5 +-
 benchmarks/bench_fp4.nim                      |   5 +-
 benchmarks/bench_fp6.nim                      |   5 +-
 benchmarks/bench_pairing_template.nim         |   4 +-
 benchmarks/bench_summary_template.nim         |   4 +-
 constantine.nimble                            |   2 +
 constantine/blssig_pop_on_bls12381_g2.nim     |   2 +-
 constantine/curves_primitives.nim             |   2 +-
 constantine/ethereum_evm_precompiles.nim      |   2 +-
 .../hash_to_curve/h2c_isogeny_maps.nim        |   2 +-
 .../hash_to_curve/h2c_map_to_isocurve_swu.nim |   2 +-
 constantine/hash_to_curve/hash_to_curve.nim   |   2 +-
 constantine/math/arithmetic/README.md         |   2 +-
 .../assembly/limbs_asm_mul_mont_x86.nim       | 201 ++++-
 .../limbs_asm_mul_mont_x86_adx_bmi2.nim       | 191 +++-
 .../math/arithmetic/bigints_montgomery.nim    |  14 +
 constantine/math/arithmetic/finite_fields.nim |  46 +-
 .../arithmetic/finite_fields_square_root.nim  |   2 +-
 constantine/math/arithmetic/limbs_exgcd.nim   |  23 +-
 .../math/arithmetic/limbs_montgomery.nim      |  88 +-
 .../math/arithmetic/limbs_unsaturated.nim     |   4 +-
 .../math/config/curves_prop_field_core.nim    |   4 +
 .../math/{curves => constants}/README.md      |   0
 .../bandersnatch_sqrt.nim                     |   0
 .../bls12_377_constants.nim                   |   0
 .../bls12_377_endomorphisms.nim               |   0
 .../bls12_377_frobenius.nim                   |   0
 .../bls12_377_pairing.nim                     |   0
 .../{curves => constants}/bls12_377_sqrt.nim  |   0
 .../bls12_377_subgroups.nim                   |   2 +-
 .../bls12_381_constants.nim                   |   0
 .../bls12_381_endomorphisms.nim               |   0
 .../bls12_381_frobenius.nim                   |   0
 .../bls12_381_generators.nim                  |   0
 .../bls12_381_hash_to_curve_g1.nim            |   0
 .../bls12_381_hash_to_curve_g2.nim            |   0
 .../bls12_381_pairing.nim                     |   0
 .../{curves => constants}/bls12_381_sqrt.nim  |   0
 .../bls12_381_sqrt_fp2.nim                    |   0
 .../bls12_381_subgroups.nim                   |   2 +-
 .../bn254_nogami_constants.nim                |   0
 .../bn254_nogami_endomorphisms.nim            |   0
 .../bn254_nogami_frobenius.nim                |   0
 .../bn254_nogami_pairing.nim                  |   0
 .../bn254_nogami_sqrt.nim                     |   0
 .../bn254_nogami_sqrt_fp2.nim                 |   0
 .../bn254_nogami_subgroups.nim                |   0
 .../bn254_snarks_constants.nim                |   0
 .../bn254_snarks_endomorphisms.nim            |   0
 .../bn254_snarks_frobenius.nim                |   0
 .../bn254_snarks_generators.nim               |   0
 .../bn254_snarks_hash_to_curve_g1.nim         |   0
 .../bn254_snarks_hash_to_curve_g2.nim         |   0
 .../bn254_snarks_pairing.nim                  |   0
 .../bn254_snarks_sqrt.nim                     |   0
 .../bn254_snarks_sqrt_fp2.nim                 |   0
 .../bn254_snarks_subgroups.nim                |   0
 .../bw6_761_constants.nim                     |   0
 .../bw6_761_endomorphisms.nim                 |   0
 .../bw6_761_frobenius.nim                     |   0
 .../{curves => constants}/bw6_761_pairing.nim |   0
 .../{curves => constants}/bw6_761_sqrt.nim    |   0
 .../bw6_761_subgroups.nim                     |   0
 .../{curves => constants}/curve25519_sqrt.nim |   0
 .../{curves => constants}/jubjub_sqrt.nim     |   0
 .../pallas_endomorphisms.nim                  |   0
 .../{curves => constants}/pallas_sqrt.nim     |   0
 .../pallas_subgroups.nim                      |   0
 .../vesta_endomorphisms.nim                   |   0
 .../math/{curves => constants}/vesta_sqrt.nim |   0
 .../{curves => constants}/vesta_subgroups.nim |   0
 .../{curves => constants}/zoo_constants.nim   |   0
 .../zoo_endomorphisms.nim                     |   0
 .../{curves => constants}/zoo_frobenius.nim   |   0
 .../{curves => constants}/zoo_generators.nim  |   0
 .../zoo_hash_to_curve.nim                     |   0
 .../{curves => constants}/zoo_pairings.nim    |   0
 .../zoo_square_roots.nim                      |   0
 .../zoo_square_roots_fp2.nim                  |   0
 .../{curves => constants}/zoo_subgroups.nim   |   0
 .../math/elliptic/ec_endomorphism_accel.nim   |   2 +-
 constantine/math/elliptic/ec_scalar_mul.nim   |   2 +-
 .../elliptic/ec_shortweierstrass_affine.nim   |   2 +-
 .../math/extension_fields/square_root_fp2.nim |   2 +-
 constantine/math/extension_fields/towers.nim  | 382 ++++++--
 constantine/math/io/io_extfields.nim          |   4 +-
 constantine/math/isogenies/frobenius.nim      |  23 +-
 .../math/pairing/cyclotomic_subgroup.nim      | 193 +++-
 constantine/math/pairing/lines_eval.nim       | 831 ++++++++++++++----
 constantine/math/pairing/pairing_bls12.nim    |   2 +-
 constantine/math/pairing/pairing_bn.nim       |   2 +-
 constantine/math/pairing/pairing_bw6_761.nim  |   2 +-
 .../platforms/isa/macro_assembler_x86.nim     |  36 +-
 constantine/signatures/bls_signatures.nim     |   2 +-
 metering/m_pairings.nim                       |   2 +-
 tests/math/t_ec_template.nim                  |   2 +-
 tests/math/t_finite_fields_mulsquare.nim      |  73 +-
 tests/math/t_fp12_bn254_nogami.nim            |  26 +
 tests/math/t_fp6_bn254_nogami.nim             |  26 +
 tests/math/t_fp_cubic_root.nim                |   2 +-
 tests/math/t_fp_tower_template.nim            |   8 +-
 tests/math/t_pairing_mul_fp12_by_lines.nim    | 277 +++++-
 tests/math/t_pairing_template.nim             |   2 +-
 tests/t_hash_to_curve_random.nim              |   2 +-
 106 files changed, 2141 insertions(+), 431 deletions(-)
 rename constantine/math/{curves => constants}/README.md (100%)
 rename constantine/math/{curves => constants}/bandersnatch_sqrt.nim (100%)
 rename constantine/math/{curves => constants}/bls12_377_constants.nim (100%)
 rename constantine/math/{curves => constants}/bls12_377_endomorphisms.nim (100%)
 rename constantine/math/{curves => constants}/bls12_377_frobenius.nim (100%)
 rename constantine/math/{curves => constants}/bls12_377_pairing.nim (100%)
 rename constantine/math/{curves => constants}/bls12_377_sqrt.nim (100%)
 rename constantine/math/{curves => constants}/bls12_377_subgroups.nim (99%)
 rename constantine/math/{curves => constants}/bls12_381_constants.nim (100%)
 rename constantine/math/{curves => constants}/bls12_381_endomorphisms.nim (100%)
 rename constantine/math/{curves => constants}/bls12_381_frobenius.nim (100%)
 rename constantine/math/{curves => constants}/bls12_381_generators.nim (100%)
 rename constantine/math/{curves => constants}/bls12_381_hash_to_curve_g1.nim (100%)
 rename constantine/math/{curves => constants}/bls12_381_hash_to_curve_g2.nim (100%)
 rename constantine/math/{curves => constants}/bls12_381_pairing.nim (100%)
 rename constantine/math/{curves => constants}/bls12_381_sqrt.nim (100%)
 rename constantine/math/{curves => constants}/bls12_381_sqrt_fp2.nim (100%)
 rename constantine/math/{curves => constants}/bls12_381_subgroups.nim (99%)
 rename constantine/math/{curves => constants}/bn254_nogami_constants.nim (100%)
 rename constantine/math/{curves => constants}/bn254_nogami_endomorphisms.nim (100%)
 rename constantine/math/{curves => constants}/bn254_nogami_frobenius.nim (100%)
 rename constantine/math/{curves => constants}/bn254_nogami_pairing.nim (100%)
 rename constantine/math/{curves => constants}/bn254_nogami_sqrt.nim (100%)
 rename constantine/math/{curves => constants}/bn254_nogami_sqrt_fp2.nim (100%)
 rename constantine/math/{curves => constants}/bn254_nogami_subgroups.nim (100%)
 rename constantine/math/{curves => constants}/bn254_snarks_constants.nim (100%)
 rename constantine/math/{curves => constants}/bn254_snarks_endomorphisms.nim (100%)
 rename constantine/math/{curves => constants}/bn254_snarks_frobenius.nim (100%)
 rename constantine/math/{curves => constants}/bn254_snarks_generators.nim (100%)
 rename constantine/math/{curves => constants}/bn254_snarks_hash_to_curve_g1.nim (100%)
 rename constantine/math/{curves => constants}/bn254_snarks_hash_to_curve_g2.nim (100%)
 rename constantine/math/{curves => constants}/bn254_snarks_pairing.nim (100%)
 rename constantine/math/{curves => constants}/bn254_snarks_sqrt.nim (100%)
 rename constantine/math/{curves => constants}/bn254_snarks_sqrt_fp2.nim (100%)
 rename constantine/math/{curves => constants}/bn254_snarks_subgroups.nim (100%)
 rename constantine/math/{curves => constants}/bw6_761_constants.nim (100%)
 rename constantine/math/{curves => constants}/bw6_761_endomorphisms.nim (100%)
 rename constantine/math/{curves => constants}/bw6_761_frobenius.nim (100%)
 rename constantine/math/{curves => constants}/bw6_761_pairing.nim (100%)
 rename constantine/math/{curves => constants}/bw6_761_sqrt.nim (100%)
 rename constantine/math/{curves => constants}/bw6_761_subgroups.nim (100%)
 rename constantine/math/{curves => constants}/curve25519_sqrt.nim (100%)
 rename constantine/math/{curves => constants}/jubjub_sqrt.nim (100%)
 rename constantine/math/{curves => constants}/pallas_endomorphisms.nim (100%)
 rename constantine/math/{curves => constants}/pallas_sqrt.nim (100%)
 rename constantine/math/{curves => constants}/pallas_subgroups.nim (100%)
 rename constantine/math/{curves => constants}/vesta_endomorphisms.nim (100%)
 rename constantine/math/{curves => constants}/vesta_sqrt.nim (100%)
 rename constantine/math/{curves => constants}/vesta_subgroups.nim (100%)
 rename constantine/math/{curves => constants}/zoo_constants.nim (100%)
 rename constantine/math/{curves => constants}/zoo_endomorphisms.nim (100%)
 rename constantine/math/{curves => constants}/zoo_frobenius.nim (100%)
 rename constantine/math/{curves => constants}/zoo_generators.nim (100%)
 rename constantine/math/{curves => constants}/zoo_hash_to_curve.nim (100%)
 rename constantine/math/{curves => constants}/zoo_pairings.nim (100%)
 rename constantine/math/{curves => constants}/zoo_square_roots.nim (100%)
 rename constantine/math/{curves => constants}/zoo_square_roots_fp2.nim (100%)
 rename constantine/math/{curves => constants}/zoo_subgroups.nim (100%)
 create mode 100644 tests/math/t_fp12_bn254_nogami.nim
 create mode 100644 tests/math/t_fp6_bn254_nogami.nim

diff --git a/benchmarks/bench_fields_template.nim b/benchmarks/bench_fields_template.nim
index b06c083..e65c773 100644
--- a/benchmarks/bench_fields_template.nim
+++ b/benchmarks/bench_fields_template.nim
@@ -18,7 +18,7 @@ import
   ../constantine/math/config/curves,
   ../constantine/math/arithmetic,
   ../constantine/math/extension_fields,
-  ../constantine/math/curves/zoo_square_roots,
+  ../constantine/math/constants/zoo_square_roots,
   # Helpers
   ../helpers/prng_unsafe,
   ./bench_blueprint
@@ -47,6 +47,20 @@ template bench(op: string, T: typedesc, iters: int, body: untyped): untyped =
   measure(iters, startTime, stopTime, startClk, stopClk, body)
   report(op, fixFieldDisplay(T), startTime, stopTime, startClk, stopClk, iters)
 
+func random_unsafe(rng: var RngState, a: var FpDbl) =
+  ## Initialize a standalone Double-Width field element
+  ## we don't reduce it modulo p², this is only used for benchmark
+  let aHi = rng.random_unsafe(Fp[FpDbl.C])
+  let aLo = rng.random_unsafe(Fp[FpDbl.C])
+  for i in 0 ..< aLo.mres.limbs.len:
+    a.limbs2x[i] = aLo.mres.limbs[i]
+  for i in 0 ..< aHi.mres.limbs.len:
+    a.limbs2x[aLo.mres.limbs.len+i] = aHi.mres.limbs[i]
+
+func random_unsafe(rng: var RngState, a: var ExtensionField2x) =
+  for i in 0 ..< a.coords.len:
+    rng.random_unsafe(a.coords[i])
+
 proc addBench*(T: typedesc, iters: int) =
   var x = rng.random_unsafe(T)
   let y = rng.random_unsafe(T)
@@ -92,21 +106,39 @@ proc sqrBench*(T: typedesc, iters: int) =
   bench("Squaring", T, iters):
     r.square(x)
 
-proc mulUnrBench*(T: typedesc, iters: int) =
+proc mul2xUnrBench*(T: typedesc, iters: int) =
   var r: doublePrec(T)
   let x = rng.random_unsafe(T)
   let y = rng.random_unsafe(T)
   preventOptimAway(r)
-  bench("Multiplication unreduced", T, iters):
+  bench("Multiplication 2x unreduced", T, iters):
     r.prod2x(x, y)
 
-proc sqrUnrBench*(T: typedesc, iters: int) =
+proc sqr2xUnrBench*(T: typedesc, iters: int) =
   var r: doublePrec(T)
   let x = rng.random_unsafe(T)
   preventOptimAway(r)
-  bench("Squaring unreduced", T, iters):
+  bench("Squaring 2x unreduced", T, iters):
     r.square2x(x)
 
+proc rdc2xBench*(T: typedesc, iters: int) =
+  var r: T
+  var t: doublePrec(T)
+  rng.random_unsafe(t)
+  preventOptimAway(r)
+  bench("Redc 2x", T, iters):
+    r.redc2x(t)
+
+proc sumprodBench*(T: typedesc, iters: int) =
+  var r: T
+  let a = rng.random_unsafe(T)
+  let b = rng.random_unsafe(T)
+  let u = rng.random_unsafe(T)
+  let v = rng.random_unsafe(T)
+  preventOptimAway(r)
+  bench("Linear combination", T, iters):
+    r.sumprod([a, b], [u, v])
+
 proc toBigBench*(T: typedesc, iters: int) =
   var r: matchingBigInt(T.C)
   let x = rng.random_unsafe(T)
diff --git a/benchmarks/bench_fp.nim b/benchmarks/bench_fp.nim
index d2eefe3..8ef45d1 100644
--- a/benchmarks/bench_fp.nim
+++ b/benchmarks/bench_fp.nim
@@ -11,7 +11,7 @@ import
   ../constantine/math/config/curves,
   ../constantine/math/arithmetic,
   ../constantine/math/io/io_bigints,
-  ../constantine/math/curves/zoo_square_roots,
+  ../constantine/math/constants/zoo_square_roots,
   # Helpers
   ../helpers/static_for,
   ./bench_fields_template
@@ -52,8 +52,11 @@ proc main() =
     mulBench(Fp[curve], Iters)
     sqrBench(Fp[curve], Iters)
     smallSeparator()
-    mulUnrBench(Fp[curve], Iters)
-    sqrUnrBench(Fp[curve], Iters)
+    mul2xUnrBench(Fp[curve], Iters)
+    sqr2xUnrBench(Fp[curve], Iters)
+    rdc2xBench(Fp[curve], Iters)
+    smallSeparator()
+    sumprodBench(Fp[curve], Iters)
     smallSeparator()
     toBigBench(Fp[curve], Iters)
     toFieldBench(Fp[curve], Iters)
diff --git a/benchmarks/bench_fp2.nim b/benchmarks/bench_fp2.nim
index 0f0eb5f..e4503df 100644
--- a/benchmarks/bench_fp2.nim
+++ b/benchmarks/bench_fp2.nim
@@ -46,8 +46,9 @@ proc main() =
     mulBench(Fp2[curve], Iters)
     sqrBench(Fp2[curve], Iters)
     smallSeparator()
-    mulUnrBench(Fp2[curve], Iters)
-    sqrUnrBench(Fp2[curve], Iters)
+    mul2xUnrBench(Fp2[curve], Iters)
+    sqr2xUnrBench(Fp2[curve], Iters)
+    rdc2xBench(Fp2[curve], Iters)
     smallSeparator()
     invBench(Fp2[curve], InvIters)
     isSquareBench(Fp2[curve], InvIters)
diff --git a/benchmarks/bench_fp4.nim b/benchmarks/bench_fp4.nim
index 2250bb5..1f14c8b 100644
--- a/benchmarks/bench_fp4.nim
+++ b/benchmarks/bench_fp4.nim
@@ -46,8 +46,9 @@ proc main() =
     mulBench(Fp4[curve], Iters)
     sqrBench(Fp4[curve], Iters)
     smallSeparator()
-    mulUnrBench(Fp2[curve], Iters)
-    sqrUnrBench(Fp2[curve], Iters)
+    mul2xUnrBench(Fp4[curve], Iters)
+    sqr2xUnrBench(Fp4[curve], Iters)
+    rdc2xBench(Fp4[curve], Iters)
     smallSeparator()
     invBench(Fp4[curve], InvIters)
     separator()
diff --git a/benchmarks/bench_fp6.nim b/benchmarks/bench_fp6.nim
index 28327b5..b167121 100644
--- a/benchmarks/bench_fp6.nim
+++ b/benchmarks/bench_fp6.nim
@@ -44,8 +44,9 @@ proc main() =
     mulBench(Fp6[curve], Iters)
     sqrBench(Fp6[curve], Iters)
     smallSeparator()
-    mulUnrBench(Fp6[curve], Iters)
-    sqrUnrBench(Fp6[curve], Iters)
+    mul2xUnrBench(Fp6[curve], Iters)
+    sqr2xUnrBench(Fp6[curve], Iters)
+    rdc2xBench(Fp6[curve], Iters)
     smallSeparator()
     invBench(Fp6[curve], InvIters)
     separator()
diff --git a/benchmarks/bench_pairing_template.nim b/benchmarks/bench_pairing_template.nim
index dcfd01a..c1e55e5 100644
--- a/benchmarks/bench_pairing_template.nim
+++ b/benchmarks/bench_pairing_template.nim
@@ -19,14 +19,14 @@ import
   ../constantine/math/arithmetic,
   ../constantine/math/extension_fields,
   ../constantine/math/ec_shortweierstrass,
-  ../constantine/math/curves/zoo_subgroups,
+  ../constantine/math/constants/zoo_subgroups,
   ../constantine/math/pairing/[
     cyclotomic_subgroup,
     lines_eval,
     pairing_bls12,
     pairing_bn
   ],
-  ../constantine/math/curves/zoo_pairings,
+  ../constantine/math/constants/zoo_pairings,
   # Helpers
   ../helpers/prng_unsafe,
   ./bench_blueprint
diff --git a/benchmarks/bench_summary_template.nim b/benchmarks/bench_summary_template.nim
index b876ca5..32b6ec3 100644
--- a/benchmarks/bench_summary_template.nim
+++ b/benchmarks/bench_summary_template.nim
@@ -22,13 +22,13 @@ import
     ec_shortweierstrass_projective,
     ec_shortweierstrass_jacobian,
     ec_scalar_mul, ec_endomorphism_accel],
-  ../constantine/math/curves/zoo_subgroups,
+  ../constantine/math/constants/zoo_subgroups,
   ../constantine/math/pairing/[
     cyclotomic_subgroup,
     pairing_bls12,
     pairing_bn
   ],
-  ../constantine/math/curves/zoo_pairings,
+  ../constantine/math/constants/zoo_pairings,
   ../constantine/hashes,
   ../constantine/hash_to_curve/hash_to_curve,
   # Helpers
diff --git a/constantine.nimble b/constantine.nimble
index 56bd076..7b363c3 100644
--- a/constantine.nimble
+++ b/constantine.nimble
@@ -52,10 +52,12 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
   ("tests/math/t_fp2.nim", false),
   ("tests/math/t_fp2_sqrt.nim", false),
   ("tests/math/t_fp4.nim", false),
+  ("tests/math/t_fp6_bn254_nogami.nim", false),
   ("tests/math/t_fp6_bn254_snarks.nim", false),
   ("tests/math/t_fp6_bls12_377.nim", false),
   ("tests/math/t_fp6_bls12_381.nim", false),
   ("tests/math/t_fp6_bw6_761.nim", false),
+  ("tests/math/t_fp12_bn254_nogami.nim", false),
   ("tests/math/t_fp12_bn254_snarks.nim", false),
   ("tests/math/t_fp12_bls12_377.nim", false),
   ("tests/math/t_fp12_bls12_381.nim", false),
diff --git a/constantine/blssig_pop_on_bls12381_g2.nim b/constantine/blssig_pop_on_bls12381_g2.nim
index a42fa5e..d9de9ad 100644
--- a/constantine/blssig_pop_on_bls12381_g2.nim
+++ b/constantine/blssig_pop_on_bls12381_g2.nim
@@ -13,7 +13,7 @@ import
       ec_shortweierstrass,
       extension_fields,
       arithmetic,
-      curves/zoo_subgroups
+      constants/zoo_subgroups
     ],
     ./math/io/[io_bigints, io_fields],
     hashes,
diff --git a/constantine/curves_primitives.nim b/constantine/curves_primitives.nim
index 35dbca5..9603980 100644
--- a/constantine/curves_primitives.nim
+++ b/constantine/curves_primitives.nim
@@ -23,7 +23,7 @@ import
       cyclotomic_subgroup,
       lines_eval
     ],
-    ./math/curves/zoo_pairings,
+    ./math/constants/zoo_pairings,
     ./hash_to_curve/hash_to_curve
 
 # ############################################################
diff --git a/constantine/ethereum_evm_precompiles.nim b/constantine/ethereum_evm_precompiles.nim
index 945ef5d..5c374fc 100644
--- a/constantine/ethereum_evm_precompiles.nim
+++ b/constantine/ethereum_evm_precompiles.nim
@@ -13,7 +13,7 @@ import
   ./math/arithmetic/limbs_montgomery,
   ./math/ec_shortweierstrass,
   ./math/pairing/[pairing_bn, miller_loops, cyclotomic_subgroup],
-  ./math/curves/zoo_subgroups,
+  ./math/constants/zoo_subgroups,
   ./math/io/[io_bigints, io_fields]
 
 # ############################################################
diff --git a/constantine/hash_to_curve/h2c_isogeny_maps.nim b/constantine/hash_to_curve/h2c_isogeny_maps.nim
index 3b083a0..edf7251 100644
--- a/constantine/hash_to_curve/h2c_isogeny_maps.nim
+++ b/constantine/hash_to_curve/h2c_isogeny_maps.nim
@@ -10,7 +10,7 @@ import
   # Internals
   ../platforms/abstractions,
   ../math/[arithmetic, extension_fields],
-  ../math/curves/zoo_hash_to_curve,
+  ../math/constants/zoo_hash_to_curve,
   ../math/elliptic/[
     ec_shortweierstrass_projective,
     ec_shortweierstrass_jacobian,
diff --git a/constantine/hash_to_curve/h2c_map_to_isocurve_swu.nim b/constantine/hash_to_curve/h2c_map_to_isocurve_swu.nim
index 0d3e987..00fbc96 100644
--- a/constantine/hash_to_curve/h2c_map_to_isocurve_swu.nim
+++ b/constantine/hash_to_curve/h2c_map_to_isocurve_swu.nim
@@ -11,7 +11,7 @@ import
   ../platforms/abstractions,
   ../math/config/curves,
   ../math/[arithmetic, extension_fields],
-  ../math/curves/zoo_hash_to_curve,
+  ../math/constants/zoo_hash_to_curve,
   ./h2c_utilities
 
 # ############################################################
diff --git a/constantine/hash_to_curve/hash_to_curve.nim b/constantine/hash_to_curve/hash_to_curve.nim
index 984c52e..ef45b49 100644
--- a/constantine/hash_to_curve/hash_to_curve.nim
+++ b/constantine/hash_to_curve/hash_to_curve.nim
@@ -11,7 +11,7 @@ import
   ../platforms/abstractions,
   ../math/config/curves,
   ../math/[arithmetic, extension_fields],
-  ../math/curves/[zoo_hash_to_curve, zoo_subgroups],
+  ../math/constants/[zoo_hash_to_curve, zoo_subgroups],
   ../math/ec_shortweierstrass,
   ./h2c_hash_to_field,
   ./h2c_map_to_isocurve_swu,
diff --git a/constantine/math/arithmetic/README.md b/constantine/math/arithmetic/README.md
index 57553d6..9392247 100644
--- a/constantine/math/arithmetic/README.md
+++ b/constantine/math/arithmetic/README.md
@@ -34,7 +34,7 @@ where code size is not an issue for example for multi-precision addition.
 
 - Faster big-integer modular multiplication for most moduli\
   Gautam Botrel, Gus Gutoski, and Thomas Piellard, 2020\
-  https://hackmd.io/@zkteam/modular_multiplication
+  https://hackmd.io/@gnark/modular_multiplication
 
 ### Square roots
 
diff --git a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim
index daa7d19..cb81e4d 100644
--- a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim
+++ b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim
@@ -45,8 +45,8 @@ macro mulMont_CIOS_sparebit_gen[N: static int](
   ## The multiplication and reduction are further merged in the same loop
   ##
   ## This requires the most significant word of the Modulus
-  ##   M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111)
-  ## https://hackmd.io/@zkteam/modular_multiplication
+  ##   M[^1] < high(SecretWord) shr 1 (i.e. less than 0b01111...1111)
+  ## https://hackmd.io/@gnark/modular_multiplication
 
   # No register spilling handling
   doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
@@ -213,3 +213,200 @@ func squareMont_CIOS_asm*[N](
   var r2x {.noInit.}: Limbs[2*N]
   r2x.square_asm_inline(a)
   r.redcMont_asm_inline(r2x, M, m0ninv, spareBits, skipFinalSub)
+
+# Montgomery Sum of Products
+# ------------------------------------------------------------
+
+macro sumprodMont_CIOS_spare2bits_gen[N, K: static int](
+        r_PIR: var Limbs[N], a_PIR, b_PIR: array[K, Limbs[N]],
+        M_PIR: Limbs[N], m0ninv_REG: BaseType,
+        skipFinalSub: static bool
+      ): untyped =
+  ## Generate an optimized Montgomery merged sum of products ⅀aᵢ.bᵢ kernel
+  ## using the CIOS method
+  ## 
+  ## This requires 2 spare bits in the most significant word
+  ## so that we can skip the intermediate reductions
+
+  # No register spilling handling
+  doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
+
+  doAssert K <= 8, "we cannot sum more than 8 products"
+  # Bounds:
+  # 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR
+  #    for all intent and purposes this is true since aᵢ.bᵢ is:
+  #    if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums
+  #    if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1,
+  #    with 4p < R due to the 2 unused bits constraint so more than p sums are allowed
+  # 2. We have a high-word tN to accumulate overflows.
+  #    with 2 unused bits in the last word,
+  #    the multiplication of two last words will leave 4 unused bits
+  #    enough for accumulating 8 additions and overflow.
+
+  result = newStmtList()
+
+  var ctx = init(Assembler_x86, BaseType)
+  let
+    scratchSlots = 6
+
+    # We could force M as immediate by specializing per moduli
+    M = init(OperandArray, nimSymbol = M_PIR, N, PointerInReg, Input)
+    # If N is too big, we need to spill registers. TODO.
+    t = init(OperandArray, nimSymbol = ident"t", N, ElemsInReg, Output_EarlyClobber)
+    # MultiPurpose Register slots
+    scratch = init(OperandArray, nimSymbol = ident"scratch", scratchSlots, ElemsInReg, InputOutput_EnsureClobber)
+
+    # MUL requires RAX and RDX
+
+    m0ninv = Operand(
+               desc: OperandDesc(
+                 asmId: "[m0ninv]",
+                 nimSymbol: m0ninv_REG,
+                 rm: MemOffsettable,
+                 constraint: Input,
+                 cEmit: "&" & $m0ninv_REG
+               )
+             )
+
+    # We're really constrained by register and somehow setting as memory doesn't help
+    # So we store the result `r` in the scratch space and then reload it in RDX
+    # before the scratchspace is used in final substraction
+    a = scratch[0].as2dArrayAddr(rows = K, cols = N) # Store the `a` operand
+    b = scratch[1].as2dArrayAddr(rows = K, cols = N) # Store the `b` operand
+    tN = scratch[2]                                  # High part of extended precision multiplication
+    C = scratch[3]                                   # Carry during reduction step
+    r = scratch[4]                                   # Stores the `r` operand
+    S = scratch[5]                                   # Mul step: Stores the carry A 
+                                                     # Red step: Stores (t[0] * m0ninv) mod 2ʷ
+
+  # Registers used:
+  # - 1 for `M`
+  # - 6 for `t`     (at most)
+  # - 6 for `scratch`
+  # - 2 for RAX and RDX
+  # Total 15 out of 16
+  # We can save 1 by hardcoding M as immediate (and m0ninv)
+  # but this prevent reusing the same code for multiple curves like BLS12-377 and BLS12-381
+  # We might be able to save registers by having `r` and `M` be memory operand as well
+
+  let tsym = t.nimSymbol
+  let scratchSym = scratch.nimSymbol
+  result.add quote do:
+    static: doAssert: sizeof(SecretWord) == sizeof(ByteAddress)
+
+    var `tsym`{.noInit, used.}: typeof(`r_PIR`)
+    # Assumes 64-bit limbs on 64-bit arch (or you can't store an address)
+    var `scratchSym` {.noInit.}: Limbs[`scratchSlots`]
+    `scratchSym`[0] = cast[SecretWord](`a_PIR`[0][0].unsafeAddr)
+    `scratchSym`[1] = cast[SecretWord](`b_PIR`[0][0].unsafeAddr)
+    `scratchSym`[4] = cast[SecretWord](`r_PIR`[0].unsafeAddr)
+
+  # Algorithm
+  # -----------------------------------------
+  # for i=0 to N-1
+  #   tN := 0
+  #   for k=0 to K-1
+  #     A := 0
+  #     for j=0 to N-1
+  # 		  (A,t[j])  := t[j] + a[k][j]*b[k][i] + A
+  #     tN += A
+  #   m := t[0]*m0ninv mod W
+  # 	C,_ := t[0] + m*M[0]
+  # 	for j=1 to N-1
+  # 		(C,t[j-1]) := t[j] + m*M[j] + C
+  #   t[N-1] = tN + C
+
+  # No register spilling handling
+  doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
+
+  for i in 0 ..< N:
+    # Multiplication step
+    ctx.comment "  Multiplication step"
+    ctx.comment "  tN = 0"
+    ctx.`xor` tN, tN
+    for k in 0 ..< K:
+      template A: untyped = S
+
+      ctx.comment "    A = 0"
+      ctx.`xor` A, A
+      ctx.comment "      (A,t[0])  := t[0] + a[k][0]*b[k][i] + A"
+      ctx.mov rax, a[k, 0]
+      ctx.mul rdx, rax, b[k, i], rax
+      if i == 0 and k == 0: # First accumulation, overwrite t[0]
+        ctx.mov t[0], rax
+      else:                 # Accumulate in t[0]
+        ctx.add t[0], rax
+        ctx.adc rdx, 0
+      ctx.mov A, rdx
+      
+      for j in 1 ..< N:
+        ctx.comment "        (A,t[j])  := t[j] + a[k][j]*b[k][i] + A"
+        ctx.mov rax, a[k, j]
+        ctx.mul rdx, rax, b[k, i], rax
+        if i == 0 and k == 0: # First accumulation, overwrite t[0]
+          ctx.mov t[j], A
+        else:                 # Accumulate in t[0]
+          ctx.add t[j], A
+          ctx.adc rdx, 0
+        ctx.`xor` A, A
+        ctx.add t[j], rax
+        ctx.adc A, rdx
+      
+      ctx.comment "    tN += A"
+      ctx.add tN, A
+
+    # Reduction step
+    ctx.comment "  Reduction step"
+    template m: untyped = S
+    ctx.comment "  m := t[0]*m0ninv mod 2ʷ"
+    ctx.mov rax, m0ninv
+    ctx.imul rax, t[0]
+    ctx.mov m, rax
+    ctx.comment "  C,_ := t[0] + m*M[0]"
+    ctx.`xor` C, C
+    ctx.mul rdx, rax, M[0], rax
+    ctx.add rax, t[0]
+    ctx.adc C, rdx
+
+    for j in 1 ..< N:
+      ctx.comment "    (C,t[j-1]) := t[j] + m*M[j] + C"
+      ctx.mov rax, M[j]
+      ctx.mul rdx, rax, m, rax
+      ctx.add C, t[j]
+      ctx.adc rdx, 0
+      ctx.add C, rax
+      ctx.adc rdx, 0
+      ctx.mov t[j-1], C
+      ctx.mov C, rdx
+
+    ctx.comment "t[N-1] = tN + C"
+    ctx.add tN, C
+    ctx.mov t[N-1], tN
+
+
+  ctx.mov rax, r # move r away from scratchspace that will be used for final substraction
+  let r2 = rax.asArrayAddr(len = N)
+
+  if skipFinalSub:
+    ctx.comment "  Copy result"
+    for i in 0 ..< N:
+      ctx.mov r2[i], t[i]
+  else:
+    ctx.comment "  Final substraction"
+    ctx.finalSubNoCarryImpl(
+      r2, t, M,
+      scratch
+    )
+  result.add ctx.generate()
+
+func sumprodMont_CIOS_spare2bits_asm*[N, K: static int](
+        r: var Limbs[N], a, b: array[K, Limbs[N]],
+        M: Limbs[N], m0ninv: BaseType,
+        skipFinalSub: static bool) =
+  ## Sum of products ⅀aᵢ.bᵢ in the Montgomery domain
+  ## If "skipFinalSub" is set
+  ## the result is in the range [0, 2M)
+  ## otherwise the result is in the range [0, M)
+  ## 
+  ## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
+  r.sumprodMont_CIOS_spare2bits_gen(a, b, M, m0ninv, skipFinalSub)
\ No newline at end of file
diff --git a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim
index d6f94b5..98a1da4 100644
--- a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim
+++ b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim
@@ -130,7 +130,8 @@ proc partialRedx(
        t: OperandArray,
        M: OperandArray,
        m0ninv: Operand,
-       lo, S: Operand
+       lo: Operand or Register,
+       S: Operand
      ) =
     ## Partial Montgomery reduction
     ## For CIOS method
@@ -182,8 +183,8 @@ macro mulMont_CIOS_sparebit_adx_gen[N: static int](
   ## Generate an optimized Montgomery Multiplication kernel
   ## using the CIOS method
   ## This requires the most significant word of the Modulus
-  ##   M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111)
-  ## https://hackmd.io/@zkteam/modular_multiplication
+  ##   M[^1] < high(SecretWord) shr 1 (i.e. less than 0b01111...1111)
+  ## https://hackmd.io/@gnark/modular_multiplication
 
   # No register spilling handling
   doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
@@ -308,3 +309,187 @@ func squareMont_CIOS_asm_adx*[N](
   var r2x {.noInit.}: Limbs[2*N]
   r2x.square_asm_adx_inline(a)
   r.redcMont_asm_adx(r2x, M, m0ninv, spareBits, skipFinalSub)
+
+# Montgomery Sum of Products
+# ------------------------------------------------------------
+
+macro sumprodMont_CIOS_spare2bits_adx_gen[N, K: static int](
+        r_PIR: var Limbs[N], a_PIR, b_PIR: array[K, Limbs[N]],
+        M_PIR: Limbs[N], m0ninv_REG: BaseType,
+        skipFinalSub: static bool
+      ): untyped =
+  ## Generate an optimized Montgomery merged sum of products ⅀aᵢ.bᵢ kernel
+  ## using the CIOS method
+  ## 
+  ## This requires 2 spare bits in the most significant word
+  ## so that we can skip the intermediate reductions
+
+  # No register spilling handling
+  doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
+
+  doAssert K <= 8, "we cannot sum more than 8 products"
+  # Bounds:
+  # 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR
+  #    for all intent and purposes this is true since aᵢ.bᵢ is:
+  #    if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums
+  #    if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1,
+  #    with 4p < R due to the 2 unused bits constraint so more than p sums are allowed
+  # 2. We have a high-word tN to accumulate overflows.
+  #    with 2 unused bits in the last word,
+  #    the multiplication of two last words will leave 4 unused bits
+  #    enough for accumulating 8 additions and overflow.
+
+  result = newStmtList()
+
+  var ctx = init(Assembler_x86, BaseType)
+  let
+    scratchSlots = 6
+
+    # We could force M as immediate by specializing per moduli
+    M = init(OperandArray, nimSymbol = M_PIR, N, PointerInReg, Input)
+    # If N is too big, we need to spill registers. TODO.
+    t = init(OperandArray, nimSymbol = ident"t", N, ElemsInReg, Output_EarlyClobber)
+    # MultiPurpose Register slots
+    scratch = init(OperandArray, nimSymbol = ident"scratch", scratchSlots, ElemsInReg, InputOutput_EnsureClobber)
+
+    # MULX requires RDX as well
+
+    m0ninv = Operand(
+               desc: OperandDesc(
+                 asmId: "[m0ninv]",
+                 nimSymbol: m0ninv_REG,
+                 rm: MemOffsettable,
+                 constraint: Input,
+                 cEmit: "&" & $m0ninv_REG
+               )
+             )
+
+    # We're really constrained by register and somehow setting as memory doesn't help
+    # So we store the result `r` in the scratch space and then reload it in RDX
+    # before the scratchspace is used in final substraction
+    a = scratch[0].as2dArrayAddr(rows = K, cols = N) # Store the `a` operand
+    b = scratch[1].as2dArrayAddr(rows = K, cols = N) # Store the `b` operand
+    tN = scratch[2]                                  # High part of extended precision multiplication
+    C = scratch[3]                                   # Carry during reduction step
+    r = scratch[4]                                   # Stores the `r` operand
+    S = scratch[5]                                   # Mul step: Stores the carry A 
+                                                     # Red step: Stores (t[0] * m0ninv) mod 2ʷ
+
+  # Registers used:
+  # - 1 for `M`
+  # - 6 for `t`     (at most)
+  # - 6 for `scratch`
+  # - 2 for RAX and RDX
+  # Total 15 out of 16
+  # We can save 1 by hardcoding M as immediate (and m0ninv)
+  # but this prevent reusing the same code for multiple curves like BLS12-377 and BLS12-381
+  # We might be able to save registers by having `r` and `M` be memory operand as well
+
+  let tsym = t.nimSymbol
+  let scratchSym = scratch.nimSymbol
+  result.add quote do:
+    static: doAssert: sizeof(SecretWord) == sizeof(ByteAddress)
+
+    var `tsym`{.noInit, used.}: typeof(`r_PIR`)
+    # Assumes 64-bit limbs on 64-bit arch (or you can't store an address)
+    var `scratchSym` {.noInit.}: Limbs[`scratchSlots`]
+    `scratchSym`[0] = cast[SecretWord](`a_PIR`[0][0].unsafeAddr)
+    `scratchSym`[1] = cast[SecretWord](`b_PIR`[0][0].unsafeAddr)
+    `scratchSym`[4] = cast[SecretWord](`r_PIR`[0].unsafeAddr)
+
+  # Algorithm
+  # -----------------------------------------
+  # for i=0 to N-1
+  #   tN := 0
+  #   for k=0 to K-1
+  #     A := 0
+  #     for j=0 to N-1
+  # 		  (A,t[j])  := t[j] + a[k][j]*b[k][i] + A
+  #     tN += A
+  #   m := t[0]*m0ninv mod W
+  # 	C,_ := t[0] + m*M[0]
+  # 	for j=1 to N-1
+  # 		(C,t[j-1]) := t[j] + m*M[j] + C
+  #   t[N-1] = tN + C
+
+  # No register spilling handling
+  doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
+
+  for i in 0 ..< N:
+    # Multiplication step
+    ctx.comment "  Multiplication step"
+    ctx.comment "  tN = 0"
+    ctx.`xor` tN, tN
+    for k in 0 ..< K:
+      template A: untyped = S
+
+      ctx.comment "    A = 0"
+      ctx.`xor` A, A
+      ctx.comment "      (A,t[0])  := t[0] + a[k][0]*b[k][i] + A"
+      ctx.mov rdx, b[k, i]
+      if i == 0 and k == 0: # First accumulation, overwrite t[0]
+        ctx.mulx t[1], t[0], a[k, 0], rdx
+      else:                 # Accumulate in t[0]
+        ctx.mulx A, rax, a[k, 0], rdx
+        ctx.adcx t[0], rax
+        ctx.adox t[1], A
+      
+      for j in 1 ..< N-1:
+        ctx.comment "        (A,t[j])  := t[j] + a[k][j]*b[k][i] + A"
+        if i == 0 and k == 0:
+          ctx.mulx t[j+1], rax, a[k, j], rdx
+          if j == 1:
+            ctx.add t[j], rax
+          else:
+            ctx.adc t[j], rax
+        else:
+          ctx.mulx A, rax, a[k, j], rdx
+          ctx.adcx t[j], rax
+          ctx.adox t[j+1], A
+
+      # Last limb
+      ctx.mulx A, rax, a[k, N-1], rdx
+      if i == 0 and k == 0:  
+        ctx.adc t[N-1], rax
+        ctx.comment "    tN += A"
+        ctx.adc tN, A
+      else:
+        ctx.adcx t[N-1], rax
+        ctx.comment "    tN += A"
+        ctx.mov  rdx, 0 # Set to 0 without clearing flags
+        ctx.adox tN, A
+        ctx.adcx tN, rdx
+
+    # Reduction step
+    ctx.partialRedx(
+      tN, t,
+      M, m0ninv,
+      rax, C
+    )
+
+  ctx.mov rax, r # move r away from scratchspace that will be used for final substraction
+  let r2 = rax.asArrayAddr(len = N)
+
+  if skipFinalSub:
+    ctx.comment "  Copy result"
+    for i in 0 ..< N:
+      ctx.mov r2[i], t[i]
+  else:
+    ctx.comment "  Final substraction"
+    ctx.finalSubNoCarryImpl(
+      r2, t, M,
+      scratch
+    )
+  result.add ctx.generate()
+
+func sumprodMont_CIOS_spare2bits_asm_adx*[N, K: static int](
+        r: var Limbs[N], a, b: array[K, Limbs[N]],
+        M: Limbs[N], m0ninv: BaseType,
+        skipFinalSub: static bool) =
+  ## Sum of products ⅀aᵢ.bᵢ in the Montgomery domain
+  ## If "skipFinalSub" is set
+  ## the result is in the range [0, 2M)
+  ## otherwise the result is in the range [0, M)
+  ## 
+  ## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
+  r.sumprodMont_CIOS_spare2bits_adx_gen(a, b, M, m0ninv, skipFinalSub)
\ No newline at end of file
diff --git a/constantine/math/arithmetic/bigints_montgomery.nim b/constantine/math/arithmetic/bigints_montgomery.nim
index 52d7893..96faa83 100644
--- a/constantine/math/arithmetic/bigints_montgomery.nim
+++ b/constantine/math/arithmetic/bigints_montgomery.nim
@@ -66,6 +66,20 @@ func squareMont*(r: var BigInt, a, M: BigInt, negInvModWord: static BaseType,
   ## to avoid duplicating with Nim zero-init policy
   squareMont(r.limbs, a.limbs, M.limbs, negInvModWord, spareBits, skipFinalSub)
 
+func sumprodMont*[N: static int](
+      r: var BigInt,
+      a, b: array[N, BigInt],
+      M: BigInt, negInvModWord: static BaseType,
+      spareBits: static int, skipFinalSub: static bool = false) =
+  ## Compute r <- ⅀aᵢ.bᵢ (mod M) (sum of products) in the Montgomery domain
+  # We rely on BigInt and Limbs having the same repr to avoid array copies
+  sumprodMont(
+    r.limbs,
+    cast[ptr array[N, typeof(a[0].limbs)]](a.unsafeAddr)[],
+    cast[ptr array[N, typeof(b[0].limbs)]](b.unsafeAddr)[],
+    M.limbs, negInvModWord, spareBits, skipFinalSub 
+  )
+
 func powMont*[mBits: static int](
        a: var BigInt[mBits], exponent: openarray[byte],
        M, one: BigInt[mBits], negInvModWord: static BaseType, windowSize: static int,
diff --git a/constantine/math/arithmetic/finite_fields.nim b/constantine/math/arithmetic/finite_fields.nim
index 1aae4a3..8b2f328 100644
--- a/constantine/math/arithmetic/finite_fields.nim
+++ b/constantine/math/arithmetic/finite_fields.nim
@@ -149,6 +149,26 @@ func setMinusOne*(a: var FF) =
   #       Check if the compiler optimizes it away
   a.mres = FF.getMontyPrimeMinus1()
 
+
+func neg*(r: var FF, a: FF) {.meter.} =
+  ## Negate modulo p
+  when UseASM_X86_64:
+    negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
+  else:
+    # If a = 0 we need r = 0 and not r = M
+    # as comparison operator assume unicity
+    # of the modular representation.
+    # Also make sure to handle aliasing where r.addr = a.addr
+    var t {.noInit.}: FF
+    let isZero = a.isZero()
+    discard t.mres.diff(FF.fieldMod(), a.mres)
+    t.mres.csetZero(isZero)
+    r = t
+
+func neg*(a: var FF) {.meter.} =
+  ## Negate modulo p
+  a.neg(a)
+
 func `+=`*(a: var FF, b: FF) {.meter.} =
   ## In-place addition modulo p
   when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
@@ -223,24 +243,14 @@ func square*(r: var FF, a: FF, skipFinalSub: static bool = false) {.meter.} =
   ## Squaring modulo p
   r.mres.squareMont(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub)
 
-func neg*(r: var FF, a: FF) {.meter.} =
-  ## Negate modulo p
-  when UseASM_X86_64:
-    negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
-  else:
-    # If a = 0 we need r = 0 and not r = M
-    # as comparison operator assume unicity
-    # of the modular representation.
-    # Also make sure to handle aliasing where r.addr = a.addr
-    var t {.noInit.}: FF
-    let isZero = a.isZero()
-    discard t.mres.diff(FF.fieldMod(), a.mres)
-    t.mres.csetZero(isZero)
-    r = t
-
-func neg*(a: var FF) {.meter.} =
-  ## Negate modulo p
-  a.neg(a)
+func sumprod*[N: static int](r: var FF, a, b: array[N, FF], skipFinalSub: static bool = false) {.meter.} =
+  ## Compute r <- ⅀aᵢ.bᵢ (mod M) (sum of products)
+  # We rely on FF and Bigints having the same repr to avoid array copies
+  r.mres.sumprodMont(
+    cast[ptr array[N, typeof(a[0].mres)]](a.unsafeAddr)[],
+    cast[ptr array[N, typeof(b[0].mres)]](b.unsafeAddr)[],
+    FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub
+  )
 
 # ############################################################
 #
diff --git a/constantine/math/arithmetic/finite_fields_square_root.nim b/constantine/math/arithmetic/finite_fields_square_root.nim
index 615236c..d89885c 100644
--- a/constantine/math/arithmetic/finite_fields_square_root.nim
+++ b/constantine/math/arithmetic/finite_fields_square_root.nim
@@ -9,7 +9,7 @@
 import
   ../../platforms/abstractions,
   ../config/curves,
-  ../curves/zoo_square_roots,
+  ../constants/zoo_square_roots,
   ./bigints, ./finite_fields, ./limbs_exgcd
 
 # ############################################################
diff --git a/constantine/math/arithmetic/limbs_exgcd.nim b/constantine/math/arithmetic/limbs_exgcd.nim
index b65b378..f8f64d9 100644
--- a/constantine/math/arithmetic/limbs_exgcd.nim
+++ b/constantine/math/arithmetic/limbs_exgcd.nim
@@ -305,8 +305,8 @@ func matVecMul_shr_k_mod_M[N, E: static int](
 
   # First iteration of [u v] [d] 
   #                    [q r].[e]
-  cd.slincombAccNoCarry(u, d[0], v, e[0])
-  ce.slincombAccNoCarry(q, d[0], r, e[0])
+  cd.ssumprodAccNoCarry(u, d[0], v, e[0])
+  ce.ssumprodAccNoCarry(q, d[0], r, e[0])
 
   # Compute me and md, multiples of M
   # such as the bottom k bits if d and e are 0
@@ -330,8 +330,8 @@ func matVecMul_shr_k_mod_M[N, E: static int](
   ce.ashr(k)
 
   for i in 1 ..< N:
-    cd.slincombAccNoCarry(u, d[i], v, e[i])
-    ce.slincombAccNoCarry(q, d[i], r, e[i])
+    cd.ssumprodAccNoCarry(u, d[i], v, e[i])
+    ce.ssumprodAccNoCarry(q, d[i], r, e[i])
     cd.smulAccNoCarry(md, M[i])
     ce.smulAccNoCarry(me, M[i])
     d[i-1] = cd.lo and Max
@@ -366,8 +366,8 @@ func matVecMul_shr_k[N, E: static int](
   
   # First iteration of [u v] [f] 
   #                    [q r].[g]
-  cf.slincombAccNoCarry(u, f[0], v, g[0])
-  cg.slincombAccNoCarry(q, f[0], r, g[0])
+  cf.ssumprodAccNoCarry(u, f[0], v, g[0])
+  cg.ssumprodAccNoCarry(q, f[0], r, g[0])
   # bottom k bits are zero by construction
   debug:
     doAssert BaseType(cf.lo and Max) == 0, "bottom k bits should be 0, cf.lo: " & $BaseType(cf.lo)
@@ -377,8 +377,8 @@ func matVecMul_shr_k[N, E: static int](
   cg.ashr(k)
 
   for i in 1 ..< N:
-    cf.slincombAccNoCarry(u, f[i], v, g[i])
-    cg.slincombAccNoCarry(q, f[i], r, g[i])
+    cf.ssumprodAccNoCarry(u, f[i], v, g[i])
+    cg.ssumprodAccNoCarry(q, f[i], r, g[i])
     f[i-1] = cf.lo and Max
     g[i-1] = cg.lo and Max
     cf.ashr(k)
@@ -496,11 +496,6 @@ func invmod*(
 #
 # Those symbols can be computed either via exponentiation (Fermat's Little Theorem)
 # or using slight modifications to the Extended Euclidean Algorithm for GCD.
-#
-# See
-# - Algorithm II.7 in Blake, Seroussi, Smart: "Elliptic Curves in Cryptography"
-# - Algorithm 5.9.2 in Bach and Shallit: "Algorithmic Number Theory"
-# - Pornin: https://github.com/pornin/x25519-cm0/blob/75a53f2/src/x25519-cm0.S#L89-L155
 
 func batchedDivstepsSymbol(
        t: var TransitionMatrix,
@@ -631,7 +626,7 @@ func legendreImpl[N, E](
 
   accL = (accL + accL.isOdd()) and SignedSecretWord(3)
   accL = SignedSecretWord(1)-accL
-  accL.csetZero(f.isZeroMask()) # f = gcd = 1 as M is prime or f = 0 if a = 0
+  accL.csetZero(f.isZeroMask())
   return SecretWord(accL)
 
 func legendre*(a, M: Limbs, bits: static int): SecretWord =
diff --git a/constantine/math/arithmetic/limbs_montgomery.nim b/constantine/math/arithmetic/limbs_montgomery.nim
index 1f2c2dc..2c01f2e 100644
--- a/constantine/math/arithmetic/limbs_montgomery.nim
+++ b/constantine/math/arithmetic/limbs_montgomery.nim
@@ -306,11 +306,69 @@ func mulMont_FIPS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub:
     discard z.csub(M, v.isNonZero() or not(z < M))
   r = z
 
+func sumprodMont_CIOS_spare2bits[K: static int](
+       r: var Limbs, a, b: array[K, Limbs],
+       M: Limbs, m0ninv: BaseType,
+       skipFinalSub: static bool = false) =
+  ## Compute r = ⅀aᵢ.bᵢ (mod M) (suim of products)
+  ## This requires 2 unused bits in the field element representation
+  ## 
+  ## This maps
+  ## - [0, 2p) -> [0, 2p) with skipFinalSub
+  ## - [0, 2p) -> [0, p) without
+  ## 
+  ## skipFinalSub skips the final substraction step.
+
+  # We want all the computation to be kept in registers
+  # hence we use a temporary `t`, hoping that the compiler does the right thing™.
+  var t: typeof(M)   # zero-init
+  const N = t.len
+  # Extra word to handle carries t[N] (not there are 2 unused spare bits)
+  var tN {.noInit.}: SecretWord
+
+  static: doAssert K <= 8, "we cannot sum more than 8 products"
+  # Bounds:
+  # 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR
+  #    for all intent and purposes this is true since aᵢ.bᵢ is:
+  #    if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums
+  #    if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1,
+  #    with 4p < R due to the 2 unused bits constraint so more than p sums are allowed
+  # 2. We have a high-word tN to accumulate overflows.
+  #    with 2 unused bits in the last word,
+  #    the multiplication of two last words will leave 4 unused bits
+  #    enough for accumulating 8 additions and overflow.
+
+  staticFor i, 0, N:
+    tN = Zero
+    staticFor k, 0, K:
+      var A = Zero
+      staticFor j, 0, N:
+        # (A, t[j]) <- a[k][j] * b[k][i] + t[j] + A
+        muladd2(A, t[j], a[k][j], b[k][i], t[j], A)
+      tN += A
+
+    # Reduction
+    #  m        <- (t[0] * m0ninv) mod 2ʷ
+    # (C, _)    <- m * M[0] + t[0]
+    var C, lo = Zero
+    let m = t[0] * SecretWord(m0ninv)
+    muladd1(C, lo, m, M[0], t[0])
+    staticFor j, 1, N:
+      # (C, t[j-1]) <- m*M[j] + t[j] + C
+      muladd2(C, t[j-1], m, M[j], t[j], C)
+    #  (_,t[N-1]) <- t[N] + C
+    t[N-1] = tN + C
+
+  when not skipFinalSub:
+    discard t.csub(M, not(t < M))
+  r = t
+
+
 # Montgomery Squaring
 # --------------------------------------------------------------------------------------------------------------------
 #
 # There are Montgomery squaring multiplications mentioned in the litterature
-# - https://hackmd.io/@zkteam/modular_multiplication if M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111)
+# - https://hackmd.io/@gnark/modular_multiplication if M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111)
 # - Architectural Support for Long Integer Modulo Arithmetic on Risc-Based Smart Cards
 #   Johann Großschädl, 2003
 #   https://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=95950BAC26A728114431C0C7B425E022?doi=10.1.1.115.3276&rep=rep1&type=pdf
@@ -482,9 +540,31 @@ func squareMont*[N](r: var Limbs[N], a, M: Limbs[N],
     r.redc2xMont(r2x, M, m0ninv, spareBits, skipFinalSub)
   else:
     mulMont(r, a, a, M, m0ninv, spareBits, skipFinalSub)
-    
+
+func sumprodMont*[N: static int](
+        r: var Limbs, a, b: array[N, Limbs],
+        M: Limbs, m0ninv: BaseType,
+        spareBits: static int,
+        skipFinalSub: static bool = false) {.inline.} =
+  when spareBits >= 2:
+    when UseASM_X86_64 and r.len in {2 .. 6}:
+      if ({.noSideEffect.}: hasAdx()):
+        r.sumprodMont_CIOS_spare2bits_asm_adx(a, b, M, m0ninv, skipFinalSub)
+      else:
+        r.sumprodMont_CIOS_spare2bits_asm(a, b, M, m0ninv, skipFinalSub)
+    else:  
+      r.sumprodMont_CIOS_spare2bits(a, b, M, m0ninv, skipFinalSub)
+  else:
+    r.mulMont(a[0], b[0], M, m0ninv, spareBits, skipFinalSub = false)
+    var ri {.noInit.}: Limbs
+    for i in 1 ..< N:
+      ri.mulMont(a[i], b[i], M, m0ninv, spareBits, skipFinalSub = false)
+      var overflowed = SecretBool r.add(ri)
+      overflowed = overflowed or not(r < M)
+      discard r.csub(M, overflowed)
+
 func fromMont*(r: var Limbs, a, M: Limbs,
-           m0ninv: BaseType, spareBits: static int) =
+           m0ninv: BaseType, spareBits: static int) {.inline.} =
   ## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)
   ## to the regular natural representation (mod N)
   ##
@@ -512,7 +592,7 @@ func fromMont*(r: var Limbs, a, M: Limbs,
     fromMont_CIOS(r, a, M, m0ninv)
 
 func getMont*(r: var Limbs, a, M, r2modM: Limbs,
-                   m0ninv: BaseType, spareBits: static int) =
+                   m0ninv: BaseType, spareBits: static int) {.inline.} =
   ## Transform a bigint ``a`` from it's natural representation (mod N)
   ## to a the Montgomery n-residue representation
   ##
diff --git a/constantine/math/arithmetic/limbs_unsaturated.nim b/constantine/math/arithmetic/limbs_unsaturated.nim
index aff37e6..54e0d86 100644
--- a/constantine/math/arithmetic/limbs_unsaturated.nim
+++ b/constantine/math/arithmetic/limbs_unsaturated.nim
@@ -393,8 +393,8 @@ func smulAccNoCarry*(r: var DSWord, a, b: SignedSecretWord) {.inline.}=
   r.lo = SignedSecretWord UV[0]
   r.hi = SignedSecretWord UV[1]
 
-func slincombAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}=
-  ## Accumulated linear combination
+func ssumprodAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}=
+  ## Accumulated sum of products
   ## (_, hi, lo) += a*u + b*v
   ## This assumes no overflowing
   var carry: Carry
diff --git a/constantine/math/config/curves_prop_field_core.nim b/constantine/math/config/curves_prop_field_core.nim
index 668c278..d4ad635 100644
--- a/constantine/math/config/curves_prop_field_core.nim
+++ b/constantine/math/config/curves_prop_field_core.nim
@@ -45,6 +45,10 @@ func has_P_3mod4_primeModulus*(C: static Curve): static bool =
   ## Returns true iff p ≡ 3 (mod 4)
   (BaseType(C.Mod.limbs[0]) and 3) == 3
 
+func has_P_3mod8_primeModulus*(C: static Curve): static bool =
+  ## Returns true iff p ≡ 3 (mod 8)
+  (BaseType(C.Mod.limbs[0]) and 7) == 3
+
 func has_P_5mod8_primeModulus*(C: static Curve): static bool =
   ## Returns true iff p ≡ 5 (mod 8)
   (BaseType(C.Mod.limbs[0]) and 7) == 5
diff --git a/constantine/math/curves/README.md b/constantine/math/constants/README.md
similarity index 100%
rename from constantine/math/curves/README.md
rename to constantine/math/constants/README.md
diff --git a/constantine/math/curves/bandersnatch_sqrt.nim b/constantine/math/constants/bandersnatch_sqrt.nim
similarity index 100%
rename from constantine/math/curves/bandersnatch_sqrt.nim
rename to constantine/math/constants/bandersnatch_sqrt.nim
diff --git a/constantine/math/curves/bls12_377_constants.nim b/constantine/math/constants/bls12_377_constants.nim
similarity index 100%
rename from constantine/math/curves/bls12_377_constants.nim
rename to constantine/math/constants/bls12_377_constants.nim
diff --git a/constantine/math/curves/bls12_377_endomorphisms.nim b/constantine/math/constants/bls12_377_endomorphisms.nim
similarity index 100%
rename from constantine/math/curves/bls12_377_endomorphisms.nim
rename to constantine/math/constants/bls12_377_endomorphisms.nim
diff --git a/constantine/math/curves/bls12_377_frobenius.nim b/constantine/math/constants/bls12_377_frobenius.nim
similarity index 100%
rename from constantine/math/curves/bls12_377_frobenius.nim
rename to constantine/math/constants/bls12_377_frobenius.nim
diff --git a/constantine/math/curves/bls12_377_pairing.nim b/constantine/math/constants/bls12_377_pairing.nim
similarity index 100%
rename from constantine/math/curves/bls12_377_pairing.nim
rename to constantine/math/constants/bls12_377_pairing.nim
diff --git a/constantine/math/curves/bls12_377_sqrt.nim b/constantine/math/constants/bls12_377_sqrt.nim
similarity index 100%
rename from constantine/math/curves/bls12_377_sqrt.nim
rename to constantine/math/constants/bls12_377_sqrt.nim
diff --git a/constantine/math/curves/bls12_377_subgroups.nim b/constantine/math/constants/bls12_377_subgroups.nim
similarity index 99%
rename from constantine/math/curves/bls12_377_subgroups.nim
rename to constantine/math/constants/bls12_377_subgroups.nim
index cf34c6c..3d6dd1e 100644
--- a/constantine/math/curves/bls12_377_subgroups.nim
+++ b/constantine/math/constants/bls12_377_subgroups.nim
@@ -15,7 +15,7 @@ import
   ../ec_shortweierstrass,
   ../io/io_bigints,
   ../isogenies/frobenius,
-  ../curves/zoo_endomorphisms
+  ../constants/zoo_endomorphisms
 
 func pow_bls12_377_abs_x[ECP: ECP_ShortW[Fp[BLS12_377], G1] or
        ECP_ShortW[Fp2[BLS12_377], G2]](
diff --git a/constantine/math/curves/bls12_381_constants.nim b/constantine/math/constants/bls12_381_constants.nim
similarity index 100%
rename from constantine/math/curves/bls12_381_constants.nim
rename to constantine/math/constants/bls12_381_constants.nim
diff --git a/constantine/math/curves/bls12_381_endomorphisms.nim b/constantine/math/constants/bls12_381_endomorphisms.nim
similarity index 100%
rename from constantine/math/curves/bls12_381_endomorphisms.nim
rename to constantine/math/constants/bls12_381_endomorphisms.nim
diff --git a/constantine/math/curves/bls12_381_frobenius.nim b/constantine/math/constants/bls12_381_frobenius.nim
similarity index 100%
rename from constantine/math/curves/bls12_381_frobenius.nim
rename to constantine/math/constants/bls12_381_frobenius.nim
diff --git a/constantine/math/curves/bls12_381_generators.nim b/constantine/math/constants/bls12_381_generators.nim
similarity index 100%
rename from constantine/math/curves/bls12_381_generators.nim
rename to constantine/math/constants/bls12_381_generators.nim
diff --git a/constantine/math/curves/bls12_381_hash_to_curve_g1.nim b/constantine/math/constants/bls12_381_hash_to_curve_g1.nim
similarity index 100%
rename from constantine/math/curves/bls12_381_hash_to_curve_g1.nim
rename to constantine/math/constants/bls12_381_hash_to_curve_g1.nim
diff --git a/constantine/math/curves/bls12_381_hash_to_curve_g2.nim b/constantine/math/constants/bls12_381_hash_to_curve_g2.nim
similarity index 100%
rename from constantine/math/curves/bls12_381_hash_to_curve_g2.nim
rename to constantine/math/constants/bls12_381_hash_to_curve_g2.nim
diff --git a/constantine/math/curves/bls12_381_pairing.nim b/constantine/math/constants/bls12_381_pairing.nim
similarity index 100%
rename from constantine/math/curves/bls12_381_pairing.nim
rename to constantine/math/constants/bls12_381_pairing.nim
diff --git a/constantine/math/curves/bls12_381_sqrt.nim b/constantine/math/constants/bls12_381_sqrt.nim
similarity index 100%
rename from constantine/math/curves/bls12_381_sqrt.nim
rename to constantine/math/constants/bls12_381_sqrt.nim
diff --git a/constantine/math/curves/bls12_381_sqrt_fp2.nim b/constantine/math/constants/bls12_381_sqrt_fp2.nim
similarity index 100%
rename from constantine/math/curves/bls12_381_sqrt_fp2.nim
rename to constantine/math/constants/bls12_381_sqrt_fp2.nim
diff --git a/constantine/math/curves/bls12_381_subgroups.nim b/constantine/math/constants/bls12_381_subgroups.nim
similarity index 99%
rename from constantine/math/curves/bls12_381_subgroups.nim
rename to constantine/math/constants/bls12_381_subgroups.nim
index 8216f53..0e34848 100644
--- a/constantine/math/curves/bls12_381_subgroups.nim
+++ b/constantine/math/constants/bls12_381_subgroups.nim
@@ -15,7 +15,7 @@ import
   ../ec_shortweierstrass,
   ../io/io_bigints,
   ../isogenies/frobenius,
-  ../curves/zoo_endomorphisms
+  ../constants/zoo_endomorphisms
 
 func pow_bls12_381_abs_x[ECP: ECP_ShortW[Fp[BLS12_381], G1] or
        ECP_ShortW[Fp2[BLS12_381], G2]](
diff --git a/constantine/math/curves/bn254_nogami_constants.nim b/constantine/math/constants/bn254_nogami_constants.nim
similarity index 100%
rename from constantine/math/curves/bn254_nogami_constants.nim
rename to constantine/math/constants/bn254_nogami_constants.nim
diff --git a/constantine/math/curves/bn254_nogami_endomorphisms.nim b/constantine/math/constants/bn254_nogami_endomorphisms.nim
similarity index 100%
rename from constantine/math/curves/bn254_nogami_endomorphisms.nim
rename to constantine/math/constants/bn254_nogami_endomorphisms.nim
diff --git a/constantine/math/curves/bn254_nogami_frobenius.nim b/constantine/math/constants/bn254_nogami_frobenius.nim
similarity index 100%
rename from constantine/math/curves/bn254_nogami_frobenius.nim
rename to constantine/math/constants/bn254_nogami_frobenius.nim
diff --git a/constantine/math/curves/bn254_nogami_pairing.nim b/constantine/math/constants/bn254_nogami_pairing.nim
similarity index 100%
rename from constantine/math/curves/bn254_nogami_pairing.nim
rename to constantine/math/constants/bn254_nogami_pairing.nim
diff --git a/constantine/math/curves/bn254_nogami_sqrt.nim b/constantine/math/constants/bn254_nogami_sqrt.nim
similarity index 100%
rename from constantine/math/curves/bn254_nogami_sqrt.nim
rename to constantine/math/constants/bn254_nogami_sqrt.nim
diff --git a/constantine/math/curves/bn254_nogami_sqrt_fp2.nim b/constantine/math/constants/bn254_nogami_sqrt_fp2.nim
similarity index 100%
rename from constantine/math/curves/bn254_nogami_sqrt_fp2.nim
rename to constantine/math/constants/bn254_nogami_sqrt_fp2.nim
diff --git a/constantine/math/curves/bn254_nogami_subgroups.nim b/constantine/math/constants/bn254_nogami_subgroups.nim
similarity index 100%
rename from constantine/math/curves/bn254_nogami_subgroups.nim
rename to constantine/math/constants/bn254_nogami_subgroups.nim
diff --git a/constantine/math/curves/bn254_snarks_constants.nim b/constantine/math/constants/bn254_snarks_constants.nim
similarity index 100%
rename from constantine/math/curves/bn254_snarks_constants.nim
rename to constantine/math/constants/bn254_snarks_constants.nim
diff --git a/constantine/math/curves/bn254_snarks_endomorphisms.nim b/constantine/math/constants/bn254_snarks_endomorphisms.nim
similarity index 100%
rename from constantine/math/curves/bn254_snarks_endomorphisms.nim
rename to constantine/math/constants/bn254_snarks_endomorphisms.nim
diff --git a/constantine/math/curves/bn254_snarks_frobenius.nim b/constantine/math/constants/bn254_snarks_frobenius.nim
similarity index 100%
rename from constantine/math/curves/bn254_snarks_frobenius.nim
rename to constantine/math/constants/bn254_snarks_frobenius.nim
diff --git a/constantine/math/curves/bn254_snarks_generators.nim b/constantine/math/constants/bn254_snarks_generators.nim
similarity index 100%
rename from constantine/math/curves/bn254_snarks_generators.nim
rename to constantine/math/constants/bn254_snarks_generators.nim
diff --git a/constantine/math/curves/bn254_snarks_hash_to_curve_g1.nim b/constantine/math/constants/bn254_snarks_hash_to_curve_g1.nim
similarity index 100%
rename from constantine/math/curves/bn254_snarks_hash_to_curve_g1.nim
rename to constantine/math/constants/bn254_snarks_hash_to_curve_g1.nim
diff --git a/constantine/math/curves/bn254_snarks_hash_to_curve_g2.nim b/constantine/math/constants/bn254_snarks_hash_to_curve_g2.nim
similarity index 100%
rename from constantine/math/curves/bn254_snarks_hash_to_curve_g2.nim
rename to constantine/math/constants/bn254_snarks_hash_to_curve_g2.nim
diff --git a/constantine/math/curves/bn254_snarks_pairing.nim b/constantine/math/constants/bn254_snarks_pairing.nim
similarity index 100%
rename from constantine/math/curves/bn254_snarks_pairing.nim
rename to constantine/math/constants/bn254_snarks_pairing.nim
diff --git a/constantine/math/curves/bn254_snarks_sqrt.nim b/constantine/math/constants/bn254_snarks_sqrt.nim
similarity index 100%
rename from constantine/math/curves/bn254_snarks_sqrt.nim
rename to constantine/math/constants/bn254_snarks_sqrt.nim
diff --git a/constantine/math/curves/bn254_snarks_sqrt_fp2.nim b/constantine/math/constants/bn254_snarks_sqrt_fp2.nim
similarity index 100%
rename from constantine/math/curves/bn254_snarks_sqrt_fp2.nim
rename to constantine/math/constants/bn254_snarks_sqrt_fp2.nim
diff --git a/constantine/math/curves/bn254_snarks_subgroups.nim b/constantine/math/constants/bn254_snarks_subgroups.nim
similarity index 100%
rename from constantine/math/curves/bn254_snarks_subgroups.nim
rename to constantine/math/constants/bn254_snarks_subgroups.nim
diff --git a/constantine/math/curves/bw6_761_constants.nim b/constantine/math/constants/bw6_761_constants.nim
similarity index 100%
rename from constantine/math/curves/bw6_761_constants.nim
rename to constantine/math/constants/bw6_761_constants.nim
diff --git a/constantine/math/curves/bw6_761_endomorphisms.nim b/constantine/math/constants/bw6_761_endomorphisms.nim
similarity index 100%
rename from constantine/math/curves/bw6_761_endomorphisms.nim
rename to constantine/math/constants/bw6_761_endomorphisms.nim
diff --git a/constantine/math/curves/bw6_761_frobenius.nim b/constantine/math/constants/bw6_761_frobenius.nim
similarity index 100%
rename from constantine/math/curves/bw6_761_frobenius.nim
rename to constantine/math/constants/bw6_761_frobenius.nim
diff --git a/constantine/math/curves/bw6_761_pairing.nim b/constantine/math/constants/bw6_761_pairing.nim
similarity index 100%
rename from constantine/math/curves/bw6_761_pairing.nim
rename to constantine/math/constants/bw6_761_pairing.nim
diff --git a/constantine/math/curves/bw6_761_sqrt.nim b/constantine/math/constants/bw6_761_sqrt.nim
similarity index 100%
rename from constantine/math/curves/bw6_761_sqrt.nim
rename to constantine/math/constants/bw6_761_sqrt.nim
diff --git a/constantine/math/curves/bw6_761_subgroups.nim b/constantine/math/constants/bw6_761_subgroups.nim
similarity index 100%
rename from constantine/math/curves/bw6_761_subgroups.nim
rename to constantine/math/constants/bw6_761_subgroups.nim
diff --git a/constantine/math/curves/curve25519_sqrt.nim b/constantine/math/constants/curve25519_sqrt.nim
similarity index 100%
rename from constantine/math/curves/curve25519_sqrt.nim
rename to constantine/math/constants/curve25519_sqrt.nim
diff --git a/constantine/math/curves/jubjub_sqrt.nim b/constantine/math/constants/jubjub_sqrt.nim
similarity index 100%
rename from constantine/math/curves/jubjub_sqrt.nim
rename to constantine/math/constants/jubjub_sqrt.nim
diff --git a/constantine/math/curves/pallas_endomorphisms.nim b/constantine/math/constants/pallas_endomorphisms.nim
similarity index 100%
rename from constantine/math/curves/pallas_endomorphisms.nim
rename to constantine/math/constants/pallas_endomorphisms.nim
diff --git a/constantine/math/curves/pallas_sqrt.nim b/constantine/math/constants/pallas_sqrt.nim
similarity index 100%
rename from constantine/math/curves/pallas_sqrt.nim
rename to constantine/math/constants/pallas_sqrt.nim
diff --git a/constantine/math/curves/pallas_subgroups.nim b/constantine/math/constants/pallas_subgroups.nim
similarity index 100%
rename from constantine/math/curves/pallas_subgroups.nim
rename to constantine/math/constants/pallas_subgroups.nim
diff --git a/constantine/math/curves/vesta_endomorphisms.nim b/constantine/math/constants/vesta_endomorphisms.nim
similarity index 100%
rename from constantine/math/curves/vesta_endomorphisms.nim
rename to constantine/math/constants/vesta_endomorphisms.nim
diff --git a/constantine/math/curves/vesta_sqrt.nim b/constantine/math/constants/vesta_sqrt.nim
similarity index 100%
rename from constantine/math/curves/vesta_sqrt.nim
rename to constantine/math/constants/vesta_sqrt.nim
diff --git a/constantine/math/curves/vesta_subgroups.nim b/constantine/math/constants/vesta_subgroups.nim
similarity index 100%
rename from constantine/math/curves/vesta_subgroups.nim
rename to constantine/math/constants/vesta_subgroups.nim
diff --git a/constantine/math/curves/zoo_constants.nim b/constantine/math/constants/zoo_constants.nim
similarity index 100%
rename from constantine/math/curves/zoo_constants.nim
rename to constantine/math/constants/zoo_constants.nim
diff --git a/constantine/math/curves/zoo_endomorphisms.nim b/constantine/math/constants/zoo_endomorphisms.nim
similarity index 100%
rename from constantine/math/curves/zoo_endomorphisms.nim
rename to constantine/math/constants/zoo_endomorphisms.nim
diff --git a/constantine/math/curves/zoo_frobenius.nim b/constantine/math/constants/zoo_frobenius.nim
similarity index 100%
rename from constantine/math/curves/zoo_frobenius.nim
rename to constantine/math/constants/zoo_frobenius.nim
diff --git a/constantine/math/curves/zoo_generators.nim b/constantine/math/constants/zoo_generators.nim
similarity index 100%
rename from constantine/math/curves/zoo_generators.nim
rename to constantine/math/constants/zoo_generators.nim
diff --git a/constantine/math/curves/zoo_hash_to_curve.nim b/constantine/math/constants/zoo_hash_to_curve.nim
similarity index 100%
rename from constantine/math/curves/zoo_hash_to_curve.nim
rename to constantine/math/constants/zoo_hash_to_curve.nim
diff --git a/constantine/math/curves/zoo_pairings.nim b/constantine/math/constants/zoo_pairings.nim
similarity index 100%
rename from constantine/math/curves/zoo_pairings.nim
rename to constantine/math/constants/zoo_pairings.nim
diff --git a/constantine/math/curves/zoo_square_roots.nim b/constantine/math/constants/zoo_square_roots.nim
similarity index 100%
rename from constantine/math/curves/zoo_square_roots.nim
rename to constantine/math/constants/zoo_square_roots.nim
diff --git a/constantine/math/curves/zoo_square_roots_fp2.nim b/constantine/math/constants/zoo_square_roots_fp2.nim
similarity index 100%
rename from constantine/math/curves/zoo_square_roots_fp2.nim
rename to constantine/math/constants/zoo_square_roots_fp2.nim
diff --git a/constantine/math/curves/zoo_subgroups.nim b/constantine/math/constants/zoo_subgroups.nim
similarity index 100%
rename from constantine/math/curves/zoo_subgroups.nim
rename to constantine/math/constants/zoo_subgroups.nim
diff --git a/constantine/math/elliptic/ec_endomorphism_accel.nim b/constantine/math/elliptic/ec_endomorphism_accel.nim
index 170c191..3115258 100644
--- a/constantine/math/elliptic/ec_endomorphism_accel.nim
+++ b/constantine/math/elliptic/ec_endomorphism_accel.nim
@@ -12,7 +12,7 @@ import
   # Internal
   ../../platforms/abstractions,
   ../config/[curves, type_bigint],
-  ../curves/zoo_endomorphisms,
+  ../constants/zoo_endomorphisms,
   ../arithmetic,
   ../extension_fields,
   ../isogenies/frobenius,
diff --git a/constantine/math/elliptic/ec_scalar_mul.nim b/constantine/math/elliptic/ec_scalar_mul.nim
index 6126149..dba6433 100644
--- a/constantine/math/elliptic/ec_scalar_mul.nim
+++ b/constantine/math/elliptic/ec_scalar_mul.nim
@@ -12,7 +12,7 @@ import
   ../arithmetic,
   ../extension_fields,
   ../io/io_bigints,
-  ../curves/zoo_endomorphisms,
+  ../constants/zoo_endomorphisms,
   ./ec_endomorphism_accel
 
 # ############################################################
diff --git a/constantine/math/elliptic/ec_shortweierstrass_affine.nim b/constantine/math/elliptic/ec_shortweierstrass_affine.nim
index 8518970..e06afa7 100644
--- a/constantine/math/elliptic/ec_shortweierstrass_affine.nim
+++ b/constantine/math/elliptic/ec_shortweierstrass_affine.nim
@@ -12,7 +12,7 @@ import
   ../arithmetic,
   ../extension_fields,
   ../io/[io_fields, io_extfields],
-  ../curves/zoo_constants
+  ../constants/zoo_constants
 
 # ############################################################
 #
diff --git a/constantine/math/extension_fields/square_root_fp2.nim b/constantine/math/extension_fields/square_root_fp2.nim
index b721376..b6c2cf5 100644
--- a/constantine/math/extension_fields/square_root_fp2.nim
+++ b/constantine/math/extension_fields/square_root_fp2.nim
@@ -11,7 +11,7 @@ import
   ./towers,
   ../arithmetic,
   ../config/curves,
-  ../curves/zoo_square_roots_fp2
+  ../constants/zoo_square_roots_fp2
 
 # Square root
 # -----------------------------------------------------------
diff --git a/constantine/math/extension_fields/towers.nim b/constantine/math/extension_fields/towers.nim
index 3129984..5e13c20 100644
--- a/constantine/math/extension_fields/towers.nim
+++ b/constantine/math/extension_fields/towers.nim
@@ -57,8 +57,8 @@ type
     CubicExt[Fp2[C]]
 
   Fp12*[C: static Curve] =
-    CubicExt[Fp4[C]]
-    # QuadraticExt[Fp6[C]]
+    # CubicExt[Fp4[C]]
+    QuadraticExt[Fp6[C]]
 
 template c0*(a: ExtensionField): auto =
   a.coords[0]
@@ -662,8 +662,8 @@ func prod*(r: var CubicExt, a: CubicExt, _: type NonResidue) =
   ## and v³ = ξ
   ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
   let t {.noInit.} = a.c2
-  r.c1 = a.c0
   r.c2 = a.c1
+  r.c1 = a.c0
   r.c0.prod(t, NonResidue)
 
 func `*=`*(a: var CubicExt, _: type NonResidue) {.inline.} =
@@ -689,8 +689,8 @@ func prod2x*(
   ## and v³ = ξ
   ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
   let t {.noInit.} = a.c2
-  r.c1 = a.c0
   r.c2 = a.c1
+  r.c1 = a.c0
   r.c0.prod2x(t, NonResidue)
 
 {.pop.} # inline
@@ -811,7 +811,7 @@ func square2x_complex(r: var QuadraticExt2x, a: Fp2) =
 # Complex multiplications
 # ----------------------------------------------------------------------
 
-func prod_complex(r: var Fp2, a, b: Fp2) =
+func prod_complex(r: var Fp2, a, b: Fp2) {.used.} =
   ## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r``
   ## ``r`` is initialized/overwritten
   ##
@@ -823,10 +823,7 @@ func prod_complex(r: var Fp2, a, b: Fp2) =
   # while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction)
   # and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition)
   #
-  # Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words)
-  # where addition/substraction are significantly cheaper than multiplication
-  #
-  # So we always reframe the imaginary part using Karatsuba approach to save a multiplication
+  # Hence we can consider using q Karatsuba approach to save a multiplication
   # (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 )
   #
   # Costs (naive implementation)
@@ -841,19 +838,12 @@ func prod_complex(r: var Fp2, a, b: Fp2) =
   # - 2 Addition 𝔽p
   # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary)
   static: doAssert r.fromComplexExtension()
-
-  var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0)
-  a0b0.prod(a.c0, b.c0)                                         # [1 Mul]
-  a1b1.prod(a.c1, b.c1)                                         # [2 Mul]
-
-  r.c0.sum(a.c0, a.c1)  # r0 = (a0 + a1)                        # [2 Mul, 1 Add]
-  r.c1.sum(b.c0, b.c1)  # r1 = (b0 + b1)                        # [2 Mul, 2 Add]
-  # aliasing: a and b unneeded now
-  r.c1 *= r.c0          # r1 = (b0 + b1)(a0 + a1)               # [3 Mul, 2 Add] - 𝔽p temporary
-
-  r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1                    # [3 Mul, 2 Add, 1 Sub]
-  r.c1 -= a0b0          # r1 = (b0 + b1)(a0 + a1) - a0b0        # [3 Mul, 2 Add, 2 Sub]
-  r.c1 -= a1b1          # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub]
+  var t {.noInit.}: typeof(r)
+  var na1 {.noInit.}: typeof(r.c0)
+  na1.neg(a.c1)
+  t.c0.sumprod([a.c0, na1], [b.c0, b.c1])
+  t.c1.sumprod([a.c0, a.c1], [b.c1, b.c0])
+  r = t
 
 func prod2x_complex(r: var QuadraticExt2x, a, b: Fp2) =
   ## Double-precision unreduced complex multiplication
@@ -983,7 +973,37 @@ func square2x_disjoint*[Fdbl, F](
   r.c1.diff2xMod(r.c1, V0)
   r.c1.diff2xMod(r.c1, V1)
 
-# Generic multiplications
+# Multiplications (specializations)
+# -------------------------------------------------------------------
+
+func prodImpl_fp4o2_p3mod8[C: static Curve](r: var Fp4[C], a, b: Fp4[C]) =
+  ## Returns r = a * b 
+  ## For 𝔽p4/𝔽p2 with p ≡ 3 (mod 8),
+  ##   hence 𝔽p QNR is 𝑖 = √-1 as p ≡ 3 (mod 8) implies p ≡ 3 (mod 4)
+  ##   and 𝔽p SNR is (1 + i)
+  static: doAssert C.has_P_3mod8_primeModulus()
+  var
+    b10_m_b11{.noInit.}, b10_p_b11{.noInit.}: Fp[C]
+    n_a01{.noInit.}, n_a11{.noInit.}: Fp[C]
+
+    t{.noInit.}: Fp4[C]
+  
+  b10_m_b11.diff(b.c1.c0, b.c1.c1)
+  b10_p_b11.sum(b.c1.c0, b.c1.c1)
+  n_a01.neg(a.c0.c1)
+  n_a11.neg(a.c1.c1)
+
+  t.c0.c0.sumprod([a.c0.c0,   n_a01,   a.c1.c0,     n_a11],
+                  [b.c0.c0, b.c0.c1, b10_m_b11, b10_p_b11])
+  t.c0.c1.sumprod([a.c0.c0, a.c0.c1,   a.c1.c0,   a.c1.c1],
+                  [b.c0.c1, b.c0.c0, b10_p_b11, b10_m_b11])
+  t.c1.c0.sumprod([a.c0.c0,   n_a01,   a.c1.c0,     n_a11],
+                  [b.c1.c0, b.c1.c1,   b.c0.c0,   b.c0.c1])
+  t.c1.c1.sumprod([a.c0.c0, a.c0.c1,   a.c1.c0,   a.c1.c1],
+                  [b.c1.c1, b.c1.c0,   b.c0.c1,   b.c0.c0])
+  r = t
+
+# Multiplications (generic)
 # ----------------------------------------------------------------------
 
 func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) =
@@ -1264,7 +1284,7 @@ func inv2xImpl(r: var QuadraticExt, a: QuadraticExt) =
 
   # [1 Inv, 2 Sqr, 1 Add]
   t.redc2x(V0)
-  t.inv()                  # v1 = 1 / (a0² - β a1²)
+  t.inv()                 # v1 = 1 / (a0² - β a1²)
 
   # [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
   r.c0.prod(a.c0, t)      # r0 = a0 / (a0² - β a1²)
@@ -1288,35 +1308,37 @@ func square2x*(r: var QuadraticExt2x, a: QuadraticExt) =
   else:
     r.square2x_disjoint(a.c0, a.c1)
 
+func square_disjoint*[F](r: var QuadraticExt[F], a0, a1: F) =
+  # TODO understand why Fp4[BLS12_377]
+  # is so slow in the branch
+  # TODO:
+  # - On Fp4, we can have a.c0.c0 off by p
+  #   a reduction is missing
+  static: doAssert not r.fromComplexExtension(), "Faster specialization not implemented"
+
+  var d {.noInit.}: doublePrec(typeof(r))
+  d.square2x_disjoint(a0, a1)
+  r.c0.redc2x(d.c0)
+  r.c1.redc2x(d.c1)
+
 func square*(r: var QuadraticExt, a: QuadraticExt) =
   when r.fromComplexExtension():
-    when true:
-      when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem() and r.typeof.has1extraBit():
-        if ({.noSideEffect.}: hasAdx()):
-          r.coords.sqrx_complex_sparebit_asm_adx(a.coords)
-        else:
-          r.square_complex(a)
+    when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem() and r.typeof.has1extraBit():
+      if ({.noSideEffect.}: hasAdx()):
+        r.coords.sqrx_complex_sparebit_asm_adx(a.coords)
       else:
         r.square_complex(a)
-    else: # slower
-      var d {.noInit.}: doublePrec(typeof(r))
-      d.square2x(a)
-      r.c0.redc2x(d.c0)
-      r.c1.redc2x(d.c1)
+    else:
+      r.square_complex(a)
   else:
-    when true: # r.typeof.F.C in {BLS12_377, BW6_761}:
+    when QuadraticExt.C.has_large_field_elem():
       # BW6-761 requires too many registers for Dbl width path
       r.square_generic(a)
-    else:
-      # TODO understand why Fp4[BLS12_377]
-      # is so slow in the branch
-      # TODO:
-      # - On Fp4, we can have a.c0.c0 off by p
-      #   a reduction is missing
-      var d {.noInit.}: doublePrec(typeof(r))
-      d.square2x_disjoint(a.c0, a.c1)
-      r.c0.redc2x(d.c0)
-      r.c1.redc2x(d.c1)
+    elif QuadraticExt is Fp4[BLS12_377]:
+      # TODO BLS12-377 slowness to fix
+      r.square_generic(a)
+    else: 
+      r.square_disjoint(a.c0, a.c1)
 
 func square*(a: var QuadraticExt) =
   ## In-place squaring
@@ -1326,26 +1348,25 @@ func prod*(r: var QuadraticExt, a, b: QuadraticExt) =
   ## Multiplication r <- a*b
 
   when r.fromComplexExtension():
-    when false:
-      r.prod_complex(a, b)
-    else: # faster
-      when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem():
-        if ({.noSideEffect.}: hasAdx()):
-          r.coords.mul_fp2_complex_asm_adx(a.coords, b.coords)
-        else:
-          var d {.noInit.}: doublePrec(typeof(r))
-          d.prod2x_complex(a, b)
-          r.c0.redc2x(d.c0)
-          r.c1.redc2x(d.c1)
+    when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem():
+      if ({.noSideEffect.}: hasAdx()):
+        r.coords.mul_fp2_complex_asm_adx(a.coords, b.coords)
       else:
         var d {.noInit.}: doublePrec(typeof(r))
         d.prod2x_complex(a, b)
         r.c0.redc2x(d.c0)
         r.c1.redc2x(d.c1)
+    else:
+      var d {.noInit.}: doublePrec(typeof(r))
+      d.prod2x_complex(a, b)
+      r.c0.redc2x(d.c0)
+      r.c1.redc2x(d.c1)
   else:
-    when r.typeof.F.C.has_large_field_elem():
+    when QuadraticExt is Fp12 or r.typeof.F.C.has_large_field_elem():
       # BW6-761 requires too many registers for Dbl width path
       r.prod_generic(a, b)
+    elif QuadraticExt is Fp4 and QuadraticExt.C.has_P_3mod8_primeModulus():
+      r.prodImpl_fp4o2_p3mod8(a, b)
     else:
       var d {.noInit.}: doublePrec(typeof(r))
       d.prod2x_disjoint(a.c0, a.c1, b.c0, b.c1)
@@ -1595,7 +1616,49 @@ func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) =
   r.c0.prod(m12, NonResidue)
   r.c0 += s0
 
-# Multiplications
+# Multiplications (specializations)
+# -------------------------------------------------------------------
+
+func prodImpl_fp6o2_p3mod8[C: static Curve](r: var Fp6[C], a, b: Fp6[C]) =
+  ## Returns r = a * b 
+  ## For 𝔽p6/𝔽p2 with p ≡ 3 (mod 8),
+  ##   hence 𝔽p QNR is 𝑖 = √-1 as p ≡ 3 (mod 8) implies p ≡ 3 (mod 4)
+  ##   and 𝔽p SNR is (1 + i)
+  # https://eprint.iacr.org/2022/367 - Equation 8
+  static: doAssert C.has_P_3mod8_primeModulus()
+  var
+    b10_p_b11{.noInit.}, b10_m_b11{.noInit.}: Fp[C]
+    b20_p_b21{.noInit.}, b20_m_b21{.noInit.}: Fp[C]
+
+    n_a01{.noInit.}, n_a11{.noInit.}, n_a21{.noInit.}: Fp[C]
+
+    t{.noInit.}: Fp6[C]
+
+  b10_p_b11.sum(b.c1.c0, b.c1.c1)
+  b10_m_b11.diff(b.c1.c0, b.c1.c1)
+  b20_p_b21.sum(b.c2.c0, b.c2.c1)
+  b20_m_b21.diff(b.c2.c0, b.c2.c1)
+
+  n_a01.neg(a.c0.c1)
+  n_a11.neg(a.c1.c1)
+  n_a21.neg(a.c2.c1)
+
+  t.c0.c0.sumprod([a.c0.c0,   n_a01,   a.c1.c0,     n_a11,   a.c2.c0,     n_a21],
+                  [b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21, b10_m_b11, b10_p_b11])
+  t.c0.c1.sumprod([a.c0.c0, a.c0.c1,   a.c1.c0,   a.c1.c1,   a.c2.c0,   a.c2.c1],
+                  [b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21, b10_p_b11, b10_m_b11])
+  t.c1.c0.sumprod([a.c0.c0,   n_a01,   a.c1.c0,     n_a11,   a.c2.c0,     n_a21],
+                  [b.c1.c0, b.c1.c1,   b.c0.c0,   b.c0.c1, b20_m_b21, b20_p_b21])
+  t.c1.c1.sumprod([a.c0.c0, a.c0.c1,   a.c1.c0,   a.c1.c1,   a.c2.c0,   a.c2.c1],
+                  [b.c1.c1, b.c1.c0,   b.c0.c1,   b.c0.c0, b20_p_b21, b20_m_b21])
+  t.c2.c0.sumprod([a.c0.c0,   n_a01,   a.c1.c0,     n_a11,   a.c2.c0,     n_a21],
+                  [b.c2.c0, b.c2.c1,   b.c1.c0,   b.c1.c1,   b.c0.c0,   b.c0.c1])
+  t.c2.c1.sumprod([a.c0.c0, a.c0.c1,   a.c1.c0,   a.c1.c1,   a.c2.c0,   a.c2.c1],
+                  [b.c2.c1, b.c2.c0,   b.c1.c1,   b.c1.c0,   b.c0.c1,   b.c0.c0])
+
+  r = t
+
+# Multiplications (generic)
 # -------------------------------------------------------------------
 
 func prodImpl(r: var CubicExt, a, b: CubicExt) =
@@ -1686,6 +1749,40 @@ func prod2xImpl(r: var CubicExt2x, a, b: CubicExt) =
 # Sparse multiplication
 # ----------------------------------------------------------------------
 
+func mul_sparse_by_x00*(r: var CubicExt, a: CubicExt, sparseB: auto) =
+  ## Sparse multiplication of a cubic extension element
+  ## with coordinates (a₀, a₁, a₂) by (b₀, b0, 0)
+
+  when typeof(sparseB) is typeof(a):
+    template b(): untyped = sparseB.c0
+  elif typeof(sparseB) is typeof(a.c0):
+    template b(): untyped = sparseB
+  else:
+    {.error: "sparseB type is " & $typeof(sparseB) &
+      " which does not match with either a (" & $typeof(a) &
+      ") or a.c0 (" & $typeof(a.c0) & ")".}
+
+  r.c0.prod(a.c0, b)
+  r.c1.prod(a.c1, b)
+  r.c2.prod(a.c2, b)
+
+func mul2x_sparse_by_x00*(r: var CubicExt2x, a: CubicExt, sparseB: auto) =
+  ## Sparse multiplication of a cubic extension element
+  ## with coordinates (a₀, a₁, a₂) by (b₀, b0, 0)
+
+  when typeof(sparseB) is typeof(a):
+    template b(): untyped = sparseB.c0
+  elif typeof(sparseB) is typeof(a.c0):
+    template b(): untyped = sparseB
+  else:
+    {.error: "sparseB type is " & $typeof(sparseB) &
+      " which does not match with either a (" & $typeof(a) &
+      ") or a.c0 (" & $typeof(a.c0) & ")".}
+
+  r.c0.prod2x(a.c0, b)
+  r.c1.prod2x(a.c1, b)
+  r.c2.prod2x(a.c2, b)
+
 func mul_sparse_by_0y0*(r: var CubicExt, a: CubicExt, sparseB: auto) =
   ## Sparse multiplication of a cubic extenion element
   ## with coordinates (a₀, a₁, a₂) by (0, b₁, 0)
@@ -1718,6 +1815,171 @@ func mul_sparse_by_0y0*(r: var CubicExt, a: CubicExt, sparseB: auto) =
   r.c1.prod(a.c0, b)
   r.c2.prod(a.c1, b)
 
+func mul2x_sparse_by_0y0*(r: var CubicExt2x, a: CubicExt, sparseB: auto) =
+  ## Sparse multiplication of a cubic extenion element
+  ## with coordinates (a₀, a₁, a₂) by (0, b₁, 0)
+
+  when typeof(sparseB) is typeof(a):
+    template b(): untyped = sparseB.c1
+  elif typeof(sparseB) is typeof(a.c0):
+    template b(): untyped = sparseB
+  else:
+    {.error: "sparseB type is " & $typeof(sparseB) &
+      " which does not match with either a (" & $typeof(a) &
+      ") or a.c0 (" & $typeof(a.c0) & ")".}
+
+  r.c0.prod2x(a.c2, b)
+  r.c0.prod2x(r.c0, NonResidue)
+  r.c1.prod2x(a.c0, b)
+  r.c2.prod2x(a.c1, b)
+
+
+func mul_sparse_by_xy0*[Fpkdiv3](r: var CubicExt, a: CubicExt,
+                                 x, y: Fpkdiv3) =
+  ## Sparse multiplication of a cubic extension element
+  ## with coordinates (a₀, a₁, a₂) by (b₀, b₁, 0)
+  ## 
+  ## r and a must not alias
+  
+  # v0 = a0 b0
+  # v1 = a1 b1
+  # v2 = a2 b2 = 0
+  #
+  # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
+  #    = ξ (a1 b1 + a2 b1 - v1) + v0
+  #    = ξa2 b1 + v0
+  # r1 = (a0 + a1) * (b0 + b1) - v0 - v1 + ξ v2
+  #    = (a0 + a1) * (b0 + b1) - v0 - v1
+  # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
+  #    = a0 b0 + a2 b0 - v0 + v1
+  #    = a2 b0 + v1
+
+  static: doAssert a.c0 is Fpkdiv3
+
+  var
+    v0 {.noInit.}: Fpkdiv3
+    v1 {.noInit.}: Fpkdiv3
+
+  v0.prod(a.c0, x)
+  v1.prod(a.c1, y)
+
+  r.c0.prod(a.c2, y)
+  r.c0 *= NonResidue
+  r.c0 += v0
+
+  r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated
+  r.c2.sum(x, y)
+  r.c1 *= r.c2
+  r.c1 -= v0
+  r.c1 -= v1
+
+  r.c2.prod(a.c2, x)
+  r.c2 += v1
+
+
+func mul2x_sparse_by_xy0*[Fpkdiv3](r: var CubicExt2x, a: CubicExt,
+                                 x, y: Fpkdiv3) =
+  ## Sparse multiplication of a cubic extension element
+  ## with coordinates (a₀, a₁, a₂) by (b₀, b₁, 0)
+  ## 
+  ## r and a must not alias
+
+  static: doAssert a.c0 is Fpkdiv3
+
+  var
+    V0 {.noInit.}: doubleprec(Fpkdiv3)
+    V1 {.noInit.}: doubleprec(Fpkdiv3)
+    t0{.noInit.}: Fpkdiv3
+    t1{.noInit.}: Fpkdiv3
+
+  V0.prod2x(a.c0, x)
+  V1.prod2x(a.c1, y)
+
+  r.c0.prod2x(a.c2, y)
+  r.c0.prod2x(r.c0, NonResidue)
+  r.c0.sum2xMod(r.c0, V0)
+
+  t0.sum(a.c0, a.c1)
+  t1.sum(x, y)
+  r.c1.prod2x(t0, t1)
+  r.c1.diff2xMod(r.c1, V0)
+  r.c1.diff2xMod(r.c1, V1)
+
+  r.c2.prod2x(a.c2, x)
+  r.c2.sum2xMod(r.c2, V1)
+
+func mul_sparse_by_0yz*[Fpkdiv3](r: var CubicExt, a: CubicExt, y, z: Fpkdiv3) =
+  ## Sparse multiplication of a cubic extension element
+  ## with coordinates (a₀, a₁, a₂) by (0, b₁, b₂)
+  ## 
+  ## r and a must not alias
+
+  # v0 = a0 b0 = 0
+  # v1 = a1 b1
+  # v2 = a2 b2
+  #
+  # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
+  #    = ξ ((a1 + a2) * (b1 + b2) - v1 - v2)
+  # r1 = (a0 + a1) * (b0 + b1) - v0 - v1 + ξ v2
+  #    = a0 b1 + a1 b1 - v1 + ξ v2
+  #    = a0 b1 + ξ v2
+  # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
+  #    = a0 b2 + a2 b2 - v0 - v2 + v1
+  #    = a0 b2 + v1
+
+  static: doAssert a.c0 is Fpkdiv3
+
+  var
+    v1 {.noInit.}: Fpkdiv3
+    v2 {.noInit.}: Fpkdiv3
+  
+  v1.prod(a.c1, y)
+  v2.prod(a.c2, z)
+
+  r.c1.sum(a.c1, a.c2)
+  r.c2.sum(   y,    z)
+  r.c0.prod(r.c1, r.c2)
+  r.c0 -= v1
+  r.c0 -= v2
+  r.c0 *= NonResidue
+
+  r.c1.prod(a.c0, y)
+  v2 *= NonResidue
+  r.c1 += v2
+
+  r.c2.prod(a.c0, z)
+  r.c2 += v1
+
+func mul2x_sparse_by_0yz*[Fpkdiv3](r: var CubicExt2x, a: CubicExt, y, z: Fpkdiv3) =
+  ## Sparse multiplication of a cubic extension element
+  ## with coordinates (a₀, a₁, a₂) by (0, b₁, b₂)
+  ## 
+  ## r and a must not alias
+  static: doAssert a.c0 is Fpkdiv3
+
+  var
+    V1 {.noInit.}: doubleprec(Fpkdiv3)
+    V2 {.noInit.}: doubleprec(Fpkdiv3)
+    t1 {.noInit.}: Fpkdiv3
+    t2 {.noInit.}: Fpkdiv3
+  
+  V1.prod2x(a.c1, y)
+  V2.prod2x(a.c2, z)
+
+  t1.sum(a.c1, a.c2)
+  t2.sum(   y,    z)
+  r.c0.prod2x(t1, t2)
+  r.c0.diff2xMod(r.c0, V1)
+  r.c0.diff2xMod(r.c0, V2)
+  r.c0.prod2x(r.c0, NonResidue)
+
+  r.c1.prod2x(a.c0, y)
+  V2.prod2x(V2, NonResidue)
+  r.c1.sum2xMod(r.c1, V2)
+
+  r.c2.prod2x(a.c0, z)
+  r.c2.sum2xMod(r.c2, V1)
+
 # Inversion
 # ----------------------------------------------------------------------
 
@@ -1856,6 +2118,8 @@ func prod*(r: var CubicExt, a, b: CubicExt) =
   ## Out-of-place multiplication
   when CubicExt.C.has_large_field_elem():
     r.prodImpl(a, b)
+  elif r is Fp6 and CubicExt.C.has_P_3mod8_primeModulus():
+    r.prodImpl_fp6o2_p3mod8(a, b)
   else:
     var d {.noInit.}: doublePrec(typeof(r))
     d.prod2x(a, b)
diff --git a/constantine/math/io/io_extfields.nim b/constantine/math/io/io_extfields.nim
index f7c3f62..16ea485 100644
--- a/constantine/math/io/io_extfields.nim
+++ b/constantine/math/io/io_extfields.nim
@@ -33,7 +33,7 @@ proc spaces*(num: int): string =
   for i in 0 ..< num:
     result[i] = ' '
 
-func appendHex*(accum: var string, f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, order: static Endianness = bigEndian) =
+func appendHex*(accum: var string, f: ExtensionField, indent = 0, order: static Endianness = bigEndian) =
   ## Hex accumulator
   accum.add static($f.typeof.genericHead() & '(')
   staticFor i, 0, f.coords.len:
@@ -46,7 +46,7 @@ func appendHex*(accum: var string, f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, ord
       accum.appendHex(f.coords[i], indent+2, order)
   accum.add ")"
 
-func toHex*(f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, order: static Endianness = bigEndian): string =
+func toHex*(f: ExtensionField, indent = 0, order: static Endianness = bigEndian): string =
   ## Stringify a tower field element to hex.
   ## Note. Leading zeros are not removed.
   ## Result is prefixed with 0x
diff --git a/constantine/math/isogenies/frobenius.nim b/constantine/math/isogenies/frobenius.nim
index 6b932f6..5440519 100644
--- a/constantine/math/isogenies/frobenius.nim
+++ b/constantine/math/isogenies/frobenius.nim
@@ -11,7 +11,7 @@ import
   ../../platforms/primitives,
   ../arithmetic,
   ../extension_fields,
-  ../curves/zoo_frobenius
+  ../constants/zoo_frobenius
 
 # Frobenius Map
 # ------------------------------------------------------------
@@ -79,17 +79,24 @@ func frobenius_map*[C](r: var Fp6[C], a: Fp6[C], k: static int = 1) {.inline.} =
 func frobenius_map*[C](r: var Fp12[C], a: Fp12[C], k: static int = 1) {.inline.} =
   ## Computes a^(pᵏ)
   ## The p-power frobenius automorphism on 𝔽p12
-  static: doAssert r.c0 is Fp4
   staticFor i, 0, r.coords.len:
     staticFor j, 0, r.coords[0].coords.len:
       r.coords[i].coords[j].frobenius_map(a.coords[i].coords[j], k)
 
-  r.c0.c0.mulCheckSparse frobMapConst(C, 0, k)
-  r.c0.c1.mulCheckSparse frobMapConst(C, 3, k)
-  r.c1.c0.mulCheckSparse frobMapConst(C, 1, k)
-  r.c1.c1.mulCheckSparse frobMapConst(C, 4, k)
-  r.c2.c0.mulCheckSparse frobMapConst(C, 2, k)
-  r.c2.c1.mulCheckSparse frobMapConst(C, 5, k)
+  when r.c0 is Fp4:
+    r.c0.c0.mulCheckSparse frobMapConst(C, 0, k)
+    r.c0.c1.mulCheckSparse frobMapConst(C, 3, k)
+    r.c1.c0.mulCheckSparse frobMapConst(C, 1, k)
+    r.c1.c1.mulCheckSparse frobMapConst(C, 4, k)
+    r.c2.c0.mulCheckSparse frobMapConst(C, 2, k)
+    r.c2.c1.mulCheckSparse frobMapConst(C, 5, k)
+  else:
+    r.c0.c0.mulCheckSparse frobMapConst(C, 0, k)
+    r.c0.c1.mulCheckSparse frobMapConst(C, 2, k)
+    r.c0.c2.mulCheckSparse frobMapConst(C, 4, k)
+    r.c1.c0.mulCheckSparse frobMapConst(C, 1, k)
+    r.c1.c1.mulCheckSparse frobMapConst(C, 3, k)
+    r.c1.c2.mulCheckSparse frobMapConst(C, 5, k)
 
 # ψ (Psi) - Untwist-Frobenius-Twist Endomorphisms on twisted curves
 # -----------------------------------------------------------------
diff --git a/constantine/math/pairing/cyclotomic_subgroup.nim b/constantine/math/pairing/cyclotomic_subgroup.nim
index 41a49f5..179d641 100644
--- a/constantine/math/pairing/cyclotomic_subgroup.nim
+++ b/constantine/math/pairing/cyclotomic_subgroup.nim
@@ -215,7 +215,7 @@ func cyclotomic_inv*[FT](r: var FT, a: FT) {.meter.} =
   ## consequently `a` MUST be unitary
   r.conj(a)
 
-func cyclotomic_square*[FT](r: var FT, a: FT) {.meter.} =
+func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) =
   ## Square `a` into `r`
   ## `a` MUST be in the cyclotomic subgroup
   ## consequently `a` MUST be unitary
@@ -224,47 +224,147 @@ func cyclotomic_square*[FT](r: var FT, a: FT) {.meter.} =
   # Granger, Scott, 2009
   # https://eprint.iacr.org/2009/565.pdf
 
+  # Cubic extension field
+  # A = 3a² − 2 ̄a
+  # B = 3 √i c² + 2 ̄b
+  # C = 3b² − 2 ̄c
+  var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(a.c0)
+
+  template a0: untyped = a.c0.c0
+  template a1: untyped = a.c0.c1
+  template a2: untyped = a.c1.c0
+  template a3: untyped = a.c1.c1
+  template a4: untyped = a.c2.c0
+  template a5: untyped = a.c2.c1
+
+  v0.square(a.c0)
+  v1.square(a.c1)
+  v2.square(a.c2)
+
+  # From here on, r aliasing with a is only for the first operation
+  # and only read/write the exact same coordinates
+  
+  # 3v₀₀ - 2a₀
+  r.c0.c0.diff(v0.c0, a0)
+  r.c0.c0.double()
+  r.c0.c0 += v0.c0
+  # 3v₀₁ + 2a₁
+  r.c0.c1.sum(v0.c1, a1)
+  r.c0.c1.double()
+  r.c0.c1 += v0.c1
+  # 3v₁₀ - 2a₄
+  r.c2.c0.diff(v1.c0, a4)
+  r.c2.c0.double()
+  r.c2.c0 += v1.c0
+  # 3v₁₁ + 2b₅
+  r.c2.c1.sum(v1.c1, a5)
+  r.c2.c1.double()
+  r.c2.c1 += v1.c1
+
+  # Now B = 3 √i c² + 2 ̄b
+  # beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i
+
+  # 3 (√i c²)₀ + 2a₂
+  v2.c1 *= NonResidue
+  r.c1.c0.sum(v2.c1, a2)
+  r.c1.c0.double()
+  r.c1.c0 += v2.c1
+
+  # 3 (√i c²)₁ - 2a₃
+  r.c1.c1.diff(v2.c0, a3)
+  r.c1.c1.double()
+  r.c1.c1 += v2.c0
+
+func cyclotomic_square_quad_over_cube[F](r: var QuadraticExt[F], a: QuadraticExt[F]) =
+  ## Square `a` into `r`
+  ## `a` MUST be in the cyclotomic subgroup
+  ## consequently `a` MUST be unitary
+  # Mapping between towering schemes
+  # --------------------------------
+  #
+  # canonical <=> cubic over quadratic <=> quadratic over cubic
+  #    c₀     <=>        a₀            <=>            b₀
+  #    c₁     <=>        a₂            <=>            b₃
+  #    c₂     <=>        a₄            <=>            b₁
+  #    c₃     <=>        a₁            <=>            b₄
+  #    c₄     <=>        a₃            <=>            b₂
+  #    c₅     <=>        a₅            <=>            b₅
+  #
+  # Hence, this formula for a cubic extension field
+  #   A = 3a² − 2 ̄a
+  #   B = 3 √i c² + 2 ̄b
+  #   C = 3b² − 2 ̄c
+  #
+  # becomes
+  #   A = (b₀, b₄) = 3(b₀, b₄)² - 2(b₀,-b₄)
+  #   B = (b₃, b₂) = 3 √i(b₁, b₅)² + 2(b₃, -b₂)
+  #   C = (b₁, b₅) = 3(b₃, b₂)² - 2(b₁, -b₅)
+  #
+  # with
+  #   v₀ = (b₀, b₄) = (a.c0.c0, a.c1.c1)
+  #   v₁ = (b₃, b₂) = (a.c1.c0, a.c0.c2)
+  #   v₂ = (b₁, b₅) = (a.c0.c1, a.c1.c2)
+  var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: QuadraticExt[typeof(r.c0.c0)]
+  
+  template b0: untyped = a.c0.c0
+  template b1: untyped = a.c0.c1
+  template b2: untyped = a.c0.c2
+  template b3: untyped = a.c1.c0
+  template b4: untyped = a.c1.c1
+  template b5: untyped = a.c1.c2
+  
+  v0.square_disjoint(b0, b4)
+  v1.square_disjoint(b3, b2)
+  v2.square_disjoint(b1, b5)
+  
+  # From here on, r aliasing with a is only for the first operation
+  # and only read/write the exact same coordinates
+  
+  # 3v₀₀ - 2b₀
+  r.c0.c0.diff(v0.c0, b0)
+  r.c0.c0.double()
+  r.c0.c0 += v0.c0
+  # 3v₁₀ - 2b₁
+  r.c0.c1.diff(v1.c0, b1)
+  r.c0.c1.double()
+  r.c0.c1 += v1.c0
+  # 3v₀₁ + 2b₄
+  r.c1.c1.sum(v0.c1, b4)
+  r.c1.c1.double()
+  r.c1.c1 += v0.c1
+  # 3v₁₁ + 2b₅
+  r.c1.c2.sum(v1.c1, b5)
+  r.c1.c2.double()
+  r.c1.c2 += v1.c1
+
+  # Now B = (b₃, b₂) = 3 √i(b₁, b₅)² + 2(b₃, -b₂)
+  # beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i
+
+  # 3 (√i (b₁, b₅)²)₀ + 2b₃
+  v2.c1 *= NonResidue
+  r.c1.c0.sum(v2.c1, b3)
+  r.c1.c0.double()
+  r.c1.c0 += v2.c1
+
+  # 3 (√i (b₁, b₅)²)₁ - 2b₃
+  r.c0.c2.diff(v2.c0, b2)
+  r.c0.c2.double()
+  r.c0.c2 += v2.c0
+
+func cyclotomic_square*[FT](r: var FT, a: FT) {.inline, meter.} =
+  ## Square `a` into `r`
+  ## `a` MUST be in the cyclotomic subgroup
+  ## consequently `a` MUST be unitary
+  #
+  # Faster Squaring in the Cyclotomic Subgroup of Sixth Degree Extensions
+  # Granger, Scott, 2009
+  # https://eprint.iacr.org/2009/565.pdf
   when a is CubicExt:
-    # Cubic extension field
-    # A = 3a² − 2 ̄a
-    # B = 3 √i c² + 2 ̄b
-    # C = 3b² − 2 ̄c
-    var t0{.noinit.}, t1{.noinit.}: typeof(a.c0)
-
-    t0.square(a.c0)     # t0 = a²
-    t1.double(t0)       # t1 = 2a²
-    t1 += t0            # t1 = 3a²
-
-    t0.conj(a.c0)       # t0 =  ̄a
-    t0.double()         # t0 =  2 ̄a
-    r.c0.diff(t1, t0)   # r0 = 3a² − 2 ̄a
-
-    # Aliasing: a.c0 unused
-
-    t0.square(a.c2)     # t0 = c²
-    t0 *= NonResidue    # t0 = √i c²
-    t1.double(t0)       # t1 = 2 √i c²
-    t0 += t1            # t0 = 3 √i c²
-
-    t1.square(a.c1)     # t1 = b²
-
-    r.c1.conj(a.c1)     # r1 = ̄b
-    r.c1.double()       # r1 = 2 ̄b
-    r.c1 += t0          # r1 = 3 √i c² + 2 ̄b
-
-    # Aliasing: a.c1 unused
-
-    t0.double(t1)       # t0 = 2b²
-    t0 += t1            # t0 = 3b²
-
-    t1.conj(a.c2)       # r2 =  ̄c
-    t1.double()         # r2 =  2 ̄c
-    r.c2.diff(t0, t1)   # r2 = 3b² - 2 ̄c
-
+    r.cyclotomic_square_cube_over_quad(a)
   else:
-    {.error: "Not implemented".}
+    r.cyclotomic_square_quad_over_cube(a)
 
-func cyclotomic_square*[FT](a: var FT) {.meter.} =
+func cyclotomic_square*[FT](a: var FT) {.inline.} =
   ## Square `a` into `r`
   ## `a` MUST be in the cyclotomic subgroup
   ## consequently `a` MUST be unitary
@@ -560,7 +660,10 @@ func fromFpk*[Fpkdiv6, Fpk](
       {.error: "a must be a sextic extension field".}
   elif a is QuadraticExt:
     when a.c0 is CubicExt:
-      {.error: "𝔽pᵏᐟ⁶ -> 𝔽pᵏᐟ³ -> 𝔽pᵏ towering (quadratic over cubic) is not implemented.".}
+      g.g2 = a.c1.c0
+      g.g3 = a.c0.c2
+      g.g4 = a.c0.c1
+      g.g5 = a.c1.c2
     else:
       {.error: "a must be a sextic extension field".}
   else:
@@ -584,7 +687,12 @@ func asFpk*[Fpkdiv6, Fpk](
       {.error: "a must be a sextic extension field".}
   elif a is QuadraticExt:
     when a.c0 is CubicExt:
-      {.error: "𝔽pᵏᐟ⁶ -> 𝔽pᵏᐟ³ -> 𝔽pᵏ towering (quadratic over cubic) is not implemented.".}
+      a.c0.c0 = g0
+      a.c0.c1 = g.g4
+      a.c0.c2 = g.g3
+      a.c1.c0 = g.g2
+      a.c1.c1 = g1
+      a.c1.c2 = g.g5
     else:
       {.error: "a must be a sextic extension field".}
   else:
@@ -598,9 +706,6 @@ func cyclotomic_exp_compressed*[N: static int, Fpk](
   ## Exponentiation is done least-signigicant bits first
   ## `squarings` represents the number of squarings
   ## to do before the next multiplication.
-  ## 
-  ## `accumSquarings` stores the accumulated squarings so far
-  ## iff N != 1
   
   type Fpkdiv6 = typeof(a.c0.c0)
 
diff --git a/constantine/math/pairing/lines_eval.nim b/constantine/math/pairing/lines_eval.nim
index 8c11535..367582f 100644
--- a/constantine/math/pairing/lines_eval.nim
+++ b/constantine/math/pairing/lines_eval.nim
@@ -17,7 +17,7 @@ import
     ec_shortweierstrass_projective
   ],
   ../io/io_extfields,
-  ../curves/zoo_constants
+  ../constants/zoo_constants
 
 # No exceptions allowed
 {.push raises: [].}
@@ -390,44 +390,19 @@ func line_add*[F1, F2](
 # D-Twist
 # ------------------------------------------------------------
 
-func mul_by_line_xy0*[Fpkdiv2, Fpkdiv6](
-       r: var Fpkdiv2,
-       a: Fpkdiv2,
-       x, y: Fpkdiv6) =
-  ## Sparse multiplication of an 𝔽pᵏᐟ² element
-  ## with coordinates (a₀, a₁, a₂) by a line (x, y, 0)
-  ## The z coordinates in the line will be ignored.
-  ## `r` and `a` must not alias
-  
-  static:
-    doAssert a is CubicExt
-    doAssert a.c0 is Fpkdiv6
-
-  var
-    v0 {.noInit.}: Fpkdiv6
-    v1 {.noInit.}: Fpkdiv6
-
-  v0.prod(a.c0, x)
-  v1.prod(a.c1, y)
-  r.c0.prod(a.c2, y)
-  r.c0 *= SexticNonResidue
-  r.c0 += v0
-
-  r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated
-  r.c2.sum(x, y)
-  r.c1 *= r.c2
-  r.c1 -= v0
-  r.c1 -= v1
-
-  r.c2.prod(a.c2, x)
-  r.c2 += v1
-
-func mul_sparse_by_line_ab00c0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
+func mul_sparse_by_line_a00bc0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
   ## Sparse multiplication of an 𝔽pᵏ element
   ## by a sparse 𝔽pᵏ element coming from an D-Twist line function
-  ## With a quadratic over cubic towering (Fp2 -> Fp6 -> Fp12)
+  ## with a cubic over quadratic towering (𝔽p2 -> 𝔽p6 -> 𝔽p12)
   ## The sparse element is represented by a packed Line type
-  ## with coordinate (a,b,c) matching 𝔽pᵏ coordinates ab00c0
+  ## with coordinate (a,b,c) matching 𝔽pᵏ coordinates a00bc0
+
+  # a0b0 = (a00, a01, a02).( x, 0, 0)
+  # a1b1 = (a10, a11, a12).( y, z, 0)
+  #
+  # r0 = a0 b0 + ξ a1 b1
+  # r1 = a0 b1 + a1 b0                       (Schoolbook)
+  #    = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
 
   static:
     doAssert Fpk.C.getSexticTwist() == D_Twist
@@ -436,27 +411,290 @@ func mul_sparse_by_line_ab00c0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
 
   type Fpkdiv2 = typeof(f.c0)
 
-  var
-    v0 {.noInit.}: Fpkdiv2
-    v1 {.noInit.}: Fpkdiv2
-    v2 {.noInit.}: Line[Fpkdiv6]
-    v3 {.noInit.}: Fpkdiv2
+  when Fpk.C.has_large_field_elem():
+    var
+      v0 {.noInit.}: Fpkdiv2
+      v1 {.noInit.}: Fpkdiv2
+      t  {.noInit.}: Fpkdiv6
 
-  v0.mul_by_line_xy0(f.c0, l.a, l.b)
-  v1.mul_sparse_by_0y0(f.c1, l.c)
+    v1.sum(f.c0, f.c1)
+    t.sum(l.a, l.b)
+    v0.mul_sparse_by_xy0(v1, t, l.c)
 
-  v2.x = l.a
-  v2.y.sum(l.b, l.c)
-  f.c1 += f.c0
-  v3.mul_by_line_xy0(f.c1, v2.x, v2.y)
-  v3 -= v0
-  f.c1.diff(v3, v1)
+    v1.mul_sparse_by_xy0(f.c1, l.b, l.c)
 
-  v3.c0.prod(v1.c2, SexticNonResidue)
-  v3.c0 += v0.c0
-  v3.c1.sum(v0.c1, v1.c0)
-  v3.c2.sum(v0.c2, v1.c1)
-  f.c0 = v3
+    f.c1.diff(v0, v1)
+
+    v0.mul_sparse_by_x00(f.c0, l.a)
+    f.c1 -= v0
+
+    v1 *= NonResidue
+    f.c0.sum(v0, v1)
+  
+  else: # Lazy reduction
+    var
+      V0 {.noInit.}: doubleprec(Fpkdiv2)
+      V1 {.noInit.}: doubleprec(Fpkdiv2)
+      V  {.noInit.}: doubleprec(Fpkdiv2)
+      t  {.noInit.}: Fpkdiv6
+      u  {.noInit.}: Fpkdiv2
+  
+    u.sum(f.c0, f.c1)
+    t.sum(l.a, l.b)
+    V.mul2x_sparse_by_xy0(u, t, l.c)
+    V1.mul2x_sparse_by_xy0(f.c1, l.b, l.c)
+    V0.mul2x_sparse_by_x00(f.c0, l.a)
+    
+    V.diff2xMod(V, V1)
+    V.diff2xMod(V, V0)
+    f.c1.redc2x(V)
+
+    V1.prod2x(V1, NonResidue)
+    V0.sum2xMod(V0, V1)
+    f.c0.redc2x(V0)
+
+func prod_x00yz0_x00yz0_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Line[Fpkdiv6]) =
+  ## Multiply 2 lines together
+  ## The result is sparse in f.
+  # Preambule
+  #
+  # A sparse xy0 by xy0 multiplication in 𝔽pᵏᐟ² (cubic) would be
+  #   (a0', a1', 0).(b0', b1', 0)
+  #  
+  #   r0' = a0'b0'
+  #   r1' = a0'b1 + a1'b0'
+  #     or (a0'+b1')(a1'+b0')-a0'a1'-b0'b1'
+  #   r2' = a1'b1
+  #
+  # Note¹ that the Karatsuba does not reuse terms!
+  #
+  # In the following equations (taken from quad extension implementation)
+  # a0 = ( x , 0 , 0)
+  # a1 = ( y , z , 0)
+  # b0 = ( x', 0, 0)
+  # b1 = ( y', z', 0)
+  #
+  # a0b0 = ( x, 0, 0).( x', 0 , 0) = (xx',       0,  0 )
+  # a1b1 = ( y, z, 0).( y', z', 0) = (yy', yz'+zy', zz')
+  
+  # r0 = a0 b0 + ξ a1 b1
+  # r1 = a0 b1 + a1 b0                       (Schoolbook)
+  #    = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
+  #
+  # Note² that Karatsuba doesn't help as a0 and b0 are actually very cheap (x, 0, 0)
+  #
+  # Replacing by the coordinates in the lower tower we get
+  #
+  # r0 = (xx'+ξzz', yy', yz'+zy')
+  # r1 = ( x, 0, 0)( y', z', 0) + ( y , z , 0)( x', 0, 0)
+  #    = (xy'+yx', xz'+zx', 0)
+  #
+  # Now we can apply Karasuba¹² for a total of 6 multiplications
+
+  static:
+    doAssert Fpk.C.getSexticTwist() == D_Twist
+    doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
+    doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
+
+  var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
+
+  XX.prod2x(l0.a, l1.a)
+  YY.prod2x(l0.b, l1.b)
+  ZZ.prod2x(l0.c, l1.c)
+
+  var V{.noInit.}: doublePrec(Fpkdiv6)  
+  var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
+
+  # r0 = (xx'+ξzz', yy', yz'+zy')
+  # r00 = xx'+ξzz'' 
+  # r01 = yy'
+  # r02 = yz'+zy' = (y+z)(y'+z') - yy' - zz'
+  V.prod2x(ZZ, NonResidue)
+  V.sum2xMod(V,XX)
+  f.c0.c0.redc2x(V)
+
+  f.c0.c1.redc2x(YY)
+
+  t0.sum(l0.b, l0.c)
+  t1.sum(l1.b, l1.c)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, YY)
+  V.diff2xMod(V, ZZ)
+  f.c0.c2.redc2x(V)
+
+  # r1 = (xy'+yx', xz'+zx', 0)
+  # r10 = xy'+yx' = (x+y)(x'+y') - xx' - yy'
+  # r11 = xz'+zx' = (x+z)(x'+z') - xx' - zz'
+  # r12 = 0
+  t0.sum(l0.a, l0.b)
+  t1.sum(l1.a, l1.b)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, XX)
+  V.diff2xMod(V, YY)
+  f.c1.c0.redc2x(V)
+
+  t0.sum(l0.a, l0.c)
+  t1.sum(l1.a, l1.c)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, XX)
+  V.diff2xMod(V, ZZ)
+  f.c1.c1.redc2x(V)
+
+  f.c1.c2.setZero()
+
+# M-Twist
+# ------------------------------------------------------------
+
+func mul_sparse_by_line_cb00a0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
+  ## Sparse multiplication of an 𝔽pᵏ element
+  ## by a sparse 𝔽pᵏ element coming from an M-Twist line function
+  ## with a cubic over quadratic towering (𝔽p2 -> 𝔽p6 -> 𝔽p12)
+  ## The sparse element is represented by a packed Line type
+  ## with coordinate (a,b,c) matching 𝔽pᵏ coordinates cb00a0
+
+  # a0b0 = (a00, a01, a02).( z, y, 0)
+  # a1b1 = (a10, a11, a12).( 0, x, 0)
+  #
+  # r0 = a0 b0 + ξ a1 b1
+  # r1 = a0 b1 + a1 b0                       (Schoolbook)
+  #    = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
+
+  static:
+    doAssert Fpk.C.getSexticTwist() == M_Twist
+    doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
+    doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
+
+  type Fpkdiv2 = typeof(f.c0)
+
+  when Fpk.C.has_large_field_elem():
+    var
+      v0 {.noInit.}: Fpkdiv2
+      v1 {.noInit.}: Fpkdiv2
+      t  {.noInit.}: Fpkdiv6
+
+    v1.sum(f.c0, f.c1)
+    t.sum(l.b, l.a)
+    v0.mul_sparse_by_xy0(v1, l.c, t)
+
+    v1.mul_sparse_by_0y0(f.c1, l.a)
+
+    f.c1.diff(v0, v1)
+
+    v0.mul_sparse_by_xy0(f.c0, l.c, l.b)
+    f.c1 -= v0
+
+    v1 *= NonResidue
+    f.c0.sum(v0, v1)
+  else: # Lazy reduction
+    var
+      V0 {.noInit.}: doubleprec(Fpkdiv2)
+      V1 {.noInit.}: doubleprec(Fpkdiv2)
+      V  {.noInit.}: doubleprec(Fpkdiv2)
+      t  {.noInit.}: Fpkdiv6
+      u  {.noInit.}: Fpkdiv2
+  
+    u.sum(f.c0, f.c1)
+    t.sum(l.b, l.a)
+    V.mul2x_sparse_by_xy0(u, l.c, t)
+    V1.mul2x_sparse_by_0y0(f.c1, l.a)
+    V0.mul2x_sparse_by_xy0(f.c0, l.c, l.b)
+    
+    V.diff2xMod(V, V1)
+    V.diff2xMod(V, V0)
+    f.c1.redc2x(V)
+
+    V1.prod2x(V1, NonResidue)
+    V0.sum2xMod(V0, V1)
+    f.c0.redc2x(V0)
+
+func prod_zy00x0_zy00x0_into_abcdef00ghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Line[Fpkdiv6]) =
+  ## Multiply 2 lines together
+  ## The result is sparse in f.c1.c0
+  # Preambule
+  #
+  # A sparse xy0 by xy0 multiplication in 𝔽pᵏᐟ² (cubic) would be
+  #   (a0', a1', 0).(b0', b1', 0)
+  #  
+  #   r0' = a0'b0'
+  #   r1' = a0'b1 + a1'b0'
+  #     or (a0'+b1')(a1'+b0')-a0'a1'-b0'b1'
+  #   r2' = a1'b1
+  #
+  # Note¹ that the Karatsuba does not reuse terms!
+  #
+  # In the following equations (taken from quad extension implementation)
+  # a0 = ( z , y , 0)
+  # a1 = ( 0 , x , 0)
+  # b0 = ( z', y', 0)
+  # b1 = ( 0 , x', 0)
+  #
+  # a0b0 = ( z, y, 0).(z', y', 0) = (zz', zy'+yz', yy')
+  # a1b1 = ( 0, x, 0).( 0, x', 0) = (  0,       0, xx')
+  
+  # r0 = a0 b0 + ξ a1 b1
+  # r1 = a0 b1 + a1 b0                       (Schoolbook)
+  #    = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
+  #
+  # Note² that Karatsuba doesn't help as a1 and b1 are actually very cheap (0, x, 0)
+  #
+  # Replacing by the coordinates in the lower tower we get
+  #
+  # r0 = (zz'+ξxx', zy'+z'y, yy')
+  # r1 = (z, y, 0)(0, x', 0) + (z', y', 0)(0, x, 0)
+  #    = (0, zx'+xz', yx'+xy')
+  #
+  # Now we can apply Karasuba¹² for a total of 6 multiplications
+
+  static:
+    doAssert Fpk.C.getSexticTwist() == M_Twist
+    doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
+    doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
+
+  var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
+
+  XX.prod2x(l0.a, l1.a)
+  YY.prod2x(l0.b, l1.b)
+  ZZ.prod2x(l0.c, l1.c)
+
+  var V{.noInit.}: doublePrec(Fpkdiv6)  
+  var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
+
+  # r0 = (zz'+ξxx', zy'+z'y, yy')
+  # r00 = zz'+ξxx'
+  # r01 = zy'+z'y = (z+y)(z'+y') - zz' - yy'
+  # r02 = yy'
+  V.prod2x(XX, NonResidue)
+  V.sum2xMod(V, ZZ)
+  f.c0.c0.redc2x(V)
+
+  t0.sum(l0.c, l0.b)
+  t1.sum(l1.c, l1.b)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, ZZ)
+  V.diff2xMod(V, YY)
+  f.c0.c1.redc2x(V)
+
+  f.c0.c2.redc2x(YY)
+
+  # r1 = (0, zx'+xz', yx'+xy')
+  # r10 = 0
+  # r11 = zx'+xz' = (z+x)(z'+x') - zz' - xx'
+  # r12 = xy'+yx' = (x+y)(x'+y') - xx' - yy'
+  f.c1.c0.setZero()
+
+  t0.sum(l0.c, l0.a)
+  t1.sum(l1.c, l1.a)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, ZZ)
+  V.diff2xMod(V, XX)
+  f.c1.c1.redc2x(V)
+
+  t0.sum(l0.a, l0.b)
+  t1.sum(l1.a, l1.b)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, XX)
+  V.diff2xMod(V, YY)
+  f.c1.c2.redc2x(V)
 
 # ############################################################
 #
@@ -556,16 +794,16 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
   ## Multiply 2 lines together
   ## The result is sparse in f.c1.c1
   # In the following equations (taken from cubic extension implementation)
-  # a0 = (x0, z0)
-  # a1 = (y0,  0)
-  # a2 = ( 0,  0)
-  # b0 = (x1, z1)
-  # b1 = (y1,  0)
-  # b2 = ( 0,  0)
+  # a0 = ( x, z)
+  # a1 = ( y, 0)
+  # a2 = ( 0, 0)
+  # b0 = (x', z')
+  # b1 = (y', 0)
+  # b2 = ( 0, 0)
   #
-  # v0 = a0 b0 = (x0, z0).(x1, z1)
-  # v1 = a1 b1 = (y0,  0).(y1,  0)
-  # v2 = a2 b2 = ( 0,  0).( 0,  0)
+  # v0 = a0 b0 = (x, z).(x', z')
+  # v1 = a1 b1 = (y, 0).(y',  0)
+  # v2 = a2 b2 = (0, 0).( 0,  0)
   #
   # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
   #    = ξ (a1 b1 + a2 b1 - v1) + v0
@@ -575,106 +813,65 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
   # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
   #    = a0 b0 - v0 + v1
   #    = v1
+  #
+  # r1 is computed in 3 extra muls in the base field 𝔽pᵏᐟ⁶ (Karatsuba product in quadratic field)
+  # 
+  # If we dive deeper:
+  # r1 = a0b1 + a1b0 = (xy'+yx', zy'+yz')      (Schoolbook - 4 muls)
+  # r10 = zy'+yz' = (x+y)(x'+y') - xx' - yy'
+  # r11 = xy'+yx' = (z+y)(z'+y') - zz' - yy'
+  #
+  # The substraction are all computed as part of v0 and v1
+  # hence r1 can be compute for 2 extra muls in 𝔽pᵏᐟ⁶ only
 
   static:
     doAssert Fpk.C.getSexticTwist() == D_Twist
     doAssert f is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
     doAssert f.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
 
-  type Fpkdiv3 = typeof(f.c0)
+  var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
 
-  var V0{.noInit.}, f2x{.noInit.}: doublePrec(Fpkdiv3)
-  var V1{.noInit.}: doublePrec(Fpkdiv6)
+  XX.prod2x(l0.a, l1.a)
+  YY.prod2x(l0.b, l1.b)
+  ZZ.prod2x(l0.c, l1.c)
 
-  V0.prod2x_disjoint(l0.a, l0.c, l1.a, l1.c) # a0 b0 = (x0, z0).(x1, z1)
-  V1.prod2x(l0.b, l1.b)                      # a1 b1 = (y0,  0).(y1,  0)
+  var V{.noInit.}: doublePrec(Fpkdiv6)  
+  var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
 
-  # r1 = (a0 + a1) * (b0 + b1) - v0 - v1
-  f.c1.c0.sum(l0.a, l0.b)                           # x0 + y0
-  f.c1.c1.sum(l1.a, l1.b)                           # x1 + y1
-  f2x.prod2x_disjoint(f.c1.c0, l0.c, f.c1.c1, l1.c) # (x0 + y0, z0)(x1 + y1, z1) = (a0 + a1) * (b0 + b1)
-  f2x.diff2xMod(f2x, V0)
-  f2x.c0.diff2xMod(f2x.c0, V1)
-  f.c1.redc2x(f2x)
+  # r0 = a0 b0 = (x, z).(x', z')
+  # r00 = xx' + βzz'
+  # r01 = (x+z)(x'+z') - zz' - xx'
+  V.prod2x(ZZ, NonResidue)
+  V.sum2xMod(V, XX)
+  f.c0.c0.redc2x(V)
+  
+  t0.sum(l0.a, l0.c)
+  t1.sum(l1.a, l1.c)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, ZZ)
+  V.diff2xMod(V, XX)
+  f.c0.c1.redc2x(V)
 
-  # r0 = v0
-  f.c0.redc2x(V0)
+  # r10 = zy'+yz' = (x+y)(x'+y') - xx' - yy'
+  # r11 = xy'+yx' = (z+y)(z'+y') - zz' - yy'
+  t0.sum(l0.a, l0.b)
+  t1.sum(l1.a, l1.b)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, XX)
+  V.diff2xMod(V, YY)
+  f.c1.c0.redc2x(V)
 
-  # r2 = v1
-  f.c2.c0.redc2x(V1)
+  t0.sum(l0.c, l0.b)
+  t1.sum(l1.c, l1.b)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, ZZ)
+  V.diff2xMod(V, YY)
+  f.c1.c1.redc2x(V)
+
+  # r2 = v2 = (y, 0).(y', 0) = (yy', 0)
+  f.c2.c0.redc2x(YY)
   f.c2.c1.setZero()
 
-func mul_sparse_by_abcdefghij00*[Fpk](
-       a: var Fpk, b: Fpk) =
-  ## Sparse multiplication of an 𝔽pᵏ element
-  ## by a sparse 𝔽pᵏ element abcdefghij00
-  ## with each representing 𝔽pᵏᐟ⁶ coordinate
-
-  static:
-    doAssert Fpk.C.getSexticTwist() == D_Twist
-    doAssert a is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
-    doAssert a.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
-
-  type Fpkdiv3 = typeof(a.c0)
-
-  # In the following equations (taken from cubic extension implementation)
-  # b0 = (b00, b01)
-  # b1 = (b10, b11)
-  # b2 = (b20,   0)
-  #
-  # v0 = a0 b0 = (f00, f01).(b00, b01)
-  # v1 = a1 b1 = (f10, f11).(b10, b11)
-  # v2 = a2 b2 = (f20, f21).(b20,   0)
-  #
-  # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
-  # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
-  # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
-
-  var V0 {.noInit.}, V1 {.noInit.}, V2 {.noinit.}: doublePrec(Fpkdiv3)
-  var t0 {.noInit.}, t1 {.noInit.}: Fpkdiv3
-  var f2x{.noInit.}, g2x {.noinit.}: doublePrec(Fpkdiv3)
-
-  V0.prod2x(a.c0, b.c0)
-  V1.prod2x(a.c1, b.c1)
-  V2.mul2x_sparse_by_x0(a.c2, b.c2)
-
-  # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
-  t0.sum(a.c1, a.c2)
-  t1.c0.sum(b.c1.c0, b.c2.c0)             # b₂ = (b20,   0)
-  f2x.prod2x_disjoint(t0, t1.c0, b.c1.c1) # (a₁ + a₂).(b₁ + b₂)
-  f2x.diff2xMod(f2x, V1)
-  f2x.diff2xMod(f2x, V2)
-  f2x.prod2x(f2x, NonResidue)
-  f2x.sum2xMod(f2x, V0)
-
-  # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁
-  t0.sum(a.c0, a.c1)
-  t1.sum(b.c0, b.c1)
-  g2x.prod2x(t0, t1)
-  g2x.diff2xMod(g2x, V0)
-  g2x.diff2xMod(g2x, V1)
-
-  # r₂ = (a₀ + a₂) and (b₀ + b₂)
-  t0.sum(a.c0, a.c2)
-  t1.c0.sum(b.c0.c0, b.c2.c0)             # b₂ = (b20,   0)
-
-  # Now we are aliasing free
-
-  # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
-  a.c0.redc2x(f2x)
-
-  # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
-  f2x.prod2x(V2, NonResidue)
-  g2x.sum2xMod(g2x, f2x)
-  a.c1.redc2x(g2x)
-
-  # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
-  f2x.prod2x_disjoint(t0, t1.c0, b.c0.c1)
-  f2x.diff2xMod(f2x, V0)
-  f2x.diff2xMod(f2x, V2)
-  f2x.sum2xMod(f2x, V1)
-  a.c2.redc2x(f2x)
-
 # M-Twist
 # ------------------------------------------------------------
 
@@ -769,16 +966,16 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
   ## Multiply 2 lines together
   ## The result is sparse in f.c1.c1
   # In the following equations (taken from cubic extension implementation)
-  # a0 = (z0, x0)
+  # a0 = ( z,  x)
   # a1 = ( 0,  0)
-  # a2 = (y0,  0)
-  # b0 = (z1, x1)
+  # a2 = ( y,  0)
+  # b0 = (z', x')
   # b1 = ( 0,  0)
-  # b2 = (y0,  0)
+  # b2 = (y',  0)
   #
-  # v0 = a0 b0 = (z0, x0).(z1, x1)
-  # v1 = a1 b1 = ( 0,  0).( 0,  0)
-  # v2 = a2 b2 = (y0,  0).(y1,  0)
+  # v0 = a0 b0 = (z, x).(z', x')
+  # v1 = a1 b1 = (0, 0).(0 , 0)
+  # v2 = a2 b2 = (y, 0).(y', 0)
   #
   # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
   #    = ξ (a1 b2 + a2 b2 - v2) + v0
@@ -788,36 +985,221 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
   #    = ξ v2
   # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
   #    = (a0 + a2) * (b0 + b2) - v0 - v2
+  #
+  # r2 is computed in 3 extra muls in the base field 𝔽pᵏᐟ⁶ (Karatsuba product in quadratic field)
+  # 
+  # If we dive deeper:
+  # r2 = a0b2 + a2b0 = (zy'+yz', xy'+x'y)      (Schoolbook - 4 muls)
+  # r20 = zy'+z'y = (z+y)(z'+y') - zz' - yy'
+  # r21 = xy'+x'y = (x+y)(x'+y') - xx' - yy'
+  #
+  # The substraction are all computed as part of v0 and v2
+  # hence r2 can be compute for 2 extra muls in 𝔽pᵏᐟ⁶ only
 
   static:
     doAssert Fpk.C.getSexticTwist() == M_Twist
     doAssert f is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
     doAssert f.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
 
-  type Fpkdiv3 = typeof(f.c0)
+  var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
 
-  var V0{.noInit.}, f2x{.noInit.}: doublePrec(Fpkdiv3)
-  var V2{.noInit.}: doublePrec(Fpkdiv6)
+  XX.prod2x(l0.a, l1.a)
+  YY.prod2x(l0.b, l1.b)
+  ZZ.prod2x(l0.c, l1.c)
 
-  V0.prod2x_disjoint(l0.c, l0.a, l1.c, l1.a) # a0 b0 = (z0, x0).(z1, x1)
-  V2.prod2x(l0.b, l1.b)                      # a2 b2 = (y0,  0).(y1,  0)
+  var V{.noInit.}: doublePrec(Fpkdiv6)  
+  var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
 
-  # r2 = (a0 + a2) * (b0 + b2) - v0 - v2
-  f.c2.c0.sum(l0.b, l0.c)                           # y0 + z0
-  f.c2.c1.sum(l1.b, l1.c)                           # y1 + z1
-  f2x.prod2x_disjoint(f.c2.c0, l0.a, f.c2.c1, l1.a) # (z0 + y0, x0).(z1 + y1, x1) = (a0 + a2) * (b0 + b2)
-  f2x.diff2xMod(f2x, V0)                            # (a0 + a2) * (b0 + b2) - v0
-  f2x.c0.diff2xMod(f2x.c0, V2)                      # (a0 + a2) * (b0 + b2) - v0 - v2
-  f.c2.redc2x(f2x)
+  # r0 = a0 b0 = (z, x).(z', x')
+  # r00 = zz' + βxx'
+  # r01 = (z+x)(z'+x') - zz' - xx'
+  V.prod2x(XX, NonResidue)
+  V.sum2xMod(V, ZZ)
+  f.c0.c0.redc2x(V)
+  
+  t0.sum(l0.c, l0.a)
+  t1.sum(l1.c, l1.a)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, ZZ)
+  V.diff2xMod(V, XX)
+  f.c0.c1.redc2x(V)
 
-  # r1 = ξ v2
-  f.c1.c1.redc2x(V2)
+  # r1 = ξ v2 = ξ(y, 0).(y', 0) = ξ(yy', 0) = (0, yy')
   f.c1.c0.setZero()
+  f.c1.c1.redc2x(YY)
 
-  # r0 = v0
-  f.c0.redc2x(V0)
+  # r20 = zy'+z'y = (z+y)(z'+y') - zz' - yy'
+  # r21 = xy'+x'y = (x+y)(x'+y') - xx' - yy'
+  t0.sum(l0.c, l0.b)
+  t1.sum(l1.c, l1.b)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, ZZ)
+  V.diff2xMod(V, YY)
+  f.c2.c0.redc2x(V)
 
-func mul_sparse_by_abcd00efghij*[Fpk](
+  t0.sum(l0.a, l0.b)
+  t1.sum(l1.a, l1.b)
+  V.prod2x(t0, t1)
+  V.diff2xMod(V, XX)
+  V.diff2xMod(V, YY)
+  f.c2.c1.redc2x(V)
+
+# ############################################################
+#
+#                    Sparse multiplication         
+#
+# ############################################################
+
+func mul_sparse_by_abcdefghij00_quad_over_cube*[Fpk](
+       a: var Fpk, b: Fpk) =
+  ## Sparse multiplication of an 𝔽pᵏ element
+  ## by a sparse 𝔽pᵏ element abcdefghij00
+  ## with each representing 𝔽pᵏᐟ⁶ coordinate
+
+  static:
+    doAssert Fpk.C.getSexticTwist() == D_Twist
+    doAssert a is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
+    doAssert a.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
+
+  type Fpkdiv2 = typeof(a.c0)
+
+  # In the following equations (taken from quadratic extension implementation)
+  # b0 = (b00, b01, b02)
+  # b1 = (b10, b11,   0)
+  #
+  # v0 = a0 b0 = (a00, a01, a02).(b00, b01, b02)
+  # v1 = a1 b1 = (a10, a11, a12).(b10, b11,   0)
+  #
+  # r0 = a0 b0 + ξ a1 b1
+  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1
+
+  when Fpk.C.has_large_field_elem():
+    var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: Fpkdiv2
+
+    # v2 <- (a0 + a1)(b0 + b1)
+    v0.sum(a.c0, a.c1)
+    v1.c0.sum(b.c0.c0, b.c1.c0)
+    v1.c1.sum(b.c0.c1, b.c1.c1)
+    v1.c2 = b.c0.c2
+    v2.prod(v0, v1)
+
+    # v0 <- a0 b0
+    # v1 <- a1 b1
+    v0.prod(a.c0, b.c0)
+    v1.mul_sparse_by_xy0(a.c1, b.c1.c0, b.c1.c1)
+
+    # aliasing: a and b unneeded now
+
+    # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
+    a.c1.diff(v2, v0)
+    a.c1 -= v1
+
+    # r0 <- a0 b0 + β a1 b1
+    v1 *= NonResidue
+    a.c0.sum(v0, v1)
+
+  else: # Lazy reduction
+    var V0 {.noInit.}, V1 {.noInit.}, V2 {.noInit.}: doublePrec(Fpkdiv2)
+    var t0{.noInit.}, t1{.noInit.}: Fpkdiv2
+
+    # v2 <- (a0 + a1)(b0 + b1)
+    t0.sum(a.c0, a.c1)
+    t1.c0.sum(b.c0.c0, b.c1.c0)
+    t1.c1.sum(b.c0.c1, b.c1.c1)
+    t1.c2 = b.c0.c2
+    V2.prod2x(t0, t1)
+
+    # v0 <- a0 b0
+    # v1 <- a1 b1
+    V0.prod2x(a.c0, b.c0)
+    V1.mul2x_sparse_by_xy0(a.c1, b.c1.c0, b.c1.c1)
+
+    # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
+    V2.diff2xMod(V2, V0)
+    V2.diff2xMod(V2, V1)
+    a.c1.redc2x(V2)
+
+    # r0 <- a0 b0 + β a1 b1
+    V1.prod2x(V1, NonResidue)
+    V2.sum2xMod(V0, V1)
+    a.c0.redc2x(V2)
+
+func mul_sparse_by_abcdef00ghij_quad_over_cube*[Fpk](
+       a: var Fpk, b: Fpk) =
+  ## Sparse multiplication of an 𝔽pᵏ element
+  ## by a sparse 𝔽pᵏ element abcdef00ghij
+  ## with each representing 𝔽pᵏᐟ⁶ coordinate
+
+  static:
+    doAssert Fpk.C.getSexticTwist() == M_Twist
+    doAssert a is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
+    doAssert a.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
+
+  type Fpkdiv2 = typeof(a.c0)
+
+  # In the following equations (taken from quadratic extension implementation)
+  # b0 = (b00, b01, b02)
+  # b1 = (  0, b11, b12)
+  #
+  # v0 = a0 b0 = (a00, a01, a02).(b00, b01, b02)
+  # v1 = a1 b1 = (a10, a11, a12).(  0, b11, b12)
+  #
+  # r0 = a0 b0 + ξ a1 b1
+  # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1
+
+  when Fpk.C.has_large_field_elem():
+    var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: Fpkdiv2
+
+    # v2 <- (a0 + a1)(b0 + b1)
+    v0.sum(a.c0, a.c1)
+    v1.c0 = b.c0.c0
+    v1.c1.sum(b.c0.c1, b.c1.c1)
+    v1.c2.sum(b.c0.c2, b.c1.c2)
+    v2.prod(v0, v1)
+
+    # v0 <- a0 b0
+    # v1 <- a1 b1
+    v0.prod(a.c0, b.c0)
+    v1.mul_sparse_by_0yz(a.c1, b.c1.c1, b.c1.c2)
+
+    # aliasing: a and b unneeded now
+
+    # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
+    a.c1.diff(v2, v0)
+    a.c1 -= v1
+
+    # r0 <- a0 b0 + β a1 b1
+    v1 *= NonResidue
+    a.c0.sum(v0, v1)
+
+  else: # Lazy reduction
+    var V0 {.noInit.}, V1 {.noInit.}, V2 {.noInit.}: doublePrec(Fpkdiv2)
+    var t0{.noInit.}, t1{.noInit.}: Fpkdiv2
+
+    # v2 <- (a0 + a1)(b0 + b1)
+    t0.sum(a.c0, a.c1)
+    t1.c0 = b.c0.c0
+    t1.c1.sum(b.c0.c1, b.c1.c1)
+    t1.c2.sum(b.c0.c2, b.c1.c2)
+    V2.prod2x(t0, t1)
+
+    # v0 <- a0 b0
+    # v1 <- a1 b1
+    V0.prod2x(a.c0, b.c0)
+    V1.mul2x_sparse_by_0yz(a.c1, b.c1.c1, b.c1.c2)
+
+    # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
+    V2.diff2xMod(V2, V0)
+    V2.diff2xMod(V2, V1)
+    a.c1.redc2x(V2)
+
+    # r0 <- a0 b0 + β a1 b1
+    V1.prod2x(V1, NonResidue)
+    V2.sum2xMod(V0, V1)
+    a.c0.redc2x(V2)
+
+
+func mul_sparse_by_abcd00efghij_cube_over_quad*[Fpk](
        a: var Fpk, b: Fpk) =
   ## Sparse multiplication of an 𝔽pᵏ element
   ## by a sparse 𝔽pᵏ element abcd00efghij
@@ -888,15 +1270,92 @@ func mul_sparse_by_abcd00efghij*[Fpk](
   f2x.sum2xMod(f2x, V1)
   a.c2.redc2x(f2x)
 
+func mul_sparse_by_abcdefghij00_cube_over_quad*[Fpk](
+       a: var Fpk, b: Fpk) =
+  ## Sparse multiplication of an 𝔽pᵏ element
+  ## by a sparse 𝔽pᵏ element abcdefghij00
+  ## with each representing 𝔽pᵏᐟ⁶ coordinate
+
+  static:
+    doAssert Fpk.C.getSexticTwist() == D_Twist
+    doAssert a is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
+    doAssert a.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
+
+  type Fpkdiv3 = typeof(a.c0)
+
+  # In the following equations (taken from cubic extension implementation)
+  # b0 = (b00, b01)
+  # b1 = (b10, b11)
+  # b2 = (b20,   0)
+  #
+  # v0 = a0 b0 = (f00, f01).(b00, b01)
+  # v1 = a1 b1 = (f10, f11).(b10, b11)
+  # v2 = a2 b2 = (f20, f21).(b20,   0)
+  #
+  # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
+  # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
+  # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
+
+  var V0 {.noInit.}, V1 {.noInit.}, V2 {.noinit.}: doublePrec(Fpkdiv3)
+  var t0 {.noInit.}, t1 {.noInit.}: Fpkdiv3
+  var f2x{.noInit.}, g2x {.noinit.}: doublePrec(Fpkdiv3)
+
+  V0.prod2x(a.c0, b.c0)
+  V1.prod2x(a.c1, b.c1)
+  V2.mul2x_sparse_by_x0(a.c2, b.c2)
+
+  # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
+  t0.sum(a.c1, a.c2)
+  t1.c0.sum(b.c1.c0, b.c2.c0)             # b₂ = (b20,   0)
+  f2x.prod2x_disjoint(t0, t1.c0, b.c1.c1) # (a₁ + a₂).(b₁ + b₂)
+  f2x.diff2xMod(f2x, V1)
+  f2x.diff2xMod(f2x, V2)
+  f2x.prod2x(f2x, NonResidue)
+  f2x.sum2xMod(f2x, V0)
+
+  # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁
+  t0.sum(a.c0, a.c1)
+  t1.sum(b.c0, b.c1)
+  g2x.prod2x(t0, t1)
+  g2x.diff2xMod(g2x, V0)
+  g2x.diff2xMod(g2x, V1)
+
+  # r₂ = (a₀ + a₂) and (b₀ + b₂)
+  t0.sum(a.c0, a.c2)
+  t1.c0.sum(b.c0.c0, b.c2.c0)             # b₂ = (b20,   0)
+
+  # Now we are aliasing free
+
+  # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
+  a.c0.redc2x(f2x)
+
+  # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
+  f2x.prod2x(V2, NonResidue)
+  g2x.sum2xMod(g2x, f2x)
+  a.c1.redc2x(g2x)
+
+  # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
+  f2x.prod2x_disjoint(t0, t1.c0, b.c0.c1)
+  f2x.diff2xMod(f2x, V0)
+  f2x.diff2xMod(f2x, V2)
+  f2x.sum2xMod(f2x, V1)
+  a.c2.redc2x(f2x)
+
 # Dispatch
 # ------------------------------------------------------------
 
 func mul_by_line*[Fpk, Fpkdiv6](f: var Fpk, line: Line[Fpkdiv6]) {.inline.} =
   ## Multiply an element of Fp12 by a sparse line function
   when Fpk.C.getSexticTwist() == D_Twist:
-    f.mul_sparse_by_line_acb000(line)
+    when f is CubicExt:
+      f.mul_sparse_by_line_acb000(line)
+    else:
+      f.mul_sparse_by_line_a00bc0(line)
   elif Fpk.C.getSexticTwist() == M_Twist:
-    f.mul_sparse_by_line_ca00b0(line)
+    when f is CubicExt:
+      f.mul_sparse_by_line_ca00b0(line)
+    else:
+      f.mul_sparse_by_line_cb00a0(line)
   else:
     {.error: "A line function assumes that the curve has a twist".}
 
@@ -905,18 +1364,30 @@ func prod_from_2_lines*[Fpk, Fpkdiv6](f: var Fpk, line0, line1: Line[Fpkdiv6]) {
   ## and store the result in f
   ## f is overwritten
   when Fpk.C.getSexticTwist() == D_Twist:
-    f.prod_xzy000_xzy000_into_abcdefghij00(line0, line1)
+    when f is CubicExt:
+      f.prod_xzy000_xzy000_into_abcdefghij00(line0, line1)
+    else:
+      f.prod_x00yz0_x00yz0_into_abcdefghij00(line0, line1)
   elif Fpk.C.getSexticTwist() == M_Twist:
-    f.prod_zx00y0_zx00y0_into_abcd00efghij(line0, line1)
+    when f is CubicExt:
+      f.prod_zx00y0_zx00y0_into_abcd00efghij(line0, line1)
+    else:
+      f.prod_zy00x0_zy00x0_into_abcdef00ghij(line0, line1)
   else:
     {.error: "A line function assumes that the curve has a twist".}
 
 func mul_by_prod_of_2_lines*[Fpk](f: var Fpk, g: Fpk) {.inline.} =
   ## Multiply f by the somewhat sparse product of 2 lines
   when Fpk.C.getSexticTwist() == D_Twist:
-    f.mul_sparse_by_abcdefghij00(g)
+    when f is CubicExt:
+      f.mul_sparse_by_abcdefghij00_cube_over_quad(g)
+    else:
+      f.mul_sparse_by_abcdefghij00_quad_over_cube(g)
   elif Fpk.C.getSexticTwist() == M_Twist:
-    f.mul_sparse_by_abcd00efghij(g)
+    when f is CubicExt:
+      f.mul_sparse_by_abcd00efghij_cube_over_quad(g)
+    else:
+      f.mul_sparse_by_abcdef00ghij_quad_over_cube(g)
   else:
     {.error: "A line function assumes that the curve has a twist".}
 
diff --git a/constantine/math/pairing/pairing_bls12.nim b/constantine/math/pairing/pairing_bls12.nim
index 63e903a..bbd3f67 100644
--- a/constantine/math/pairing/pairing_bls12.nim
+++ b/constantine/math/pairing/pairing_bls12.nim
@@ -15,7 +15,7 @@ import
     ec_shortweierstrass_projective
   ],
   ../isogenies/frobenius,
-  ../curves/zoo_pairings,
+  ../constants/zoo_pairings,
   ../arithmetic,
   ./cyclotomic_subgroup,
   ./lines_eval,
diff --git a/constantine/math/pairing/pairing_bn.nim b/constantine/math/pairing/pairing_bn.nim
index 4cfcee7..128ef4e 100644
--- a/constantine/math/pairing/pairing_bn.nim
+++ b/constantine/math/pairing/pairing_bn.nim
@@ -15,7 +15,7 @@ import
     ec_shortweierstrass_projective
   ],
   ../isogenies/frobenius,
-  ../curves/zoo_pairings,
+  ../constants/zoo_pairings,
   ./lines_eval,
   ./cyclotomic_subgroup,
   ./miller_loops
diff --git a/constantine/math/pairing/pairing_bw6_761.nim b/constantine/math/pairing/pairing_bw6_761.nim
index d5b1a05..7ee4a55 100644
--- a/constantine/math/pairing/pairing_bw6_761.nim
+++ b/constantine/math/pairing/pairing_bw6_761.nim
@@ -15,7 +15,7 @@ import
     ec_shortweierstrass_projective
   ],
   ../isogenies/frobenius,
-  ../curves/zoo_pairings,
+  ../constants/zoo_pairings,
   ./lines_eval,
   ./miller_loops
 
diff --git a/constantine/platforms/isa/macro_assembler_x86.nim b/constantine/platforms/isa/macro_assembler_x86.nim
index 8552d23..d9061fc 100644
--- a/constantine/platforms/isa/macro_assembler_x86.nim
+++ b/constantine/platforms/isa/macro_assembler_x86.nim
@@ -74,6 +74,7 @@ type
     kRegister
     kFromArray
     kArrayAddr
+    k2dArrayAddr
 
   Operand* = object
     desc*: OperandDesc
@@ -84,6 +85,9 @@ type
       offset: int
     of kArrayAddr:
       buf: seq[Operand]
+    of k2dArrayAddr:
+      dims: array[2, int]
+      buf2d: seq[Operand]
 
   OperandDesc* = ref object
     asmId*: string          # [a] - ASM id
@@ -141,11 +145,17 @@ proc `[]`*(opArray: OperandArray, index: int): Operand =
 func `[]`*(opArray: var OperandArray, index: int): var Operand =
   opArray.buf[index]
 
-func `[]`*(arrayAddr: Operand, index: int): Operand =
-  arrayAddr.buf[index]
+func `[]`*(arrAddr: Operand, index: int): Operand =
+  arrAddr.buf[index]
 
-func `[]`*(arrayAddr: var Operand, index: int): var Operand =
-  arrayAddr.buf[index]
+func `[]`*(arrAddr: var Operand, index: int): var Operand =
+  arrAddr.buf[index]
+
+func `[]`*(arr2dAddr: Operand, i, j: int): Operand =
+  arr2dAddr.buf2d[i*arr2dAddr.dims[1] + j]
+
+func `[]`*(arr2dAddr: var Operand, i, j: int): var Operand =
+  arr2dAddr.buf2d[i*arr2dAddr.dims[1] + j]
 
 func init*(T: type Assembler_x86, Word: typedesc[SomeUnsignedInt]): Assembler_x86 =
   result.wordSize = sizeof(Word)
@@ -236,6 +246,22 @@ func asArrayAddr*(op: Register, len: int): Operand =
       offset: i
     )
 
+func as2dArrayAddr*(op: Operand, rows, cols: int): Operand =
+  ## Use the value stored in an operand as an array address
+  doAssert op.desc.rm in {Reg, PointerInReg, ElemsInReg}+SpecificRegisters
+  result = Operand(
+    kind: k2dArrayAddr,
+    desc: nil,
+    dims: [rows, cols],
+    buf2d: newSeq[Operand](rows*cols)
+  )
+  for i in 0 ..< rows*cols:
+    result.buf2d[i] = Operand(
+      desc: op.desc,
+      kind: kFromArray,
+      offset: i
+    )
+
 # Code generation
 # ------------------------------------------------------------------------------------------------------------
 
@@ -344,7 +370,7 @@ func generate*(a: Assembler_x86): NimNode =
 
 func getStrOffset(a: Assembler_x86, op: Operand): string =
   if op.kind != kFromArray:
-    if op.kind == kArrayAddr:
+    if op.kind in {kArrayAddr, k2dArrayAddr}:
       # We are operating on an array pointer
       # instead of array elements
       if op.buf[0].desc.constraint == ClobberedRegister:
diff --git a/constantine/signatures/bls_signatures.nim b/constantine/signatures/bls_signatures.nim
index 3db0061..bd374dc 100644
--- a/constantine/signatures/bls_signatures.nim
+++ b/constantine/signatures/bls_signatures.nim
@@ -8,7 +8,7 @@
 
 import
     ../math/[ec_shortweierstrass, pairings],
-    ../math/curves/zoo_generators,
+    ../math/constants/zoo_generators,
     ../hash_to_curve/hash_to_curve,
     ../hashes
 
diff --git a/metering/m_pairings.nim b/metering/m_pairings.nim
index 86c1a7c..e999261 100644
--- a/metering/m_pairings.nim
+++ b/metering/m_pairings.nim
@@ -12,7 +12,7 @@ import
   ../constantine/math/config/[common, curves],
   ../constantine/math/[arithmetic, extension_fields],
   ../constantine/math/elliptic/ec_shortweierstrass_projective,
-  ../constantine/math/curves/zoo_subgroups,
+  ../constantine/math/constants/zoo_subgroups,
   ../constantine/math/pairing/pairing_bls12,
   # Helpers
   ../helpers/prng_unsafe
diff --git a/tests/math/t_ec_template.nim b/tests/math/t_ec_template.nim
index 5e15dab..fcd3479 100644
--- a/tests/math/t_ec_template.nim
+++ b/tests/math/t_ec_template.nim
@@ -27,7 +27,7 @@ import
     ec_twistededwards_projective,
     ec_scalar_mul],
   ../../constantine/math/io/[io_bigints, io_fields, io_ec],
-  ../../constantine/math/curves/zoo_subgroups,
+  ../../constantine/math/constants/zoo_subgroups,
   # Test utilities
   ../../helpers/prng_unsafe,
   ./support/ec_reference_scalar_mult
diff --git a/tests/math/t_finite_fields_mulsquare.nim b/tests/math/t_finite_fields_mulsquare.nim
index b85447d..52b473f 100644
--- a/tests/math/t_finite_fields_mulsquare.nim
+++ b/tests/math/t_finite_fields_mulsquare.nim
@@ -28,7 +28,7 @@ echo "test_finite_fields_mulsquare xoshiro512** seed: ", seed
 static: doAssert defined(testingCurves), "This modules requires the -d:testingCurves compile option"
 
 proc sanity(C: static Curve) =
-  test "Squaring 0,1,2 with "& $Curve(C) & " [FastSquaring = " & $(Fp[C].getSpareBits() >= 2) & "]":
+  test "Squaring 0,1,2 with " & $Curve(C) & " [FastSquaring = " & $(Fp[C].getSpareBits() >= 2) & "]":
         block: # 0² mod
           var n: Fp[C]
 
@@ -310,3 +310,74 @@ suite "Modular squaring - bugs highlighted by property-based testing":
     check:
       bool(a2mul == expected)
       bool(a2sqr == expected)
+
+
+proc random_sumprod(C: static Curve, N: static int) =
+  template sumprod_test(random_instancer: untyped) =
+    block:
+      var a: array[N, Fp[C]]
+      var b: array[N, Fp[C]]
+
+      for i in 0 ..< N:
+        a[i] = rng.random_instancer(Fp[C])
+        b[i] = rng.random_instancer(Fp[C])
+
+      var r, r_ref, t: Fp[C]
+
+      r_ref.prod(a[0], b[0])
+      for i in 1 ..< N:
+        t.prod(a[i], b[i])
+        r_ref += t
+
+      r.sumprod(a, b)
+
+      doAssert bool(r == r_ref)
+
+  template sumProdMax() =
+    block:
+      var a: array[N, Fp[C]]
+      var b: array[N, Fp[C]]
+
+      for i in 0 ..< N:
+        a[i].setMinusOne()
+        b[i].setMinusOne()
+  
+      var r, r_ref, t: Fp[C]
+
+      r_ref.prod(a[0], b[0])
+      for i in 1 ..< N:
+        t.prod(a[i], b[i])
+        r_ref += t
+
+      r.sumprod(a, b)
+
+      doAssert bool(r == r_ref)
+
+  sumprod_test(random_unsafe)
+  sumprod_test(randomHighHammingWeight)
+  sumprod_test(random_long01Seq)
+  sumProdMax()
+
+suite "Random sum products is consistent with naive " & " [" & $WordBitwidth & "-bit mode]":
+  
+  const MaxLength = 8
+  test "Random sum products mod P-224]":
+    for _ in 0 ..< Iters:
+      staticFor N, 2, MaxLength:
+        random_sumprod(P224, N)
+  test "Random sum products mod BN254_Nogami]":
+    for _ in 0 ..< Iters:
+      staticFor N, 2, MaxLength:
+        random_sumprod(BN254_Nogami, N)
+  test "Random sum products mod BN254_Snarks]":
+    for _ in 0 ..< Iters:
+      staticFor N, 2, MaxLength:
+        random_sumprod(BN254_Snarks, N)
+  test "Random sum products mod BLS12_377]":
+    for _ in 0 ..< Iters:
+      staticFor N, 2, MaxLength:
+        random_sumprod(BLS12_377, N)
+  test "Random sum products mod BLS12_381]":
+    for _ in 0 ..< Iters:
+      staticFor N, 2, MaxLength:
+        random_sumprod(BLS12_381, N)
\ No newline at end of file
diff --git a/tests/math/t_fp12_bn254_nogami.nim b/tests/math/t_fp12_bn254_nogami.nim
new file mode 100644
index 0000000..89a7fe6
--- /dev/null
+++ b/tests/math/t_fp12_bn254_nogami.nim
@@ -0,0 +1,26 @@
+# Constantine
+# Copyright (c) 2018-2019    Status Research & Development GmbH
+# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+import
+  # Internals
+  ../../constantine/math/extension_fields,
+  ../../constantine/math/config/curves,
+  # Test utilities
+  ./t_fp_tower_template
+
+const TestCurves = [
+    BN254_Nogami,
+  ]
+
+runTowerTests(
+  ExtDegree = 12,
+  Iters = 12,
+  TestCurves = TestCurves,
+  moduleName = "test_fp12_" & $BN254_Nogami,
+  testSuiteDesc = "𝔽p12 = 𝔽p6[w] " & $BN254_Nogami
+)
diff --git a/tests/math/t_fp6_bn254_nogami.nim b/tests/math/t_fp6_bn254_nogami.nim
new file mode 100644
index 0000000..11fc55c
--- /dev/null
+++ b/tests/math/t_fp6_bn254_nogami.nim
@@ -0,0 +1,26 @@
+# Constantine
+# Copyright (c) 2018-2019    Status Research & Development GmbH
+# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+import
+  # Internals
+  ../../constantine/math/extension_fields,
+  ../../constantine/math/config/curves,
+  # Test utilities
+  ./t_fp_tower_template
+
+const TestCurves = [
+    BN254_Nogami,
+  ]
+
+runTowerTests(
+  ExtDegree = 6,
+  Iters = 12,
+  TestCurves = TestCurves,
+  moduleName = "test_fp6_" & $BN254_Nogami,
+  testSuiteDesc = "𝔽p6 = 𝔽p2[w] " & $BN254_Nogami
+)
diff --git a/tests/math/t_fp_cubic_root.nim b/tests/math/t_fp_cubic_root.nim
index ce817b3..0948378 100644
--- a/tests/math/t_fp_cubic_root.nim
+++ b/tests/math/t_fp_cubic_root.nim
@@ -10,7 +10,7 @@ import  std/unittest,
         ../../constantine/platforms/abstractions,
         ../../constantine/math/arithmetic,
         ../../constantine/math/config/curves,
-        ../../constantine/math/curves/zoo_endomorphisms
+        ../../constantine/math/constants/zoo_endomorphisms
 
 echo "\n------------------------------------------------------\n"
 
diff --git a/tests/math/t_fp_tower_template.nim b/tests/math/t_fp_tower_template.nim
index 1cf48c3..9bd6143 100644
--- a/tests/math/t_fp_tower_template.nim
+++ b/tests/math/t_fp_tower_template.nim
@@ -260,11 +260,11 @@ proc runTowerTests*[N](
         test(ExtField(ExtDegree, curve)):
           r.prod(x, Z)
           doAssert bool(r == Z),
-            "\nExpected zero but got (" & $ExtField(ExtDegree, curve) & "): " & x.toHex()
+            "\nExpected zero but got \n(" & $ExtField(ExtDegree, curve) & "): " & x.toHex()
         test(ExtField(ExtDegree, curve)):
           r.prod(Z, x)
           doAssert bool(r == Z),
-            "\nExpected zero but got (" & $ExtField(ExtDegree, curve) & "): " & x.toHex()
+            "\nExpected zero but got \n(" & $ExtField(ExtDegree, curve) & "): " & x.toHex()
         test(ExtField(ExtDegree, curve)):
           r.prod(x, O)
           doAssert bool(r == x),
@@ -285,7 +285,7 @@ proc runTowerTests*[N](
           rMul.prod(a, a)
           rSqr.square(a)
 
-          doAssert bool(rMul == rSqr), "Failure with a (" & $Field & "): " & a.toHex() & "\n" &
+          doAssert bool(rMul == rSqr), "Failure with a (" & $Field & "): \nInput:" & a.toHex() & "\n" &
             "Mul: " & rMul.toHex() & "\n" &
             "Sqr: " & rSqr.toHex() & "\n"
 
@@ -306,7 +306,7 @@ proc runTowerTests*[N](
           rSqr.square(a)
           rNegSqr.square(na)
 
-          doAssert bool(rSqr == rNegSqr), "Failure with a (" & $Field & "): " & a.toHex() & "\n" &
+          doAssert bool(rSqr == rNegSqr), "Failure with a \n(" & $Field & "): " & a.toHex() & "\n" &
             "Sqr:    " & rSqr.toHex() & "\n" &
             "SqrNeg: " & rNegSqr.toHex() & "\n"
 
diff --git a/tests/math/t_pairing_mul_fp12_by_lines.nim b/tests/math/t_pairing_mul_fp12_by_lines.nim
index 61eec8d..1593b06 100644
--- a/tests/math/t_pairing_mul_fp12_by_lines.nim
+++ b/tests/math/t_pairing_mul_fp12_by_lines.nim
@@ -56,7 +56,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
         let y = rng.random_elem(Fp2[C], gen)
         let b = Fp4[C](coords: [Fp2[C](), y])
 
-        var r {.noInit.}, r2 {.noInit.}: Fp4[C]
+        var r, r2: Fp4[C]
 
         r.prod(a, b)
         r2.mul_sparse_by_0y(a, y)
@@ -75,7 +75,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
         let y = rng.random_elem(Fp2[C], gen)
         let b = Fp6[C](coords: [Fp2[C](), y, Fp2[C]()])
 
-        var r {.noInit.}, r2 {.noInit.}: Fp6[C]
+        var r, r2: Fp6[C]
 
         r.prod(a, b)
         r2.mul_sparse_by_0y0(a, y)
@@ -95,10 +95,10 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
         let y = rng.random_elem(Fp2[C], gen)
         let b = Fp6[C](coords: [x, y, Fp2[C]()])
 
-        var r {.noInit.}, r2 {.noInit.}: Fp6[C]
+        var r, r2: Fp6[C]
 
         r.prod(a, b)
-        r2.mul_by_line_xy0(a, x, y)
+        r2.mul_sparse_by_xy0(a, x, y)
 
         check: bool(r == r2)
 
@@ -107,58 +107,247 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
       test_fp6_xy0(curve, gen = HighHammingWeight)
       test_fp6_xy0(curve, gen = Long01Sequence)
 
+  test "Dense 𝔽p6 by Sparse 0yz":
+    proc test_fp6_0yz(C: static Curve, gen: static RandomGen) =
+      for _ in 0 ..< Iters:
+        let a = rng.random_elem(Fp6[C], gen)
+        let y = rng.random_elem(Fp2[C], gen)
+        let z = rng.random_elem(Fp2[C], gen)
+        let b = Fp6[C](coords: [Fp2[C](), y, z])
+
+        var r, r2: Fp6[C]
+
+        r.prod(a, b)
+        r2.mul_sparse_by_0yz(a, y, z)
+
+        check: bool(r == r2)
+
+    staticFor(curve, TestCurves):
+      test_fp6_0yz(curve, gen = Uniform)
+      test_fp6_0yz(curve, gen = HighHammingWeight)
+      test_fp6_0yz(curve, gen = Long01Sequence)
+
   when Fp12[BN254_Snarks]().c0.typeof is Fp6:
-    test "Sparse 𝔽p12/𝔽p6 resulting from ab00c0 line function":
-      proc test_fp12_ab00c0(C: static Curve, gen: static RandomGen) =
-        for _ in 0 ..< Iters:
-          var a = rng.random_elem(Fp12[C], gen)
-          var a2 = a
+    # =========== Towering 𝔽p12/𝔽p6 ======================================
 
-          var x = rng.random_elem(Fp2[C], gen)
-          var y = rng.random_elem(Fp2[C], gen)
-          var z = rng.random_elem(Fp2[C], gen)
+    test "Sparse 𝔽p12/𝔽p6 resulting from a00bc0 line function":
+      proc test_fp12_a00bc0(C: static Curve, gen: static RandomGen) =
+        when C.getSexticTwist() == D_Twist:
+          for _ in 0 ..< Iters:
+            var a = rng.random_elem(Fp12[C], gen)
+            var a2 = a
 
-          let line = Line[Fp2[C]](a: x, b: y, c: z)
-          let b = Fp12[C](
-            c0: Fp6[C](coords: [       x, y, Fp2[C]()]),
-            c1: Fp6[C](coords: [Fp2[C](), z, Fp2[C]()])
-          )
+            var x = rng.random_elem(Fp2[C], gen)
+            var y = rng.random_elem(Fp2[C], gen)
+            var z = rng.random_elem(Fp2[C], gen)
 
-          a *= b
-          a2.mul_sparse_by_line_ab00c0(line)
+            let line = Line[Fp2[C]](a: x, b: y, c: z)
+            let b = Fp12[C]( coords: [
+              Fp6[C](coords: [ x, Fp2[C](), Fp2[C]()]),
+              Fp6[C](coords: [ y,        z, Fp2[C]()])
+            ])
 
-          check: bool(a == a2)
+            a *= b
+            a2.mul_sparse_by_line_a00bc0(line)
+
+            check: bool(a == a2)
+
+        staticFor(curve, TestCurves):
+          test_fp12_a00bc0(curve, gen = Uniform)
+          test_fp12_a00bc0(curve, gen = HighHammingWeight)
+          test_fp12_a00bc0(curve, gen = Long01Sequence)
+
+    test "Sparse 𝔽p12/𝔽p6 resulting from cb00a0 line function":
+      proc test_fp12_cb00a0(C: static Curve, gen: static RandomGen) =
+        when C.getSexticTwist() == M_Twist:
+          for _ in 0 ..< Iters:
+            var a = rng.random_elem(Fp12[C], gen)
+            var a2 = a
+
+            var x = rng.random_elem(Fp2[C], gen)
+            var y = rng.random_elem(Fp2[C], gen)
+            var z = rng.random_elem(Fp2[C], gen)
+
+            let line = Line[Fp2[C]](a: x, b: y, c: z)
+            let b = Fp12[C](coords: [
+              Fp6[C](coords: [       z, y, Fp2[C]()]),
+              Fp6[C](coords: [Fp2[C](), x, Fp2[C]()])
+            ])
+
+            a *= b
+            a2.mul_sparse_by_line_cb00a0(line)
+
+            check: bool(a == a2)
+
+        staticFor(curve, TestCurves):
+          test_fp12_cb00a0(curve, gen = Uniform)
+          test_fp12_cb00a0(curve, gen = HighHammingWeight)
+          test_fp12_cb00a0(curve, gen = Long01Sequence)
+
+    test "Somewhat-sparse 𝔽p12/𝔽p6 resulting from a00bc0*a00bc0 line functions (D-twist only)":
+      proc test_fp12_a00bc0_a00bc0(C: static Curve, gen: static RandomGen) =
+        when C.getSexticTwist() == D_Twist:
+          for _ in 0 ..< Iters:
+            var x0 = rng.random_elem(Fp2[C], gen)
+            var y0 = rng.random_elem(Fp2[C], gen)
+            var z0 = rng.random_elem(Fp2[C], gen)
+
+            let line0 = Line[Fp2[C]](a: x0, b: y0, c: z0)
+            let f0 = Fp12[C]( coords: [
+              Fp6[C](coords: [ x0, Fp2[C](), Fp2[C]()]),
+              Fp6[C](coords: [ y0,       z0, Fp2[C]()])
+            ])
+
+
+            var x1 = rng.random_elem(Fp2[C], gen)
+            var y1 = rng.random_elem(Fp2[C], gen)
+            var z1 = rng.random_elem(Fp2[C], gen)
+
+            let line1 = Line[Fp2[C]](a: x1, b: y1, c: z1)
+            let f1 = Fp12[C]( coords: [
+              Fp6[C](coords: [ x1, Fp2[C](), Fp2[C]()]),
+              Fp6[C](coords: [ y1,       z1, Fp2[C]()])
+            ])
+
+            var r: Fp12[C]
+            r.prod(f0, f1)
+
+            var rl: Fp12[C]
+            rl.prod_x00yz0_x00yz0_into_abcdefghij00(line0, line1)
+
+            check: bool(r == rl)
 
       staticFor(curve, TestCurves):
-        test_fp12_xy00z0(curve, gen = Uniform)
-        test_fp12_xy00z0(curve, gen = HighHammingWeight)
-        test_fp12_xy00z0(curve, gen = Long01Sequence)
+        test_fp12_a00bc0_a00bc0(curve, gen = Uniform)
+        test_fp12_a00bc0_a00bc0(curve, gen = HighHammingWeight)
+        test_fp12_a00bc0_a00bc0(curve, gen = Long01Sequence)
 
-    test "Sparse 𝔽p12/𝔽p6 resulting from acb000 line function":
-      proc test_fp12_acb000(C: static Curve, gen: static RandomGen) =
-        for _ in 0 ..< Iters:
-          var a = rng.random_elem(Fp12[C], gen)
-          var a2 = a
+    test "Somewhat-sparse 𝔽p12/𝔽p6 resulting from cb00a0*cb00a0 line functions (M-twist only)":
+      proc test_fp12_cb00a0_cb00a0(C: static Curve, gen: static RandomGen) =
+        when C.getSexticTwist() == M_Twist:
+          for _ in 0 ..< Iters:
+            var x0 = rng.random_elem(Fp2[C], gen)
+            var y0 = rng.random_elem(Fp2[C], gen)
+            var z0 = rng.random_elem(Fp2[C], gen)
 
-          var x = rng.random_elem(Fp2[C], gen)
-          var y = rng.random_elem(Fp2[C], gen)
-          var z = rng.random_elem(Fp2[C], gen)
+            let line0 = Line[Fp2[C]](a: x0, b: y0, c: z0)
+            let f0 = Fp12[C](coords: [
+              Fp6[C](coords: [      z0, y0, Fp2[C]()]),
+              Fp6[C](coords: [Fp2[C](), x0, Fp2[C]()])
+            ])
 
-          let line = Line[Fp2[C]](a: x, b: y, c: z)
-          let b = Fp12[C](
-            c0: Fp6[C](coords: [x, z, y])
-          )
+            var x1 = rng.random_elem(Fp2[C], gen)
+            var y1 = rng.random_elem(Fp2[C], gen)
+            var z1 = rng.random_elem(Fp2[C], gen)
 
-          a *= b
-          a2.mul_sparse_by_line_xyz000(line)
+            let line1 = Line[Fp2[C]](a: x1, b: y1, c: z1)
+            let f1 = Fp12[C](coords: [
+              Fp6[C](coords: [      z1, y1, Fp2[C]()]),
+              Fp6[C](coords: [Fp2[C](), x1, Fp2[C]()])
+            ])
 
-          check: bool(a == a2)
+            var r: Fp12[C]
+            r.prod(f0, f1)
+
+            var rl: Fp12[C]
+            rl.prod_zy00x0_zy00x0_into_abcdef00ghij(line0, line1)
+
+            check: bool(r == rl)
 
       staticFor(curve, TestCurves):
-        test_fp12_acb000(curve, gen = Uniform)
-        test_fp12_acb000(curve, gen = HighHammingWeight)
-        test_fp12_acb000(curve, gen = Long01Sequence)
-  else:
+        test_fp12_cb00a0_cb00a0(curve, gen = Uniform)
+        test_fp12_cb00a0_cb00a0(curve, gen = HighHammingWeight)
+        test_fp12_cb00a0_cb00a0(curve, gen = Long01Sequence)
+
+    test "Somewhat-sparse 𝔽p12/𝔽p6 mul by the product a00bc0*a00bc0 of line functions (D-twist only)":
+      proc test_fp12_abcdefghij00(C: static Curve, gen: static RandomGen) =
+        when C.getSexticTwist() == D_Twist:
+          for _ in 0 ..< Iters:
+            var x0 = rng.random_elem(Fp2[C], gen)
+            var y0 = rng.random_elem(Fp2[C], gen)
+            var z0 = rng.random_elem(Fp2[C], gen)
+
+            let line0 = Line[Fp2[C]](a: x0, b: y0, c: z0)
+            let f0 = Fp12[C]( coords: [
+              Fp6[C](coords: [ x0, Fp2[C](), Fp2[C]()]),
+              Fp6[C](coords: [ y0,       z0, Fp2[C]()])
+            ])
+
+
+            var x1 = rng.random_elem(Fp2[C], gen)
+            var y1 = rng.random_elem(Fp2[C], gen)
+            var z1 = rng.random_elem(Fp2[C], gen)
+
+            let line1 = Line[Fp2[C]](a: x1, b: y1, c: z1)
+            let f1 = Fp12[C]( coords: [
+              Fp6[C](coords: [ x1, Fp2[C](), Fp2[C]()]),
+              Fp6[C](coords: [ y1,       z1, Fp2[C]()])
+            ])
+
+
+            var r: Fp12[C]
+            r.prod(f0, f1)
+
+            var rl: Fp12[C]
+            rl.prod_x00yz0_x00yz0_into_abcdefghij00(line0, line1)
+
+            var f = rng.random_elem(Fp12[C], gen)
+            var f2 = f
+
+            f *= rl
+            f2.mul_sparse_by_abcdefghij00_quad_over_cube(rl)
+
+            check: bool(f == f2)
+
+      staticFor(curve, TestCurves):
+        test_fp12_abcdefghij00(curve, gen = Uniform)
+        test_fp12_abcdefghij00(curve, gen = HighHammingWeight)
+        test_fp12_abcdefghij00(curve, gen = Long01Sequence)
+
+    test "Somewhat-sparse 𝔽p12/𝔽p6 mul by the product (cb00a0*cb00a0) of line functions (M-twist only)":
+      proc test_fp12_abcdef00ghij(C: static Curve, gen: static RandomGen) =
+        when C.getSexticTwist() == M_Twist:
+          for _ in 0 ..< Iters:
+            var x0 = rng.random_elem(Fp2[C], gen)
+            var y0 = rng.random_elem(Fp2[C], gen)
+            var z0 = rng.random_elem(Fp2[C], gen)
+
+            let line0 = Line[Fp2[C]](a: x0, b: y0, c: z0)
+            let f0 = Fp12[C](coords: [
+              Fp6[C](coords: [      z0, y0, Fp2[C]()]),
+              Fp6[C](coords: [Fp2[C](), x0, Fp2[C]()])
+            ])
+
+            var x1 = rng.random_elem(Fp2[C], gen)
+            var y1 = rng.random_elem(Fp2[C], gen)
+            var z1 = rng.random_elem(Fp2[C], gen)
+
+            let line1 = Line[Fp2[C]](a: x1, b: y1, c: z1)
+            let f1 = Fp12[C](coords: [
+              Fp6[C](coords: [      z1, y1, Fp2[C]()]),
+              Fp6[C](coords: [Fp2[C](), x1, Fp2[C]()])
+            ])
+
+            var r: Fp12[C]
+            r.prod(f0, f1)
+
+            var rl: Fp12[C]
+            rl.prod_zy00x0_zy00x0_into_abcdef00ghij(line0, line1)
+
+            var f = rng.random_elem(Fp12[C], gen)
+            var f2 = f
+
+            f *= rl
+            f2.mul_sparse_by_abcdef00ghij_quad_over_cube(rl)
+
+            check: bool(f == f2)
+
+      staticFor(curve, TestCurves):
+        test_fp12_abcdef00ghij(curve, gen = Uniform)
+        test_fp12_abcdef00ghij(curve, gen = HighHammingWeight)
+        test_fp12_abcdef00ghij(curve, gen = Long01Sequence)
+
+  else: # =========== Towering 𝔽p12/𝔽p4 ======================================
     static: doAssert Fp12[BN254_Snarks]().c0.typeof is Fp4
 
     test "Sparse 𝔽p12/𝔽p4 resulting from ca00b0 line function (M-twist only)":
@@ -347,7 +536,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
             var f2 = f
 
             f *= rl
-            f2.mul_sparse_by_abcdefghij00(rl)
+            f2.mul_sparse_by_abcdefghij00_cube_over_quad(rl)
 
             check: bool(f == f2)
 
@@ -396,7 +585,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
             var f2 = f
 
             f *= rl
-            f2.mul_sparse_by_abcd00efghij(rl)
+            f2.mul_sparse_by_abcd00efghij_cube_over_quad(rl)
 
             check: bool(f == f2)
 
diff --git a/tests/math/t_pairing_template.nim b/tests/math/t_pairing_template.nim
index 2ca9848..cdc1816 100644
--- a/tests/math/t_pairing_template.nim
+++ b/tests/math/t_pairing_template.nim
@@ -15,7 +15,7 @@ import
   ../../constantine/math/extension_fields,
   ../../constantine/math/config/curves,
   ../../constantine/math/elliptic/[ec_shortweierstrass_affine, ec_shortweierstrass_projective],
-  ../../constantine/math/curves/[zoo_subgroups, zoo_pairings],
+  ../../constantine/math/constants/[zoo_subgroups, zoo_pairings],
   ../../constantine/math/pairing/cyclotomic_subgroup,
   ../../constantine/math/io/io_extfields,
 
diff --git a/tests/t_hash_to_curve_random.nim b/tests/t_hash_to_curve_random.nim
index b529120..cc13bb1 100644
--- a/tests/t_hash_to_curve_random.nim
+++ b/tests/t_hash_to_curve_random.nim
@@ -15,7 +15,7 @@ import
   ../constantine/math/ec_shortweierstrass,
   ../constantine/hash_to_curve/hash_to_curve,
   ../constantine/hashes,
-  ../constantine/math/curves/zoo_subgroups,
+  ../constantine/math/constants/zoo_subgroups,
   # Test utilities
   ../helpers/prng_unsafe