From f5c0b6245d8a7eeda8a2190b085819f7c2270754 Mon Sep 17 00:00:00 2001
From: Mamy Ratsimbazafy <mamy_github@numforge.co>
Date: Mon, 16 Aug 2021 22:22:51 +0200
Subject: [PATCH] Multipairing (#165)

* Productionize multipairings for BLS12-381

* typo

* arg order + benchmark

* Introduce mul_3way_sparse_sparse

* cleanup MultiMiller loop

* fix init sparse optimization in multimiller loop [skip ci]
---
 README.md                                    |   4 +-
 benchmarks/bench_pairing_bls12_381.nim       |   4 +
 benchmarks/bench_pairing_template.nim        |  45 ++-
 constantine.nimble                           |   1 +
 constantine/pairing/cyclotomic_fp12.nim      |   3 +
 constantine/pairing/lines_common.nim         |   4 +-
 constantine/pairing/lines_projective.nim     |   3 +
 constantine/pairing/miller_loops.nim         | 256 +++++++-------
 constantine/pairing/mul_fp12_by_lines.nim    |  29 +-
 constantine/pairing/mul_fp6_by_lines.nim     |   2 +
 constantine/pairing/multi_pairing.md         | 127 +++++++
 constantine/pairing/pairing_bls12.nim        |  16 +
 constantine/pairing/pairing_bn.nim           |   3 +
 research/multi_pairing/pairing_bls12_381.nim | 338 -------------------
 tests/t_pairing_bls12_381_multi.nim          |  65 ++++
 tests/t_pairing_mul_fp12_by_lines.nim        |   8 +-
 tests/t_sig_bls_lowlevel.nim                 |  38 ++-
 17 files changed, 473 insertions(+), 473 deletions(-)
 create mode 100644 constantine/pairing/multi_pairing.md
 delete mode 100644 research/multi_pairing/pairing_bls12_381.nim
 create mode 100644 tests/t_pairing_bls12_381_multi.nim

diff --git a/README.md b/README.md
index c23f908..ded8f52 100644
--- a/README.md
+++ b/README.md
@@ -77,8 +77,8 @@ Supports:
 - [x] Field arithmetics
 - [x] Curve arithmetic
 - [x] Pairing
-- [ ] Multi-Pairing
-- [ ] Hash-To-Curve
+- [x] Multi-Pairing
+- [x] Hash-To-Curve
 
 Families:
 - BN: Barreto-Naehrig
diff --git a/benchmarks/bench_pairing_bls12_381.nim b/benchmarks/bench_pairing_bls12_381.nim
index 0972407..cdf19eb 100644
--- a/benchmarks/bench_pairing_bls12_381.nim
+++ b/benchmarks/bench_pairing_bls12_381.nim
@@ -49,7 +49,11 @@ proc main() =
     finalExpBLS12Bench(curve, Iters)
     separator()
     pairingBLS12Bench(curve, Iters)
+    pairing_multipairing_BLS12Bench(curve, 1, Iters)
     separator()
+    staticFor j, 2, 17:
+      pairing_multisingle_BLS12Bench(curve, j, Iters div j)
+      pairing_multipairing_BLS12Bench(curve, j, Iters div j)
 
 main()
 notes()
diff --git a/benchmarks/bench_pairing_template.nim b/benchmarks/bench_pairing_template.nim
index 4369aa4..65c6715 100644
--- a/benchmarks/bench_pairing_template.nim
+++ b/benchmarks/bench_pairing_template.nim
@@ -105,7 +105,7 @@ proc mulLinebyLine_xyz000_Bench*(C: static Curve, iters: int) =
   var f = rng.random_unsafe(Fp12[C])
 
   bench("Mul line xyz000 by line xyz000", C, iters):
-    f.mul_xyz000_xyz000_into_abcdefghij00(l0, l1)
+    f.prod_xyz000_xyz000_into_abcdefghij00(l0, l1)
 
 proc mulLinebyLine_xy000z_Bench*(C: static Curve, iters: int) =
   var l0, l1: Line[Fp2[C]]
@@ -116,7 +116,7 @@ proc mulLinebyLine_xy000z_Bench*(C: static Curve, iters: int) =
   var f = rng.random_unsafe(Fp12[C])
 
   bench("Mul line xy000z by line xy000z", C, iters):
-    f.mul_xy000z_xy000z_into_abcd00efghij(l0, l1)
+    f.prod_xy000z_xy000z_into_abcd00efghij(l0, l1)
 
 proc mulFp12by_abcdefghij00_Bench*(C: static Curve, iters: int) =
   var f = rng.random_unsafe(Fp12[C])
@@ -154,7 +154,7 @@ proc mulFp12_by_2lines_v2_xyz000_Bench*(C: static Curve, iters: int) =
 
   bench("mulFp12 by 2 lines v2", C, iters):
     var f2 {.noInit.}: Fp12[C]
-    f2.mul_xyz000_xyz000_into_abcdefghij00(l0, l1)
+    f2.prod_xyz000_xyz000_into_abcdefghij00(l0, l1)
     f.mul_sparse_by_abcdefghij00(f2)
 
 proc mulFp12_by_2lines_v1_xy000z_Bench*(C: static Curve, iters: int) =
@@ -179,7 +179,7 @@ proc mulFp12_by_2lines_v2_xy000z_Bench*(C: static Curve, iters: int) =
 
   bench("mulFp12 by 2 lines v2", C, iters):
     var f2 {.noInit.}: Fp12[C]
-    f2.mul_xy000z_xy000z_into_abcd00efghij(l0, l1)
+    f2.prod_xy000z_xy000z_into_abcd00efghij(l0, l1)
     f.mul_sparse_by_abcd00efghij(f2)
 
 proc millerLoopBLS12Bench*(C: static Curve, iters: int) =
@@ -238,6 +238,43 @@ proc pairingBLS12Bench*(C: static Curve, iters: int) =
   bench("Pairing BLS12", C, iters):
     f.pairing_bls12(P, Q)
 
+proc pairing_multisingle_BLS12Bench*(C: static Curve, N: static int, iters: int) =
+  let
+    P = rng.random_point(ECP_ShortW_Aff[Fp[C], NotOnTwist])
+    Q = rng.random_point(ECP_ShortW_Aff[Fp2[C], OnTwist])
+
+  var
+    Ps {.noInit.}: array[N, ECP_ShortW_Aff[Fp[C], NotOnTwist]]
+    Qs {.noInit.}: array[N, ECP_ShortW_Aff[Fp2[C], OnTwist]]
+
+    GTs {.noInit.}: array[N, Fp12[C]]
+
+  for i in 0 ..< N:
+    Ps[i] = rng.random_unsafe(typeof(Ps[0]))
+    Qs[i] = rng.random_unsafe(typeof(Qs[0]))
+
+  var f: Fp12[C]
+  bench("Pairing BLS12 multi-single " & $N & " pairings", C, iters):
+    for i in 0 ..< N:
+      GTs[i].pairing_bls12(Ps[i], Qs[i])
+
+    f = GTs[0]
+    for i in 1 ..< N:
+      f *= GTs[i]
+
+proc pairing_multipairing_BLS12Bench*(C: static Curve, N: static int, iters: int) =
+  var
+    Ps {.noInit.}: array[N, ECP_ShortW_Aff[Fp[C], NotOnTwist]]
+    Qs {.noInit.}: array[N, ECP_ShortW_Aff[Fp2[C], OnTwist]]
+
+  for i in 0 ..< N:
+    Ps[i] = rng.random_unsafe(typeof(Ps[0]))
+    Qs[i] = rng.random_unsafe(typeof(Qs[0]))
+
+  var f: Fp12[C]
+  bench("Pairing BLS12 multipairing " & $N & " pairings", C, iters):
+    f.pairing_bls12(Ps, Qs)
+
 proc pairingBNBench*(C: static Curve, iters: int) =
   let
     P = rng.random_point(ECP_ShortW_Aff[Fp[C], NotOnTwist])
diff --git a/constantine.nimble b/constantine.nimble
index 1f60d0c..29e2d7f 100644
--- a/constantine.nimble
+++ b/constantine.nimble
@@ -148,6 +148,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
   ("tests/t_pairing_bn254_snarks_optate.nim", false),
   ("tests/t_pairing_bls12_377_optate.nim", false),
   ("tests/t_pairing_bls12_381_optate.nim", false),
+  ("tests/t_pairing_bls12_381_multi.nim", false),
 
   # Hashing vs OpenSSL
   # ----------------------------------------------------------
diff --git a/constantine/pairing/cyclotomic_fp12.nim b/constantine/pairing/cyclotomic_fp12.nim
index d9fed0f..6be21ce 100644
--- a/constantine/pairing/cyclotomic_fp12.nim
+++ b/constantine/pairing/cyclotomic_fp12.nim
@@ -13,6 +13,9 @@ import
   ../towers,
   ../isogeny/frobenius
 
+# No exceptions allowed
+{.push raises: [].}
+
 # ############################################################
 #
 #               Gϕ₁₂, Cyclotomic subgroup of Fp12
diff --git a/constantine/pairing/lines_common.nim b/constantine/pairing/lines_common.nim
index fcac103..1ad4fdc 100644
--- a/constantine/pairing/lines_common.nim
+++ b/constantine/pairing/lines_common.nim
@@ -9,12 +9,14 @@
 import
   std/typetraits,
   ../primitives,
-  ../config/curves,
   ../arithmetic,
   ../towers,
   ../elliptic/ec_shortweierstrass_affine,
   ../io/io_towers
 
+# No exceptions allowed
+{.push raises: [].}
+
 type
   Line*[F] = object
     ## Packed line representation over a E'(Fp^k/d)
diff --git a/constantine/pairing/lines_projective.nim b/constantine/pairing/lines_projective.nim
index 1ba2646..ee8340e 100644
--- a/constantine/pairing/lines_projective.nim
+++ b/constantine/pairing/lines_projective.nim
@@ -19,6 +19,9 @@ import
 
 export lines_common
 
+# No exceptions allowed
+{.push raises: [].}
+
 # ############################################################
 #
 #            Miller Loop's Line Evaluation
diff --git a/constantine/pairing/miller_loops.nim b/constantine/pairing/miller_loops.nim
index 821a26c..12f6372 100644
--- a/constantine/pairing/miller_loops.nim
+++ b/constantine/pairing/miller_loops.nim
@@ -15,6 +15,9 @@ import
   ./lines_projective,
   ./mul_fp6_by_lines, ./mul_fp12_by_lines
 
+# No exceptions allowed
+{.push raises: [].}
+
 # ############################################################
 #                                                            #
 #                 Basic Miller Loop                          #
@@ -111,118 +114,6 @@ func millerCorrectionBN*[FT, F1, F2](
 # we hardcode unrolled addition chains.
 # This should also contribute to performance.
 #
-# Multi-pairing discussion:
-# Aranha & Scott proposes 2 different approaches for multi-pairing.
-#
-# -----
-# Scott
-#
-# Algorithm 2: Calculate and store line functions for BLS12 curve
-# Input: Q ∈ G2, P ∈ G1 , curve parameter u
-# Output: An array g of blog2(u)c line functions ∈ Fp12
-#   1 T ← Q
-#   2 for i ← ceil(log2(u)) − 1 to 0 do
-#   3   g[i] ← lT,T(P), T ← 2T
-#   4   if ui = 1 then
-#   5     g[i] ← g[i].lT,Q(P), T ← T + Q
-#   6 return g
-#
-# And to accumulate lines from a new (P, Q) tuple of points
-#
-# Algorithm 4: Accumulate another set of line functions into g
-# Input: The array g, Qj ∈ G2 , Pj ∈ G1 , curve parameter u
-# Output: Updated array g of ceil(log2(u)) line functions ∈ Fp12
-#   1 T ← Qj
-#   2 for i ← blog2 (u)c − 1 to 0 do
-#   3   t ← lT,T (Pj), T ← 2T
-#   4   if ui = 1 then
-#   5     t ← t.lT,Qj (Pj), T ← T + Qj
-#   6   g[i] ← g[i].t
-#   7 return g
-#
-# ------
-# Aranha
-#
-# Algorithm 11.2 Explicit multipairing version of Algorithm 11.1.
-# (we extract the Miller Loop part only)
-# Input : P1 , P2 , . . . Pn ∈ G1 ,
-#         Q1 , Q2, . . . Qn ∈ G2
-# Output: (we focus on the Miller Loop)
-#
-# Write l in binary form, l = sum(0 ..< m-1)
-# f ← 1, l ← abs(AteParam)
-# for j ← 1 to n do
-#   Tj ← Qj
-# end
-#
-# for i = m-2 down to 0 do
-#   f ← f²
-#   for j ← 1 to n do
-#     f ← f gTj,Tj(Pj), Tj ← [2]Tj
-#     if li = 1 then
-#       f ← f gTj,Qj(Pj), Tj ← Tj + Qj
-#     end
-#   end
-# end
-#
-# -----
-# Assuming we have N tuples (Pj, Qj) of points j in 0 ..< N
-# and I operations to do in our Miller loop:
-# - I = HammingWeight(AteParam) + Bitwidth(AteParam)
-#   - HammingWeight(AteParam) corresponds to line additions
-#   - Bitwidth(AteParam) corresponds to line doublings
-#
-# Scott approach is to have:
-# - I Fp12 accumulators `g`
-# - 1 G2 accumulator `T`
-# and then accumulating each (Pj, Qj) into their corresponding `g` accumulator.
-#
-# Aranha approach is to have:
-# - 1 Fp12 accumulator `f`
-# - N G2 accumulators  `T`
-# and accumulate N points per I.
-#
-# Scott approach is fully "online"/"streaming",
-# while Aranha's saves space.
-# For BLS12_381,
-# I = 68 hence we would need 68*12*48 = 39168 bytes (381-bit needs 48 bytes)
-# G2 has size 3*2*48 = 288 bytes (3 proj coordinates on Fp2)
-# and we choose N (which can be 1 for single pairing or reverting to Scott approach).
-#
-# In actual use, "streaming pairings" are not used, pairings to compute are receive
-# by batch, for example for blockchain you receive a batch of N blocks to verify from one peer.
-# Furthermore, 39kB would be over L1 cache size and incurs cache misses.
-# Additionally Aranha approach would make it easier to batch inversions
-# using Montgomery's simultaneous inversion technique.
-# Lastly, while a higher level API will need to store N (Pj, Qj) pairs for multi-pairings
-# for Aranha approach, it can decide how big N is depending on hardware and/or protocol.
-#
-# Regarding optimizations, as the Fp12 accumulator is dense
-# and lines are sparse (xyz000 or xy000z) Scott mentions the following costs:
-# - squaring                 is 11m
-# - Dense-sparse             is 13m
-# - sparse-sparse            is 6m
-# - Dense-(somewhat sparse)  is 17m
-# Hence when accumulating lines from multiple points:
-# - 2x Dense-sparse is 26m
-# - sparse-sparse then Dense-(somewhat sparse) is 23m
-# a 11.5% speedup
-#
-# We can use Aranha approach but process lines function 2-by-2 merging them
-# before merging them to the dense Fp12 accumulator.
-#
-# In benchmarks though, the speedup doesn't work for BN curves but does for BLS curves.
-#
-# For single pairings
-# Unfortunately, it's BN254_Snarks which requires a lot of addition in the Miller loop.
-# BLS12-377 and BLS12-381 require 6 and 7 line addition in their Miller loop,
-# the saving is about 150 cycles per addition for about 1000 cycles saved.
-# A full pairing is ~2M cycles so this is only 0.5% for significantly
-# more maintenance and bounds analysis complexity.
-#
-# For multipairing it is interesting since for a BLS signature verification (double pairing)
-# we would save 1000 cycles per Ate iteration so ~70000 cycles, while a Miller loop is ~800000 cycles.
-
 # Miller Loop - single pairing
 # ----------------------------------------------------------------------------
 
@@ -263,7 +154,7 @@ func miller_init_double_then_add*[FT, F1, F2](
   # - The first line is squared (sparse * sparse)
   # - The second is (somewhat-sparse * sparse)
   when numDoublings >= 2:
-    f.mul_sparse_sparse(line, line)
+    f.prod_sparse_sparse(line, line)
     line.line_double(T, P)
     f.mul(line)
     for _ in 2 ..< numDoublings:
@@ -278,13 +169,10 @@ func miller_init_double_then_add*[FT, F1, F2](
   # we special case the addition as
   # - The first line and second are sparse (sparse * sparse)
   when numDoublings == 1:
-    # TODO: sparse * sparse
     # f *= line <=> f = line for the first iteration
-    # With Fp2 -> Fp4 -> Fp12 towering and a M-Twist
-    # The line corresponds to a sparse xy000z Fp12
     var line2 {.noInit.}: Line[F2]
     line2.line_add(T, Q, P)
-    f.mul_sparse_sparse(line, line2)
+    f.prod_sparse_sparse(line, line2)
   else:
     line.line_add(T, Q, P)
     f.mul(line)
@@ -324,3 +212,137 @@ func miller_accum_double_then_add*[FT, F1, F2](
 
 # Miller Loop - multi-pairing
 # ----------------------------------------------------------------------------
+#
+# Multi-pairing discussion:
+# Aranha & Scott proposes 2 different approaches for multi-pairing.
+# See `multi_pairing.md``
+# We implement Aranha approach
+
+func double_jToN[N: static int, FT, F1, F2](
+       f: var FT,
+       j: static int,
+       line0, line1: var Line[F2],
+       Ts: var array[N, ECP_ShortW_Prj[F2, OnTwist]],
+       Ps: array[N, ECP_ShortW_Aff[F1, NotOnTwist]]) =
+  ## Doubling steps for pairings j to N
+
+  {.push checks: off.} # No OverflowError or IndexError allowed
+  # Sparse merge 2 by 2, starting from j
+  for i in countup(j, N-1, 2):
+    if i+1 >= N:
+      break
+
+    line0.line_double(Ts[i], Ps[i])
+    line1.line_double(Ts[i+1], Ps[i+1])
+    f.mul_3way_sparse_sparse(line0, line1)
+
+  when (N and 1) == 1: # N >= 2 and N is odd, there is a leftover
+    line0.line_double(Ts[N-1], Ps[N-1])
+    f.mul(line0)
+
+  {.pop.}
+
+func add_jToN[N: static int, FT, F1, F2](
+       f: var FT,
+       j: static int,
+       line0, line1: var Line[F2],
+       Ts: var array[N, ECP_ShortW_Prj[F2, OnTwist]],
+       Qs: array[N, ECP_ShortW_Aff[F2, OnTwist]],
+       Ps: array[N, ECP_ShortW_Aff[F1, NotOnTwist]])=
+  ## Addition steps for pairings 0 to N
+
+  {.push checks: off.} # No OverflowError or IndexError allowed
+  # Sparse merge 2 by 2, starting from 0
+  for i in countup(j, N-1, 2):
+    if i+1 >= N:
+      break
+
+    line0.line_add(Ts[i], Qs[i], Ps[i])
+    line1.line_add(Ts[i+1], Qs[i+1], Ps[i+1])
+    f.mul_3way_sparse_sparse(line0, line1)
+
+  when (N and 1) == 1: # N >= 2 and N is odd, there is a leftover
+    line0.line_add(Ts[N-1], Qs[N-1], Ps[N-1])
+    f.mul(line0)
+
+  {.pop.}
+
+func miller_init_double_then_add*[N: static int, FT, F1, F2](
+       f: var FT,
+       Ts: var array[N, ECP_ShortW_Prj[F2, OnTwist]],
+       Qs: array[N, ECP_ShortW_Aff[F2, OnTwist]],
+       Ps: array[N, ECP_ShortW_Aff[F1, NotOnTwist]],
+       numDoublings: static int
+     ) =
+  ## Start a Miller Loop
+  ## This means
+  ## - 1 doubling
+  ## - 1 add
+  ##
+  ## f is overwritten
+  ## Ts are overwritten by Qs
+  static:
+    doAssert f.c0 is Fp4
+    doAssert FT.C == F1.C
+    doAssert FT.C == F2.C
+
+  {.push checks: off.} # No OverflowError or IndexError allowed
+  var line0 {.noInit.}, line1 {.noInit.}: Line[F2]
+
+  # First step: T <- Q, f = 1 (mod p¹²), f *= line
+  # ----------------------------------------------
+  for i in 0 ..< N:
+    Ts[i].projectiveFromAffine(Qs[i])
+
+  line0.line_double(Ts[0], Ps[0])
+  when N >= 2:
+    line1.line_double(Ts[1], Ps[1])
+    f.prod_sparse_sparse(line0, line1)
+    f.double_jToN(j=2, line0, line1, Ts, Ps)
+
+  # Doubling steps: 0b10...00
+  # ------------------------------------------------
+  when numDoublings > 1: # Already did the MSB doubling
+    when N == 1:         # f = line0
+      f.prod_sparse_sparse(line0, line0) # f.square()
+      line0.line_double(Ts[1], Ps[1])
+      f.mul(line0)
+      for _ in 2 ..< numDoublings:
+        f.square()
+        f.double_jtoN(j=0, line0, line1, Ts, Ps)
+    else:
+      for _ in 0 ..< numDoublings:
+        f.square()
+        f.double_jtoN(j=0, line0, line1, Ts, Ps)
+
+  # Addition step: 0b10...01
+  # ------------------------------------------------
+
+  when numDoublings == 1 and N == 1: # f = line0
+    line1.line_add(Ts[0], Qs[0], Ps[0])
+    f.prod_sparse_sparse(line0, line1)
+  else:
+    f.add_jToN(j=0,line0, line1, Ts, Qs, Ps)
+
+  {.pop.} # No OverflowError or IndexError allowed
+
+func miller_accum_double_then_add*[N: static int, FT, F1, F2](
+       f: var FT,
+       Ts: var array[N, ECP_ShortW_Prj[F2, OnTwist]],
+       Qs: array[N, ECP_ShortW_Aff[F2, OnTwist]],
+       Ps: array[N, ECP_ShortW_Aff[F1, NotOnTwist]],
+       numDoublings: int,
+       add = true
+     ) =
+  ## Continue a Miller Loop with
+  ## - `numDoubling` doublings
+  ## - 1 add
+  ##
+  ## f and T are updated
+  var line0{.noInit.}, line1{.noinit.}: Line[F2]
+  for _ in 0 ..< numDoublings:
+    f.square()
+    f.double_jtoN(j=0, line0, line1, Ts, Ps)
+
+  if add:
+    f.add_jToN(j=0, line0, line1, Ts, Qs, Ps)
diff --git a/constantine/pairing/mul_fp12_by_lines.nim b/constantine/pairing/mul_fp12_by_lines.nim
index b8870ee..666e9ac 100644
--- a/constantine/pairing/mul_fp12_by_lines.nim
+++ b/constantine/pairing/mul_fp12_by_lines.nim
@@ -13,6 +13,8 @@ import
   ../towers,
   ./lines_projective
 
+# No exceptions allowed
+{.push raises: [].}
 
 # ############################################################
 #
@@ -203,7 +205,7 @@ func mul_sparse_by_line_xyz000*[C: static Curve](
     f2x.sum2xMod(f2x, V1)
     f.c2.redc2x(f2x)
 
-func mul_xyz000_xyz000_into_abcdefghij00*[C: static Curve](f: var Fp12[C], l0, l1: Line[Fp2[C]]) =
+func prod_xyz000_xyz000_into_abcdefghij00*[C: static Curve](f: var Fp12[C], l0, l1: Line[Fp2[C]]) =
   ## Multiply 2 lines together
   ## The result is sparse in f.c1.c1
   # In the following equations (taken from cubic extension implementation)
@@ -407,7 +409,7 @@ func mul_sparse_by_line_xy000z*[C: static Curve](
     f2x.sum2xMod(f2x, V2)
     f.c1.redc2x(f2x)
 
-func mul_xy000z_xy000z_into_abcd00efghij*[C: static Curve](f: var Fp12[C], l0, l1: Line[Fp2[C]]) =
+func prod_xy000z_xy000z_into_abcd00efghij*[C: static Curve](f: var Fp12[C], l0, l1: Line[Fp2[C]]) =
   ## Multiply 2 lines together
   ## The result is sparse in f.c1.c0
   # In the following equations (taken from cubic extension implementation)
@@ -529,6 +531,7 @@ func mul_sparse_by_abcd00efghij*[C: static Curve](
 # ------------------------------------------------------------
 
 func mul*[C](f: var Fp12[C], line: Line[Fp2[C]]) {.inline.} =
+  ## Multiply an element of Fp12 by a sparse line function (xyz000 or xy000z)
   when C.getSexticTwist() == D_Twist:
     f.mul_sparse_by_line_xyz000(line)
   elif C.getSexticTwist() == M_Twist:
@@ -536,10 +539,26 @@ func mul*[C](f: var Fp12[C], line: Line[Fp2[C]]) {.inline.} =
   else:
     {.error: "A line function assumes that the curve has a twist".}
 
-func mul_sparse_sparse*[C](f: var Fp12[C], line0, line1: Line[Fp2[C]]) {.inline.} =
+func prod_sparse_sparse*[C](f: var Fp12[C], line0, line1: Line[Fp2[C]]) {.inline.} =
+  ## Multiply 2 lines function (xyz000 or xy000z)
+  ## and store the result in f
+  ## f is overwritten
   when C.getSexticTwist() == D_Twist:
-    f.mul_xyz000_xyz000_into_abcdefghij00(line0, line1)
+    f.prod_xyz000_xyz000_into_abcdefghij00(line0, line1)
   elif C.getSexticTwist() == M_Twist:
-    f.mul_xy000z_xy000z_into_abcd00efghij(line0, line1)
+    f.prod_xy000z_xy000z_into_abcd00efghij(line0, line1)
+  else:
+    {.error: "A line function assumes that the curve has a twist".}
+
+func mul_3way_sparse_sparse*[C](f: var Fp12[C], line0, line1: Line[Fp2[C]]) {.inline.} =
+  ## Multiply f*line0*line1 with lines (xyz000 or xy000z)
+  ## f is updated with the result
+  var t{.noInit.}: typeof(f)
+  when C.getSexticTwist() == D_Twist:
+    t.prod_xyz000_xyz000_into_abcdefghij00(line0, line1)
+    f.mul_sparse_by_abcdefghij00(t)
+  elif C.getSexticTwist() == M_Twist:
+    t.prod_xy000z_xy000z_into_abcd00efghij(line0, line1)
+    f.mul_sparse_by_abcd00efghij(t)
   else:
     {.error: "A line function assumes that the curve has a twist".}
diff --git a/constantine/pairing/mul_fp6_by_lines.nim b/constantine/pairing/mul_fp6_by_lines.nim
index 2d62707..6474a6b 100644
--- a/constantine/pairing/mul_fp6_by_lines.nim
+++ b/constantine/pairing/mul_fp6_by_lines.nim
@@ -13,6 +13,8 @@ import
   ../towers,
   ./lines_projective
 
+# No exceptions allowed
+{.push raises: [].}
 
 # ############################################################
 #
diff --git a/constantine/pairing/multi_pairing.md b/constantine/pairing/multi_pairing.md
new file mode 100644
index 0000000..e0a726c
--- /dev/null
+++ b/constantine/pairing/multi_pairing.md
@@ -0,0 +1,127 @@
+# Multi-pairing discussion:
+
+Aranha & Scott proposes 2 different approaches for multi-pairing.
+
+- Software Implementation, Algorithm 11.2 & 11.3\
+  Aranha, Dominguez Perez, A. Mrabet, Schwabe,\
+  Guide to Pairing-Based Cryptography, 2015
+- Pairing Implementation Revisited
+  Mike Scott, 2019
+  https://eprint.iacr.org/2019/077.pdf
+
+## Scott approach
+
+```
+Algorithm 2: Calculate and store line functions for BLS12 curve
+Input: Q ∈ G2, P ∈ G1 , curve parameter u
+Output: An array g of ceil(log2(u)) line functions ∈ Fp12
+  1 T ← Q
+  2 for i ← ceil(log2(u)) − 1 to 0 do
+  3   g[i] ← lT,T(P), T ← 2T
+  4   if ui = 1 then
+  5     g[i] ← g[i].lT,Q(P), T ← T + Q
+  6 return g
+```
+
+And to accumulate lines from a new (P, Q) tuple of points
+
+```
+Algorithm 4: Accumulate another set of line functions into g
+Input: The array g, Qj ∈ G2 , Pj ∈ G1 , curve parameter u
+Output: Updated array g of ceil(log2(u)) line functions ∈ Fp12
+  1 T ← Qj
+  2 for i ← ceil(log2(u)) − 1 to 0 do
+  3   t ← lT,T (Pj), T ← 2T
+  4   if ui = 1 then
+  5     t ← t.lT,Qj (Pj), T ← T + Qj
+  6   g[i] ← g[i].t
+  7 return g
+```
+
+## Aranha approach
+
+```
+Algorithm 11.2 Explicit multipairing version of Algorithm 11.1.
+(we extract the Miller Loop part only)
+Input : P1 , P2 , . . . Pn ∈ G1 ,
+        Q1 , Q2, . . . Qn ∈ G2
+Output: (we focus on the Miller Loop)
+
+Write l in binary form, l = sum(0 ..< m-1)
+f ← 1, l ← abs(AteParam)
+for j ← 1 to n do
+  Tj ← Qj
+end
+
+for i = m-2 down to 0 do
+  f ← f²
+  for j ← 1 to n do
+    f ← f.gTj,Tj(Pj), Tj ← [2]Tj
+    if li = 1 then
+      f ← f.gTj,Qj(Pj), Tj ← Tj + Qj
+    end
+  end
+end
+```
+
+## Analysis
+
+Assuming we have N tuples (Pj, Qj) of points j in 0 ..< N
+and M operations to do in our Miller loop:
+- M = HammingWeight(AteParam) + Bitwidth(AteParam)
+  - HammingWeight(AteParam) corresponds to line additions
+  - Bitwidth(AteParam) corresponds to line doublings
+
+Scott approach is to have:
+- M Fp12 line accumulators `g`
+- 1 G2 accumulator `T`
+and then accumulating each (Pj, Qj) lines into their corresponding `g` accumulator.
+Then those precomputed lines are merged into the final GT result.
+
+Aranha approach is to have:
+- 1 Fp12 accumulator `f`
+- N G2 accumulators  `T`
+and then pairings of each tuple are directly merged on GT.
+
+Scott approach is fully "online"/"streaming",
+while Aranha's saves space.
+For BLS12_381,
+M = 68 hence we would need 68*12*48 = 39168 bytes (381-bit needs 48 bytes)
+G2 has size 3*2*48 = 288 bytes (3 proj coordinates on Fp2)
+and while we can choose N to be anything (which can be 1 for single pairing or reverting to Scott approach).
+
+In practice, "streaming pairings" are not used, pairings to compute are receive
+by batch, for example for blockchain you receive a batch of N blocks to verify from one peer.
+Furthermore, 39kB would be over L1 cache size and incurs cache misses.
+Additionally Aranha approach would make it easier to batch inversions
+using Montgomery's simultaneous inversion technique.
+Lastly, while a higher level API will need to store N (Pj, Qj) pairs for multi-pairings
+for Aranha approach, it can decide how big N is depending on hardware and/or protocol.
+
+## Further optimizations
+
+Regarding optimizations, as the Fp12 accumulator is dense
+and lines are sparse (xyz000 or xy000z) Scott mentions the following costs:
+- squaring                 is 11m
+- Dense-sparse             is 13m
+- sparse-sparse            is 6m
+- Dense-(somewhat sparse)  is 17m
+Hence when accumulating lines from multiple points:
+- 2x Dense-sparse is 26m
+- sparse-sparse then Dense-(somewhat sparse) is 23m
+a 11.5% speedup
+
+We can use Aranha approach but process lines function 2-by-2 merging them
+before merging them to the dense Fp12 accumulator.
+
+In benchmarks though, the speedup doesn't work for BN curves but does for BLS curves.
+
+For single pairings
+Unfortunately, it's BN254_Snarks which requires a lot of addition in the Miller loop.
+BLS12-377 and BLS12-381 require 6 and 7 line addition in their Miller loop,
+the saving is about 150 cycles per addition for about 1000 cycles saved.
+A full pairing is ~2M cycles so this is only 0.5% for significantly
+more maintenance and bounds analysis complexity.
+
+For multipairing it is interesting since for a BLS signature verification (double pairing)
+we would save 1000 cycles per Ate iteration so ~70000 cycles, while a Miller loop is ~800000 cycles.
diff --git a/constantine/pairing/pairing_bls12.nim b/constantine/pairing/pairing_bls12.nim
index ec183db..f08704a 100644
--- a/constantine/pairing/pairing_bls12.nim
+++ b/constantine/pairing/pairing_bls12.nim
@@ -21,6 +21,9 @@ import
 
 export zoo_pairings # generic sandwich https://github.com/nim-lang/Nim/issues/11225
 
+# No exceptions allowed
+{.push raises: [].}
+
 # ############################################################
 #
 #                 Optimal ATE pairing for
@@ -154,3 +157,16 @@ func pairing_bls12*[C](
   gt.millerLoopAddchain(Q, P)
   gt.finalExpEasy()
   gt.finalExpHard_BLS12()
+
+func pairing_bls12*[N: static int, C](
+       gt: var Fp12[C],
+       Ps: array[N, ECP_ShortW_Aff[Fp[C], NotOnTwist]],
+       Qs: array[N, ECP_ShortW_Aff[Fp2[C], OnTwist]]) {.meter.} =
+  ## Compute the optimal Ate Pairing for BLS12 curves
+  ## Input: an array of Ps ∈ G1 and Qs ∈ G2
+  ## Output:
+  ##   The product of pairings
+  ##   e(P₀, Q₀) * e(P₁, Q₁) * e(P₂, Q₂) * ... * e(Pₙ, Qₙ) ∈ Gt
+  gt.millerLoopAddchain(Qs, Ps)
+  gt.finalExpEasy()
+  gt.finalExpHard_BLS12()
diff --git a/constantine/pairing/pairing_bn.nim b/constantine/pairing/pairing_bn.nim
index ad9b891..233fd41 100644
--- a/constantine/pairing/pairing_bn.nim
+++ b/constantine/pairing/pairing_bn.nim
@@ -22,6 +22,9 @@ import
 
 export zoo_pairings # generic sandwich https://github.com/nim-lang/Nim/issues/11225
 
+# No exceptions allowed
+{.push raises: [].}
+
 # ############################################################
 #
 #                 Optimal ATE pairing for
diff --git a/research/multi_pairing/pairing_bls12_381.nim b/research/multi_pairing/pairing_bls12_381.nim
deleted file mode 100644
index 96476cf..0000000
--- a/research/multi_pairing/pairing_bls12_381.nim
+++ /dev/null
@@ -1,338 +0,0 @@
-# Constantine
-# Copyright (c) 2018-2019    Status Research & Development GmbH
-# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
-# Licensed and distributed under either of
-#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
-#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
-# at your option. This file may not be copied, modified, or distributed except according to those terms.
-
-import
-  ../config/[common, curves, type_ff],
-  ../towers,
-  ../elliptic/[
-    ec_shortweierstrass_affine,
-    ec_shortweierstrass_projective
-  ],
-  ../curves/zoo_pairings,
-  ./lines_projective, ./mul_fp12_by_lines,
-  ./miller_loops
-
-# ############################################################
-#
-#                 Optimal ATE pairing for
-#                      BLS12-381
-#
-# ############################################################
-#
-# - Software Implementation, Algorithm 11.2 & 11.3
-#   Aranha, Dominguez Perez, A. Mrabet, Schwabe,
-#   Guide to Pairing-Based Cryptography, 2015
-#
-# - Physical Attacks,
-#   N. El Mrabet, Goubin, Guilley, Fournier, Jauvart, Moreau, Rauzy, Rondepierre,
-#   Guide to Pairing-Based Cryptography, 2015
-#
-# - Pairing Implementation Revisited
-#   Mike Scott, 2019
-#   https://eprint.iacr.org/2019/077.pdf
-#
-# Fault attacks:
-# To limite exposure to some fault attacks (flipping bits with a laser on embedded):
-# - changing the number of Miller loop iterations
-# - flipping the bits in the Miller loop
-# we hardcode unrolled addition chains.
-# This should also contribute to performance.
-#
-# Multi-pairing discussion:
-# Aranha & Scott proposes 2 different approaches for multi-pairing.
-#
-# -----
-# Scott
-#
-# Algorithm 2: Calculate and store line functions for BLS12 curve
-# Input: Q ∈ G2, P ∈ G1 , curve parameter u
-# Output: An array g of blog2(u)c line functions ∈ Fp12
-#   1 T ← Q
-#   2 for i ← ceil(log2(u)) − 1 to 0 do
-#   3   g[i] ← lT,T(P), T ← 2T
-#   4   if ui = 1 then
-#   5     g[i] ← g[i].lT,Q(P), T ← T + Q
-#   6 return g
-#
-# And to accumulate lines from a new (P, Q) tuple of points
-#
-# Algorithm 4: Accumulate another set of line functions into g
-# Input: The array g, Qj ∈ G2 , Pj ∈ G1 , curve parameter u
-# Output: Updated array g of ceil(log2(u)) line functions ∈ Fp12
-#   1 T ← Qj
-#   2 for i ← blog2 (u)c − 1 to 0 do
-#   3   t ← lT,T (Pj), T ← 2T
-#   4   if ui = 1 then
-#   5     t ← t.lT,Qj (Pj), T ← T + Qj
-#   6   g[i] ← g[i].t
-#   7 return g
-#
-# ------
-# Aranha
-#
-# Algorithm 11.2 Explicit multipairing version of Algorithm 11.1.
-# (we extract the Miller Loop part only)
-# Input : P1 , P2 , . . . Pn ∈ G1 ,
-#         Q1 , Q2, . . . Qn ∈ G2
-# Output: (we focus on the Miller Loop)
-#
-# Write l in binary form, l = sum(0 ..< m-1)
-# f ← 1, l ← abs(AteParam)
-# for j ← 1 to n do
-#   Tj ← Qj
-# end
-#
-# for i = m-2 down to 0 do
-#   f ← f²
-#   for j ← 1 to n do
-#     f ← f gTj,Tj(Pj), Tj ← [2]Tj
-#     if li = 1 then
-#       f ← f gTj,Qj(Pj), Tj ← Tj + Qj
-#     end
-#   end
-# end
-#
-# -----
-# Assuming we have N tuples (Pj, Qj) of points j in 0 ..< N
-# and I operations to do in our Miller loop:
-# - I = HammingWeight(AteParam) + Bitwidth(AteParam)
-#   - HammingWeight(AteParam) corresponds to line additions
-#   - Bitwidth(AteParam) corresponds to line doublings
-#
-# Scott approach is to have:
-# - I Fp12 accumulators `g`
-# - 1 G2 accumulator `T`
-# and then accumulating each (Pj, Qj) into their corresponding `g` accumulator.
-#
-# Aranha approach is to have:
-# - 1 Fp12 accumulator `f`
-# - N G2 accumulators  `T`
-# and accumulate N points per I.
-#
-# Scott approach is fully "online"/"streaming",
-# while Aranha's saves space.
-# For BLS12_381,
-# I = 68 hence we would need 68*12*48 = 39168 bytes (381-bit needs 48 bytes)
-# G2 has size 3*2*48 = 288 bytes (3 proj coordinates on Fp2)
-# and we choose N (which can be 1 for single pairing or reverting to Scott approach).
-#
-# In actual use, "streaming pairings" are not used, pairings to compute are receive
-# by batch, for example for blockchain you receive a batch of N blocks to verify from one peer.
-# Furthermore, 39kB would be over L1 cache size and incurs cache misses.
-# Additionally Aranha approach would make it easier to batch inversions
-# using Montgomery's simultaneous inversion technique.
-# Lastly, while a higher level API will need to store N (Pj, Qj) pairs for multi-pairings
-# for Aranha approach, it can decide how big N is depending on hardware and/or protocol.
-#
-# Regarding optimizations, as the Fp12 accumulator is dense
-# and lines are sparse (xyz000 or xy000z) Scott mentions the following costs:
-# - Dense-sparse             is 13m
-# - sparse-sparse            is 6m
-# - Dense-(somewhat sparse)  is 17m
-# Hence when accumulating lines from multiple points:
-# - 2x Dense-sparse is 26m
-# - sparse-sparse then Dense-(somewhat sparse) is 23m
-# a 11.5% speedup
-#
-# We can use Aranha approach but process lines function 2-by-2 merging them
-# before merging them to the dense Fp12 accumulator
-
-# Miller Loop
-# -------------------------------------------------------------------------------------------------------
-
-{.push raises: [].}
-
-import
-  strutils,
-  ../io/io_towers
-
-func miller_first_iter[N: static int](
-       f: var Fp12[BLS12_381],
-       Ts: var array[N, ECP_ShortW_Prj[Fp2[BLS12_381], OnTwist]],
-       Qs: array[N, ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]],
-       Ps: array[N, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]]
-     ) =
-  ## Start a Miller Loop
-  ## This means
-  ## - 1 doubling
-  ## - 1 add
-  ##
-  ## f is overwritten
-  ## Ts are overwritten by Qs
-  static:
-    doAssert N >= 1
-    doAssert f.c0 is Fp4
-
-  {.push checks: off.} # No OverflowError or IndexError allowed
-  var line {.noInit.}: Line[Fp2[BLS12_381]]
-
-  # First step: T <- Q, f = 1 (mod p¹²), f *= line
-  # ----------------------------------------------
-  for i in 0 ..< N:
-    Ts[i].projectiveFromAffine(Qs[i])
-
-  line.line_double(Ts[0], Ps[0])
-
-  # f *= line <=> f = line for the first iteration
-  # With Fp2 -> Fp4 -> Fp12 towering and a M-Twist
-  # The line corresponds to a sparse xy000z Fp12
-  f.c0.c0 = line.x
-  f.c0.c1 = line.y
-  f.c1.c0.setZero()
-  f.c1.c1.setZero()
-  f.c2.c0.setZero()
-  f.c2.c1 = line.z
-
-  when N >= 2:
-    line.line_double(Ts[1], Ps[1])
-    f.mul_sparse_by_line_xy000z(line)  # TODO: sparse-sparse mul
-
-    # Sparse merge 2 by 2, starting from 2
-    for i in countup(2, N-1, 2):
-      # var f2 {.noInit.}: Fp12[BLS12_381] # TODO: sparse-sparse mul
-      var line2 {.noInit.}: Line[Fp2[BLS12_381]]
-
-      line.line_double(Ts[i], Ps[i])
-      line2.line_double(Ts[i+1], Ps[i+1])
-
-      # f2.mul_sparse_sparse(line, line2)
-      # f.mul_somewhat_sparse(f2)
-      f.mul_sparse_by_line_xy000z(line)
-      f.mul_sparse_by_line_xy000z(line2)
-
-    when N and 1 == 1: # N >= 2 and N is odd, there is a leftover
-      line.line_double(Ts[N-1], Ps[N-1])
-      f.mul_sparse_by_line_xy000z(line)
-
-  # 2nd step: Line addition as MSB is always 1
-  # ----------------------------------------------
-  when N >= 2: # f is dense, there are already many lines accumulated
-    # Sparse merge 2 by 2, starting from 0
-    for i in countup(0, N-1, 2):
-      # var f2 {.noInit.}: Fp12[BLS12_381] # TODO: sparse-sparse mul
-      var line2 {.noInit.}: Line[Fp2[BLS12_381]]
-
-      line.line_add(Ts[i], Qs[i], Ps[i])
-      line2.line_add(Ts[i+1], Qs[i+1], Ps[i+1])
-
-      # f2.mul_sparse_sparse(line, line2)
-      # f.mul_somewhat_sparse(f2)
-      f.mul_sparse_by_line_xy000z(line)
-      f.mul_sparse_by_line_xy000z(line2)
-
-    when N and 1 == 1: # N >= 2 and N is odd, there is a leftover
-      line.line_add(Ts[N-1], Qs[N-1], Ps[N-1])
-      f.mul_sparse_by_line_xy000z(line)
-
-  else: # N = 1, f is sparse
-    line.line_add(Ts[0], Qs[0], Ps[0])
-    # f.mul_sparse_sparse(line)
-    f.mul_sparse_by_line_xy000z(line)
-
-  {.pop.} # No OverflowError or IndexError allowed
-
-func miller_accum_doublings[N: static int](
-       f: var Fp12[BLS12_381],
-       Ts: var array[N, ECP_ShortW_Prj[Fp2[BLS12_381], OnTwist]],
-       Ps: array[N, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]],
-       numDoublings: int
-     ) =
-  ## Accumulate `numDoublings` Miller loop doubling steps into `f`
-  static: doAssert N >= 1
-  {.push checks: off.} # No OverflowError or IndexError allowed
-
-  var line {.noInit.}: Line[Fp2[BLS12_381]]
-
-  for _ in 0 ..< numDoublings:
-    f.square()
-    when N >= 2:
-      for i in countup(0, N-1, 2):
-        # var f2 {.noInit.}: Fp12[BLS12_381] # TODO: sparse-sparse mul
-        var line2 {.noInit.}: Line[Fp2[BLS12_381]]
-
-        line.line_double(Ts[i], Ps[i])
-        line2.line_double(Ts[i+1], Ps[i+1])
-
-        # f2.mul_sparse_sparse(line, line2)
-        # f.mul_somewhat_sparse(f2)
-        f.mul_sparse_by_line_xy000z(line)
-        f.mul_sparse_by_line_xy000z(line2)
-
-      when N and 1 == 1: # N >= 2 and N is odd, there is a leftover
-        line.line_double(Ts[N-1], Ps[N-1])
-        f.mul_sparse_by_line_xy000z(line)
-    else:
-      line.line_double(Ts[0], Ps[0])
-      f.mul_sparse_by_line_xy000z(line)
-
-  {.pop.} # No OverflowError or IndexError allowed
-
-func miller_accum_addition[N: static int](
-       f: var Fp12[BLS12_381],
-       Ts: var array[N, ECP_ShortW_Prj[Fp2[BLS12_381], OnTwist]],
-       Qs: array[N, ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]],
-       Ps: array[N, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]]
-     ) =
-  ## Accumulate a Miller loop addition step into `f`
-  static: doAssert N >= 1
-  {.push checks: off.} # No OverflowError or IndexError allowed
-
-  var line {.noInit.}: Line[Fp2[BLS12_381]]
-
-  when N >= 2:
-    # Sparse merge 2 by 2, starting from 0
-    for i in countup(0, N-1, 2):
-      # var f2 {.noInit.}: Fp12[BLS12_381] # TODO: sparse-sparse mul
-      var line2 {.noInit.}: Line[Fp2[BLS12_381]]
-
-      line.line_add(Ts[i], Qs[i], Ps[i])
-      line2.line_add(Ts[i+1], Qs[i+1], Ps[i+1])
-
-      # f2.mul_sparse_sparse(line, line2)
-      # f.mul_somewhat_sparse(f2)
-      f.mul_sparse_by_line_xy000z(line)
-      f.mul_sparse_by_line_xy000z(line2)
-
-    when N and 1 == 1: # N >= 2 and N is odd, there is a leftover
-      line.line_add(Ts[N-1], Qs[N-1], Ps[N-1])
-      f.mul_sparse_by_line_xy000z(line)
-
-  else:
-    line.line_add(Ts[0], Qs[0], Ps[0])
-    f.mul_sparse_by_line_xy000z(line)
-
-  {.pop.} # No OverflowError or IndexError allowed
-
-func millerLoop_opt_BLS12_381*[N: static int](
-       f: var Fp12[BLS12_381],
-       Qs: array[N, ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]],
-       Ps: array[N, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]]
-     ) {.meter.} =
-  ## Generic Miller Loop for BLS12 curve
-  ## Computes f{u,Q}(P) with u the BLS curve parameter
-
-  var Ts {.noInit.}: array[N, ECP_ShortW_Prj[Fp2[BLS12_381], OnTwist]]
-
-  # Ate param addition chain
-  # Hex: 0xd201000000010000
-  # Bin: 0b1101001000000001000000000000000000000000000000010000000000000000
-
-  var iter = 1'u64
-
-  f.miller_first_iter(Ts, Qs, Ps)       # 0b11
-  f.miller_accum_doublings(Ts, Ps, 2)   # 0b1100
-  f.miller_accum_addition(Ts, Qs, Ps)   # 0b1101
-  f.miller_accum_doublings(Ts, Ps, 3)   # 0b1101000
-  f.miller_accum_addition(Ts, Qs, Ps)   # 0b1101001
-  f.miller_accum_doublings(Ts, Ps, 9)   # 0b1101001000000000
-  f.miller_accum_addition(Ts, Qs, Ps)   # 0b1101001000000001
-  f.miller_accum_doublings(Ts, Ps, 32)  # 0b110100100000000100000000000000000000000000000000
-  f.miller_accum_addition(Ts, Qs, Ps)   # 0b110100100000000100000000000000000000000000000001
-  f.miller_accum_doublings(Ts, Ps, 16)  # 0b1101001000000001000000000000000000000000000000010000000000000000
-
-  # TODO: what is the threshold for Karabina's compressed squarings?
diff --git a/tests/t_pairing_bls12_381_multi.nim b/tests/t_pairing_bls12_381_multi.nim
new file mode 100644
index 0000000..a384318
--- /dev/null
+++ b/tests/t_pairing_bls12_381_multi.nim
@@ -0,0 +1,65 @@
+# Constantine
+# Copyright (c) 2018-2019    Status Research & Development GmbH
+# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+import
+  # Standard library
+  std/[os, times, strformat],
+  # Internals
+  ../constantine/config/common,
+  ../constantine/[
+    arithmetic, primitives,
+    towers, ec_shortweierstrass
+  ],
+  ../constantine/io/io_towers,
+  ../constantine/config/curves,
+  ../constantine/pairing/pairing_bls12,
+  # Test utilities
+  ../helpers/prng_unsafe
+
+# Testing multipairing
+# ----------------------------------------------
+
+var rng: RngState
+let timeseed = uint32(toUnix(getTime()) and (1'i64 shl 32 - 1)) # unixTime mod 2^32
+seed(rng, timeseed)
+echo "\n------------------------------------------------------\n"
+echo "test_pairing_bls12_381_multi xoshiro512** seed: ", timeseed
+
+proc testMultiPairing(rng: var RngState, N: static int) =
+  var
+    Ps {.noInit.}: array[N, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]]
+    Qs {.noInit.}: array[N, ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]]
+
+    GTs {.noInit.}: array[N, Fp12[BLS12_381]]
+
+  for i in 0 ..< N:
+    Ps[i] = rng.random_unsafe(typeof(Ps[0]))
+    Qs[i] = rng.random_unsafe(typeof(Qs[0]))
+
+  # Simple pairing
+  let clockSimpleStart = cpuTime()
+  var GTsimple {.noInit.}: Fp12[BLS12_381]
+  for i in 0 ..< N:
+    GTs[i].pairing_bls12(Ps[i], Qs[i])
+
+  GTsimple = GTs[0]
+  for i in 1 ..< N:
+    GTsimple *= GTs[i]
+  let clockSimpleStop = cpuTime()
+
+  # Multipairing
+  let clockMultiStart = cpuTime()
+  var GTmulti {.noInit.}: Fp12[BLS12_381]
+  GTmulti.pairing_bls12(Ps, Qs)
+  let clockMultiStop = cpuTime()
+
+  echo &"N={N}, Simple: {clockSimpleStop - clockSimpleStart:>4.4f}s, Multi: {clockMultiStop - clockMultiStart:>4.4f}s"
+  doAssert bool GTsimple == GTmulti
+
+staticFor i, 1, 17:
+  rng.testMultiPairing(N = i)
diff --git a/tests/t_pairing_mul_fp12_by_lines.nim b/tests/t_pairing_mul_fp12_by_lines.nim
index 1be5464..d057142 100644
--- a/tests/t_pairing_mul_fp12_by_lines.nim
+++ b/tests/t_pairing_mul_fp12_by_lines.nim
@@ -259,7 +259,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
             r.prod(f0, f1)
 
             var rl: Fp12[C]
-            rl.mul_xy000z_xy000z_into_abcd00efghij(line0, line1)
+            rl.prod_xy000z_xy000z_into_abcd00efghij(line0, line1)
 
             check: bool(r == rl)
 
@@ -297,7 +297,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
             r.prod(f0, f1)
 
             var rl: Fp12[C]
-            rl.mul_xyz000_xyz000_into_abcdefghij00(line0, line1)
+            rl.prod_xyz000_xyz000_into_abcdefghij00(line0, line1)
 
             check: bool(r == rl)
 
@@ -337,7 +337,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
             )
 
             var rl: Fp12[C]
-            rl.mul_xyz000_xyz000_into_abcdefghij00(line0, line1)
+            rl.prod_xyz000_xyz000_into_abcdefghij00(line0, line1)
 
             var f = rng.random_elem(Fp12[C], gen)
             var f2 = f
@@ -383,7 +383,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
             )
 
             var rl: Fp12[C]
-            rl.mul_xy000z_xy000z_into_abcd00efghij(line0, line1)
+            rl.prod_xy000z_xy000z_into_abcd00efghij(line0, line1)
 
             var f = rng.random_elem(Fp12[C], gen)
             var f2 = f
diff --git a/tests/t_sig_bls_lowlevel.nim b/tests/t_sig_bls_lowlevel.nim
index be3d144..977b7f5 100644
--- a/tests/t_sig_bls_lowlevel.nim
+++ b/tests/t_sig_bls_lowlevel.nim
@@ -132,6 +132,37 @@ func verifyG2[T: byte|char](
 
   return e0 == e1
 
+func verifyG2_multi[T: byte|char](
+       pubkey: ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist],
+       message: openarray[T],
+       signature: ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]
+     ): SecretBool =
+  doAssert not pubkey.isInf.bool
+  doAssert not signature.isInf.bool
+
+  var Qprj {.noInit.}: ECP_ShortW_Prj[Fp2[BLS12_381], OnTwist]
+  hashToCurve(
+    H = sha256, k = 128,
+    output = Qprj,
+    augmentation = "",
+    message = message,
+    domainSepTag = DomainSepTag
+  )
+
+  var G2s: array[2, ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]]
+  var G1s: array[2, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]]
+
+  G1s[0] = pubkey
+  G2s[0].affineFromprojective(Qprj)
+
+  G1s[1].neg(BLS12_381_G1_generator)
+  G2s[1] = signature
+
+  var e: Fp12[BLS12_381]
+  e.pairing_bls12(G1s, G2s)
+
+  return e.isOne()
+
 proc bls_signature_test(rng: var RngState, i: int) =
   var
     seckey: Fr[BLS12_381]
@@ -143,8 +174,11 @@ proc bls_signature_test(rng: var RngState, i: int) =
   pubkey.publicKeyG1(seckey)
   signature.signG2(message, seckey)
 
-  let ok = pubkey.verifyG2(message, signature)
-  doAssert bool ok
+  let okSingle = pubkey.verifyG2(message, signature)
+  doAssert bool okSingle
+
+  let okMulti = pubkey.verifyG2_multi(message, signature)
+  doAssert bool okMulti
 
 for i in 0 ..< 500:
   rng.bls_signature_test(i)