Multipairing (#165)

* Productionize multipairings for BLS12-381

* typo

* arg order + benchmark

* Introduce mul_3way_sparse_sparse

* cleanup MultiMiller loop

* fix init sparse optimization in multimiller loop [skip ci]
This commit is contained in:
Mamy Ratsimbazafy 2021-08-16 22:22:51 +02:00 committed by GitHub
parent 979d183657
commit f5c0b6245d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 473 additions and 473 deletions

View File

@ -77,8 +77,8 @@ Supports:
- [x] Field arithmetics
- [x] Curve arithmetic
- [x] Pairing
- [ ] Multi-Pairing
- [ ] Hash-To-Curve
- [x] Multi-Pairing
- [x] Hash-To-Curve
Families:
- BN: Barreto-Naehrig

View File

@ -49,7 +49,11 @@ proc main() =
finalExpBLS12Bench(curve, Iters)
separator()
pairingBLS12Bench(curve, Iters)
pairing_multipairing_BLS12Bench(curve, 1, Iters)
separator()
staticFor j, 2, 17:
pairing_multisingle_BLS12Bench(curve, j, Iters div j)
pairing_multipairing_BLS12Bench(curve, j, Iters div j)
main()
notes()

View File

@ -105,7 +105,7 @@ proc mulLinebyLine_xyz000_Bench*(C: static Curve, iters: int) =
var f = rng.random_unsafe(Fp12[C])
bench("Mul line xyz000 by line xyz000", C, iters):
f.mul_xyz000_xyz000_into_abcdefghij00(l0, l1)
f.prod_xyz000_xyz000_into_abcdefghij00(l0, l1)
proc mulLinebyLine_xy000z_Bench*(C: static Curve, iters: int) =
var l0, l1: Line[Fp2[C]]
@ -116,7 +116,7 @@ proc mulLinebyLine_xy000z_Bench*(C: static Curve, iters: int) =
var f = rng.random_unsafe(Fp12[C])
bench("Mul line xy000z by line xy000z", C, iters):
f.mul_xy000z_xy000z_into_abcd00efghij(l0, l1)
f.prod_xy000z_xy000z_into_abcd00efghij(l0, l1)
proc mulFp12by_abcdefghij00_Bench*(C: static Curve, iters: int) =
var f = rng.random_unsafe(Fp12[C])
@ -154,7 +154,7 @@ proc mulFp12_by_2lines_v2_xyz000_Bench*(C: static Curve, iters: int) =
bench("mulFp12 by 2 lines v2", C, iters):
var f2 {.noInit.}: Fp12[C]
f2.mul_xyz000_xyz000_into_abcdefghij00(l0, l1)
f2.prod_xyz000_xyz000_into_abcdefghij00(l0, l1)
f.mul_sparse_by_abcdefghij00(f2)
proc mulFp12_by_2lines_v1_xy000z_Bench*(C: static Curve, iters: int) =
@ -179,7 +179,7 @@ proc mulFp12_by_2lines_v2_xy000z_Bench*(C: static Curve, iters: int) =
bench("mulFp12 by 2 lines v2", C, iters):
var f2 {.noInit.}: Fp12[C]
f2.mul_xy000z_xy000z_into_abcd00efghij(l0, l1)
f2.prod_xy000z_xy000z_into_abcd00efghij(l0, l1)
f.mul_sparse_by_abcd00efghij(f2)
proc millerLoopBLS12Bench*(C: static Curve, iters: int) =
@ -238,6 +238,43 @@ proc pairingBLS12Bench*(C: static Curve, iters: int) =
bench("Pairing BLS12", C, iters):
f.pairing_bls12(P, Q)
proc pairing_multisingle_BLS12Bench*(C: static Curve, N: static int, iters: int) =
let
P = rng.random_point(ECP_ShortW_Aff[Fp[C], NotOnTwist])
Q = rng.random_point(ECP_ShortW_Aff[Fp2[C], OnTwist])
var
Ps {.noInit.}: array[N, ECP_ShortW_Aff[Fp[C], NotOnTwist]]
Qs {.noInit.}: array[N, ECP_ShortW_Aff[Fp2[C], OnTwist]]
GTs {.noInit.}: array[N, Fp12[C]]
for i in 0 ..< N:
Ps[i] = rng.random_unsafe(typeof(Ps[0]))
Qs[i] = rng.random_unsafe(typeof(Qs[0]))
var f: Fp12[C]
bench("Pairing BLS12 multi-single " & $N & " pairings", C, iters):
for i in 0 ..< N:
GTs[i].pairing_bls12(Ps[i], Qs[i])
f = GTs[0]
for i in 1 ..< N:
f *= GTs[i]
proc pairing_multipairing_BLS12Bench*(C: static Curve, N: static int, iters: int) =
var
Ps {.noInit.}: array[N, ECP_ShortW_Aff[Fp[C], NotOnTwist]]
Qs {.noInit.}: array[N, ECP_ShortW_Aff[Fp2[C], OnTwist]]
for i in 0 ..< N:
Ps[i] = rng.random_unsafe(typeof(Ps[0]))
Qs[i] = rng.random_unsafe(typeof(Qs[0]))
var f: Fp12[C]
bench("Pairing BLS12 multipairing " & $N & " pairings", C, iters):
f.pairing_bls12(Ps, Qs)
proc pairingBNBench*(C: static Curve, iters: int) =
let
P = rng.random_point(ECP_ShortW_Aff[Fp[C], NotOnTwist])

View File

@ -148,6 +148,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
("tests/t_pairing_bn254_snarks_optate.nim", false),
("tests/t_pairing_bls12_377_optate.nim", false),
("tests/t_pairing_bls12_381_optate.nim", false),
("tests/t_pairing_bls12_381_multi.nim", false),
# Hashing vs OpenSSL
# ----------------------------------------------------------

View File

@ -13,6 +13,9 @@ import
../towers,
../isogeny/frobenius
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Gϕ₁₂, Cyclotomic subgroup of Fp12

View File

@ -9,12 +9,14 @@
import
std/typetraits,
../primitives,
../config/curves,
../arithmetic,
../towers,
../elliptic/ec_shortweierstrass_affine,
../io/io_towers
# No exceptions allowed
{.push raises: [].}
type
Line*[F] = object
## Packed line representation over a E'(Fp^k/d)

View File

@ -19,6 +19,9 @@ import
export lines_common
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Miller Loop's Line Evaluation

View File

@ -15,6 +15,9 @@ import
./lines_projective,
./mul_fp6_by_lines, ./mul_fp12_by_lines
# No exceptions allowed
{.push raises: [].}
# ############################################################
# #
# Basic Miller Loop #
@ -111,118 +114,6 @@ func millerCorrectionBN*[FT, F1, F2](
# we hardcode unrolled addition chains.
# This should also contribute to performance.
#
# Multi-pairing discussion:
# Aranha & Scott proposes 2 different approaches for multi-pairing.
#
# -----
# Scott
#
# Algorithm 2: Calculate and store line functions for BLS12 curve
# Input: Q ∈ G2, P ∈ G1 , curve parameter u
# Output: An array g of blog2(u)c line functions ∈ Fp12
# 1 T ← Q
# 2 for i ← ceil(log2(u)) 1 to 0 do
# 3 g[i] ← lT,T(P), T ← 2T
# 4 if ui = 1 then
# 5 g[i] ← g[i].lT,Q(P), T ← T + Q
# 6 return g
#
# And to accumulate lines from a new (P, Q) tuple of points
#
# Algorithm 4: Accumulate another set of line functions into g
# Input: The array g, Qj ∈ G2 , Pj ∈ G1 , curve parameter u
# Output: Updated array g of ceil(log2(u)) line functions ∈ Fp12
# 1 T ← Qj
# 2 for i ← blog2 (u)c 1 to 0 do
# 3 t ← lT,T (Pj), T ← 2T
# 4 if ui = 1 then
# 5 t ← t.lT,Qj (Pj), T ← T + Qj
# 6 g[i] ← g[i].t
# 7 return g
#
# ------
# Aranha
#
# Algorithm 11.2 Explicit multipairing version of Algorithm 11.1.
# (we extract the Miller Loop part only)
# Input : P1 , P2 , . . . Pn ∈ G1 ,
# Q1 , Q2, . . . Qn ∈ G2
# Output: (we focus on the Miller Loop)
#
# Write l in binary form, l = sum(0 ..< m-1)
# f ← 1, l ← abs(AteParam)
# for j ← 1 to n do
# Tj ← Qj
# end
#
# for i = m-2 down to 0 do
# f ← f²
# for j ← 1 to n do
# f ← f gTj,Tj(Pj), Tj ← [2]Tj
# if li = 1 then
# f ← f gTj,Qj(Pj), Tj ← Tj + Qj
# end
# end
# end
#
# -----
# Assuming we have N tuples (Pj, Qj) of points j in 0 ..< N
# and I operations to do in our Miller loop:
# - I = HammingWeight(AteParam) + Bitwidth(AteParam)
# - HammingWeight(AteParam) corresponds to line additions
# - Bitwidth(AteParam) corresponds to line doublings
#
# Scott approach is to have:
# - I Fp12 accumulators `g`
# - 1 G2 accumulator `T`
# and then accumulating each (Pj, Qj) into their corresponding `g` accumulator.
#
# Aranha approach is to have:
# - 1 Fp12 accumulator `f`
# - N G2 accumulators `T`
# and accumulate N points per I.
#
# Scott approach is fully "online"/"streaming",
# while Aranha's saves space.
# For BLS12_381,
# I = 68 hence we would need 68*12*48 = 39168 bytes (381-bit needs 48 bytes)
# G2 has size 3*2*48 = 288 bytes (3 proj coordinates on Fp2)
# and we choose N (which can be 1 for single pairing or reverting to Scott approach).
#
# In actual use, "streaming pairings" are not used, pairings to compute are receive
# by batch, for example for blockchain you receive a batch of N blocks to verify from one peer.
# Furthermore, 39kB would be over L1 cache size and incurs cache misses.
# Additionally Aranha approach would make it easier to batch inversions
# using Montgomery's simultaneous inversion technique.
# Lastly, while a higher level API will need to store N (Pj, Qj) pairs for multi-pairings
# for Aranha approach, it can decide how big N is depending on hardware and/or protocol.
#
# Regarding optimizations, as the Fp12 accumulator is dense
# and lines are sparse (xyz000 or xy000z) Scott mentions the following costs:
# - squaring is 11m
# - Dense-sparse is 13m
# - sparse-sparse is 6m
# - Dense-(somewhat sparse) is 17m
# Hence when accumulating lines from multiple points:
# - 2x Dense-sparse is 26m
# - sparse-sparse then Dense-(somewhat sparse) is 23m
# a 11.5% speedup
#
# We can use Aranha approach but process lines function 2-by-2 merging them
# before merging them to the dense Fp12 accumulator.
#
# In benchmarks though, the speedup doesn't work for BN curves but does for BLS curves.
#
# For single pairings
# Unfortunately, it's BN254_Snarks which requires a lot of addition in the Miller loop.
# BLS12-377 and BLS12-381 require 6 and 7 line addition in their Miller loop,
# the saving is about 150 cycles per addition for about 1000 cycles saved.
# A full pairing is ~2M cycles so this is only 0.5% for significantly
# more maintenance and bounds analysis complexity.
#
# For multipairing it is interesting since for a BLS signature verification (double pairing)
# we would save 1000 cycles per Ate iteration so ~70000 cycles, while a Miller loop is ~800000 cycles.
# Miller Loop - single pairing
# ----------------------------------------------------------------------------
@ -263,7 +154,7 @@ func miller_init_double_then_add*[FT, F1, F2](
# - The first line is squared (sparse * sparse)
# - The second is (somewhat-sparse * sparse)
when numDoublings >= 2:
f.mul_sparse_sparse(line, line)
f.prod_sparse_sparse(line, line)
line.line_double(T, P)
f.mul(line)
for _ in 2 ..< numDoublings:
@ -278,13 +169,10 @@ func miller_init_double_then_add*[FT, F1, F2](
# we special case the addition as
# - The first line and second are sparse (sparse * sparse)
when numDoublings == 1:
# TODO: sparse * sparse
# f *= line <=> f = line for the first iteration
# With Fp2 -> Fp4 -> Fp12 towering and a M-Twist
# The line corresponds to a sparse xy000z Fp12
var line2 {.noInit.}: Line[F2]
line2.line_add(T, Q, P)
f.mul_sparse_sparse(line, line2)
f.prod_sparse_sparse(line, line2)
else:
line.line_add(T, Q, P)
f.mul(line)
@ -324,3 +212,137 @@ func miller_accum_double_then_add*[FT, F1, F2](
# Miller Loop - multi-pairing
# ----------------------------------------------------------------------------
#
# Multi-pairing discussion:
# Aranha & Scott proposes 2 different approaches for multi-pairing.
# See `multi_pairing.md``
# We implement Aranha approach
func double_jToN[N: static int, FT, F1, F2](
f: var FT,
j: static int,
line0, line1: var Line[F2],
Ts: var array[N, ECP_ShortW_Prj[F2, OnTwist]],
Ps: array[N, ECP_ShortW_Aff[F1, NotOnTwist]]) =
## Doubling steps for pairings j to N
{.push checks: off.} # No OverflowError or IndexError allowed
# Sparse merge 2 by 2, starting from j
for i in countup(j, N-1, 2):
if i+1 >= N:
break
line0.line_double(Ts[i], Ps[i])
line1.line_double(Ts[i+1], Ps[i+1])
f.mul_3way_sparse_sparse(line0, line1)
when (N and 1) == 1: # N >= 2 and N is odd, there is a leftover
line0.line_double(Ts[N-1], Ps[N-1])
f.mul(line0)
{.pop.}
func add_jToN[N: static int, FT, F1, F2](
f: var FT,
j: static int,
line0, line1: var Line[F2],
Ts: var array[N, ECP_ShortW_Prj[F2, OnTwist]],
Qs: array[N, ECP_ShortW_Aff[F2, OnTwist]],
Ps: array[N, ECP_ShortW_Aff[F1, NotOnTwist]])=
## Addition steps for pairings 0 to N
{.push checks: off.} # No OverflowError or IndexError allowed
# Sparse merge 2 by 2, starting from 0
for i in countup(j, N-1, 2):
if i+1 >= N:
break
line0.line_add(Ts[i], Qs[i], Ps[i])
line1.line_add(Ts[i+1], Qs[i+1], Ps[i+1])
f.mul_3way_sparse_sparse(line0, line1)
when (N and 1) == 1: # N >= 2 and N is odd, there is a leftover
line0.line_add(Ts[N-1], Qs[N-1], Ps[N-1])
f.mul(line0)
{.pop.}
func miller_init_double_then_add*[N: static int, FT, F1, F2](
f: var FT,
Ts: var array[N, ECP_ShortW_Prj[F2, OnTwist]],
Qs: array[N, ECP_ShortW_Aff[F2, OnTwist]],
Ps: array[N, ECP_ShortW_Aff[F1, NotOnTwist]],
numDoublings: static int
) =
## Start a Miller Loop
## This means
## - 1 doubling
## - 1 add
##
## f is overwritten
## Ts are overwritten by Qs
static:
doAssert f.c0 is Fp4
doAssert FT.C == F1.C
doAssert FT.C == F2.C
{.push checks: off.} # No OverflowError or IndexError allowed
var line0 {.noInit.}, line1 {.noInit.}: Line[F2]
# First step: T <- Q, f = 1 (mod p¹²), f *= line
# ----------------------------------------------
for i in 0 ..< N:
Ts[i].projectiveFromAffine(Qs[i])
line0.line_double(Ts[0], Ps[0])
when N >= 2:
line1.line_double(Ts[1], Ps[1])
f.prod_sparse_sparse(line0, line1)
f.double_jToN(j=2, line0, line1, Ts, Ps)
# Doubling steps: 0b10...00
# ------------------------------------------------
when numDoublings > 1: # Already did the MSB doubling
when N == 1: # f = line0
f.prod_sparse_sparse(line0, line0) # f.square()
line0.line_double(Ts[1], Ps[1])
f.mul(line0)
for _ in 2 ..< numDoublings:
f.square()
f.double_jtoN(j=0, line0, line1, Ts, Ps)
else:
for _ in 0 ..< numDoublings:
f.square()
f.double_jtoN(j=0, line0, line1, Ts, Ps)
# Addition step: 0b10...01
# ------------------------------------------------
when numDoublings == 1 and N == 1: # f = line0
line1.line_add(Ts[0], Qs[0], Ps[0])
f.prod_sparse_sparse(line0, line1)
else:
f.add_jToN(j=0,line0, line1, Ts, Qs, Ps)
{.pop.} # No OverflowError or IndexError allowed
func miller_accum_double_then_add*[N: static int, FT, F1, F2](
f: var FT,
Ts: var array[N, ECP_ShortW_Prj[F2, OnTwist]],
Qs: array[N, ECP_ShortW_Aff[F2, OnTwist]],
Ps: array[N, ECP_ShortW_Aff[F1, NotOnTwist]],
numDoublings: int,
add = true
) =
## Continue a Miller Loop with
## - `numDoubling` doublings
## - 1 add
##
## f and T are updated
var line0{.noInit.}, line1{.noinit.}: Line[F2]
for _ in 0 ..< numDoublings:
f.square()
f.double_jtoN(j=0, line0, line1, Ts, Ps)
if add:
f.add_jToN(j=0, line0, line1, Ts, Qs, Ps)

View File

@ -13,6 +13,8 @@ import
../towers,
./lines_projective
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
@ -203,7 +205,7 @@ func mul_sparse_by_line_xyz000*[C: static Curve](
f2x.sum2xMod(f2x, V1)
f.c2.redc2x(f2x)
func mul_xyz000_xyz000_into_abcdefghij00*[C: static Curve](f: var Fp12[C], l0, l1: Line[Fp2[C]]) =
func prod_xyz000_xyz000_into_abcdefghij00*[C: static Curve](f: var Fp12[C], l0, l1: Line[Fp2[C]]) =
## Multiply 2 lines together
## The result is sparse in f.c1.c1
# In the following equations (taken from cubic extension implementation)
@ -407,7 +409,7 @@ func mul_sparse_by_line_xy000z*[C: static Curve](
f2x.sum2xMod(f2x, V2)
f.c1.redc2x(f2x)
func mul_xy000z_xy000z_into_abcd00efghij*[C: static Curve](f: var Fp12[C], l0, l1: Line[Fp2[C]]) =
func prod_xy000z_xy000z_into_abcd00efghij*[C: static Curve](f: var Fp12[C], l0, l1: Line[Fp2[C]]) =
## Multiply 2 lines together
## The result is sparse in f.c1.c0
# In the following equations (taken from cubic extension implementation)
@ -529,6 +531,7 @@ func mul_sparse_by_abcd00efghij*[C: static Curve](
# ------------------------------------------------------------
func mul*[C](f: var Fp12[C], line: Line[Fp2[C]]) {.inline.} =
## Multiply an element of Fp12 by a sparse line function (xyz000 or xy000z)
when C.getSexticTwist() == D_Twist:
f.mul_sparse_by_line_xyz000(line)
elif C.getSexticTwist() == M_Twist:
@ -536,10 +539,26 @@ func mul*[C](f: var Fp12[C], line: Line[Fp2[C]]) {.inline.} =
else:
{.error: "A line function assumes that the curve has a twist".}
func mul_sparse_sparse*[C](f: var Fp12[C], line0, line1: Line[Fp2[C]]) {.inline.} =
func prod_sparse_sparse*[C](f: var Fp12[C], line0, line1: Line[Fp2[C]]) {.inline.} =
## Multiply 2 lines function (xyz000 or xy000z)
## and store the result in f
## f is overwritten
when C.getSexticTwist() == D_Twist:
f.mul_xyz000_xyz000_into_abcdefghij00(line0, line1)
f.prod_xyz000_xyz000_into_abcdefghij00(line0, line1)
elif C.getSexticTwist() == M_Twist:
f.mul_xy000z_xy000z_into_abcd00efghij(line0, line1)
f.prod_xy000z_xy000z_into_abcd00efghij(line0, line1)
else:
{.error: "A line function assumes that the curve has a twist".}
func mul_3way_sparse_sparse*[C](f: var Fp12[C], line0, line1: Line[Fp2[C]]) {.inline.} =
## Multiply f*line0*line1 with lines (xyz000 or xy000z)
## f is updated with the result
var t{.noInit.}: typeof(f)
when C.getSexticTwist() == D_Twist:
t.prod_xyz000_xyz000_into_abcdefghij00(line0, line1)
f.mul_sparse_by_abcdefghij00(t)
elif C.getSexticTwist() == M_Twist:
t.prod_xy000z_xy000z_into_abcd00efghij(line0, line1)
f.mul_sparse_by_abcd00efghij(t)
else:
{.error: "A line function assumes that the curve has a twist".}

View File

@ -13,6 +13,8 @@ import
../towers,
./lines_projective
# No exceptions allowed
{.push raises: [].}
# ############################################################
#

View File

@ -0,0 +1,127 @@
# Multi-pairing discussion:
Aranha & Scott proposes 2 different approaches for multi-pairing.
- Software Implementation, Algorithm 11.2 & 11.3\
Aranha, Dominguez Perez, A. Mrabet, Schwabe,\
Guide to Pairing-Based Cryptography, 2015
- Pairing Implementation Revisited
Mike Scott, 2019
https://eprint.iacr.org/2019/077.pdf
## Scott approach
```
Algorithm 2: Calculate and store line functions for BLS12 curve
Input: Q ∈ G2, P ∈ G1 , curve parameter u
Output: An array g of ceil(log2(u)) line functions ∈ Fp12
1 T ← Q
2 for i ← ceil(log2(u)) 1 to 0 do
3 g[i] ← lT,T(P), T ← 2T
4 if ui = 1 then
5 g[i] ← g[i].lT,Q(P), T ← T + Q
6 return g
```
And to accumulate lines from a new (P, Q) tuple of points
```
Algorithm 4: Accumulate another set of line functions into g
Input: The array g, Qj ∈ G2 , Pj ∈ G1 , curve parameter u
Output: Updated array g of ceil(log2(u)) line functions ∈ Fp12
1 T ← Qj
2 for i ← ceil(log2(u)) 1 to 0 do
3 t ← lT,T (Pj), T ← 2T
4 if ui = 1 then
5 t ← t.lT,Qj (Pj), T ← T + Qj
6 g[i] ← g[i].t
7 return g
```
## Aranha approach
```
Algorithm 11.2 Explicit multipairing version of Algorithm 11.1.
(we extract the Miller Loop part only)
Input : P1 , P2 , . . . Pn ∈ G1 ,
Q1 , Q2, . . . Qn ∈ G2
Output: (we focus on the Miller Loop)
Write l in binary form, l = sum(0 ..< m-1)
f ← 1, l ← abs(AteParam)
for j ← 1 to n do
Tj ← Qj
end
for i = m-2 down to 0 do
f ← f²
for j ← 1 to n do
f ← f.gTj,Tj(Pj), Tj ← [2]Tj
if li = 1 then
f ← f.gTj,Qj(Pj), Tj ← Tj + Qj
end
end
end
```
## Analysis
Assuming we have N tuples (Pj, Qj) of points j in 0 ..< N
and M operations to do in our Miller loop:
- M = HammingWeight(AteParam) + Bitwidth(AteParam)
- HammingWeight(AteParam) corresponds to line additions
- Bitwidth(AteParam) corresponds to line doublings
Scott approach is to have:
- M Fp12 line accumulators `g`
- 1 G2 accumulator `T`
and then accumulating each (Pj, Qj) lines into their corresponding `g` accumulator.
Then those precomputed lines are merged into the final GT result.
Aranha approach is to have:
- 1 Fp12 accumulator `f`
- N G2 accumulators `T`
and then pairings of each tuple are directly merged on GT.
Scott approach is fully "online"/"streaming",
while Aranha's saves space.
For BLS12_381,
M = 68 hence we would need 68*12*48 = 39168 bytes (381-bit needs 48 bytes)
G2 has size 3*2*48 = 288 bytes (3 proj coordinates on Fp2)
and while we can choose N to be anything (which can be 1 for single pairing or reverting to Scott approach).
In practice, "streaming pairings" are not used, pairings to compute are receive
by batch, for example for blockchain you receive a batch of N blocks to verify from one peer.
Furthermore, 39kB would be over L1 cache size and incurs cache misses.
Additionally Aranha approach would make it easier to batch inversions
using Montgomery's simultaneous inversion technique.
Lastly, while a higher level API will need to store N (Pj, Qj) pairs for multi-pairings
for Aranha approach, it can decide how big N is depending on hardware and/or protocol.
## Further optimizations
Regarding optimizations, as the Fp12 accumulator is dense
and lines are sparse (xyz000 or xy000z) Scott mentions the following costs:
- squaring is 11m
- Dense-sparse is 13m
- sparse-sparse is 6m
- Dense-(somewhat sparse) is 17m
Hence when accumulating lines from multiple points:
- 2x Dense-sparse is 26m
- sparse-sparse then Dense-(somewhat sparse) is 23m
a 11.5% speedup
We can use Aranha approach but process lines function 2-by-2 merging them
before merging them to the dense Fp12 accumulator.
In benchmarks though, the speedup doesn't work for BN curves but does for BLS curves.
For single pairings
Unfortunately, it's BN254_Snarks which requires a lot of addition in the Miller loop.
BLS12-377 and BLS12-381 require 6 and 7 line addition in their Miller loop,
the saving is about 150 cycles per addition for about 1000 cycles saved.
A full pairing is ~2M cycles so this is only 0.5% for significantly
more maintenance and bounds analysis complexity.
For multipairing it is interesting since for a BLS signature verification (double pairing)
we would save 1000 cycles per Ate iteration so ~70000 cycles, while a Miller loop is ~800000 cycles.

View File

@ -21,6 +21,9 @@ import
export zoo_pairings # generic sandwich https://github.com/nim-lang/Nim/issues/11225
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Optimal ATE pairing for
@ -154,3 +157,16 @@ func pairing_bls12*[C](
gt.millerLoopAddchain(Q, P)
gt.finalExpEasy()
gt.finalExpHard_BLS12()
func pairing_bls12*[N: static int, C](
gt: var Fp12[C],
Ps: array[N, ECP_ShortW_Aff[Fp[C], NotOnTwist]],
Qs: array[N, ECP_ShortW_Aff[Fp2[C], OnTwist]]) {.meter.} =
## Compute the optimal Ate Pairing for BLS12 curves
## Input: an array of Ps ∈ G1 and Qs ∈ G2
## Output:
## The product of pairings
## e(P₀, Q₀) * e(P₁, Q₁) * e(P₂, Q₂) * ... * e(Pₙ, Qₙ) ∈ Gt
gt.millerLoopAddchain(Qs, Ps)
gt.finalExpEasy()
gt.finalExpHard_BLS12()

View File

@ -22,6 +22,9 @@ import
export zoo_pairings # generic sandwich https://github.com/nim-lang/Nim/issues/11225
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Optimal ATE pairing for

View File

@ -1,338 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/[common, curves, type_ff],
../towers,
../elliptic/[
ec_shortweierstrass_affine,
ec_shortweierstrass_projective
],
../curves/zoo_pairings,
./lines_projective, ./mul_fp12_by_lines,
./miller_loops
# ############################################################
#
# Optimal ATE pairing for
# BLS12-381
#
# ############################################################
#
# - Software Implementation, Algorithm 11.2 & 11.3
# Aranha, Dominguez Perez, A. Mrabet, Schwabe,
# Guide to Pairing-Based Cryptography, 2015
#
# - Physical Attacks,
# N. El Mrabet, Goubin, Guilley, Fournier, Jauvart, Moreau, Rauzy, Rondepierre,
# Guide to Pairing-Based Cryptography, 2015
#
# - Pairing Implementation Revisited
# Mike Scott, 2019
# https://eprint.iacr.org/2019/077.pdf
#
# Fault attacks:
# To limite exposure to some fault attacks (flipping bits with a laser on embedded):
# - changing the number of Miller loop iterations
# - flipping the bits in the Miller loop
# we hardcode unrolled addition chains.
# This should also contribute to performance.
#
# Multi-pairing discussion:
# Aranha & Scott proposes 2 different approaches for multi-pairing.
#
# -----
# Scott
#
# Algorithm 2: Calculate and store line functions for BLS12 curve
# Input: Q ∈ G2, P ∈ G1 , curve parameter u
# Output: An array g of blog2(u)c line functions ∈ Fp12
# 1 T ← Q
# 2 for i ← ceil(log2(u)) 1 to 0 do
# 3 g[i] ← lT,T(P), T ← 2T
# 4 if ui = 1 then
# 5 g[i] ← g[i].lT,Q(P), T ← T + Q
# 6 return g
#
# And to accumulate lines from a new (P, Q) tuple of points
#
# Algorithm 4: Accumulate another set of line functions into g
# Input: The array g, Qj ∈ G2 , Pj ∈ G1 , curve parameter u
# Output: Updated array g of ceil(log2(u)) line functions ∈ Fp12
# 1 T ← Qj
# 2 for i ← blog2 (u)c 1 to 0 do
# 3 t ← lT,T (Pj), T ← 2T
# 4 if ui = 1 then
# 5 t ← t.lT,Qj (Pj), T ← T + Qj
# 6 g[i] ← g[i].t
# 7 return g
#
# ------
# Aranha
#
# Algorithm 11.2 Explicit multipairing version of Algorithm 11.1.
# (we extract the Miller Loop part only)
# Input : P1 , P2 , . . . Pn ∈ G1 ,
# Q1 , Q2, . . . Qn ∈ G2
# Output: (we focus on the Miller Loop)
#
# Write l in binary form, l = sum(0 ..< m-1)
# f ← 1, l ← abs(AteParam)
# for j ← 1 to n do
# Tj ← Qj
# end
#
# for i = m-2 down to 0 do
# f ← f²
# for j ← 1 to n do
# f ← f gTj,Tj(Pj), Tj ← [2]Tj
# if li = 1 then
# f ← f gTj,Qj(Pj), Tj ← Tj + Qj
# end
# end
# end
#
# -----
# Assuming we have N tuples (Pj, Qj) of points j in 0 ..< N
# and I operations to do in our Miller loop:
# - I = HammingWeight(AteParam) + Bitwidth(AteParam)
# - HammingWeight(AteParam) corresponds to line additions
# - Bitwidth(AteParam) corresponds to line doublings
#
# Scott approach is to have:
# - I Fp12 accumulators `g`
# - 1 G2 accumulator `T`
# and then accumulating each (Pj, Qj) into their corresponding `g` accumulator.
#
# Aranha approach is to have:
# - 1 Fp12 accumulator `f`
# - N G2 accumulators `T`
# and accumulate N points per I.
#
# Scott approach is fully "online"/"streaming",
# while Aranha's saves space.
# For BLS12_381,
# I = 68 hence we would need 68*12*48 = 39168 bytes (381-bit needs 48 bytes)
# G2 has size 3*2*48 = 288 bytes (3 proj coordinates on Fp2)
# and we choose N (which can be 1 for single pairing or reverting to Scott approach).
#
# In actual use, "streaming pairings" are not used, pairings to compute are receive
# by batch, for example for blockchain you receive a batch of N blocks to verify from one peer.
# Furthermore, 39kB would be over L1 cache size and incurs cache misses.
# Additionally Aranha approach would make it easier to batch inversions
# using Montgomery's simultaneous inversion technique.
# Lastly, while a higher level API will need to store N (Pj, Qj) pairs for multi-pairings
# for Aranha approach, it can decide how big N is depending on hardware and/or protocol.
#
# Regarding optimizations, as the Fp12 accumulator is dense
# and lines are sparse (xyz000 or xy000z) Scott mentions the following costs:
# - Dense-sparse is 13m
# - sparse-sparse is 6m
# - Dense-(somewhat sparse) is 17m
# Hence when accumulating lines from multiple points:
# - 2x Dense-sparse is 26m
# - sparse-sparse then Dense-(somewhat sparse) is 23m
# a 11.5% speedup
#
# We can use Aranha approach but process lines function 2-by-2 merging them
# before merging them to the dense Fp12 accumulator
# Miller Loop
# -------------------------------------------------------------------------------------------------------
{.push raises: [].}
import
strutils,
../io/io_towers
func miller_first_iter[N: static int](
f: var Fp12[BLS12_381],
Ts: var array[N, ECP_ShortW_Prj[Fp2[BLS12_381], OnTwist]],
Qs: array[N, ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]],
Ps: array[N, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]]
) =
## Start a Miller Loop
## This means
## - 1 doubling
## - 1 add
##
## f is overwritten
## Ts are overwritten by Qs
static:
doAssert N >= 1
doAssert f.c0 is Fp4
{.push checks: off.} # No OverflowError or IndexError allowed
var line {.noInit.}: Line[Fp2[BLS12_381]]
# First step: T <- Q, f = 1 (mod p¹²), f *= line
# ----------------------------------------------
for i in 0 ..< N:
Ts[i].projectiveFromAffine(Qs[i])
line.line_double(Ts[0], Ps[0])
# f *= line <=> f = line for the first iteration
# With Fp2 -> Fp4 -> Fp12 towering and a M-Twist
# The line corresponds to a sparse xy000z Fp12
f.c0.c0 = line.x
f.c0.c1 = line.y
f.c1.c0.setZero()
f.c1.c1.setZero()
f.c2.c0.setZero()
f.c2.c1 = line.z
when N >= 2:
line.line_double(Ts[1], Ps[1])
f.mul_sparse_by_line_xy000z(line) # TODO: sparse-sparse mul
# Sparse merge 2 by 2, starting from 2
for i in countup(2, N-1, 2):
# var f2 {.noInit.}: Fp12[BLS12_381] # TODO: sparse-sparse mul
var line2 {.noInit.}: Line[Fp2[BLS12_381]]
line.line_double(Ts[i], Ps[i])
line2.line_double(Ts[i+1], Ps[i+1])
# f2.mul_sparse_sparse(line, line2)
# f.mul_somewhat_sparse(f2)
f.mul_sparse_by_line_xy000z(line)
f.mul_sparse_by_line_xy000z(line2)
when N and 1 == 1: # N >= 2 and N is odd, there is a leftover
line.line_double(Ts[N-1], Ps[N-1])
f.mul_sparse_by_line_xy000z(line)
# 2nd step: Line addition as MSB is always 1
# ----------------------------------------------
when N >= 2: # f is dense, there are already many lines accumulated
# Sparse merge 2 by 2, starting from 0
for i in countup(0, N-1, 2):
# var f2 {.noInit.}: Fp12[BLS12_381] # TODO: sparse-sparse mul
var line2 {.noInit.}: Line[Fp2[BLS12_381]]
line.line_add(Ts[i], Qs[i], Ps[i])
line2.line_add(Ts[i+1], Qs[i+1], Ps[i+1])
# f2.mul_sparse_sparse(line, line2)
# f.mul_somewhat_sparse(f2)
f.mul_sparse_by_line_xy000z(line)
f.mul_sparse_by_line_xy000z(line2)
when N and 1 == 1: # N >= 2 and N is odd, there is a leftover
line.line_add(Ts[N-1], Qs[N-1], Ps[N-1])
f.mul_sparse_by_line_xy000z(line)
else: # N = 1, f is sparse
line.line_add(Ts[0], Qs[0], Ps[0])
# f.mul_sparse_sparse(line)
f.mul_sparse_by_line_xy000z(line)
{.pop.} # No OverflowError or IndexError allowed
func miller_accum_doublings[N: static int](
f: var Fp12[BLS12_381],
Ts: var array[N, ECP_ShortW_Prj[Fp2[BLS12_381], OnTwist]],
Ps: array[N, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]],
numDoublings: int
) =
## Accumulate `numDoublings` Miller loop doubling steps into `f`
static: doAssert N >= 1
{.push checks: off.} # No OverflowError or IndexError allowed
var line {.noInit.}: Line[Fp2[BLS12_381]]
for _ in 0 ..< numDoublings:
f.square()
when N >= 2:
for i in countup(0, N-1, 2):
# var f2 {.noInit.}: Fp12[BLS12_381] # TODO: sparse-sparse mul
var line2 {.noInit.}: Line[Fp2[BLS12_381]]
line.line_double(Ts[i], Ps[i])
line2.line_double(Ts[i+1], Ps[i+1])
# f2.mul_sparse_sparse(line, line2)
# f.mul_somewhat_sparse(f2)
f.mul_sparse_by_line_xy000z(line)
f.mul_sparse_by_line_xy000z(line2)
when N and 1 == 1: # N >= 2 and N is odd, there is a leftover
line.line_double(Ts[N-1], Ps[N-1])
f.mul_sparse_by_line_xy000z(line)
else:
line.line_double(Ts[0], Ps[0])
f.mul_sparse_by_line_xy000z(line)
{.pop.} # No OverflowError or IndexError allowed
func miller_accum_addition[N: static int](
f: var Fp12[BLS12_381],
Ts: var array[N, ECP_ShortW_Prj[Fp2[BLS12_381], OnTwist]],
Qs: array[N, ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]],
Ps: array[N, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]]
) =
## Accumulate a Miller loop addition step into `f`
static: doAssert N >= 1
{.push checks: off.} # No OverflowError or IndexError allowed
var line {.noInit.}: Line[Fp2[BLS12_381]]
when N >= 2:
# Sparse merge 2 by 2, starting from 0
for i in countup(0, N-1, 2):
# var f2 {.noInit.}: Fp12[BLS12_381] # TODO: sparse-sparse mul
var line2 {.noInit.}: Line[Fp2[BLS12_381]]
line.line_add(Ts[i], Qs[i], Ps[i])
line2.line_add(Ts[i+1], Qs[i+1], Ps[i+1])
# f2.mul_sparse_sparse(line, line2)
# f.mul_somewhat_sparse(f2)
f.mul_sparse_by_line_xy000z(line)
f.mul_sparse_by_line_xy000z(line2)
when N and 1 == 1: # N >= 2 and N is odd, there is a leftover
line.line_add(Ts[N-1], Qs[N-1], Ps[N-1])
f.mul_sparse_by_line_xy000z(line)
else:
line.line_add(Ts[0], Qs[0], Ps[0])
f.mul_sparse_by_line_xy000z(line)
{.pop.} # No OverflowError or IndexError allowed
func millerLoop_opt_BLS12_381*[N: static int](
f: var Fp12[BLS12_381],
Qs: array[N, ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]],
Ps: array[N, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]]
) {.meter.} =
## Generic Miller Loop for BLS12 curve
## Computes f{u,Q}(P) with u the BLS curve parameter
var Ts {.noInit.}: array[N, ECP_ShortW_Prj[Fp2[BLS12_381], OnTwist]]
# Ate param addition chain
# Hex: 0xd201000000010000
# Bin: 0b1101001000000001000000000000000000000000000000010000000000000000
var iter = 1'u64
f.miller_first_iter(Ts, Qs, Ps) # 0b11
f.miller_accum_doublings(Ts, Ps, 2) # 0b1100
f.miller_accum_addition(Ts, Qs, Ps) # 0b1101
f.miller_accum_doublings(Ts, Ps, 3) # 0b1101000
f.miller_accum_addition(Ts, Qs, Ps) # 0b1101001
f.miller_accum_doublings(Ts, Ps, 9) # 0b1101001000000000
f.miller_accum_addition(Ts, Qs, Ps) # 0b1101001000000001
f.miller_accum_doublings(Ts, Ps, 32) # 0b110100100000000100000000000000000000000000000000
f.miller_accum_addition(Ts, Qs, Ps) # 0b110100100000000100000000000000000000000000000001
f.miller_accum_doublings(Ts, Ps, 16) # 0b1101001000000001000000000000000000000000000000010000000000000000
# TODO: what is the threshold for Karabina's compressed squarings?

View File

@ -0,0 +1,65 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Standard library
std/[os, times, strformat],
# Internals
../constantine/config/common,
../constantine/[
arithmetic, primitives,
towers, ec_shortweierstrass
],
../constantine/io/io_towers,
../constantine/config/curves,
../constantine/pairing/pairing_bls12,
# Test utilities
../helpers/prng_unsafe
# Testing multipairing
# ----------------------------------------------
var rng: RngState
let timeseed = uint32(toUnix(getTime()) and (1'i64 shl 32 - 1)) # unixTime mod 2^32
seed(rng, timeseed)
echo "\n------------------------------------------------------\n"
echo "test_pairing_bls12_381_multi xoshiro512** seed: ", timeseed
proc testMultiPairing(rng: var RngState, N: static int) =
var
Ps {.noInit.}: array[N, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]]
Qs {.noInit.}: array[N, ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]]
GTs {.noInit.}: array[N, Fp12[BLS12_381]]
for i in 0 ..< N:
Ps[i] = rng.random_unsafe(typeof(Ps[0]))
Qs[i] = rng.random_unsafe(typeof(Qs[0]))
# Simple pairing
let clockSimpleStart = cpuTime()
var GTsimple {.noInit.}: Fp12[BLS12_381]
for i in 0 ..< N:
GTs[i].pairing_bls12(Ps[i], Qs[i])
GTsimple = GTs[0]
for i in 1 ..< N:
GTsimple *= GTs[i]
let clockSimpleStop = cpuTime()
# Multipairing
let clockMultiStart = cpuTime()
var GTmulti {.noInit.}: Fp12[BLS12_381]
GTmulti.pairing_bls12(Ps, Qs)
let clockMultiStop = cpuTime()
echo &"N={N}, Simple: {clockSimpleStop - clockSimpleStart:>4.4f}s, Multi: {clockMultiStop - clockMultiStart:>4.4f}s"
doAssert bool GTsimple == GTmulti
staticFor i, 1, 17:
rng.testMultiPairing(N = i)

View File

@ -259,7 +259,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
r.prod(f0, f1)
var rl: Fp12[C]
rl.mul_xy000z_xy000z_into_abcd00efghij(line0, line1)
rl.prod_xy000z_xy000z_into_abcd00efghij(line0, line1)
check: bool(r == rl)
@ -297,7 +297,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
r.prod(f0, f1)
var rl: Fp12[C]
rl.mul_xyz000_xyz000_into_abcdefghij00(line0, line1)
rl.prod_xyz000_xyz000_into_abcdefghij00(line0, line1)
check: bool(r == rl)
@ -337,7 +337,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
)
var rl: Fp12[C]
rl.mul_xyz000_xyz000_into_abcdefghij00(line0, line1)
rl.prod_xyz000_xyz000_into_abcdefghij00(line0, line1)
var f = rng.random_elem(Fp12[C], gen)
var f2 = f
@ -383,7 +383,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi
)
var rl: Fp12[C]
rl.mul_xy000z_xy000z_into_abcd00efghij(line0, line1)
rl.prod_xy000z_xy000z_into_abcd00efghij(line0, line1)
var f = rng.random_elem(Fp12[C], gen)
var f2 = f

View File

@ -132,6 +132,37 @@ func verifyG2[T: byte|char](
return e0 == e1
func verifyG2_multi[T: byte|char](
pubkey: ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist],
message: openarray[T],
signature: ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]
): SecretBool =
doAssert not pubkey.isInf.bool
doAssert not signature.isInf.bool
var Qprj {.noInit.}: ECP_ShortW_Prj[Fp2[BLS12_381], OnTwist]
hashToCurve(
H = sha256, k = 128,
output = Qprj,
augmentation = "",
message = message,
domainSepTag = DomainSepTag
)
var G2s: array[2, ECP_ShortW_Aff[Fp2[BLS12_381], OnTwist]]
var G1s: array[2, ECP_ShortW_Aff[Fp[BLS12_381], NotOnTwist]]
G1s[0] = pubkey
G2s[0].affineFromprojective(Qprj)
G1s[1].neg(BLS12_381_G1_generator)
G2s[1] = signature
var e: Fp12[BLS12_381]
e.pairing_bls12(G1s, G2s)
return e.isOne()
proc bls_signature_test(rng: var RngState, i: int) =
var
seckey: Fr[BLS12_381]
@ -143,8 +174,11 @@ proc bls_signature_test(rng: var RngState, i: int) =
pubkey.publicKeyG1(seckey)
signature.signG2(message, seckey)
let ok = pubkey.verifyG2(message, signature)
doAssert bool ok
let okSingle = pubkey.verifyG2(message, signature)
doAssert bool okSingle
let okMulti = pubkey.verifyG2_multi(message, signature)
doAssert bool okMulti
for i in 0 ..< 500:
rng.bls_signature_test(i)