From 082cd1deb973618c38930e54f7f7b58f3e8835c7 Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Tue, 7 Feb 2023 16:27:53 +0100 Subject: [PATCH] MSB-to-LSB minimum Hamming Weight Recoding (#219) * signed recoding * use recoding --- benchmarks/bench_ec_g1.nim | 3 + benchmarks/bench_ec_g2.nim | 3 + benchmarks/bench_elliptic_template.nim | 12 +++ constantine.nimble | 2 +- constantine/math/arithmetic/bigints.nim | 49 ++++++++++- .../elliptic/ec_shortweierstrass_jacobian.nim | 10 ++- .../ec_shortweierstrass_projective.nim | 10 ++- .../math/pairings/cyclotomic_subgroups.nim | 81 ++++++++++--------- constantine/math/pairings/lines_eval.nim | 62 +++++++------- constantine/math/pairings/miller_loops.nim | 6 +- constantine/signatures/bls_signatures.nim | 55 +++++++------ .../math/support/ec_reference_scalar_mult.nim | 31 +++++-- tests/math/t_ec_template.nim | 6 +- 13 files changed, 216 insertions(+), 114 deletions(-) diff --git a/benchmarks/bench_ec_g1.nim b/benchmarks/bench_ec_g1.nim index aad7948..38e8825 100644 --- a/benchmarks/bench_ec_g1.nim +++ b/benchmarks/bench_ec_g1.nim @@ -57,6 +57,9 @@ proc main() = scalarMulUnsafeDoubleAddBench(ECP_ShortW_Prj[Fp[curve], G1], MulIters) scalarMulUnsafeDoubleAddBench(ECP_ShortW_Jac[Fp[curve], G1], MulIters) separator() + scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Prj[Fp[curve], G1], MulIters) + scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Jac[Fp[curve], G1], MulIters) + separator() scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], window = 2, MulIters) scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], window = 3, MulIters) scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], window = 4, MulIters) diff --git a/benchmarks/bench_ec_g2.nim b/benchmarks/bench_ec_g2.nim index e43099a..dab49ec 100644 --- a/benchmarks/bench_ec_g2.nim +++ b/benchmarks/bench_ec_g2.nim @@ -58,6 +58,9 @@ proc main() = scalarMulUnsafeDoubleAddBench(ECP_ShortW_Prj[Fp2[curve], G2], MulIters) scalarMulUnsafeDoubleAddBench(ECP_ShortW_Jac[Fp2[curve], G2], MulIters) separator() + scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Prj[Fp2[curve], G2], MulIters) + scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Jac[Fp2[curve], G2], MulIters) + separator() scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], window = 2, MulIters) scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], window = 3, MulIters) scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], window = 4, MulIters) diff --git a/benchmarks/bench_elliptic_template.nim b/benchmarks/bench_elliptic_template.nim index a4b157d..5051065 100644 --- a/benchmarks/bench_elliptic_template.nim +++ b/benchmarks/bench_elliptic_template.nim @@ -143,6 +143,18 @@ proc scalarMulUnsafeDoubleAddBench*(EC: typedesc, iters: int) = r = P r.unsafe_ECmul_double_add(exponent) +proc scalarMulUnsafeMinHammingWeightRecodingBench*(EC: typedesc, iters: int) = + const bits = EC.F.C.getCurveOrderBitwidth() + + var r {.noInit.}: EC + var P = rng.random_unsafe(EC) # TODO: clear cofactor + + let exponent = rng.random_unsafe(BigInt[bits]) + + bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (unsafe min Hamming Weight recoding)", EC, iters): + r = P + r.unsafe_ECmul_minHammingWeight(exponent) + proc multiAddBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) = var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](numPoints) diff --git a/constantine.nimble b/constantine.nimble index d47ed2d..65d5d5c 100644 --- a/constantine.nimble +++ b/constantine.nimble @@ -200,7 +200,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[ # ("tests/math/t_pairing_bls12_377_line_functions.nim", false), # ("tests/math/t_pairing_bls12_381_line_functions.nim", false), # ("tests/math/t_pairing_mul_fp12_by_lines.nim", false), - # ("tests/math/t_pairing_cyclotomic_subgroup.nim", false), + ("tests/math/t_pairing_cyclotomic_subgroup.nim", false), ("tests/math/t_pairing_bn254_nogami_optate.nim", false), ("tests/math/t_pairing_bn254_snarks_optate.nim", false), ("tests/math/t_pairing_bls12_377_optate.nim", false), diff --git a/constantine/math/arithmetic/bigints.nim b/constantine/math/arithmetic/bigints.nim index 25cb4f3..7483f7a 100644 --- a/constantine/math/arithmetic/bigints.nim +++ b/constantine/math/arithmetic/bigints.nim @@ -476,7 +476,7 @@ func invmod*[bits]( F, M: static BigInt[bits]) = ## Compute the modular inverse of ``a`` modulo M ## r ≡ F.a⁻¹ (mod M) - ## + ## ## with F and M known at compile-time ## ## M MUST be odd, M does not need to be prime. @@ -491,5 +491,52 @@ func invmod*[bits](r: var BigInt[bits], a, M: BigInt[bits]) = one.setOne() r.invmod(a, one, M) +# ############################################################ +# +# Recoding +# +# ############################################################ + +iterator recoding_l2r_vartime*(a: BigInt): int8 = + ## This is a minimum-Hamming-Weight left-to-right recoding. + ## It outputs signed {-1, 0, 1} bits from MSB to LSB + ## with minimal Hamming Weight to minimize operations + ## in Miller Loop and vartime scalar multiplications + ## + ## Tagged vartime as it returns an int8 + ## - Optimal Left-to-Right Binary Signed-Digit Recoding + ## Joye, Yen, 2000 + ## https://marcjoye.github.io/papers/JY00sd2r.pdf + + # As the caller is copy-pasted at each yield + # we rework the algorithm so that we have a single yield point + # We rely on the compiler for loop hoisting and/or loop peeling + + var bi, bi1, ri, ri1, ri2: int8 + + var i = a.bits + while true: + if i == a.bits: # We rely on compiler to hoist this branch out of the loop. + ri = 0 + ri1 = int8 a.bit(a.bits-1) + ri2 = int8 a.bit(a.bits-2) + bi = 0 + else: + bi = bi1 + ri = ri1 + ri1 = ri2 + if i < 2: + ri2 = 0 + else: + ri2 = int8 a.bit(i-2) + + bi1 = (bi + ri1 + ri2) shr 1 + yield -2*bi + ri + bi1 + + if i > 0: + i -= 1 + else: + break + {.pop.} # inline {.pop.} # raises no exceptions diff --git a/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim b/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim index 3145618..9515d36 100644 --- a/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim +++ b/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim @@ -630,14 +630,18 @@ func double*(P: var ECP_ShortW_Jac) {.inline.} = ## In-place point doubling P.double(P) -func diff*(r: var ECP_ShortW_Jac, - P, Q: ECP_ShortW_Jac - ) {.inline.} = +func diff*(r: var ECP_ShortW_Jac, P, Q: ECP_ShortW_Jac) {.inline.} = ## r = P - Q var nQ {.noInit.}: typeof(Q) nQ.neg(Q) r.sum(P, nQ) +func `-=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Jac) {.inline.} = + ## In-place point substraction + var nQ {.noInit.}: typeof(Q) + nQ.neg(Q) + P.sum(P, nQ) + func affine*[F; G]( aff: var ECP_ShortW_Aff[F, G], jac: ECP_ShortW_Jac[F, G]) = diff --git a/constantine/math/elliptic/ec_shortweierstrass_projective.nim b/constantine/math/elliptic/ec_shortweierstrass_projective.nim index 2badd19..8e7bb4e 100644 --- a/constantine/math/elliptic/ec_shortweierstrass_projective.nim +++ b/constantine/math/elliptic/ec_shortweierstrass_projective.nim @@ -417,15 +417,19 @@ func double*(P: var ECP_ShortW_Prj) {.inline.} = ## In-place EC doubling P.double(P) -func diff*(r: var ECP_ShortW_Prj, - P, Q: ECP_ShortW_Prj - ) {.inline.} = +func diff*(r: var ECP_ShortW_Prj, P, Q: ECP_ShortW_Prj) {.inline.} = ## r = P - Q ## Can handle r and Q aliasing var nQ {.noInit.}: typeof(Q) nQ.neg(Q) r.sum(P, nQ) +func `-=`*(P: var ECP_ShortW_Prj, Q: ECP_ShortW_Prj) {.inline.} = + ## In-place point substraction + var nQ {.noInit.}: typeof(Q) + nQ.neg(Q) + P.sum(P, nQ) + func affine*[F, G]( aff: var ECP_ShortW_Aff[F, G], proj: ECP_ShortW_Prj[F, G]) = diff --git a/constantine/math/pairings/cyclotomic_subgroups.nim b/constantine/math/pairings/cyclotomic_subgroups.nim index 179d641..dc614a4 100644 --- a/constantine/math/pairings/cyclotomic_subgroups.nim +++ b/constantine/math/pairings/cyclotomic_subgroups.nim @@ -243,7 +243,7 @@ func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) = # From here on, r aliasing with a is only for the first operation # and only read/write the exact same coordinates - + # 3v₀₀ - 2a₀ r.c0.c0.diff(v0.c0, a0) r.c0.c0.double() @@ -305,21 +305,21 @@ func cyclotomic_square_quad_over_cube[F](r: var QuadraticExt[F], a: QuadraticExt # v₁ = (b₃, b₂) = (a.c1.c0, a.c0.c2) # v₂ = (b₁, b₅) = (a.c0.c1, a.c1.c2) var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: QuadraticExt[typeof(r.c0.c0)] - + template b0: untyped = a.c0.c0 template b1: untyped = a.c0.c1 template b2: untyped = a.c0.c2 template b3: untyped = a.c1.c0 template b4: untyped = a.c1.c1 template b5: untyped = a.c1.c2 - + v0.square_disjoint(b0, b4) v1.square_disjoint(b3, b2) v2.square_disjoint(b1, b5) - + # From here on, r aliasing with a is only for the first operation # and only read/write the exact same coordinates - + # 3v₀₀ - 2b₀ r.c0.c0.diff(v0.c0, b0) r.c0.c0.double() @@ -385,28 +385,31 @@ func cycl_sqr_repeated*[FT](r: var FT, a: FT, num: int) {.inline, meter.} = for _ in 1 ..< num: r.cyclotomic_square() -iterator unpack(scalarByte: byte): bool = - yield bool((scalarByte and 0b10000000) shr 7) - yield bool((scalarByte and 0b01000000) shr 6) - yield bool((scalarByte and 0b00100000) shr 5) - yield bool((scalarByte and 0b00010000) shr 4) - yield bool((scalarByte and 0b00001000) shr 3) - yield bool((scalarByte and 0b00000100) shr 2) - yield bool((scalarByte and 0b00000010) shr 1) - yield bool( scalarByte and 0b00000001) +func cyclotomic_exp*[FT](r: var FT, a: FT, exponent: static BigInt, invert: bool) {.meter.} = + ## Assumes public exponent + var na {.noInit.}: FT + na.cyclotomic_inv(a) -func cyclotomic_exp*[FT](r: var FT, a: FT, exponent: BigInt, invert: bool) {.meter.} = - var eBytes: array[(exponent.bits+7) div 8, byte] - eBytes.marshal(exponent, bigEndian) + r.setOne() + var init = false + for bit in recoding_l2r_vartime(exponent): + if init: + r.cyclotomic_square() + if bit == 1: + if not init: + r = a + init = true + else: + r *= a + elif bit == -1: + if not init: + r = na + init = true + else: + r *= na - r.setOne() - for b in eBytes: - for bit in unpack(b): - r.cyclotomic_square() - if bit: - r *= a - if invert: - r.cyclotomic_inv() + if invert: + r.cyclotomic_inv() func isInCyclotomicSubgroup*[C](a: Fp6[C]): SecretBool = ## Check if a ∈ Fpⁿ: a^Φₙ(p) = 1 @@ -491,7 +494,7 @@ func cyclotomic_square_compressed*[F](g: var G2345[F]) = # h₃ = 3(g₄² + g₅²ξ) - 2g₃ # h₄ = 3(g₂² + g₃²ξ) - 2g₄ # h₅ = 2g₅ + 3 ((g₂+g₃)²-g₂²-g₃²) - # (6 sqr) + # (6 sqr) # # or with quadratic arithmetic # (h₂+h₃u) = 3u(g₄+g₅u)² + 2(g₂-g₃u) @@ -523,14 +526,14 @@ func recover_g1*[F](g1_num, g1_den: var F, g: G2345[F]) = # g₁ = (g₅²ξ + 3g₄² - 2g₃)/4g₂ # if g₂ == 0 # g₁ = 2g₄g₅/g₃ - # + # # Theorem 3.1, this is well-defined for all # g in Gϕₙ \ {1} # if g₂=g₃=0 then g₄=g₅=0 as well # and g₀ = 1 let g2NonZero = not g.g2.isZero() var t{.noInit.}: F - + g1_num = g.g4 t = g.g5 t.ccopy(g.g4, g2NonZero) @@ -543,7 +546,7 @@ func recover_g1*[F](g1_num, g1_den: var F, g: G2345[F]) = t.square(g.g5) t *= NonResidue g1_num.cadd(t, g2NonZero) # g₅²ξ + 3g₄² - 2g₃ or 2g₄g₅ - + t.prod(g.g2, 4) g1_den = g.g3 g1_den.ccopy(t, g2NonZero) # 4g₂ or g₃ @@ -556,7 +559,7 @@ func batch_ratio_g1s*[N: static int, F]( ## This requires that all g1_den != 0 or all g1_den == 0 ## which is the case if this is used to implement ## exponentiation in cyclotomic subgroup. - + # Algorithm: Montgomery's batch inversion # - Speeding the Pollard and Elliptic Curve Methods of Factorization # Section 10.3.1 @@ -581,7 +584,7 @@ func batch_ratio_g1s*[N: static int, F]( dst[i] *= src[i].g1_num # Next iteration accInv *= src[i].g1_den - + dst[0].prod(accInv, src[0].g1_num) func recover_g0*[F]( @@ -607,12 +610,12 @@ func fromFpk*[Fpkdiv6, Fpk]( a: Fpk) = ## Convert from a sextic extension to the Karabina g₂₃₄₅ ## representation. - + # GT representations isomorphisms # =============================== # # Given a sextic twist, we can express all elements in terms of z = SNR¹ᐟ⁶ - # + # # The canonical direct sextic representation uses coefficients # # c₀ + c₁ z + c₂ z² + c₃ z³ + c₄ z⁴ + c₅ z⁵ @@ -699,14 +702,14 @@ func asFpk*[Fpkdiv6, Fpk]( {.error: "𝔽pᵏᐟ⁶ -> 𝔽pᵏ towering (direct sextic) is not implemented.".} func cyclotomic_exp_compressed*[N: static int, Fpk]( - r: var Fpk, a: Fpk, + r: var Fpk, a: Fpk, squarings: static array[N, int]) = ## Exponentiation on the cyclotomic subgroup ## via compressed repeated squarings ## Exponentiation is done least-signigicant bits first ## `squarings` represents the number of squarings ## to do before the next multiplication. - + type Fpkdiv6 = typeof(a.c0.c0) var gs {.noInit.}: array[N, G2345[Fpkdiv6]] @@ -724,7 +727,7 @@ func cyclotomic_exp_compressed*[N: static int, Fpk]( var g1s_ratio {.noInit.}: array[N, tuple[g1_num, g1_den: Fpkdiv6]] for i in 0 ..< N: recover_g1(g1s_ratio[i].g1_num, g1s_ratio[i].g1_den, gs[i]) - + var g1s {.noInit.}: array[N, Fpkdiv6] g1s.batch_ratio_g1s(g1s_ratio) @@ -746,10 +749,10 @@ func cyclotomic_exp_compressed*[N: static int, Fpk]( ## Exponentiation is done least-signigicant bits first ## `squarings` represents the number of squarings ## to do before the next multiplication. - ## + ## ## `accumSquarings` stores the accumulated squarings so far ## iff N != 1 - + type Fpkdiv6 = typeof(a.c0.c0) var gs {.noInit.}: array[N, G2345[Fpkdiv6]] @@ -767,7 +770,7 @@ func cyclotomic_exp_compressed*[N: static int, Fpk]( var g1s_ratio {.noInit.}: array[N, tuple[g1_num, g1_den: Fpkdiv6]] for i in 0 ..< N: recover_g1(g1s_ratio[i].g1_num, g1s_ratio[i].g1_den, gs[i]) - + var g1s {.noInit.}: array[N, Fpkdiv6] g1s.batch_ratio_g1s(g1s_ratio) diff --git a/constantine/math/pairings/lines_eval.nim b/constantine/math/pairings/lines_eval.nim index 367582f..6d5d7e8 100644 --- a/constantine/math/pairings/lines_eval.nim +++ b/constantine/math/pairings/lines_eval.nim @@ -24,7 +24,7 @@ import # ############################################################ # -# Lines functions +# Lines functions # # ############################################################ @@ -32,7 +32,7 @@ import # =============================== # # Given a sextic twist, we can express all elements in terms of z = SNR¹ᐟ⁶ -# +# # The canonical direct sextic representation uses coefficients # # c₀ + c₁ z + c₂ z² + c₃ z³ + c₄ z⁴ + c₅ z⁵ @@ -84,16 +84,16 @@ type ## For a D-twist ## in canonical coordinates over sextic polynomial [1, w, w², w³, w⁴, w⁵] ## when evaluating the line at P(xₚ, yₚ) - ## a.yₚ + b.xₚ w + c w³ - ## + ## a.yₚ + b.xₚ w + c w³ + ## ## This translates in 𝔽pᵏ to ## - acb000 (cubic over quadratic towering) ## - a00bc0 (quadratic over cubic towering) ## For a M-Twist ## in canonical coordinates over sextic polynomial [1, w, w², w³, w⁴, w⁵] ## when evaluating the line at the twist ψ(P)(xₚw², yₚw³) - ## a.yₚ w³ + b.xₚ w² + c - ## + ## a.yₚ w³ + b.xₚ w² + c + ## ## This translates in 𝔽pᵏ to ## - ca00b0 (cubic over quadratic towering) ## - cb00a0 (quadratic over cubic towering) @@ -136,7 +136,7 @@ func line_update[F1, F2](line: var Line[F2], P: ECP_ShortW_Aff[F1, G1]) = # a.yₚ + b.xₚ w + c w³ # # M-Twist: line at ψ(P)(xₚw², yₚw³) - # a.yₚ w³ + b.xₚ w² + c + # a.yₚ w³ + b.xₚ w² + c line.a *= P.y line.b *= P.x @@ -236,11 +236,11 @@ func line_eval_fused_double[Field]( var A {.noInit.}, B {.noInit.}, C {.noInit.}: Field var E {.noInit.}, F {.noInit.}, G {.noInit.}: Field - + template H: untyped = line.a template I: untyped = line.b template J: untyped = line.c - + A.prod(T.x, T.y) A.div2() # A = XY/2 B.square(T.y) # B = Y² @@ -383,7 +383,7 @@ func line_add*[F1, F2]( # ############################################################ # -# 𝔽pᵏ by line - 𝔽pᵏ quadratic over cubic +# 𝔽pᵏ by line - 𝔽pᵏ quadratic over cubic # # ############################################################ @@ -430,7 +430,7 @@ func mul_sparse_by_line_a00bc0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) = v1 *= NonResidue f.c0.sum(v0, v1) - + else: # Lazy reduction var V0 {.noInit.}: doubleprec(Fpkdiv2) @@ -438,13 +438,13 @@ func mul_sparse_by_line_a00bc0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) = V {.noInit.}: doubleprec(Fpkdiv2) t {.noInit.}: Fpkdiv6 u {.noInit.}: Fpkdiv2 - + u.sum(f.c0, f.c1) t.sum(l.a, l.b) V.mul2x_sparse_by_xy0(u, t, l.c) V1.mul2x_sparse_by_xy0(f.c1, l.b, l.c) V0.mul2x_sparse_by_x00(f.c0, l.a) - + V.diff2xMod(V, V1) V.diff2xMod(V, V0) f.c1.redc2x(V) @@ -460,7 +460,7 @@ func prod_x00yz0_x00yz0_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin # # A sparse xy0 by xy0 multiplication in 𝔽pᵏᐟ² (cubic) would be # (a0', a1', 0).(b0', b1', 0) - # + # # r0' = a0'b0' # r1' = a0'b1 + a1'b0' # or (a0'+b1')(a1'+b0')-a0'a1'-b0'b1' @@ -476,7 +476,7 @@ func prod_x00yz0_x00yz0_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin # # a0b0 = ( x, 0, 0).( x', 0 , 0) = (xx', 0, 0 ) # a1b1 = ( y, z, 0).( y', z', 0) = (yy', yz'+zy', zz') - + # r0 = a0 b0 + ξ a1 b1 # r1 = a0 b1 + a1 b0 (Schoolbook) # = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) @@ -502,11 +502,11 @@ func prod_x00yz0_x00yz0_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin YY.prod2x(l0.b, l1.b) ZZ.prod2x(l0.c, l1.c) - var V{.noInit.}: doublePrec(Fpkdiv6) + var V{.noInit.}: doublePrec(Fpkdiv6) var t0{.noInit.}, t1{.noInit.}: Fpkdiv6 # r0 = (xx'+ξzz', yy', yz'+zy') - # r00 = xx'+ξzz'' + # r00 = xx'+ξzz'' # r01 = yy' # r02 = yz'+zy' = (y+z)(y'+z') - yy' - zz' V.prod2x(ZZ, NonResidue) @@ -592,13 +592,13 @@ func mul_sparse_by_line_cb00a0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) = V {.noInit.}: doubleprec(Fpkdiv2) t {.noInit.}: Fpkdiv6 u {.noInit.}: Fpkdiv2 - + u.sum(f.c0, f.c1) t.sum(l.b, l.a) V.mul2x_sparse_by_xy0(u, l.c, t) V1.mul2x_sparse_by_0y0(f.c1, l.a) V0.mul2x_sparse_by_xy0(f.c0, l.c, l.b) - + V.diff2xMod(V, V1) V.diff2xMod(V, V0) f.c1.redc2x(V) @@ -614,7 +614,7 @@ func prod_zy00x0_zy00x0_into_abcdef00ghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin # # A sparse xy0 by xy0 multiplication in 𝔽pᵏᐟ² (cubic) would be # (a0', a1', 0).(b0', b1', 0) - # + # # r0' = a0'b0' # r1' = a0'b1 + a1'b0' # or (a0'+b1')(a1'+b0')-a0'a1'-b0'b1' @@ -630,7 +630,7 @@ func prod_zy00x0_zy00x0_into_abcdef00ghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin # # a0b0 = ( z, y, 0).(z', y', 0) = (zz', zy'+yz', yy') # a1b1 = ( 0, x, 0).( 0, x', 0) = ( 0, 0, xx') - + # r0 = a0 b0 + ξ a1 b1 # r1 = a0 b1 + a1 b0 (Schoolbook) # = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) @@ -656,7 +656,7 @@ func prod_zy00x0_zy00x0_into_abcdef00ghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin YY.prod2x(l0.b, l1.b) ZZ.prod2x(l0.c, l1.c) - var V{.noInit.}: doublePrec(Fpkdiv6) + var V{.noInit.}: doublePrec(Fpkdiv6) var t0{.noInit.}, t1{.noInit.}: Fpkdiv6 # r0 = (zz'+ξxx', zy'+z'y, yy') @@ -698,7 +698,7 @@ func prod_zy00x0_zy00x0_into_abcdef00ghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin # ############################################################ # -# 𝔽pᵏ by line - 𝔽pᵏ cubic over quadratic +# 𝔽pᵏ by line - 𝔽pᵏ cubic over quadratic # # ############################################################ @@ -815,7 +815,7 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin # = v1 # # r1 is computed in 3 extra muls in the base field 𝔽pᵏᐟ⁶ (Karatsuba product in quadratic field) - # + # # If we dive deeper: # r1 = a0b1 + a1b0 = (xy'+yx', zy'+yz') (Schoolbook - 4 muls) # r10 = zy'+yz' = (x+y)(x'+y') - xx' - yy' @@ -835,7 +835,7 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin YY.prod2x(l0.b, l1.b) ZZ.prod2x(l0.c, l1.c) - var V{.noInit.}: doublePrec(Fpkdiv6) + var V{.noInit.}: doublePrec(Fpkdiv6) var t0{.noInit.}, t1{.noInit.}: Fpkdiv6 # r0 = a0 b0 = (x, z).(x', z') @@ -844,7 +844,7 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin V.prod2x(ZZ, NonResidue) V.sum2xMod(V, XX) f.c0.c0.redc2x(V) - + t0.sum(l0.a, l0.c) t1.sum(l1.a, l1.c) V.prod2x(t0, t1) @@ -987,7 +987,7 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin # = (a0 + a2) * (b0 + b2) - v0 - v2 # # r2 is computed in 3 extra muls in the base field 𝔽pᵏᐟ⁶ (Karatsuba product in quadratic field) - # + # # If we dive deeper: # r2 = a0b2 + a2b0 = (zy'+yz', xy'+x'y) (Schoolbook - 4 muls) # r20 = zy'+z'y = (z+y)(z'+y') - zz' - yy' @@ -1007,7 +1007,7 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin YY.prod2x(l0.b, l1.b) ZZ.prod2x(l0.c, l1.c) - var V{.noInit.}: doublePrec(Fpkdiv6) + var V{.noInit.}: doublePrec(Fpkdiv6) var t0{.noInit.}, t1{.noInit.}: Fpkdiv6 # r0 = a0 b0 = (z, x).(z', x') @@ -1016,7 +1016,7 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin V.prod2x(XX, NonResidue) V.sum2xMod(V, ZZ) f.c0.c0.redc2x(V) - + t0.sum(l0.c, l0.a) t1.sum(l1.c, l1.a) V.prod2x(t0, t1) @@ -1046,7 +1046,7 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin # ############################################################ # -# Sparse multiplication +# Sparse multiplication # # ############################################################ @@ -1397,3 +1397,5 @@ func mul_by_2_lines*[Fpk, Fpkdiv6](f: var Fpk, line0, line1: Line[Fpkdiv6]) {.in var t{.noInit.}: Fpk t.prod_from_2_lines(line0, line1) f.mul_by_prod_of_2_lines(t) + +# func asFpk \ No newline at end of file diff --git a/constantine/math/pairings/miller_loops.nim b/constantine/math/pairings/miller_loops.nim index fae3968..3616b07 100644 --- a/constantine/math/pairings/miller_loops.nim +++ b/constantine/math/pairings/miller_loops.nim @@ -45,7 +45,8 @@ func basicMillerLoop*[FT, F1, F2]( var u3 = ate_param u3 *= 3 for i in countdown(u3.bits - 2, 1): - f.square() + if i != u3.bits - 2: + f.square() line.line_double(T, P) f.mul_by_line(line) @@ -320,7 +321,8 @@ func basicMillerLoop*[FT, F1, F2]( var u3 = ate_param u3 *= 3 for i in countdown(u3.bits - 2, 1): - f.square() + if i != u3.bits - 2: + f.square() f.double_jToN(j=0, line0, line1, Ts, Ps, N) let naf = u3.bit(i).int8 - u.bit(i).int8 # This can throw exception diff --git a/constantine/signatures/bls_signatures.nim b/constantine/signatures/bls_signatures.nim index 460bc91..a44aa51 100644 --- a/constantine/signatures/bls_signatures.nim +++ b/constantine/signatures/bls_signatures.nim @@ -8,6 +8,7 @@ import ../math/[ec_shortweierstrass, extension_fields], + ../math/io/io_bigints, ../math/elliptic/ec_shortweierstrass_batch_ops, ../math/pairings/[pairings_generic, miller_accumulators], ../math/constants/zoo_generators, @@ -365,41 +366,30 @@ func init*[T0, T1: char|byte]( H.hash(ctx.secureBlinding, secureRandomBytes, accumSepTag) -iterator unpack(scalarByte: byte): bool = - yield bool((scalarByte and 0b10000000) shr 7) - yield bool((scalarByte and 0b01000000) shr 6) - yield bool((scalarByte and 0b00100000) shr 5) - yield bool((scalarByte and 0b00010000) shr 4) - yield bool((scalarByte and 0b00001000) shr 3) - yield bool((scalarByte and 0b00000100) shr 2) - yield bool((scalarByte and 0b00000010) shr 1) - yield bool( scalarByte and 0b00000001) - -func scalarMul_doubleAdd_vartime[EC]( +func scalarMul_minHammingWeight_vartime[EC]( P: var EC, - scalarCanonical: openArray[byte], + scalar: BigInt, ) = ## **Variable-time** Elliptic Curve Scalar Multiplication ## ## P <- [k] P ## - ## This uses the double-and-add algorithm - ## This is UNSAFE to use with secret data and is only intended for signature verification - ## to multiply by random blinding scalars. + ## This uses an online recoding with minimum Hamming Weight + ## (which is not NAF, NAF is least-significant bit to most) ## Due to those scalars being 64-bit, window-method or endomorphism acceleration are slower ## than double-and-add. ## ## This is highly VULNERABLE to timing attacks and power analysis attacks. - var t0{.noInit.}, t1{.noInit.}: typeof(P) + ## For our usecase, scaling with a random number not in attacker control, + ## leaking the scalar bits is not an issue. + var t0{.noInit.}: typeof(P) t0.setInf() - t1.setInf() - for scalarByte in scalarCanonical: - for bit in unpack(scalarByte): - t1.double(t0) - if bit: - t0.sum(t1, P) - else: - t0 = t1 + for bit in recoding_l2r_vartime(scalar): + t0.double() + if bit == 1: + t0 += P + elif bit == -1: + t0 -= P P = t0 func update*[T: char|byte, Pubkey, Sig: ECP_ShortW_Aff]( @@ -434,7 +424,12 @@ func update*[T: char|byte, Pubkey, Sig: ECP_ShortW_Aff]( # we only use a 1..<2^64 random blinding factor. # We assume that the attacker cannot resubmit 2^64 times # forged public keys and signatures. + # # Discussion https://ethresear.ch/t/fast-verification-of-multiple-bls-signatures/5407 + # See also + # - Faster batch forgery identification + # Daniel J. Bernstein, Jeroen Doumen, Tanja Lange, and Jan-Jaap Oosterwijk, 2012 + # https://eprint.iacr.org/2012/549 # We only use the first 8 bytes for blinding # but use the full 32 bytes to derive new random scalar @@ -459,8 +454,10 @@ func update*[T: char|byte, Pubkey, Sig: ECP_ShortW_Aff]( pkG1_jac.fromAffine(pubkey) sigG2_jac.fromAffine(signature) - pkG1_jac.scalarMul_doubleAdd_vartime(ctx.secureBlinding.toOpenArray(0, 7)) - sigG2_jac.scalarMul_doubleAdd_vartime(ctx.secureBlinding.toOpenArray(0, 7)) + var randFactor{.noInit.}: BigInt[64] + randFactor.unmarshal(ctx.secureBlinding.toOpenArray(0, 7), bigEndian) + pkG1_jac.scalarMul_minHammingWeight_vartime(randFactor) + sigG2_jac.scalarMul_minHammingWeight_vartime(randFactor) if ctx.aggSigOnce == false: ctx.aggSig = sigG2_jac @@ -493,8 +490,10 @@ func update*[T: char|byte, Pubkey, Sig: ECP_ShortW_Aff]( sigG1_jac.fromAffine(signature) - hmsgG1_jac.scalarMul_doubleAdd_vartime(ctx.secureBlinding.toOpenArray(0, 7)) - sigG1_jac.scalarMul_doubleAdd_vartime(ctx.secureBlinding.toOpenArray(0, 7)) + var randFactor{.noInit.}: BigInt[64] + randFactor.unmarshal(ctx.secureBlinding.toOpenArray(0, 7), bigEndian) + hmsgG1_jac.scalarMul_minHammingWeight_vartime(randFactor) + sigG1_jac.scalarMul_minHammingWeight_vartime(randFactor) if ctx.aggSigOnce == false: ctx.aggSig = sigG1_jac diff --git a/tests/math/support/ec_reference_scalar_mult.nim b/tests/math/support/ec_reference_scalar_mult.nim index bae7185..e73816f 100644 --- a/tests/math/support/ec_reference_scalar_mult.nim +++ b/tests/math/support/ec_reference_scalar_mult.nim @@ -39,14 +39,33 @@ func unsafe_ECmul_double_add*[EC]( var scalarCanonical: array[(scalar.bits+7) div 8, byte] scalarCanonical.marshal(scalar, bigEndian) - var t0{.noInit.}, t1{.noInit.}: typeof(P) + var t0: typeof(P) t0.setInf() - t1.setInf() for scalarByte in scalarCanonical: for bit in unpack(scalarByte): - t1.double(t0) + t0.double() if bit: - t0.sum(t1, P) - else: - t0 = t1 + t0 += P P = t0 + +func unsafe_ECmul_minHammingWeight*[EC]( + P: var EC, + scalar: BigInt) = + ## **Unsafe** Elliptic Curve Scalar Multiplication + ## + ## P <- [k] P + ## + ## This uses an online recoding with minimum Hamming Weight + ## (which is not NAF, NAF is least-significant bit to most) + ## This is UNSAFE to use in production and only intended for testing purposes. + ## + ## This is highly VULNERABLE to timing attacks and power analysis attacks + var t0{.noInit.}: typeof(P) + t0.setInf() + for bit in recoding_l2r_vartime(scalar): + t0.double() + if bit == 1: + t0 += P + elif bit == -1: + t0 -= P + P = t0 \ No newline at end of file diff --git a/tests/math/t_ec_template.nim b/tests/math/t_ec_template.nim index 8faf79a..b75d6b4 100644 --- a/tests/math/t_ec_template.nim +++ b/tests/math/t_ec_template.nim @@ -421,11 +421,15 @@ proc run_EC_mul_vs_ref_impl*( var impl = a reference = a + refMinWeight = a impl.scalarMulGeneric(exponent) reference.unsafe_ECmul_double_add(exponent) + refMinWeight.unsafe_ECmul_minHammingWeight(exponent) - check: bool(impl == reference) + check: + bool(impl == reference) + bool(impl == refMinWeight) test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Uniform) test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Uniform)