Batch additions (#207)

* Batch elliptic curve addition

* accelerate chained muls

* jac mixed add handle doubling. jac additions handle aliasing when adding infinity

* properly skip sanitizer on BLS signature test

* properly skip sanitizer² on BLS signature test
This commit is contained in:
Mamy Ratsimbazafy 2022-10-29 22:43:40 +02:00 committed by GitHub
parent 93654d580e
commit 928f515582
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 1027 additions and 219 deletions

View File

@ -0,0 +1,68 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../constantine/math/config/curves,
../constantine/math/arithmetic,
../constantine/math/elliptic/[
ec_shortweierstrass_affine,
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian],
# Helpers
../helpers/static_for,
./bench_elliptic_template,
# Standard library
std/strutils
# ############################################################
#
# Benchmark of the G1 group of
# Short Weierstrass elliptic curves
# in (homogeneous) projective coordinates
#
# ############################################################
const Iters = 10_000
const AvailableCurves = [
# BN254_Snarks,
BLS12_381,
]
proc main() =
separator()
staticFor i, 0, AvailableCurves.len:
const curve = AvailableCurves[i]
addBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
addBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
mixedAddBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
mixedAddBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
doublingBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
doublingBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
separator()
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
let batchIters = max(1, Iters div numPoints)
multiAddBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, useBatching = false, batchIters)
separator()
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
let batchIters = max(1, Iters div numPoints)
multiAddBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, useBatching = true, batchIters)
separator()
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
let batchIters = max(1, Iters div numPoints)
multiAddBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, useBatching = false, batchIters)
separator()
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
let batchIters = max(1, Iters div numPoints)
multiAddBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, useBatching = true, batchIters)
separator()
separator()
main()
notes()

View File

@ -22,6 +22,7 @@ import
ec_shortweierstrass_affine,
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian,
ec_shortweierstrass_batch_ops,
ec_scalar_mul, ec_endomorphism_accel],
# Helpers
../helpers/[prng_unsafe, static_for],
@ -35,10 +36,10 @@ export abstractions # generic sandwich on SecretBool and SecretBool in Jacobian
proc separator*() = separator(177)
macro fixEllipticDisplay(T: typedesc): untyped =
macro fixEllipticDisplay(EC: typedesc): untyped =
# At compile-time, enums are integers and their display is buggy
# we get the Curve ID instead of the curve name.
let instantiated = T.getTypeInst()
let instantiated = EC.getTypeInst()
var name = $instantiated[1][0] # EllipticEquationFormCoordinates
let fieldName = $instantiated[1][1][0]
let curveName = $Curve(instantiated[1][1][1].intVal)
@ -53,100 +54,108 @@ proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int6
else:
echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op"
template bench(op: string, T: typedesc, iters: int, body: untyped): untyped =
template bench(op: string, EC: typedesc, iters: int, body: untyped): untyped =
measure(iters, startTime, stopTime, startClk, stopClk, body)
report(op, fixEllipticDisplay(T), startTime, stopTime, startClk, stopClk, iters)
report(op, fixEllipticDisplay(EC), startTime, stopTime, startClk, stopClk, iters)
proc addBench*(T: typedesc, iters: int) =
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
var r {.noInit.}: T
let P = rng.random_unsafe(T)
let Q = rng.random_unsafe(T)
bench("EC Add " & G1_or_G2, T, iters):
proc addBench*(EC: typedesc, iters: int) =
var r {.noInit.}: EC
let P = rng.random_unsafe(EC)
let Q = rng.random_unsafe(EC)
bench("EC Add " & $EC.G, EC, iters):
r.sum(P, Q)
proc mixedAddBench*(T: typedesc, iters: int) =
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
var r {.noInit.}: T
let P = rng.random_unsafe(T)
let Q = rng.random_unsafe(T)
var Qaff: ECP_ShortW_Aff[T.F, T.G]
proc mixedAddBench*(EC: typedesc, iters: int) =
var r {.noInit.}: EC
let P = rng.random_unsafe(EC)
let Q = rng.random_unsafe(EC)
var Qaff: ECP_ShortW_Aff[EC.F, EC.G]
Qaff.affine(Q)
bench("EC Mixed Addition " & G1_or_G2, T, iters):
bench("EC Mixed Addition " & $EC.G, EC, iters):
r.madd(P, Qaff)
proc doublingBench*(T: typedesc, iters: int) =
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
var r {.noInit.}: T
let P = rng.random_unsafe(T)
bench("EC Double " & G1_or_G2, T, iters):
proc doublingBench*(EC: typedesc, iters: int) =
var r {.noInit.}: EC
let P = rng.random_unsafe(EC)
bench("EC Double " & $EC.G, EC, iters):
r.double(P)
proc affFromProjBench*(T: typedesc, iters: int) =
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
var r {.noInit.}: ECP_ShortW_Aff[T.F, T.G]
let P = rng.random_unsafe(T)
bench("EC Projective to Affine " & G1_or_G2, T, iters):
proc affFromProjBench*(EC: typedesc, iters: int) =
var r {.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
let P = rng.random_unsafe(EC)
bench("EC Projective to Affine " & $EC.G, EC, iters):
r.affine(P)
proc affFromJacBench*(T: typedesc, iters: int) =
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
var r {.noInit.}: ECP_ShortW_Aff[T.F, T.G]
let P = rng.random_unsafe(T)
bench("EC Jacobian to Affine " & G1_or_G2, T, iters):
proc affFromJacBench*(EC: typedesc, iters: int) =
var r {.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
let P = rng.random_unsafe(EC)
bench("EC Jacobian to Affine " & $EC.G, EC, iters):
r.affine(P)
proc scalarMulGenericBench*(T: typedesc, window: static int, iters: int) =
const bits = T.F.C.getCurveOrderBitwidth()
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
proc scalarMulGenericBench*(EC: typedesc, window: static int, iters: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
var r {.noInit.}: T
let P = rng.random_unsafe(T) # TODO: clear cofactor
var r {.noInit.}: EC
let P = rng.random_unsafe(EC) # TODO: clear cofactor
let exponent = rng.random_unsafe(BigInt[bits])
bench("EC ScalarMul " & $bits & "-bit " & G1_or_G2 & " (window-" & $window & ", generic)", T, iters):
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (window-" & $window & ", generic)", EC, iters):
r = P
r.scalarMulGeneric(exponent, window)
proc scalarMulEndo*(T: typedesc, iters: int) =
const bits = T.F.C.getCurveOrderBitwidth()
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
proc scalarMulEndo*(EC: typedesc, iters: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
var r {.noInit.}: T
let P = rng.random_unsafe(T) # TODO: clear cofactor
var r {.noInit.}: EC
let P = rng.random_unsafe(EC) # TODO: clear cofactor
let exponent = rng.random_unsafe(BigInt[bits])
bench("EC ScalarMul " & $bits & "-bit " & G1_or_G2 & " (endomorphism accelerated)", T, iters):
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (endomorphism accelerated)", EC, iters):
r = P
r.scalarMulEndo(exponent)
proc scalarMulEndoWindow*(T: typedesc, iters: int) =
const bits = T.F.C.getCurveOrderBitwidth()
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
proc scalarMulEndoWindow*(EC: typedesc, iters: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
var r {.noInit.}: T
let P = rng.random_unsafe(T) # TODO: clear cofactor
var r {.noInit.}: EC
let P = rng.random_unsafe(EC) # TODO: clear cofactor
let exponent = rng.random_unsafe(BigInt[bits])
bench("EC ScalarMul " & $bits & "-bit " & G1_or_G2 & " (window-2, endomorphism accelerated)", T, iters):
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (window-2, endomorphism accelerated)", EC, iters):
r = P
when T.F is Fp:
when EC.F is Fp:
r.scalarMulGLV_m2w2(exponent)
else:
{.error: "Not implemented".}
proc scalarMulUnsafeDoubleAddBench*(T: typedesc, iters: int) =
const bits = T.F.C.getCurveOrderBitwidth()
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
proc scalarMulUnsafeDoubleAddBench*(EC: typedesc, iters: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
var r {.noInit.}: T
let P = rng.random_unsafe(T) # TODO: clear cofactor
var r {.noInit.}: EC
let P = rng.random_unsafe(EC) # TODO: clear cofactor
let exponent = rng.random_unsafe(BigInt[bits])
bench("EC ScalarMul " & $bits & "-bit " & G1_or_G2 & " (unsafe reference DoubleAdd)", T, iters):
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (unsafe reference DoubleAdd)", EC, iters):
r = P
r.unsafe_ECmul_double_add(exponent)
proc multiAddBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) =
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](numPoints)
for i in 0 ..< numPoints:
points[i] = rng.random_unsafe(ECP_ShortW_Aff[EC.F, EC.G])
var r{.noInit.}: EC
if useBatching:
bench("EC Multi-Addition batched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
r.sum_batch_vartime(points)
else:
bench("EC Multi-Addition unbatched mixed add " & $EC.G & " (" & $numPoints & " points)", EC, iters):
r.setInf()
for i in 0 ..< numPoints:
r += points[i]

View File

@ -96,13 +96,13 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
# Elliptic curve arithmetic G1
# ----------------------------------------------------------
# ("tests/math/t_ec_shortw_prj_g1_add_double.nim", false),
("tests/math/t_ec_shortw_prj_g1_add_double.nim", false),
# ("tests/math/t_ec_shortw_prj_g1_mul_sanity.nim", false),
# ("tests/math/t_ec_shortw_prj_g1_mul_distri.nim", false),
("tests/math/t_ec_shortw_prj_g1_mul_vs_ref.nim", false),
("tests/math/t_ec_shortw_prj_g1_mixed_add.nim", false),
# ("tests/math/t_ec_shortw_jac_g1_add_double.nim", false),
("tests/math/t_ec_shortw_jac_g1_add_double.nim", false),
# ("tests/math/t_ec_shortw_jac_g1_mul_sanity.nim", false),
# ("tests/math/t_ec_shortw_jac_g1_mul_distri.nim", false),
("tests/math/t_ec_shortw_jac_g1_mul_vs_ref.nim", false),
@ -115,49 +115,49 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
# Elliptic curve arithmetic G2
# ----------------------------------------------------------
# ("tests/math/t_ec_shortw_prj_g2_add_double_bn254_snarks.nim", false),
("tests/math/t_ec_shortw_prj_g2_add_double_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bn254_snarks.nim", false),
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bn254_snarks.nim", false),
("tests/math/t_ec_shortw_prj_g2_mixed_add_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_add_double_bls12_381.nim", false),
("tests/math/t_ec_shortw_prj_g2_add_double_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bls12_381.nim", false),
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bls12_381.nim", false),
("tests/math/t_ec_shortw_prj_g2_mixed_add_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_add_double_bls12_377.nim", false),
("tests/math/t_ec_shortw_prj_g2_add_double_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bls12_377.nim", false),
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bls12_377.nim", false),
("tests/math/t_ec_shortw_prj_g2_mixed_add_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_add_double_bw6_761.nim", false),
("tests/math/t_ec_shortw_prj_g2_add_double_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bw6_761.nim", false),
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bw6_761.nim", false),
("tests/math/t_ec_shortw_prj_g2_mixed_add_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_add_double_bn254_snarks.nim", false),
("tests/math/t_ec_shortw_jac_g2_add_double_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bn254_snarks.nim", false),
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bn254_snarks.nim", false),
("tests/math/t_ec_shortw_jac_g2_mixed_add_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_add_double_bls12_381.nim", false),
("tests/math/t_ec_shortw_jac_g2_add_double_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bls12_381.nim", false),
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bls12_381.nim", false),
("tests/math/t_ec_shortw_jac_g2_mixed_add_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_add_double_bls12_377.nim", false),
("tests/math/t_ec_shortw_jac_g2_add_double_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bls12_377.nim", false),
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bls12_377.nim", false),
("tests/math/t_ec_shortw_jac_g2_mixed_add_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_add_double_bw6_761.nim", false),
("tests/math/t_ec_shortw_jac_g2_add_double_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bw6_761.nim", false),
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bw6_761.nim", false),
@ -177,6 +177,11 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
# ----------------------------------------------------------
("tests/math/t_ec_shortw_prj_edge_cases.nim", false),
# Elliptic curve arithmetic - batch computation
# ----------------------------------------------------------
("tests/math/t_ec_shortw_prj_g1_batch_add.nim", false),
("tests/math/t_ec_shortw_jac_g1_batch_add.nim", false),
# Subgroups and cofactors
# ----------------------------------------------------------
("tests/math/t_ec_subgroups_bn254_nogami.nim", false),
@ -232,6 +237,7 @@ const benchDesc = [
"bench_fp6",
"bench_fp12",
"bench_ec_g1",
"bench_ec_g1_batch",
"bench_ec_g2",
"bench_pairing_bls12_377",
"bench_pairing_bls12_381",
@ -262,13 +268,14 @@ const skipSanitizers = [
"tests/math/t_ec_sage_bn254_snarks.nim",
"tests/math/t_ec_sage_bls12_377.nim",
"tests/math/t_ec_sage_bls12_381.nim",
"tests/t_blssig_pop_on_bls12381_g2.nim",
"tests/t_hash_to_field.nim",
"tests/t_hash_to_curve.nim",
"tests/t_hash_to_curve_random.nim",
"tests/t_mac_poly1305.nim",
"tests/t_mac_hmac.nim",
"tests/t_kdf_hkdf.nim",
"tests/t_ethereum_eip2333_bls12381_key_derivation"
"tests/t_ethereum_eip2333_bls12381_key_derivation.nim"
]
when defined(windows):
@ -302,8 +309,8 @@ template setupCommand(): untyped {.dirty.} =
var flags = flags
when not defined(windows):
# Not available in MinGW https://github.com/libressl-portable/portable/issues/54
flags &= " --passC:-fstack-protector-all"
let command = "nim " & lang & cc & " " & flags &
flags &= " --passC:-fstack-protector-strong"
let command = "nim " & lang & cc & " -d:release " & flags &
" --verbosity:0 --outdir:build/testsuite -r --hints:off --warnings:off " &
" --nimcache:nimcache/" & path & " " &
path
@ -672,37 +679,55 @@ task bench_fp12_clang_noasm, "Run benchmark 𝔽p12 with clang - no Assembly":
# Elliptic curve G1
# ------------------------------------------
task bench_ec_g1, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - Default compiler":
task bench_ec_g1, "Run benchmark on Elliptic Curve group 𝔾1 - Default compiler":
runBench("bench_ec_g1")
task bench_ec_g1_gcc, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC":
task bench_ec_g1_gcc, "Run benchmark on Elliptic Curve group 𝔾1 - GCC":
runBench("bench_ec_g1", "gcc")
task bench_ec_g1_clang, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - Clang":
task bench_ec_g1_clang, "Run benchmark on Elliptic Curve group 𝔾1 - Clang":
runBench("bench_ec_g1", "clang")
task bench_ec_g1_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC no Assembly":
task bench_ec_g1_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾1 - GCC no Assembly":
runBench("bench_ec_g1", "gcc", useAsm = false)
task bench_ec_g1_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - Clang no Assembly":
task bench_ec_g1_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾1 - Clang no Assembly":
runBench("bench_ec_g1", "clang", useAsm = false)
# Elliptic curve G1 - batch operations
# ------------------------------------------
task bench_ec_g1_batch, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - Default compiler":
runBench("bench_ec_g1_batch")
task bench_ec_g1_batch_gcc, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - GCC":
runBench("bench_ec_g1_batch", "gcc")
task bench_ec_g1_batch_clang, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - Clang":
runBench("bench_ec_g1_batch", "clang")
task bench_ec_g1_batch_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - GCC no Assembly":
runBench("bench_ec_g1_batch", "gcc", useAsm = false)
task bench_ec_g1_batch_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - Clang no Assembly":
runBench("bench_ec_g1", "clang", useAsm = false)
# Elliptic curve G2
# ------------------------------------------
task bench_ec_g2, "Run benchmark on Elliptic Curve group 𝔾2 - Short Weierstrass with Projective Coordinates - Default compiler":
task bench_ec_g2, "Run benchmark on Elliptic Curve group 𝔾2 - Default compiler":
runBench("bench_ec_g2")
task bench_ec_g2_gcc, "Run benchmark on Elliptic Curve group 𝔾2 - Short Weierstrass with Projective Coordinates - GCC":
task bench_ec_g2_gcc, "Run benchmark on Elliptic Curve group 𝔾2 - GCC":
runBench("bench_ec_g2", "gcc")
task bench_ec_g2_clang, "Run benchmark on Elliptic Curve group 𝔾2 - Short Weierstrass with Projective Coordinates - Clang":
task bench_ec_g2_clang, "Run benchmark on Elliptic Curve group 𝔾2 - Clang":
runBench("bench_ec_g2", "clang")
task bench_ec_g2_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾2 - Short Weierstrass with Projective Coordinates - GCC no Assembly":
task bench_ec_g2_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾2 - GCC no Assembly":
runBench("bench_ec_g2", "gcc", useAsm = false)
task bench_ec_g2_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾2 - Short Weierstrass with Projective Coordinates - Clang no Assembly":
task bench_ec_g2_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾2 - Clang no Assembly":
runBench("bench_ec_g2", "clang", useAsm = false)
# Pairings

View File

@ -62,7 +62,7 @@ Ry = λ(Px - Rx) - Py
```
but in the case of addition
```
λ = (Qy - Py) / (Px - Qx)
λ = (Qy - Py) / (Qx - Px)
```
which is undefined for P == Q or P == -Q (as `-(x, y) = (x, -y)`)

View File

@ -14,6 +14,9 @@ import
../io/[io_fields, io_extfields],
../constants/zoo_constants
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Elliptic Curve in Short Weierstrass form

View File

@ -0,0 +1,303 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../../platforms/[abstractions, allocs],
../arithmetic,
../extension_fields,
./ec_shortweierstrass_affine,
./ec_shortweierstrass_jacobian,
./ec_shortweierstrass_projective
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Elliptic Curve in Short Weierstrass form
# Batch addition
#
# ############################################################
# Affine primitives
# ------------------------------------------------------------
#
# The equation for elliptic curve addition is in affine (x, y) coordinates:
#
# P + Q = R
# (Px, Py) + (Qx, Qy) = (Rx, Ry)
#
# with
# Rx = λ² - Px - Qx
# Ry = λ(Px - Rx) - Py
#
# in the case of addition
# λ = (Qy - Py) / (Qx - Px)
#
# which is undefined for P == Q or P == -Q as -(x, y) = (x, -y)
#
# if P = Q, the doubling formula uses the slope of the tangent at the limit
# λ = (3 Px² + a) / (2 Px)
#
# if P = -Q, the sum is the point at infinity
#
# ~~~~
#
# Those formulas require
# addition: 2M + 1S + 1I
# doubling: 2M + 2S + 1I
#
# Inversion is very expensive:
# 119.5x multiplications (with ADX) for BN254
# 98.4x multiplications (with ADX) for BLS12-381
#
# However, n inversions can use Montgomery's batch inversion
# at the cost of 3(n-1)M + 1I
#
# Hence batch addition can have an asymptotic cost of
# 5M + 1S
# Compared to
# Jacobian addition: 12M + 4S
# Jacobian mixed addition: 7M + 4S
# Projective addition: 12M (for curves in the form y² = x³ + b)
# Projective mixed addition: 11M (for curves in the form y² = x³ + b)
func lambdaAdd[F; G: static Subgroup](lambda_num, lambda_den: var F, P, Q: ECP_ShortW_Aff[F, G]) =
## Compute the slope of the line (PQ)
lambda_num.diff(Q.y, P.y)
lambda_den.diff(Q.x, P.x)
func lambdaDouble[F; G: static Subgroup](lambda_num, lambda_den: var F, P: ECP_ShortW_Aff[F, G]) =
## Compute the tangent at P
lambda_num.square(P.x)
lambda_num *= 3
when F.C.getCoefA() != 0:
t += F.C.getCoefA()
lambda_den.double(P.y)
func affineAdd[F; G: static Subgroup](
r: var ECP_ShortW_Aff[F, G],
lambda: var F,
P, Q: ECP_ShortW_Aff[F, G]) =
r.x.square(lambda)
r.x -= P.x
r.x -= Q.x
r.y.diff(P.x, r.x)
r.y *= lambda
r.y -= P.y
{.push checks:off.}
func accum_half_vartime[F; G: static Subgroup](
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
lambdas: ptr UncheckedArray[tuple[num, den: F]],
len: uint) {.noinline.} =
## Affine accumulation of half the points into the other half
## Warning ⚠️ : variable-time
##
## Accumulate `len` points pairwise into `len/2`
##
## Input/output:
## - points: `len/2` affine points to add (must be even)
## Partial sums are stored in [0, len/2)
## [len/2, len) data has been destroyed
##
## Scratchspace:
## - Lambdas
##
## Output:
## - r
##
## Warning ⚠️ : cannot be inlined if used in loop due to the use of alloca
debug: doAssert len and 1 == 0, "There must be an even number of points"
let N = len div 2
# Step 1: Compute numerators and denominators of λᵢ = λᵢ_num / λᵢ_den
for i in 0 ..< N:
let p = i
let q = i+N
let q_prev = i-1+N
# As we can't divide by 0 in normal cases, λᵢ_den != 0,
# so we use it to indicate special handling.
template markSpecialCase(): untyped {.dirty.} =
# we use Qy as an accumulator, so we save Qy in λᵢ_num
lambdas[i].num = points[q].y
# Mark for special handling
lambdas[i].den.setZero()
# Step 2: Accumulate denominators in Qy, which is not used anymore.
if i == 0:
points[q].y.setOne()
else:
points[q].y = points[q_prev].y
# Special case 1: infinity points have affine coordinates (0, 0) by convention
# it doesn't match the y²=x³+ax+b equation so slope formula need special handling
if points[p].isInf().bool() or points[q].isInf().bool():
markSpecialCase()
continue
# Special case 2: λ = (Qy-Py)/(Qx-Px) which is undefined when Px == Qx
# This happens when P == Q or P == -Q
if bool(points[p].x == points[q].x):
if bool(points[p].y == points[q].y):
lambdaDouble(lambdas[i].num, lambdas[i].den, points[p])
else: # P = -Q, so P+Q = inf
markSpecialCase()
continue
else:
lambdaAdd(lambdas[i].num, lambdas[i].den, points[p], points[q])
# Step 2: Accumulate denominators in Qy, which is not used anymore.
if i == 0:
points[q].y = lambdas[i].den
else:
points[q].y.prod(points[q_prev].y, lambdas[i].den, skipFinalSub = true)
# Step 3: batch invert
var accInv {.noInit.}: F
accInv.setZero()
points[len-1].y += accInv # Undo skipFinalSub, ensure that the last accum is in canonical form, before inversion
accInv.inv(points[len-1].y)
# Step 4: Compute the partial sums
template recallSpecialCase(i, p, q): untyped {.dirty.} =
# As Qy is used as an accumulator, we saved Qy in λᵢ_num
# For special caseshandling, restore it.
points[q].y = lambdas[i].num
if points[p].isInf().bool():
points[i] = points[q]
elif points[q].x.isZero().bool() and lambdas[i].num.isZero().bool():
discard "points[i] = points[p]" # i == p
else:
points[i].setInf()
for i in countdown(N-1, 1):
let p = i
let q = i+N
let q_prev = i-1+N
if lambdas[i].den.isZero().bool():
recallSpecialCase(i, p, q)
continue
# Compute lambda
points[q].y.prod(accInv, points[q_prev].y, skipFinalSub = true)
points[q].y.prod(points[q].y, lambdas[i].num, skipFinalSub = true)
# Compute EC addition
var r{.noInit.}: ECP_ShortW_Aff[F, G]
r.affineAdd(lambda = points[q].y, points[p], points[q])
# Store result
points[i] = r
# Next iteration
accInv.prod(accInv, lambdas[i].den, skipFinalSub = true)
block: # Tail
let i = 0
let p = 0
let q = N
if lambdas[0].den.isZero().bool():
recallSpecialCase(i, p, q)
else:
# Compute lambda
points[q].y.prod(lambdas[0].num, accInv, skipFinalSub = true)
# Compute EC addition
var r{.noInit.}: ECP_ShortW_Aff[F, G]
r.affineAdd(lambda = points[q].y, points[p], points[q])
# Store result
points[0] = r
{.pop.}
# Batch addition: jacobian
# ------------------------------------------------------------
{.push checks:off.}
func accumSum_chunk_vartime[F; G: static Subgroup](
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]),
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
lambdas: ptr UncheckedArray[tuple[num, den: F]],
len: uint) =
## Accumulate `points` into r.
## `r` is NOT overwritten
## r += ∑ points
const ChunkThreshold = 16
var n = len
while n >= ChunkThreshold:
if (n and 1) == 1: # odd number of points
## Accumulate the last
r += points[n-1]
n -= 1
# Compute [0, n/2) += [n/2, n)
accum_half_vartime(points, lambdas, n)
# Next chunk
n = n div 2
# Tail
for i in 0'u ..< n:
r += points[i]
{.pop.}
{.push checks:off.}
func sum_batch_vartime*[F; G: static Subgroup](
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]),
points: openArray[ECP_ShortW_Aff[F, G]]) =
## Batch addition of `points` into `r`
## `r` is overwritten
# We chunk the addition to limit memory usage
# especially as we allocate on the stack.
# From experience in high-performance computing,
# here are the constraints we want to optimize for
# 1. MSVC limits stack to 1MB by default, we want to use a fraction of that.
# 2. We want to use a large fraction of L2 cache, but not more.
# 3. We want to use a large fraction of the memory addressable by the TLB.
# 4. We optimize for hyperthreading with 2 sibling threads (Xeon Phi hyperthreads have 4 siblings).
# Meaning we want to use less than half the L2 cache so that if run on siblings threads (same physical core),
# the chunks don't evict each other.
#
# Hardware:
# - a Raspberry Pi 4 (2019, Cortex A72) has 1MB L2 cache size
# - Intel Ice Lake (2019, Core 11XXX) and AMD Zen 2 (2019, Ryzen 3XXX) have 512kB L2 cache size
#
# After one chunk is processed we are well within all 64-bit CPU L2 cache bounds
# as we halve after each chunk.
r.setInf()
const maxChunkSize = 262144 # 2¹⁸ = 262144
const maxStride = maxChunkSize div sizeof(ECP_ShortW_Aff[F, G])
let n = min(maxStride, points.len)
let accumulators = alloca(ECP_ShortW_Aff[F, G], n)
let lambdas = alloca(tuple[num, den: F], n)
for i in countup(0, points.len-1, maxStride):
let n = min(maxStride, points.len - i)
let size = n * sizeof(ECP_ShortW_Aff[F, G])
copyMem(accumulators[0].addr, points[i].unsafeAddr, size)
r.accumSum_chunk_vartime(accumulators, lambdas, uint n)
{.pop.}

View File

@ -15,6 +15,9 @@ import
export Subgroup
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Elliptic Curve in Short Weierstrass form
@ -47,15 +50,15 @@ func `==`*(P, Q: ECP_ShortW_Jac): SecretBool =
var z1z1 {.noInit.}, z2z2 {.noInit.}: F
var a{.noInit.}, b{.noInit.}: F
z1z1.square(P.z)
z2z2.square(Q.z)
z1z1.square(P.z, skipFinalSub = true)
z2z2.square(Q.z, skipFinalSub = true)
a.prod(P.x, z2z2)
b.prod(Q.x, z1z1)
result = a == b
a.prod(P.y, Q.z)
b.prod(Q.y, P.z)
a.prod(P.y, Q.z, skipFinalSub = true)
b.prod(Q.y, P.z, skipFinalSub = true)
a *= z2z2
b *= z1z1
result = result and a == b
@ -108,9 +111,9 @@ func trySetFromCoordsXandZ*[F; G](
result = sqrt_if_square(P.y)
var z2 {.noInit.}: F
z2.square(z)
z2.square(z, skipFinalSub = true)
P.x.prod(x, z2)
P.y *= z2
P.y.prod(P.y, z2, skipFinalSub = true)
P.y *= z
P.z = z
@ -209,13 +212,13 @@ template sumImpl[F; G: static Subgroup](
block: # Addition-only, check for exceptional cases
var Z2Z2 {.noInit.}, U2 {.noInit.}, S2 {.noInit.}: F
Z2Z2.square(Q.z)
S1.prod(Q.z, Z2Z2)
Z2Z2.square(Q.z, skipFinalSub = true)
S1.prod(Q.z, Z2Z2, skipFinalSub = true)
S1 *= P.y # S₁ = Y₁*Z₂³
U1.prod(P.x, Z2Z2) # U₁ = X₁*Z₂²
Z1Z1.square(P.z)
S2.prod(P.z, Z1Z1)
Z1Z1.square(P.z, skipFinalSub = true)
S2.prod(P.z, Z1Z1, skipFinalSub = true)
S2 *= Q.y # S₂ = Y₂*Z₁³
U2.prod(Q.x, Z1Z1) # U₂ = X₂*Z₁²
@ -285,10 +288,10 @@ template sumImpl[F; G: static Subgroup](
var b{.noInit.} = HH_or_YY
a.ccopy(P.x, isDbl)
b.ccopy(P.x, isDbl)
HHH_or_Mpre.prod(a, b) # HHH or X₁²
HHH_or_Mpre.prod(a, b, true) # HHH or X₁²
# Assuming doubling path
a.square(HHH_or_Mpre)
a.square(HHH_or_Mpre, skipFinalSub = true)
a *= HHH_or_Mpre # a = 3X₁²
b.square(Z1Z1)
# b.mulCheckSparse(CoefA) # TODO: broken static compile-time type inference
@ -302,31 +305,33 @@ template sumImpl[F; G: static Subgroup](
# - R_or_M is set with R (add) or M (dbl)
# - HHH_or_Mpre contains HHH (add) or garbage precomputation (dbl)
# - V_or_S is set with V = U₁*HH (add) or S = X₁*YY (dbl)
var o {.noInit.}: typeof(r)
block: # Finishing line
# we can start using r, while carefully handling r and P or Q aliasing
var t {.noInit.}: F
t.double(V_or_S)
r.x.square(R_or_M)
r.x -= t # X₃ = R²-2*V (add) or M²-2*S (dbl)
r.x.csub(HHH_or_Mpre, not isDbl) # X₃ = R²-HHH-2*V (add) or M²-2*S (dbl)
o.x.square(R_or_M)
o.x -= t # X₃ = R²-2*V (add) or M²-2*S (dbl)
o.x.csub(HHH_or_Mpre, not isDbl) # X₃ = R²-HHH-2*V (add) or M²-2*S (dbl)
V_or_S -= r.x # V-X₃ (add) or S-X₃ (dbl)
r.y.prod(R_or_M, V_or_S) # Y₃ = R(V-X₃) (add) or M(S-X₃) (dbl)
V_or_S -= o.x # V-X₃ (add) or S-X₃ (dbl)
o.y.prod(R_or_M, V_or_S) # Y₃ = R(V-X₃) (add) or M(S-X₃) (dbl)
HHH_or_Mpre.ccopy(HH_or_YY, isDbl) # HHH (add) or YY (dbl)
S1.ccopy(HH_or_YY, isDbl) # S1 (add) or YY (dbl)
HHH_or_Mpre *= S1 # HHH*S1 (add) or YY² (dbl)
r.y -= HHH_or_Mpre # Y₃ = R(V-X₃)-S₁*HHH (add) or M(S-X₃)-YY² (dbl)
o.y -= HHH_or_Mpre # Y₃ = R(V-X₃)-S₁*HHH (add) or M(S-X₃)-YY² (dbl)
t = Q.z
t.ccopy(H_or_Y, isDbl) # Z₂ (add) or Y₁ (dbl)
t *= P.z # Z₁Z₂ (add) or Y₁Z₁ (dbl)
r.z.prod(t, H_or_Y) # Z₁Z₂H (add) or garbage (dbl)
r.z.ccopy(t, isDbl) # Z₁Z₂H (add) or Y₁Z₁ (dbl)
t.prod(t, P.z, true) # Z₁Z₂ (add) or Y₁Z₁ (dbl)
o.z.prod(t, H_or_Y) # Z₁Z₂H (add) or garbage (dbl)
o.z.ccopy(t, isDbl) # Z₁Z₂H (add) or Y₁Z₁ (dbl)
# if P or R were infinity points they would have spread 0 with Z₁Z₂
block: # Infinity points
r.ccopy(Q, P.isInf())
r.ccopy(P, Q.isInf())
o.ccopy(Q, P.isInf())
o.ccopy(P, Q.isInf())
r = o
func sum*[F; G: static Subgroup](
r: var ECP_ShortW_Jac[F, G],
@ -379,81 +384,176 @@ func madd*[F; G: static Subgroup](
P: ECP_ShortW_Jac[F, G],
Q: ECP_ShortW_Aff[F, G]
) =
## Elliptic curve mixed addition for Short Weierstrass curves
## with p in Jacobian coordinates and Q in affine coordinates
## Elliptic curve mixed addition for Short Weierstrass curves in Jacobian coordinates
## with the curve ``a`` being a parameter for summing on isogenous curves.
##
## R = P + Q
##
## TODO: ⚠️ cannot handle P == Q
# "madd-2007-bl" mixed addition formula - https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
# with conditional copies to handle infinity points
# Assumptions: Z2=1.
# Cost: 7M + 4S + 9add + 3*2 + 1*4.
# Source: 2007 BernsteinLange.
# Explicit formulas:
##
## Short Weierstrass curves have the following equation in Jacobian coordinates
## Y² = X³ + aXZ⁴ + bZ⁶
## from the affine equation
## y² = x³ + a x + b
##
## ``r`` is initialized/overwritten with the sum
## ``CoefA`` allows fast path for curve with a == 0 or a == -3
## and also allows summing on curve isogenies.
##
## Implementation is constant-time, in particular it will not expose
## that P == Q or P == -Q or P or Q are the infinity points
## to simple side-channel attacks (SCA)
## This is done by using a "complete" or "exception-free" addition law.
#
# Z1Z1 = Z1²
# U2 = X2*Z1Z1
# S2 = Y2*Z1*Z1Z1
# H = U2-X1
# HH = H2
# I = 4*HH
# J = H*I
# r = 2*(S2-Y1)
# V = X1*I
# X3 = r²-J-2*V
# Y3 = r*(V-X3)-2*Y1*J
# Z3 = (Z1+H)²-Z1Z1-HH
var Z1Z1 {.noInit.}, H {.noInit.}, HH {.noInit.}, I{.noInit.}, J {.noInit.}: F
# Implementation, see write-up at the bottom.
# We fuse addition and doubling with condition copy by swapping
# terms with the following table
#
# | Addition, Cohen et al, 1998 | Doubling, Cohen et al, 1998 | Doubling = -3 | Doubling a = 0 |
# | 12M + 4S + 6add + 1*2 | 3M + 6S + 1*a + 4add + 1*2 + 1*3 + 1half | | |
# | ----------------------------- | -----------------------------------------| ----------------- | -------------- |
# | Z₁Z₁ = Z₁² | Z₁Z₁ = Z₁² | | |
# | Z₂Z₂ = Z₂² | | | |
# | | | | |
# | U₁ = X₁*Z₂Z₂ | | | |
# | U₂ = X₂*Z₁Z₁ | | | |
# | S₁ = Y₁*Z₂*Z₂Z₂ | | | |
# | S₂ = Y₂*Z₁*Z₁Z₁ | | | |
# | H = U₂-U₁ # P=-Q, P=Inf, P=Q | | | |
# | R = S₂-S₁ # Q=Inf | | | |
# | | | | |
# | HH = H² | YY = Y₁² | | |
# | V = U₁*HH | S = X₁*YY | | |
# | HHH = H*HH | M = (3*X₁²+a*ZZ²)/2 | 3(X₁-ZZ)(X₁+ZZ)/2 | 3X₁²/2 |
# | | | | |
# | X₃ = R²-HHH-2*V | X₃ = M²-2*S | | |
# | Y₃ = R*(V-X₃)-S₁*HHH | Y₃ = M*(S-X₃)-YY*YY | | |
# | Z₃ = Z₁*Z₂*H | Z₃ = Y₁*Z₁ | | |
#
# For mixed adddition we just set Z₂ = 1
var Z1Z1 {.noInit.}, U1 {.noInit.}, S1 {.noInit.}, H {.noInit.}, R {.noinit.}: F
# Preload P and Q in cache
let pIsInf = P.isInf()
let qIsInf = Q.isInf()
block: # Addition-only, check for exceptional cases
var U2 {.noInit.}, S2 {.noInit.}: F
U1 = P.x
S1 = P.y
Z1Z1.square(P.z) # Z₁Z₁ = Z₁²
r.z.prod(P.z, Z1Z1) # P.Z is hot in cache, keep it in same register.
r.z *= Q.y # S₂ = Y₂Z₁Z₁Z₁ -- r.z used as S₂
Z1Z1.square(P.z, skipFinalSub = true)
S2.prod(P.z, Z1Z1, skipFinalSub = true)
S2 *= Q.y # S₂ = Y₂*Z₁³
U2.prod(Q.x, Z1Z1) # U₂ = X₂*Z₁²
H.prod(Q.x, Z1Z1) # U₂ = X₂Z₁Z₁
H -= P.x # H = U₂ - X₁
H.diff(U2, U1) # H = U₂-U
R.diff(S2, S1) # R = S₂-S
HH.square(H) # HH = H²
# Exceptional cases
# Expressing H as affine, if H == 0, P == Q or -Q
# H = U₂-U₁ = X₂*Z₁² - X₁*Z₂² = x₂*Z₂²*Z₁² - x₁*Z₁²*Z₂²
# if H == 0 && R == 0, P = Q -> doubling
# if only H == 0, P = -Q -> infinity, implied in Z₃ = Z₁*Z₂*H = 0
# if only R == 0, P and Q are related by the cubic root endomorphism
let isDbl = H.isZero() and R.isZero()
I.double(HH)
I.double() # I = 4HH
# Rename buffers under the form (add_or_dbl)
template R_or_M: untyped = R
template H_or_Y: untyped = H
template V_or_S: untyped = U1
var HH_or_YY {.noInit.}: F
var HHH_or_Mpre {.noInit.}: F
J.prod(H, I) # J = H*I
r.y.prod(P.x, I) # V = X₁*I -- r.y used as V
H_or_Y.ccopy(P.y, isDbl) # H (add) or Y₁ (dbl)
HH_or_YY.square(H_or_Y) # H² (add) or Y₁² (dbl)
r.z -= P.y #
r.z.double() # r = 2*(S₂-Y₁) -- r.z used as r
V_or_S.ccopy(P.x, isDbl) # U₁ (add) or X₁ (dbl)
V_or_S *= HH_or_YY # V = U₁*HH (add) or S = X₁*YY (dbl)
r.x.square(r.z) # r²
r.x -= J
r.x -= r.y
r.x -= r.y # X₃ = r²-J-2*V -- r.x computed
block: # Compute M for doubling
# "when" static evaluation doesn't shortcut booleans :/
# which causes issues when CoefA isn't an int but Fp or Fp2
const CoefA = F.C.getCoefA()
when CoefA is int:
const CoefA_eq_zero = CoefA == 0
const CoefA_eq_minus3 {.used.} = CoefA == -3
else:
const CoefA_eq_zero = false
const CoefA_eq_minus3 = false
r.y -= r.x # V-X₃
r.y *= r.z # r*(V-X₃)
when CoefA_eq_zero:
var a {.noInit.} = H
var b {.noInit.} = HH_or_YY
a.ccopy(P.x, isDbl) # H or X₁
b.ccopy(P.x, isDbl) # HH or X₁
HHH_or_Mpre.prod(a, b) # HHH or X₁²
J *= P.y # Y₁J -- J reused as Y₁J
r.y -= J
r.y -= J # Y₃ = r*(V-X₃) - 2*Y₁J -- r.y computed
var M{.noInit.} = HHH_or_Mpre # Assuming on doubling path
M.div2() # X₁²/2
M += HHH_or_Mpre # 3X₁²/2
R_or_M.ccopy(M, isDbl)
r.z.sum(P.z, H) # Z₁ + H
r.z.square()
r.z -= Z1Z1
r.z -= HH # Z₃ = (Z1+H)²-Z1Z1-HH
elif CoefA_eq_minus3:
var a{.noInit.}, b{.noInit.}: F
a.sum(P.x, Z1Z1)
b.diff(P.z, Z1Z1)
a.ccopy(H_or_Y, not isDbl) # H or X₁+ZZ
b.ccopy(HH_or_YY, not isDbl) # HH or X₁-ZZ
HHH_or_Mpre.prod(a, b) # HHH or X₁²-ZZ²
# Now handle points at infinity
proc one(): F =
result.setOne()
var M{.noInit.} = HHH_or_Mpre # Assuming on doubling path
M.div2() # (X₁²-ZZ²)/2
M += HHH_or_Mpre # 3(X₁²-ZZ²)/2
R_or_M.ccopy(M, isDbl)
r.x.ccopy(Q.x, pIsInf)
r.y.ccopy(Q.y, pIsInf)
r.z.ccopy(static(one()), pIsInf)
else:
# TODO: Costly `a` coefficients can be computed
# by merging their computation with Z₃ = Z₁*Z₂*H (add) or Z₃ = Y₁*Z₁ (dbl)
var a{.noInit.} = H
var b{.noInit.} = HH_or_YY
a.ccopy(P.x, isDbl)
b.ccopy(P.x, isDbl)
HHH_or_Mpre.prod(a, b, true) # HHH or X₁²
r.ccopy(P, qIsInf)
# Assuming doubling path
a.square(HHH_or_Mpre, skipFinalSub = true)
a *= HHH_or_Mpre # a = 3X₁²
b.square(Z1Z1)
# b.mulCheckSparse(CoefA) # TODO: broken static compile-time type inference
b *= CoefA # b = αZZ, with α the "a" coefficient of the curve
a += b
a.div2()
R_or_M.ccopy(a, isDbl) # (3X₁² - αZZ)/2
# Let's count our horses, at this point:
# - R_or_M is set with R (add) or M (dbl)
# - HHH_or_Mpre contains HHH (add) or garbage precomputation (dbl)
# - V_or_S is set with V = U₁*HH (add) or S = X₁*YY (dbl)
var o {.noInit.}: typeof(r)
block: # Finishing line
var t {.noInit.}: F
t.double(V_or_S)
o.x.square(R_or_M)
o.x -= t # X₃ = R²-2*V (add) or M²-2*S (dbl)
o.x.csub(HHH_or_Mpre, not isDbl) # X₃ = R²-HHH-2*V (add) or M²-2*S (dbl)
V_or_S -= o.x # V-X₃ (add) or S-X₃ (dbl)
o.y.prod(R_or_M, V_or_S) # Y₃ = R(V-X₃) (add) or M(S-X₃) (dbl)
HHH_or_Mpre.ccopy(HH_or_YY, isDbl) # HHH (add) or YY (dbl)
S1.ccopy(HH_or_YY, isDbl) # S1 (add) or YY (dbl)
HHH_or_Mpre *= S1 # HHH*S1 (add) or YY² (dbl)
o.y -= HHH_or_Mpre # Y₃ = R(V-X₃)-S₁*HHH (add) or M(S-X₃)-YY² (dbl)
t.setOne()
t.ccopy(H_or_Y, isDbl) # Z₂ (add) or Y₁ (dbl)
t.prod(t, P.z, true) # Z₁Z₂ (add) or Y₁Z₁ (dbl)
o.z.prod(t, H_or_Y) # Z₁Z₂H (add) or garbage (dbl)
o.z.ccopy(t, isDbl) # Z₁Z₂H (add) or Y₁Z₁ (dbl)
block: # Infinity points
o.x.ccopy(Q.x, P.isInf())
o.y.ccopy(Q.y, P.isInf())
o.z.csetOne(P.isInf())
o.ccopy(P, Q.isInf())
r = o
func double*[F; G: static Subgroup](
r: var ECP_ShortW_Jac[F, G],
@ -524,11 +624,7 @@ func `+=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Jac) {.inline.} =
func `+=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Aff) {.inline.} =
## In-place mixed point addition
##
## TODO: ⚠️ cannot handle P == Q
var t{.noInit.}: typeof(P)
t.madd(P, Q)
P = t
P.madd(P, Q)
func double*(P: var ECP_ShortW_Jac) {.inline.} =
## In-place point doubling
@ -547,10 +643,10 @@ func affine*[F; G](
jac: ECP_ShortW_Jac[F, G]) =
var invZ {.noInit.}, invZ2{.noInit.}: F
invZ.inv(jac.z)
invZ2.square(invZ)
invZ2.square(invZ, skipFinalSub = true)
aff.x.prod(jac.x, invZ2)
invZ *= invZ2
invZ.prod(invZ, invZ2, skipFinalSub = true)
aff.y.prod(jac.y, invZ)
func fromAffine*[F; G](
@ -586,38 +682,38 @@ func batchAffine*[N: static int, F, G](
zeroes[i] = z.isZero()
z.csetOne(zeroes[i])
affs[i].x.prod(affs[i-1].x, z)
if i != N-1:
affs[i].x.prod(affs[i-1].x, z, skipFinalSub = true)
else:
affs[i].x.prod(affs[i-1].x, z, skipFinalSub = false)
var accInv {.noInit.}: F
accInv.inv(affs[N-1].x)
for i in countdown(N-1, 1):
# Skip zero z-coordinates (infinity points)
var z = affs[i].x
# Extract 1/Pᵢ
var invi {.noInit.}: F
invi.prod(accInv, affs[i-1].x)
invi.prod(accInv, affs[i-1].x, skipFinalSub = true)
invi.csetZero(zeroes[i])
# Now convert Pᵢ to affine
var invi2 {.noinit.}: F
invi2.square(invi)
invi2.square(invi, skipFinalSub = true)
affs[i].x.prod(jacs[i].x, invi2)
invi *= invi2
invi.prod(invi, invi2, skipFinalSub = true)
affs[i].y.prod(jacs[i].y, invi)
# next iteration
invi = jacs[i].z
invi.csetOne(zeroes[i])
accInv *= invi
accInv.prod(accInv, invi, skipFinalSub = true)
block: # tail
var invi2 {.noinit.}: F
accInv.csetZero(zeroes[0])
invi2.square(accInv)
invi2.square(accInv, skipFinalSub = true)
affs[0].x.prod(jacs[0].x, invi2)
accInv *= invi2
accInv.prod(accInv, invi2, skipFinalSub = true)
affs[0].y.prod(jacs[0].y, accInv)
# ############################################################

View File

@ -15,6 +15,9 @@ import
export Subgroup
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Elliptic Curve in Short Weierstrass form
@ -261,7 +264,6 @@ func madd*[F; G: static Subgroup](
## ``r`` may alias P
# TODO: static doAssert odd order
when F.C.getCoefA() == 0:
var t0 {.noInit.}, t1 {.noInit.}, t2 {.noInit.}, t3 {.noInit.}, t4 {.noInit.}: F
var x3 {.noInit.}, y3 {.noInit.}, z3 {.noInit.}: F
@ -273,6 +275,10 @@ func madd*[F; G: static Subgroup](
# Y₃ = (Y₁Y₂ + 3bZ₁)(Y₁Y₂ 3bZ₁)
# + 9bX₁X₂ (X₁ + X₂Z₁)
# Z₃= (Y₁ + Y₂Z₁)(Y₁Y₂ + 3bZ₁) + 3 X₁X₂ (X₁Y₂ + X₂Y₁)
#
# Note¹⁰ mentions that due to Qz = 1, cannot be
# the point at infinity.
# We solve that by conditional copies.
t0.prod(P.x, Q.x) # 1. t₀ <- X₁ X₂
t1.prod(P.y, Q.y) # 2. t₁ <- Y₁ Y₂
t3.sum(P.x, P.y) # 3. t₃ <- X₁ + Y₁ ! error in paper
@ -304,13 +310,24 @@ func madd*[F; G: static Subgroup](
y3 *= SexticNonResidue
x3.prod(t4, y3) # 18. X₃ <- t₄ Y₃, X₃ = (Y₁ + Y₂Z₁) 3b(X₁ + X₂Z₁)
t2.prod(t3, t1) # 19. t₂ <- t₃ t₁, t₂ = (X₁Y₂ + X₂Y₁)(Y₁Y₂ - 3bZ₁)
r.x.diff(t2, x3) # 20. X₃ <- t₂ - X₃, X₃ = (X₁Y₂ + X₂Y₁)(Y₁Y₂ - 3bZ₁) - 3b(Y₁ + Y₂Z₁)(X₁ + X₂Z₁)
x3.diff(t2, x3) # 20. X₃ <- t₂ - X₃, X₃ = (X₁Y₂ + X₂Y₁)(Y₁Y₂ - 3bZ₁) - 3b(Y₁ + Y₂Z₁)(X₁ + X₂Z₁)
y3 *= t0 # 21. Y₃ <- Y₃ t₀, Y₃ = 9bX₁X₂ (X₁ + X₂Z₁)
t1 *= z3 # 22. t₁ <- t₁ Z₃, t₁ = (Y₁Y₂ - 3bZ₁)(Y₁Y₂ + 3bZ₁)
r.y.sum(y3, t1) # 23. Y₃ <- t₁ + Y₃, Y₃ = (Y₁Y₂ + 3bZ₁)(Y₁Y₂ - 3bZ₁) + 9bX₁X₂ (X₁ + X₂Z₁)
y3 += t1 # 23. Y₃ <- t₁ + Y₃, Y₃ = (Y₁Y₂ + 3bZ₁)(Y₁Y₂ - 3bZ₁) + 9bX₁X₂ (X₁ + X₂Z₁)
t0 *= t3 # 31. t₀ <- t₀ t₃, t₀ = 3X₁X₂ (X₁Y₂ + X₂Y₁)
z3 *= t4 # 32. Z₃ <- Z₃ t₄, Z₃ = (Y₁Y₂ + 3bZ₁)(Y₁ + Y₂Z₁)
r.z.sum(z3, t0) # 33. Z₃ <- Z₃ + t₀, Z₃ = (Y₁ + Y₂Z₁)(Y₁Y₂ + 3bZ₁) + 3X₁X₂ (X₁Y₂ + X₂Y₁)
z3 += t0 # 33. Z₃ <- Z₃ + t₀, Z₃ = (Y₁ + Y₂Z₁)(Y₁Y₂ + 3bZ₁) + 3X₁X₂ (X₁Y₂ + X₂Y₁)
# Deal with infinity point. r and P might alias.
let inf = Q.isInf()
x3.ccopy(P.x, inf)
y3.ccopy(P.y, inf)
z3.ccopy(P.z, inf)
r.x = x3
r.y = y3
r.z = z3
else:
{.error: "Not implemented.".}
@ -454,18 +471,18 @@ func batchAffine*[N: static int, F, G](
zeroes[i] = z.isZero()
z.csetOne(zeroes[i])
affs[i].x.prod(affs[i-1].x, z)
if i != N-1:
affs[i].x.prod(affs[i-1].x, z, skipFinalSub = true)
else:
affs[i].x.prod(affs[i-1].x, z, skipFinalSub = false)
var accInv {.noInit.}: F
accInv.inv(affs[N-1].x)
for i in countdown(N-1, 1):
# Skip zero z-coordinates (infinity points)
var z = affs[i].x
# Extract 1/Pᵢ
var invi {.noInit.}: F
invi.prod(accInv, affs[i-1].x)
invi.prod(accInv, affs[i-1].x, skipFinalSub = true)
invi.csetZero(zeroes[i])
# Now convert Pᵢ to affine
@ -475,7 +492,7 @@ func batchAffine*[N: static int, F, G](
# next iteration
invi = projs[i].z
invi.csetOne(zeroes[i])
accInv *= invi
accInv.prod(accInv, invi, skipFinalSub = true)
block: # tail
accInv.csetZero(zeroes[0])

View File

@ -276,6 +276,10 @@ func double*[Field](
E -= D # C stores E-D
r.y *= E
func `+=`*(P: var ECP_TwEdwards_Prj, Q: ECP_TwEdwards_Prj) {.inline.} =
## In-place point addition
P.sum(P, Q)
func double*(P: var ECP_TwEdwards_Prj) {.inline.} =
## In-place EC doubling
P.double(P)

View File

@ -2156,5 +2156,29 @@ func inv*(a: var CubicExt) =
## to affine for elliptic curve
a.invImpl(a)
# Convenience functions
# ----------------------------------------------------------------------
template square*(a: var ExtensionField, skipFinalSub: static bool) =
# Square alias,
# this allows using the same code for
# the base field and its extensions while benefitting from skipping
# the final substraction on Fp
a.square()
template square*(r: var ExtensionField, a: ExtensionField, skipFinalSub: static bool) =
# Square alias,
# this allows using the same code for
# the base field and its extensions while benefitting from skipping
# the final substraction on Fp
r.square(a)
template prod*(r: var ExtensionField, a, b: ExtensionField, skipFinalSub: static bool) =
# Prod alias,
# this allows using the same code for
# the base field and its extensions while benefitting from skipping
# the final substraction on Fp
r.prod(a, b)
{.pop.} # inline
{.pop.} # raises no exceptions

View File

@ -0,0 +1,32 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# ############################################################
#
# Allocators
#
# ############################################################
# Due to the following constraints:
# - No dynamic allocation in single-threaded codepaths (for compatibility with embedded devices like TPM or secure hardware)
# - Avoiding cryptographic material in third-party libraries (like a memory allocator)
# - Giving full control of the library user on allocation strategy
# - Performance, especially for long-running processes (fragmentation, multithreaded allocation...)
#
# stack allocation is strongly preferred where necessary.
when defined(windows):
proc alloca(size: int): pointer {.header: "<malloc.h>".}
else:
proc alloca(size: int): pointer {.header: "<alloca.h>".}
template alloca*(T: typedesc): ptr T =
cast[ptr T](alloca(sizeof(T)))
template alloca*(T: typedesc, len: Natural): ptr UncheckedArray[T] =
cast[ptr UncheckedArray[T]](alloca(sizeof(T) * len))

View File

@ -13,7 +13,7 @@ import
# Pararun is a parallel shell command runner
# ------------------------------------------
# Usage: pararun <file-with-1-command-per-line> <numWorkers
# Usage: pararun <file-with-1-command-per-line> <numWorkers>
# AsyncSemaphore
# ----------------------------------------------------------------

View File

@ -0,0 +1,30 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../../constantine/math/config/curves,
../../constantine/math/elliptic/ec_shortweierstrass_jacobian,
../../constantine/math/arithmetic,
# Test utilities
./t_ec_template
const
numPoints = [1, 2, 8, 16, 128, 1024, 2048, 16384, 32768] # 262144, 1048576]
run_EC_batch_add_impl(
ec = ECP_ShortW_Jac[Fp[BN254_Snarks], G1],
numPoints = numPoints,
moduleName = "test_ec_shortweierstrass_jacobian_batch_add_" & $BN254_Snarks
)
run_EC_batch_add_impl(
ec = ECP_ShortW_Jac[Fp[BLS12_381], G1],
numPoints = numPoints,
moduleName = "test_ec_shortweierstrass_jacobian_batch_add_" & $BLS12_381
)

View File

@ -0,0 +1,30 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../../constantine/math/config/curves,
../../constantine/math/elliptic/ec_shortweierstrass_projective,
../../constantine/math/arithmetic,
# Test utilities
./t_ec_template
const
numPoints = [1, 2, 8, 16, 128, 1024, 2048, 16384, 32768] # 262144, 1048576]
run_EC_batch_add_impl(
ec = ECP_ShortW_Prj[Fp[BN254_Snarks], G1],
numPoints = numPoints,
moduleName = "test_ec_shortweierstrass_projective_batch_add_" & $BN254_Snarks
)
run_EC_batch_add_impl(
ec = ECP_ShortW_Prj[Fp[BLS12_381], G1],
numPoints = numPoints,
moduleName = "test_ec_shortweierstrass_projective_batch_add_" & $BLS12_381
)

View File

@ -23,6 +23,7 @@ import
ec_shortweierstrass_affine,
ec_shortweierstrass_jacobian,
ec_shortweierstrass_projective,
ec_shortweierstrass_batch_ops,
ec_twistededwards_affine,
ec_twistededwards_projective,
ec_scalar_mul],
@ -41,7 +42,7 @@ type
Long01Sequence
func random_point*(rng: var RngState, EC: typedesc, randZ: bool, gen: RandomGen): EC {.noInit.} =
if not randZ:
when EC is ECP_ShortW_Aff:
if gen == Uniform:
result = rng.random_unsafe(EC)
elif gen == HighHammingWeight:
@ -49,12 +50,20 @@ func random_point*(rng: var RngState, EC: typedesc, randZ: bool, gen: RandomGen)
else:
result = rng.random_long01Seq(EC)
else:
if gen == Uniform:
result = rng.random_unsafe_with_randZ(EC)
elif gen == HighHammingWeight:
result = rng.random_highHammingWeight_with_randZ(EC)
if not randZ:
if gen == Uniform:
result = rng.random_unsafe(EC)
elif gen == HighHammingWeight:
result = rng.random_highHammingWeight(EC)
else:
result = rng.random_long01Seq(EC)
else:
result = rng.random_long01Seq_with_randZ(EC)
if gen == Uniform:
result = rng.random_unsafe_with_randZ(EC)
elif gen == HighHammingWeight:
result = rng.random_highHammingWeight_with_randZ(EC)
else:
result = rng.random_long01Seq_with_randZ(EC)
template pairingGroup(EC: typedesc): string =
when EC is (ECP_ShortW_Aff or ECP_ShortW_Prj or ECP_ShortW_Jac):
@ -99,6 +108,15 @@ proc run_EC_addition_tests*(
r.sum(inf, P)
check: bool(r == P)
# Aliasing tests
r = P
r += inf
check: bool(r == P)
r.setInf()
r += P
check: bool(r == P)
test(ec, randZ = false, gen = Uniform)
test(ec, randZ = true, gen = Uniform)
test(ec, randZ = false, gen = HighHammingWeight)
@ -438,7 +456,7 @@ proc run_EC_mixed_add_impl*(
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitwidth & "-bit mode]":
test "EC " & G1_or_G2 & " mixed addition is consistent with general addition":
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
let a = rng.random_point(EC, randZ, gen)
let b = rng.random_point(EC, randZ, gen)
@ -452,12 +470,83 @@ proc run_EC_mixed_add_impl*(
check: bool(r_generic == r_mixed)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Uniform)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Uniform)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = HighHammingWeight)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = HighHammingWeight)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Long01Sequence)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Long01Sequence)
test(ec, randZ = false, gen = Uniform)
test(ec, randZ = true, gen = Uniform)
test(ec, randZ = false, gen = HighHammingWeight)
test(ec, randZ = true, gen = HighHammingWeight)
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "EC " & G1_or_G2 & " mixed addition - doubling":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
let a = rng.random_point(EC, randZ, gen)
var aAff: ECP_ShortW_Aff[EC.F, EC.G]
aAff.affine(a)
var r_generic, r_mixed: EC
r_generic.double(a)
r_mixed.madd(a, aAff)
check: bool(r_generic == r_mixed)
# Aliasing test
r_mixed = a
r_mixed += aAff
check: bool(r_generic == r_mixed)
test(ec, randZ = false, gen = Uniform)
test(ec, randZ = true, gen = Uniform)
test(ec, randZ = false, gen = HighHammingWeight)
test(ec, randZ = true, gen = HighHammingWeight)
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "EC " & G1_or_G2 & " mixed addition - adding infinity LHS":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
var a{.noInit.}: EC
a.setInf()
let bAff = rng.random_point(ECP_ShortW_Aff[EC.F, EC.G], randZ = false, gen)
var r_mixed{.noInit.}: EC
r_mixed.madd(a, bAff)
var r{.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
r.affine(r_mixed)
a += bAff
check:
bool(r == bAff)
bool(a == r_mixed)
test(ec, randZ = false, gen = Uniform)
test(ec, randZ = false, gen = HighHammingWeight)
test(ec, randZ = false, gen = Long01Sequence)
test "EC " & G1_or_G2 & " mixed addition - adding infinity RHS":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
let a = rng.random_point(EC, randZ, gen)
var bAff{.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
bAff.setInf()
var r{.noInit.}: EC
r.madd(a, bAff)
check: bool(r == a)
r = a
r += bAff
check: bool(r == a)
test(ec, randZ = false, gen = Uniform)
test(ec, randZ = true, gen = Uniform)
test(ec, randZ = false, gen = HighHammingWeight)
test(ec, randZ = true, gen = HighHammingWeight)
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
proc run_EC_subgroups_cofactors_impl*(
ec: typedesc,
@ -480,7 +569,7 @@ proc run_EC_subgroups_cofactors_impl*(
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitwidth & "-bit mode]":
test "Effective cofactor matches accelerated cofactor clearing" & " - " & $ec & " - [" & $WordBitwidth & "-bit mode]":
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< ItersMul:
let P = rng.random_point(EC, randZ, gen)
var cPeff = P
@ -491,17 +580,17 @@ proc run_EC_subgroups_cofactors_impl*(
check: bool(cPeff == cPfast)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Uniform)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Uniform)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = HighHammingWeight)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = HighHammingWeight)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Long01Sequence)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Long01Sequence)
test(ec, randZ = false, gen = Uniform)
test(ec, randZ = true, gen = Uniform)
test(ec, randZ = false, gen = HighHammingWeight)
test(ec, randZ = true, gen = HighHammingWeight)
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "Subgroup checks and cofactor clearing consistency":
var inSubgroup = 0
var offSubgroup = 0
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
stdout.write " "
for _ in 0 ..< ItersMul:
let P = rng.random_point(EC, randZ, gen)
@ -526,12 +615,12 @@ proc run_EC_subgroups_cofactors_impl*(
stdout.write '\n'
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Uniform)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Uniform)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = HighHammingWeight)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = HighHammingWeight)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Long01Sequence)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Long01Sequence)
test(ec, randZ = false, gen = Uniform)
test(ec, randZ = true, gen = Uniform)
test(ec, randZ = false, gen = HighHammingWeight)
test(ec, randZ = true, gen = HighHammingWeight)
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
echo " [SUCCESS] Test finished with ", inSubgroup, " points in ", G1_or_G2, " subgroup and ",
offSubgroup, " points on curve but not in subgroup (before cofactor clearing)"
@ -699,4 +788,82 @@ proc run_EC_conversion_failures*(
doAssert bool(Qs[i] == Rs[i])
test_bn254_snarks_g1(ECP_ShortW_Prj[Fp[BN254_Snarks], G1])
test_bn254_snarks_g1(ECP_ShortW_Jac[Fp[BN254_Snarks], G1])
test_bn254_snarks_g1(ECP_ShortW_Jac[Fp[BN254_Snarks], G1])
proc run_EC_batch_add_impl*[N: static int](
ec: typedesc,
numPoints: array[N, int],
moduleName: string
) =
# Random seed for reproducibility
var rng: RngState
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
rng.seed(seed)
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
when ec.G == G1:
const G1_or_G2 = "G1"
else:
const G1_or_G2 = "G2"
const testSuiteDesc = "Elliptic curve batch addition for Short Weierstrass form"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitwidth & "-bit mode]":
for n in numPoints:
test $ec & " batch addition (N=" & $n & ")":
proc test(EC: typedesc, gen: RandomGen) =
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](n)
for i in 0 ..< n:
points[i] = rng.random_point(ECP_ShortW_Aff[EC.F, EC.G], randZ = false, gen)
var r_batch{.noinit.}, r_ref{.noInit.}: EC
r_ref.setInf()
for i in 0 ..< n:
r_ref += points[i]
r_batch.sum_batch_vartime(points)
check: bool(r_batch == r_ref)
test(ec, gen = Uniform)
test(ec, gen = HighHammingWeight)
test(ec, gen = Long01Sequence)
test "EC " & G1_or_G2 & " batch addition (N=" & $n & ") - special cases":
proc test(EC: typedesc, gen: RandomGen) =
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](n)
let halfN = n div 2
for i in 0 ..< halfN:
points[i] = rng.random_point(ECP_ShortW_Aff[EC.F, EC.G], randZ = false, gen)
for i in halfN ..< n:
# The special cases test relies on internal knowledge that we sum(points[i], points[i+n/2]
# It should be changed if scheduling change, for example if we sum(points[2*i], points[2*i+1])
let c = rng.random_unsafe(3)
if c == 0:
points[i] = rng.random_point(ECP_ShortW_Aff[EC.F, EC.G], randZ = false, gen)
elif c == 1:
points[i] = points[i-halfN]
else:
points[i].neg(points[i-halfN])
var r_batch{.noinit.}, r_ref{.noInit.}: EC
r_ref.setInf()
for i in 0 ..< n:
r_ref += points[i]
r_batch.sum_batch_vartime(points)
check: bool(r_batch == r_ref)
test(ec, gen = Uniform)
test(ec, gen = HighHammingWeight)
test(ec, gen = Long01Sequence)