mirror of
https://github.com/logos-storage/constantine.git
synced 2026-01-04 06:03:08 +00:00
Batch additions (#207)
* Batch elliptic curve addition * accelerate chained muls * jac mixed add handle doubling. jac additions handle aliasing when adding infinity * properly skip sanitizer on BLS signature test * properly skip sanitizer² on BLS signature test
This commit is contained in:
parent
93654d580e
commit
928f515582
68
benchmarks/bench_ec_g1_batch.nim
Normal file
68
benchmarks/bench_ec_g1_batch.nim
Normal file
@ -0,0 +1,68 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
# Internals
|
||||
../constantine/math/config/curves,
|
||||
../constantine/math/arithmetic,
|
||||
../constantine/math/elliptic/[
|
||||
ec_shortweierstrass_affine,
|
||||
ec_shortweierstrass_projective,
|
||||
ec_shortweierstrass_jacobian],
|
||||
# Helpers
|
||||
../helpers/static_for,
|
||||
./bench_elliptic_template,
|
||||
# Standard library
|
||||
std/strutils
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Benchmark of the G1 group of
|
||||
# Short Weierstrass elliptic curves
|
||||
# in (homogeneous) projective coordinates
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
|
||||
const Iters = 10_000
|
||||
const AvailableCurves = [
|
||||
# BN254_Snarks,
|
||||
BLS12_381,
|
||||
]
|
||||
|
||||
proc main() =
|
||||
separator()
|
||||
staticFor i, 0, AvailableCurves.len:
|
||||
const curve = AvailableCurves[i]
|
||||
addBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
|
||||
addBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
|
||||
mixedAddBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
|
||||
mixedAddBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
|
||||
doublingBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
|
||||
doublingBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
|
||||
separator()
|
||||
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
|
||||
let batchIters = max(1, Iters div numPoints)
|
||||
multiAddBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, useBatching = false, batchIters)
|
||||
separator()
|
||||
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
|
||||
let batchIters = max(1, Iters div numPoints)
|
||||
multiAddBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, useBatching = true, batchIters)
|
||||
separator()
|
||||
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
|
||||
let batchIters = max(1, Iters div numPoints)
|
||||
multiAddBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, useBatching = false, batchIters)
|
||||
separator()
|
||||
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
|
||||
let batchIters = max(1, Iters div numPoints)
|
||||
multiAddBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, useBatching = true, batchIters)
|
||||
separator()
|
||||
separator()
|
||||
|
||||
main()
|
||||
notes()
|
||||
@ -22,6 +22,7 @@ import
|
||||
ec_shortweierstrass_affine,
|
||||
ec_shortweierstrass_projective,
|
||||
ec_shortweierstrass_jacobian,
|
||||
ec_shortweierstrass_batch_ops,
|
||||
ec_scalar_mul, ec_endomorphism_accel],
|
||||
# Helpers
|
||||
../helpers/[prng_unsafe, static_for],
|
||||
@ -35,10 +36,10 @@ export abstractions # generic sandwich on SecretBool and SecretBool in Jacobian
|
||||
|
||||
proc separator*() = separator(177)
|
||||
|
||||
macro fixEllipticDisplay(T: typedesc): untyped =
|
||||
macro fixEllipticDisplay(EC: typedesc): untyped =
|
||||
# At compile-time, enums are integers and their display is buggy
|
||||
# we get the Curve ID instead of the curve name.
|
||||
let instantiated = T.getTypeInst()
|
||||
let instantiated = EC.getTypeInst()
|
||||
var name = $instantiated[1][0] # EllipticEquationFormCoordinates
|
||||
let fieldName = $instantiated[1][1][0]
|
||||
let curveName = $Curve(instantiated[1][1][1].intVal)
|
||||
@ -53,100 +54,108 @@ proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int6
|
||||
else:
|
||||
echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op"
|
||||
|
||||
template bench(op: string, T: typedesc, iters: int, body: untyped): untyped =
|
||||
template bench(op: string, EC: typedesc, iters: int, body: untyped): untyped =
|
||||
measure(iters, startTime, stopTime, startClk, stopClk, body)
|
||||
report(op, fixEllipticDisplay(T), startTime, stopTime, startClk, stopClk, iters)
|
||||
report(op, fixEllipticDisplay(EC), startTime, stopTime, startClk, stopClk, iters)
|
||||
|
||||
proc addBench*(T: typedesc, iters: int) =
|
||||
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
|
||||
var r {.noInit.}: T
|
||||
let P = rng.random_unsafe(T)
|
||||
let Q = rng.random_unsafe(T)
|
||||
bench("EC Add " & G1_or_G2, T, iters):
|
||||
proc addBench*(EC: typedesc, iters: int) =
|
||||
var r {.noInit.}: EC
|
||||
let P = rng.random_unsafe(EC)
|
||||
let Q = rng.random_unsafe(EC)
|
||||
bench("EC Add " & $EC.G, EC, iters):
|
||||
r.sum(P, Q)
|
||||
|
||||
proc mixedAddBench*(T: typedesc, iters: int) =
|
||||
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
|
||||
var r {.noInit.}: T
|
||||
let P = rng.random_unsafe(T)
|
||||
let Q = rng.random_unsafe(T)
|
||||
var Qaff: ECP_ShortW_Aff[T.F, T.G]
|
||||
proc mixedAddBench*(EC: typedesc, iters: int) =
|
||||
var r {.noInit.}: EC
|
||||
let P = rng.random_unsafe(EC)
|
||||
let Q = rng.random_unsafe(EC)
|
||||
var Qaff: ECP_ShortW_Aff[EC.F, EC.G]
|
||||
Qaff.affine(Q)
|
||||
bench("EC Mixed Addition " & G1_or_G2, T, iters):
|
||||
bench("EC Mixed Addition " & $EC.G, EC, iters):
|
||||
r.madd(P, Qaff)
|
||||
|
||||
proc doublingBench*(T: typedesc, iters: int) =
|
||||
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
|
||||
var r {.noInit.}: T
|
||||
let P = rng.random_unsafe(T)
|
||||
bench("EC Double " & G1_or_G2, T, iters):
|
||||
proc doublingBench*(EC: typedesc, iters: int) =
|
||||
var r {.noInit.}: EC
|
||||
let P = rng.random_unsafe(EC)
|
||||
bench("EC Double " & $EC.G, EC, iters):
|
||||
r.double(P)
|
||||
|
||||
proc affFromProjBench*(T: typedesc, iters: int) =
|
||||
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
|
||||
var r {.noInit.}: ECP_ShortW_Aff[T.F, T.G]
|
||||
let P = rng.random_unsafe(T)
|
||||
bench("EC Projective to Affine " & G1_or_G2, T, iters):
|
||||
proc affFromProjBench*(EC: typedesc, iters: int) =
|
||||
var r {.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
|
||||
let P = rng.random_unsafe(EC)
|
||||
bench("EC Projective to Affine " & $EC.G, EC, iters):
|
||||
r.affine(P)
|
||||
|
||||
proc affFromJacBench*(T: typedesc, iters: int) =
|
||||
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
|
||||
var r {.noInit.}: ECP_ShortW_Aff[T.F, T.G]
|
||||
let P = rng.random_unsafe(T)
|
||||
bench("EC Jacobian to Affine " & G1_or_G2, T, iters):
|
||||
proc affFromJacBench*(EC: typedesc, iters: int) =
|
||||
var r {.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
|
||||
let P = rng.random_unsafe(EC)
|
||||
bench("EC Jacobian to Affine " & $EC.G, EC, iters):
|
||||
r.affine(P)
|
||||
|
||||
proc scalarMulGenericBench*(T: typedesc, window: static int, iters: int) =
|
||||
const bits = T.F.C.getCurveOrderBitwidth()
|
||||
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
|
||||
proc scalarMulGenericBench*(EC: typedesc, window: static int, iters: int) =
|
||||
const bits = EC.F.C.getCurveOrderBitwidth()
|
||||
|
||||
var r {.noInit.}: T
|
||||
let P = rng.random_unsafe(T) # TODO: clear cofactor
|
||||
var r {.noInit.}: EC
|
||||
let P = rng.random_unsafe(EC) # TODO: clear cofactor
|
||||
|
||||
let exponent = rng.random_unsafe(BigInt[bits])
|
||||
|
||||
bench("EC ScalarMul " & $bits & "-bit " & G1_or_G2 & " (window-" & $window & ", generic)", T, iters):
|
||||
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (window-" & $window & ", generic)", EC, iters):
|
||||
r = P
|
||||
r.scalarMulGeneric(exponent, window)
|
||||
|
||||
proc scalarMulEndo*(T: typedesc, iters: int) =
|
||||
const bits = T.F.C.getCurveOrderBitwidth()
|
||||
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
|
||||
proc scalarMulEndo*(EC: typedesc, iters: int) =
|
||||
const bits = EC.F.C.getCurveOrderBitwidth()
|
||||
|
||||
var r {.noInit.}: T
|
||||
let P = rng.random_unsafe(T) # TODO: clear cofactor
|
||||
var r {.noInit.}: EC
|
||||
let P = rng.random_unsafe(EC) # TODO: clear cofactor
|
||||
|
||||
let exponent = rng.random_unsafe(BigInt[bits])
|
||||
|
||||
bench("EC ScalarMul " & $bits & "-bit " & G1_or_G2 & " (endomorphism accelerated)", T, iters):
|
||||
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (endomorphism accelerated)", EC, iters):
|
||||
r = P
|
||||
r.scalarMulEndo(exponent)
|
||||
|
||||
proc scalarMulEndoWindow*(T: typedesc, iters: int) =
|
||||
const bits = T.F.C.getCurveOrderBitwidth()
|
||||
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
|
||||
proc scalarMulEndoWindow*(EC: typedesc, iters: int) =
|
||||
const bits = EC.F.C.getCurveOrderBitwidth()
|
||||
|
||||
var r {.noInit.}: T
|
||||
let P = rng.random_unsafe(T) # TODO: clear cofactor
|
||||
var r {.noInit.}: EC
|
||||
let P = rng.random_unsafe(EC) # TODO: clear cofactor
|
||||
|
||||
let exponent = rng.random_unsafe(BigInt[bits])
|
||||
|
||||
bench("EC ScalarMul " & $bits & "-bit " & G1_or_G2 & " (window-2, endomorphism accelerated)", T, iters):
|
||||
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (window-2, endomorphism accelerated)", EC, iters):
|
||||
r = P
|
||||
when T.F is Fp:
|
||||
when EC.F is Fp:
|
||||
r.scalarMulGLV_m2w2(exponent)
|
||||
else:
|
||||
{.error: "Not implemented".}
|
||||
|
||||
proc scalarMulUnsafeDoubleAddBench*(T: typedesc, iters: int) =
|
||||
const bits = T.F.C.getCurveOrderBitwidth()
|
||||
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
|
||||
proc scalarMulUnsafeDoubleAddBench*(EC: typedesc, iters: int) =
|
||||
const bits = EC.F.C.getCurveOrderBitwidth()
|
||||
|
||||
var r {.noInit.}: T
|
||||
let P = rng.random_unsafe(T) # TODO: clear cofactor
|
||||
var r {.noInit.}: EC
|
||||
let P = rng.random_unsafe(EC) # TODO: clear cofactor
|
||||
|
||||
let exponent = rng.random_unsafe(BigInt[bits])
|
||||
|
||||
bench("EC ScalarMul " & $bits & "-bit " & G1_or_G2 & " (unsafe reference DoubleAdd)", T, iters):
|
||||
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (unsafe reference DoubleAdd)", EC, iters):
|
||||
r = P
|
||||
r.unsafe_ECmul_double_add(exponent)
|
||||
|
||||
proc multiAddBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) =
|
||||
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](numPoints)
|
||||
|
||||
for i in 0 ..< numPoints:
|
||||
points[i] = rng.random_unsafe(ECP_ShortW_Aff[EC.F, EC.G])
|
||||
|
||||
var r{.noInit.}: EC
|
||||
|
||||
if useBatching:
|
||||
bench("EC Multi-Addition batched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
|
||||
r.sum_batch_vartime(points)
|
||||
else:
|
||||
bench("EC Multi-Addition unbatched mixed add " & $EC.G & " (" & $numPoints & " points)", EC, iters):
|
||||
r.setInf()
|
||||
for i in 0 ..< numPoints:
|
||||
r += points[i]
|
||||
@ -96,13 +96,13 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
|
||||
|
||||
# Elliptic curve arithmetic G1
|
||||
# ----------------------------------------------------------
|
||||
# ("tests/math/t_ec_shortw_prj_g1_add_double.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g1_add_double.nim", false),
|
||||
# ("tests/math/t_ec_shortw_prj_g1_mul_sanity.nim", false),
|
||||
# ("tests/math/t_ec_shortw_prj_g1_mul_distri.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g1_mul_vs_ref.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g1_mixed_add.nim", false),
|
||||
|
||||
# ("tests/math/t_ec_shortw_jac_g1_add_double.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g1_add_double.nim", false),
|
||||
# ("tests/math/t_ec_shortw_jac_g1_mul_sanity.nim", false),
|
||||
# ("tests/math/t_ec_shortw_jac_g1_mul_distri.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g1_mul_vs_ref.nim", false),
|
||||
@ -115,49 +115,49 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
|
||||
|
||||
# Elliptic curve arithmetic G2
|
||||
# ----------------------------------------------------------
|
||||
# ("tests/math/t_ec_shortw_prj_g2_add_double_bn254_snarks.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_add_double_bn254_snarks.nim", false),
|
||||
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bn254_snarks.nim", false),
|
||||
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bn254_snarks.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bn254_snarks.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_mixed_add_bn254_snarks.nim", false),
|
||||
|
||||
# ("tests/math/t_ec_shortw_prj_g2_add_double_bls12_381.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_add_double_bls12_381.nim", false),
|
||||
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bls12_381.nim", false),
|
||||
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bls12_381.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bls12_381.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_mixed_add_bls12_381.nim", false),
|
||||
|
||||
# ("tests/math/t_ec_shortw_prj_g2_add_double_bls12_377.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_add_double_bls12_377.nim", false),
|
||||
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bls12_377.nim", false),
|
||||
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bls12_377.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bls12_377.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_mixed_add_bls12_377.nim", false),
|
||||
|
||||
# ("tests/math/t_ec_shortw_prj_g2_add_double_bw6_761.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_add_double_bw6_761.nim", false),
|
||||
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bw6_761.nim", false),
|
||||
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bw6_761.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bw6_761.nim", false),
|
||||
("tests/math/t_ec_shortw_prj_g2_mixed_add_bw6_761.nim", false),
|
||||
|
||||
# ("tests/math/t_ec_shortw_jac_g2_add_double_bn254_snarks.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_add_double_bn254_snarks.nim", false),
|
||||
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bn254_snarks.nim", false),
|
||||
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bn254_snarks.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bn254_snarks.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_mixed_add_bn254_snarks.nim", false),
|
||||
|
||||
# ("tests/math/t_ec_shortw_jac_g2_add_double_bls12_381.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_add_double_bls12_381.nim", false),
|
||||
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bls12_381.nim", false),
|
||||
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bls12_381.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bls12_381.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_mixed_add_bls12_381.nim", false),
|
||||
|
||||
# ("tests/math/t_ec_shortw_jac_g2_add_double_bls12_377.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_add_double_bls12_377.nim", false),
|
||||
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bls12_377.nim", false),
|
||||
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bls12_377.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bls12_377.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_mixed_add_bls12_377.nim", false),
|
||||
|
||||
# ("tests/math/t_ec_shortw_jac_g2_add_double_bw6_761.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_add_double_bw6_761.nim", false),
|
||||
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bw6_761.nim", false),
|
||||
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bw6_761.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bw6_761.nim", false),
|
||||
@ -177,6 +177,11 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
|
||||
# ----------------------------------------------------------
|
||||
("tests/math/t_ec_shortw_prj_edge_cases.nim", false),
|
||||
|
||||
# Elliptic curve arithmetic - batch computation
|
||||
# ----------------------------------------------------------
|
||||
("tests/math/t_ec_shortw_prj_g1_batch_add.nim", false),
|
||||
("tests/math/t_ec_shortw_jac_g1_batch_add.nim", false),
|
||||
|
||||
# Subgroups and cofactors
|
||||
# ----------------------------------------------------------
|
||||
("tests/math/t_ec_subgroups_bn254_nogami.nim", false),
|
||||
@ -232,6 +237,7 @@ const benchDesc = [
|
||||
"bench_fp6",
|
||||
"bench_fp12",
|
||||
"bench_ec_g1",
|
||||
"bench_ec_g1_batch",
|
||||
"bench_ec_g2",
|
||||
"bench_pairing_bls12_377",
|
||||
"bench_pairing_bls12_381",
|
||||
@ -262,13 +268,14 @@ const skipSanitizers = [
|
||||
"tests/math/t_ec_sage_bn254_snarks.nim",
|
||||
"tests/math/t_ec_sage_bls12_377.nim",
|
||||
"tests/math/t_ec_sage_bls12_381.nim",
|
||||
"tests/t_blssig_pop_on_bls12381_g2.nim",
|
||||
"tests/t_hash_to_field.nim",
|
||||
"tests/t_hash_to_curve.nim",
|
||||
"tests/t_hash_to_curve_random.nim",
|
||||
"tests/t_mac_poly1305.nim",
|
||||
"tests/t_mac_hmac.nim",
|
||||
"tests/t_kdf_hkdf.nim",
|
||||
"tests/t_ethereum_eip2333_bls12381_key_derivation"
|
||||
"tests/t_ethereum_eip2333_bls12381_key_derivation.nim"
|
||||
]
|
||||
|
||||
when defined(windows):
|
||||
@ -302,8 +309,8 @@ template setupCommand(): untyped {.dirty.} =
|
||||
var flags = flags
|
||||
when not defined(windows):
|
||||
# Not available in MinGW https://github.com/libressl-portable/portable/issues/54
|
||||
flags &= " --passC:-fstack-protector-all"
|
||||
let command = "nim " & lang & cc & " " & flags &
|
||||
flags &= " --passC:-fstack-protector-strong"
|
||||
let command = "nim " & lang & cc & " -d:release " & flags &
|
||||
" --verbosity:0 --outdir:build/testsuite -r --hints:off --warnings:off " &
|
||||
" --nimcache:nimcache/" & path & " " &
|
||||
path
|
||||
@ -672,37 +679,55 @@ task bench_fp12_clang_noasm, "Run benchmark 𝔽p12 with clang - no Assembly":
|
||||
# Elliptic curve G1
|
||||
# ------------------------------------------
|
||||
|
||||
task bench_ec_g1, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - Default compiler":
|
||||
task bench_ec_g1, "Run benchmark on Elliptic Curve group 𝔾1 - Default compiler":
|
||||
runBench("bench_ec_g1")
|
||||
|
||||
task bench_ec_g1_gcc, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC":
|
||||
task bench_ec_g1_gcc, "Run benchmark on Elliptic Curve group 𝔾1 - GCC":
|
||||
runBench("bench_ec_g1", "gcc")
|
||||
|
||||
task bench_ec_g1_clang, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - Clang":
|
||||
task bench_ec_g1_clang, "Run benchmark on Elliptic Curve group 𝔾1 - Clang":
|
||||
runBench("bench_ec_g1", "clang")
|
||||
|
||||
task bench_ec_g1_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC no Assembly":
|
||||
task bench_ec_g1_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾1 - GCC no Assembly":
|
||||
runBench("bench_ec_g1", "gcc", useAsm = false)
|
||||
|
||||
task bench_ec_g1_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - Clang no Assembly":
|
||||
task bench_ec_g1_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾1 - Clang no Assembly":
|
||||
runBench("bench_ec_g1", "clang", useAsm = false)
|
||||
|
||||
# Elliptic curve G1 - batch operations
|
||||
# ------------------------------------------
|
||||
|
||||
task bench_ec_g1_batch, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - Default compiler":
|
||||
runBench("bench_ec_g1_batch")
|
||||
|
||||
task bench_ec_g1_batch_gcc, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - GCC":
|
||||
runBench("bench_ec_g1_batch", "gcc")
|
||||
|
||||
task bench_ec_g1_batch_clang, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - Clang":
|
||||
runBench("bench_ec_g1_batch", "clang")
|
||||
|
||||
task bench_ec_g1_batch_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - GCC no Assembly":
|
||||
runBench("bench_ec_g1_batch", "gcc", useAsm = false)
|
||||
|
||||
task bench_ec_g1_batch_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - Clang no Assembly":
|
||||
runBench("bench_ec_g1", "clang", useAsm = false)
|
||||
|
||||
# Elliptic curve G2
|
||||
# ------------------------------------------
|
||||
|
||||
task bench_ec_g2, "Run benchmark on Elliptic Curve group 𝔾2 - Short Weierstrass with Projective Coordinates - Default compiler":
|
||||
task bench_ec_g2, "Run benchmark on Elliptic Curve group 𝔾2 - Default compiler":
|
||||
runBench("bench_ec_g2")
|
||||
|
||||
task bench_ec_g2_gcc, "Run benchmark on Elliptic Curve group 𝔾2 - Short Weierstrass with Projective Coordinates - GCC":
|
||||
task bench_ec_g2_gcc, "Run benchmark on Elliptic Curve group 𝔾2 - GCC":
|
||||
runBench("bench_ec_g2", "gcc")
|
||||
|
||||
task bench_ec_g2_clang, "Run benchmark on Elliptic Curve group 𝔾2 - Short Weierstrass with Projective Coordinates - Clang":
|
||||
task bench_ec_g2_clang, "Run benchmark on Elliptic Curve group 𝔾2 - Clang":
|
||||
runBench("bench_ec_g2", "clang")
|
||||
|
||||
task bench_ec_g2_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾2 - Short Weierstrass with Projective Coordinates - GCC no Assembly":
|
||||
task bench_ec_g2_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾2 - GCC no Assembly":
|
||||
runBench("bench_ec_g2", "gcc", useAsm = false)
|
||||
|
||||
task bench_ec_g2_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾2 - Short Weierstrass with Projective Coordinates - Clang no Assembly":
|
||||
task bench_ec_g2_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾2 - Clang no Assembly":
|
||||
runBench("bench_ec_g2", "clang", useAsm = false)
|
||||
|
||||
# Pairings
|
||||
|
||||
@ -62,7 +62,7 @@ Ry = λ(Px - Rx) - Py
|
||||
```
|
||||
but in the case of addition
|
||||
```
|
||||
λ = (Qy - Py) / (Px - Qx)
|
||||
λ = (Qy - Py) / (Qx - Px)
|
||||
```
|
||||
which is undefined for P == Q or P == -Q (as `-(x, y) = (x, -y)`)
|
||||
|
||||
|
||||
@ -14,6 +14,9 @@ import
|
||||
../io/[io_fields, io_extfields],
|
||||
../constants/zoo_constants
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [].}
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Elliptic Curve in Short Weierstrass form
|
||||
|
||||
303
constantine/math/elliptic/ec_shortweierstrass_batch_ops.nim
Normal file
303
constantine/math/elliptic/ec_shortweierstrass_batch_ops.nim
Normal file
@ -0,0 +1,303 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
../../platforms/[abstractions, allocs],
|
||||
../arithmetic,
|
||||
../extension_fields,
|
||||
./ec_shortweierstrass_affine,
|
||||
./ec_shortweierstrass_jacobian,
|
||||
./ec_shortweierstrass_projective
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [].}
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Elliptic Curve in Short Weierstrass form
|
||||
# Batch addition
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
# Affine primitives
|
||||
# ------------------------------------------------------------
|
||||
#
|
||||
# The equation for elliptic curve addition is in affine (x, y) coordinates:
|
||||
#
|
||||
# P + Q = R
|
||||
# (Px, Py) + (Qx, Qy) = (Rx, Ry)
|
||||
#
|
||||
# with
|
||||
# Rx = λ² - Px - Qx
|
||||
# Ry = λ(Px - Rx) - Py
|
||||
#
|
||||
# in the case of addition
|
||||
# λ = (Qy - Py) / (Qx - Px)
|
||||
#
|
||||
# which is undefined for P == Q or P == -Q as -(x, y) = (x, -y)
|
||||
#
|
||||
# if P = Q, the doubling formula uses the slope of the tangent at the limit
|
||||
# λ = (3 Px² + a) / (2 Px)
|
||||
#
|
||||
# if P = -Q, the sum is the point at infinity
|
||||
#
|
||||
# ~~~~
|
||||
#
|
||||
# Those formulas require
|
||||
# addition: 2M + 1S + 1I
|
||||
# doubling: 2M + 2S + 1I
|
||||
#
|
||||
# Inversion is very expensive:
|
||||
# 119.5x multiplications (with ADX) for BN254
|
||||
# 98.4x multiplications (with ADX) for BLS12-381
|
||||
#
|
||||
# However, n inversions can use Montgomery's batch inversion
|
||||
# at the cost of 3(n-1)M + 1I
|
||||
#
|
||||
# Hence batch addition can have an asymptotic cost of
|
||||
# 5M + 1S
|
||||
# Compared to
|
||||
# Jacobian addition: 12M + 4S
|
||||
# Jacobian mixed addition: 7M + 4S
|
||||
# Projective addition: 12M (for curves in the form y² = x³ + b)
|
||||
# Projective mixed addition: 11M (for curves in the form y² = x³ + b)
|
||||
|
||||
func lambdaAdd[F; G: static Subgroup](lambda_num, lambda_den: var F, P, Q: ECP_ShortW_Aff[F, G]) =
|
||||
## Compute the slope of the line (PQ)
|
||||
lambda_num.diff(Q.y, P.y)
|
||||
lambda_den.diff(Q.x, P.x)
|
||||
|
||||
func lambdaDouble[F; G: static Subgroup](lambda_num, lambda_den: var F, P: ECP_ShortW_Aff[F, G]) =
|
||||
## Compute the tangent at P
|
||||
lambda_num.square(P.x)
|
||||
lambda_num *= 3
|
||||
when F.C.getCoefA() != 0:
|
||||
t += F.C.getCoefA()
|
||||
|
||||
lambda_den.double(P.y)
|
||||
|
||||
func affineAdd[F; G: static Subgroup](
|
||||
r: var ECP_ShortW_Aff[F, G],
|
||||
lambda: var F,
|
||||
P, Q: ECP_ShortW_Aff[F, G]) =
|
||||
|
||||
r.x.square(lambda)
|
||||
r.x -= P.x
|
||||
r.x -= Q.x
|
||||
|
||||
r.y.diff(P.x, r.x)
|
||||
r.y *= lambda
|
||||
r.y -= P.y
|
||||
|
||||
{.push checks:off.}
|
||||
func accum_half_vartime[F; G: static Subgroup](
|
||||
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
|
||||
lambdas: ptr UncheckedArray[tuple[num, den: F]],
|
||||
len: uint) {.noinline.} =
|
||||
## Affine accumulation of half the points into the other half
|
||||
## Warning ⚠️ : variable-time
|
||||
##
|
||||
## Accumulate `len` points pairwise into `len/2`
|
||||
##
|
||||
## Input/output:
|
||||
## - points: `len/2` affine points to add (must be even)
|
||||
## Partial sums are stored in [0, len/2)
|
||||
## [len/2, len) data has been destroyed
|
||||
##
|
||||
## Scratchspace:
|
||||
## - Lambdas
|
||||
##
|
||||
## Output:
|
||||
## - r
|
||||
##
|
||||
## Warning ⚠️ : cannot be inlined if used in loop due to the use of alloca
|
||||
|
||||
debug: doAssert len and 1 == 0, "There must be an even number of points"
|
||||
|
||||
let N = len div 2
|
||||
|
||||
# Step 1: Compute numerators and denominators of λᵢ = λᵢ_num / λᵢ_den
|
||||
for i in 0 ..< N:
|
||||
let p = i
|
||||
let q = i+N
|
||||
let q_prev = i-1+N
|
||||
|
||||
# As we can't divide by 0 in normal cases, λᵢ_den != 0,
|
||||
# so we use it to indicate special handling.
|
||||
template markSpecialCase(): untyped {.dirty.} =
|
||||
# we use Qy as an accumulator, so we save Qy in λᵢ_num
|
||||
lambdas[i].num = points[q].y
|
||||
# Mark for special handling
|
||||
lambdas[i].den.setZero()
|
||||
|
||||
# Step 2: Accumulate denominators in Qy, which is not used anymore.
|
||||
if i == 0:
|
||||
points[q].y.setOne()
|
||||
else:
|
||||
points[q].y = points[q_prev].y
|
||||
|
||||
# Special case 1: infinity points have affine coordinates (0, 0) by convention
|
||||
# it doesn't match the y²=x³+ax+b equation so slope formula need special handling
|
||||
if points[p].isInf().bool() or points[q].isInf().bool():
|
||||
markSpecialCase()
|
||||
continue
|
||||
|
||||
# Special case 2: λ = (Qy-Py)/(Qx-Px) which is undefined when Px == Qx
|
||||
# This happens when P == Q or P == -Q
|
||||
if bool(points[p].x == points[q].x):
|
||||
if bool(points[p].y == points[q].y):
|
||||
lambdaDouble(lambdas[i].num, lambdas[i].den, points[p])
|
||||
else: # P = -Q, so P+Q = inf
|
||||
markSpecialCase()
|
||||
continue
|
||||
else:
|
||||
lambdaAdd(lambdas[i].num, lambdas[i].den, points[p], points[q])
|
||||
|
||||
# Step 2: Accumulate denominators in Qy, which is not used anymore.
|
||||
if i == 0:
|
||||
points[q].y = lambdas[i].den
|
||||
else:
|
||||
points[q].y.prod(points[q_prev].y, lambdas[i].den, skipFinalSub = true)
|
||||
|
||||
# Step 3: batch invert
|
||||
var accInv {.noInit.}: F
|
||||
accInv.setZero()
|
||||
points[len-1].y += accInv # Undo skipFinalSub, ensure that the last accum is in canonical form, before inversion
|
||||
accInv.inv(points[len-1].y)
|
||||
|
||||
# Step 4: Compute the partial sums
|
||||
|
||||
template recallSpecialCase(i, p, q): untyped {.dirty.} =
|
||||
# As Qy is used as an accumulator, we saved Qy in λᵢ_num
|
||||
# For special caseshandling, restore it.
|
||||
points[q].y = lambdas[i].num
|
||||
if points[p].isInf().bool():
|
||||
points[i] = points[q]
|
||||
elif points[q].x.isZero().bool() and lambdas[i].num.isZero().bool():
|
||||
discard "points[i] = points[p]" # i == p
|
||||
else:
|
||||
points[i].setInf()
|
||||
|
||||
for i in countdown(N-1, 1):
|
||||
let p = i
|
||||
let q = i+N
|
||||
let q_prev = i-1+N
|
||||
|
||||
if lambdas[i].den.isZero().bool():
|
||||
recallSpecialCase(i, p, q)
|
||||
continue
|
||||
|
||||
# Compute lambda
|
||||
points[q].y.prod(accInv, points[q_prev].y, skipFinalSub = true)
|
||||
points[q].y.prod(points[q].y, lambdas[i].num, skipFinalSub = true)
|
||||
|
||||
# Compute EC addition
|
||||
var r{.noInit.}: ECP_ShortW_Aff[F, G]
|
||||
r.affineAdd(lambda = points[q].y, points[p], points[q])
|
||||
|
||||
# Store result
|
||||
points[i] = r
|
||||
|
||||
# Next iteration
|
||||
accInv.prod(accInv, lambdas[i].den, skipFinalSub = true)
|
||||
|
||||
block: # Tail
|
||||
let i = 0
|
||||
let p = 0
|
||||
let q = N
|
||||
|
||||
if lambdas[0].den.isZero().bool():
|
||||
recallSpecialCase(i, p, q)
|
||||
else:
|
||||
# Compute lambda
|
||||
points[q].y.prod(lambdas[0].num, accInv, skipFinalSub = true)
|
||||
|
||||
# Compute EC addition
|
||||
var r{.noInit.}: ECP_ShortW_Aff[F, G]
|
||||
r.affineAdd(lambda = points[q].y, points[p], points[q])
|
||||
|
||||
# Store result
|
||||
points[0] = r
|
||||
|
||||
{.pop.}
|
||||
|
||||
# Batch addition: jacobian
|
||||
# ------------------------------------------------------------
|
||||
|
||||
{.push checks:off.}
|
||||
func accumSum_chunk_vartime[F; G: static Subgroup](
|
||||
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]),
|
||||
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
|
||||
lambdas: ptr UncheckedArray[tuple[num, den: F]],
|
||||
len: uint) =
|
||||
## Accumulate `points` into r.
|
||||
## `r` is NOT overwritten
|
||||
## r += ∑ points
|
||||
|
||||
const ChunkThreshold = 16
|
||||
var n = len
|
||||
|
||||
while n >= ChunkThreshold:
|
||||
if (n and 1) == 1: # odd number of points
|
||||
## Accumulate the last
|
||||
r += points[n-1]
|
||||
n -= 1
|
||||
|
||||
# Compute [0, n/2) += [n/2, n)
|
||||
accum_half_vartime(points, lambdas, n)
|
||||
|
||||
# Next chunk
|
||||
n = n div 2
|
||||
|
||||
# Tail
|
||||
for i in 0'u ..< n:
|
||||
r += points[i]
|
||||
{.pop.}
|
||||
|
||||
{.push checks:off.}
|
||||
func sum_batch_vartime*[F; G: static Subgroup](
|
||||
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]),
|
||||
points: openArray[ECP_ShortW_Aff[F, G]]) =
|
||||
## Batch addition of `points` into `r`
|
||||
## `r` is overwritten
|
||||
|
||||
# We chunk the addition to limit memory usage
|
||||
# especially as we allocate on the stack.
|
||||
|
||||
# From experience in high-performance computing,
|
||||
# here are the constraints we want to optimize for
|
||||
# 1. MSVC limits stack to 1MB by default, we want to use a fraction of that.
|
||||
# 2. We want to use a large fraction of L2 cache, but not more.
|
||||
# 3. We want to use a large fraction of the memory addressable by the TLB.
|
||||
# 4. We optimize for hyperthreading with 2 sibling threads (Xeon Phi hyperthreads have 4 siblings).
|
||||
# Meaning we want to use less than half the L2 cache so that if run on siblings threads (same physical core),
|
||||
# the chunks don't evict each other.
|
||||
#
|
||||
# Hardware:
|
||||
# - a Raspberry Pi 4 (2019, Cortex A72) has 1MB L2 cache size
|
||||
# - Intel Ice Lake (2019, Core 11XXX) and AMD Zen 2 (2019, Ryzen 3XXX) have 512kB L2 cache size
|
||||
#
|
||||
# After one chunk is processed we are well within all 64-bit CPU L2 cache bounds
|
||||
# as we halve after each chunk.
|
||||
|
||||
r.setInf()
|
||||
|
||||
const maxChunkSize = 262144 # 2¹⁸ = 262144
|
||||
const maxStride = maxChunkSize div sizeof(ECP_ShortW_Aff[F, G])
|
||||
|
||||
let n = min(maxStride, points.len)
|
||||
let accumulators = alloca(ECP_ShortW_Aff[F, G], n)
|
||||
let lambdas = alloca(tuple[num, den: F], n)
|
||||
|
||||
for i in countup(0, points.len-1, maxStride):
|
||||
let n = min(maxStride, points.len - i)
|
||||
let size = n * sizeof(ECP_ShortW_Aff[F, G])
|
||||
copyMem(accumulators[0].addr, points[i].unsafeAddr, size)
|
||||
r.accumSum_chunk_vartime(accumulators, lambdas, uint n)
|
||||
|
||||
{.pop.}
|
||||
@ -15,6 +15,9 @@ import
|
||||
|
||||
export Subgroup
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [].}
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Elliptic Curve in Short Weierstrass form
|
||||
@ -47,15 +50,15 @@ func `==`*(P, Q: ECP_ShortW_Jac): SecretBool =
|
||||
var z1z1 {.noInit.}, z2z2 {.noInit.}: F
|
||||
var a{.noInit.}, b{.noInit.}: F
|
||||
|
||||
z1z1.square(P.z)
|
||||
z2z2.square(Q.z)
|
||||
z1z1.square(P.z, skipFinalSub = true)
|
||||
z2z2.square(Q.z, skipFinalSub = true)
|
||||
|
||||
a.prod(P.x, z2z2)
|
||||
b.prod(Q.x, z1z1)
|
||||
result = a == b
|
||||
|
||||
a.prod(P.y, Q.z)
|
||||
b.prod(Q.y, P.z)
|
||||
a.prod(P.y, Q.z, skipFinalSub = true)
|
||||
b.prod(Q.y, P.z, skipFinalSub = true)
|
||||
a *= z2z2
|
||||
b *= z1z1
|
||||
result = result and a == b
|
||||
@ -108,9 +111,9 @@ func trySetFromCoordsXandZ*[F; G](
|
||||
result = sqrt_if_square(P.y)
|
||||
|
||||
var z2 {.noInit.}: F
|
||||
z2.square(z)
|
||||
z2.square(z, skipFinalSub = true)
|
||||
P.x.prod(x, z2)
|
||||
P.y *= z2
|
||||
P.y.prod(P.y, z2, skipFinalSub = true)
|
||||
P.y *= z
|
||||
P.z = z
|
||||
|
||||
@ -209,13 +212,13 @@ template sumImpl[F; G: static Subgroup](
|
||||
|
||||
block: # Addition-only, check for exceptional cases
|
||||
var Z2Z2 {.noInit.}, U2 {.noInit.}, S2 {.noInit.}: F
|
||||
Z2Z2.square(Q.z)
|
||||
S1.prod(Q.z, Z2Z2)
|
||||
Z2Z2.square(Q.z, skipFinalSub = true)
|
||||
S1.prod(Q.z, Z2Z2, skipFinalSub = true)
|
||||
S1 *= P.y # S₁ = Y₁*Z₂³
|
||||
U1.prod(P.x, Z2Z2) # U₁ = X₁*Z₂²
|
||||
|
||||
Z1Z1.square(P.z)
|
||||
S2.prod(P.z, Z1Z1)
|
||||
Z1Z1.square(P.z, skipFinalSub = true)
|
||||
S2.prod(P.z, Z1Z1, skipFinalSub = true)
|
||||
S2 *= Q.y # S₂ = Y₂*Z₁³
|
||||
U2.prod(Q.x, Z1Z1) # U₂ = X₂*Z₁²
|
||||
|
||||
@ -285,10 +288,10 @@ template sumImpl[F; G: static Subgroup](
|
||||
var b{.noInit.} = HH_or_YY
|
||||
a.ccopy(P.x, isDbl)
|
||||
b.ccopy(P.x, isDbl)
|
||||
HHH_or_Mpre.prod(a, b) # HHH or X₁²
|
||||
HHH_or_Mpre.prod(a, b, true) # HHH or X₁²
|
||||
|
||||
# Assuming doubling path
|
||||
a.square(HHH_or_Mpre)
|
||||
a.square(HHH_or_Mpre, skipFinalSub = true)
|
||||
a *= HHH_or_Mpre # a = 3X₁²
|
||||
b.square(Z1Z1)
|
||||
# b.mulCheckSparse(CoefA) # TODO: broken static compile-time type inference
|
||||
@ -302,31 +305,33 @@ template sumImpl[F; G: static Subgroup](
|
||||
# - R_or_M is set with R (add) or M (dbl)
|
||||
# - HHH_or_Mpre contains HHH (add) or garbage precomputation (dbl)
|
||||
# - V_or_S is set with V = U₁*HH (add) or S = X₁*YY (dbl)
|
||||
var o {.noInit.}: typeof(r)
|
||||
block: # Finishing line
|
||||
# we can start using r, while carefully handling r and P or Q aliasing
|
||||
var t {.noInit.}: F
|
||||
t.double(V_or_S)
|
||||
r.x.square(R_or_M)
|
||||
r.x -= t # X₃ = R²-2*V (add) or M²-2*S (dbl)
|
||||
r.x.csub(HHH_or_Mpre, not isDbl) # X₃ = R²-HHH-2*V (add) or M²-2*S (dbl)
|
||||
o.x.square(R_or_M)
|
||||
o.x -= t # X₃ = R²-2*V (add) or M²-2*S (dbl)
|
||||
o.x.csub(HHH_or_Mpre, not isDbl) # X₃ = R²-HHH-2*V (add) or M²-2*S (dbl)
|
||||
|
||||
V_or_S -= r.x # V-X₃ (add) or S-X₃ (dbl)
|
||||
r.y.prod(R_or_M, V_or_S) # Y₃ = R(V-X₃) (add) or M(S-X₃) (dbl)
|
||||
V_or_S -= o.x # V-X₃ (add) or S-X₃ (dbl)
|
||||
o.y.prod(R_or_M, V_or_S) # Y₃ = R(V-X₃) (add) or M(S-X₃) (dbl)
|
||||
HHH_or_Mpre.ccopy(HH_or_YY, isDbl) # HHH (add) or YY (dbl)
|
||||
S1.ccopy(HH_or_YY, isDbl) # S1 (add) or YY (dbl)
|
||||
HHH_or_Mpre *= S1 # HHH*S1 (add) or YY² (dbl)
|
||||
r.y -= HHH_or_Mpre # Y₃ = R(V-X₃)-S₁*HHH (add) or M(S-X₃)-YY² (dbl)
|
||||
o.y -= HHH_or_Mpre # Y₃ = R(V-X₃)-S₁*HHH (add) or M(S-X₃)-YY² (dbl)
|
||||
|
||||
t = Q.z
|
||||
t.ccopy(H_or_Y, isDbl) # Z₂ (add) or Y₁ (dbl)
|
||||
t *= P.z # Z₁Z₂ (add) or Y₁Z₁ (dbl)
|
||||
r.z.prod(t, H_or_Y) # Z₁Z₂H (add) or garbage (dbl)
|
||||
r.z.ccopy(t, isDbl) # Z₁Z₂H (add) or Y₁Z₁ (dbl)
|
||||
t.prod(t, P.z, true) # Z₁Z₂ (add) or Y₁Z₁ (dbl)
|
||||
o.z.prod(t, H_or_Y) # Z₁Z₂H (add) or garbage (dbl)
|
||||
o.z.ccopy(t, isDbl) # Z₁Z₂H (add) or Y₁Z₁ (dbl)
|
||||
|
||||
# if P or R were infinity points they would have spread 0 with Z₁Z₂
|
||||
block: # Infinity points
|
||||
r.ccopy(Q, P.isInf())
|
||||
r.ccopy(P, Q.isInf())
|
||||
o.ccopy(Q, P.isInf())
|
||||
o.ccopy(P, Q.isInf())
|
||||
|
||||
r = o
|
||||
|
||||
func sum*[F; G: static Subgroup](
|
||||
r: var ECP_ShortW_Jac[F, G],
|
||||
@ -379,81 +384,176 @@ func madd*[F; G: static Subgroup](
|
||||
P: ECP_ShortW_Jac[F, G],
|
||||
Q: ECP_ShortW_Aff[F, G]
|
||||
) =
|
||||
## Elliptic curve mixed addition for Short Weierstrass curves
|
||||
## with p in Jacobian coordinates and Q in affine coordinates
|
||||
## Elliptic curve mixed addition for Short Weierstrass curves in Jacobian coordinates
|
||||
## with the curve ``a`` being a parameter for summing on isogenous curves.
|
||||
##
|
||||
## R = P + Q
|
||||
##
|
||||
## TODO: ⚠️ cannot handle P == Q
|
||||
# "madd-2007-bl" mixed addition formula - https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
|
||||
# with conditional copies to handle infinity points
|
||||
# Assumptions: Z2=1.
|
||||
# Cost: 7M + 4S + 9add + 3*2 + 1*4.
|
||||
# Source: 2007 Bernstein–Lange.
|
||||
# Explicit formulas:
|
||||
##
|
||||
## Short Weierstrass curves have the following equation in Jacobian coordinates
|
||||
## Y² = X³ + aXZ⁴ + bZ⁶
|
||||
## from the affine equation
|
||||
## y² = x³ + a x + b
|
||||
##
|
||||
## ``r`` is initialized/overwritten with the sum
|
||||
## ``CoefA`` allows fast path for curve with a == 0 or a == -3
|
||||
## and also allows summing on curve isogenies.
|
||||
##
|
||||
## Implementation is constant-time, in particular it will not expose
|
||||
## that P == Q or P == -Q or P or Q are the infinity points
|
||||
## to simple side-channel attacks (SCA)
|
||||
## This is done by using a "complete" or "exception-free" addition law.
|
||||
#
|
||||
# Z1Z1 = Z1²
|
||||
# U2 = X2*Z1Z1
|
||||
# S2 = Y2*Z1*Z1Z1
|
||||
# H = U2-X1
|
||||
# HH = H2
|
||||
# I = 4*HH
|
||||
# J = H*I
|
||||
# r = 2*(S2-Y1)
|
||||
# V = X1*I
|
||||
# X3 = r²-J-2*V
|
||||
# Y3 = r*(V-X3)-2*Y1*J
|
||||
# Z3 = (Z1+H)²-Z1Z1-HH
|
||||
var Z1Z1 {.noInit.}, H {.noInit.}, HH {.noInit.}, I{.noInit.}, J {.noInit.}: F
|
||||
# Implementation, see write-up at the bottom.
|
||||
# We fuse addition and doubling with condition copy by swapping
|
||||
# terms with the following table
|
||||
#
|
||||
# | Addition, Cohen et al, 1998 | Doubling, Cohen et al, 1998 | Doubling = -3 | Doubling a = 0 |
|
||||
# | 12M + 4S + 6add + 1*2 | 3M + 6S + 1*a + 4add + 1*2 + 1*3 + 1half | | |
|
||||
# | ----------------------------- | -----------------------------------------| ----------------- | -------------- |
|
||||
# | Z₁Z₁ = Z₁² | Z₁Z₁ = Z₁² | | |
|
||||
# | Z₂Z₂ = Z₂² | | | |
|
||||
# | | | | |
|
||||
# | U₁ = X₁*Z₂Z₂ | | | |
|
||||
# | U₂ = X₂*Z₁Z₁ | | | |
|
||||
# | S₁ = Y₁*Z₂*Z₂Z₂ | | | |
|
||||
# | S₂ = Y₂*Z₁*Z₁Z₁ | | | |
|
||||
# | H = U₂-U₁ # P=-Q, P=Inf, P=Q | | | |
|
||||
# | R = S₂-S₁ # Q=Inf | | | |
|
||||
# | | | | |
|
||||
# | HH = H² | YY = Y₁² | | |
|
||||
# | V = U₁*HH | S = X₁*YY | | |
|
||||
# | HHH = H*HH | M = (3*X₁²+a*ZZ²)/2 | 3(X₁-ZZ)(X₁+ZZ)/2 | 3X₁²/2 |
|
||||
# | | | | |
|
||||
# | X₃ = R²-HHH-2*V | X₃ = M²-2*S | | |
|
||||
# | Y₃ = R*(V-X₃)-S₁*HHH | Y₃ = M*(S-X₃)-YY*YY | | |
|
||||
# | Z₃ = Z₁*Z₂*H | Z₃ = Y₁*Z₁ | | |
|
||||
#
|
||||
# For mixed adddition we just set Z₂ = 1
|
||||
var Z1Z1 {.noInit.}, U1 {.noInit.}, S1 {.noInit.}, H {.noInit.}, R {.noinit.}: F
|
||||
|
||||
# Preload P and Q in cache
|
||||
let pIsInf = P.isInf()
|
||||
let qIsInf = Q.isInf()
|
||||
block: # Addition-only, check for exceptional cases
|
||||
var U2 {.noInit.}, S2 {.noInit.}: F
|
||||
U1 = P.x
|
||||
S1 = P.y
|
||||
|
||||
Z1Z1.square(P.z) # Z₁Z₁ = Z₁²
|
||||
r.z.prod(P.z, Z1Z1) # P.Z is hot in cache, keep it in same register.
|
||||
r.z *= Q.y # S₂ = Y₂Z₁Z₁Z₁ -- r.z used as S₂
|
||||
Z1Z1.square(P.z, skipFinalSub = true)
|
||||
S2.prod(P.z, Z1Z1, skipFinalSub = true)
|
||||
S2 *= Q.y # S₂ = Y₂*Z₁³
|
||||
U2.prod(Q.x, Z1Z1) # U₂ = X₂*Z₁²
|
||||
|
||||
H.prod(Q.x, Z1Z1) # U₂ = X₂Z₁Z₁
|
||||
H -= P.x # H = U₂ - X₁
|
||||
H.diff(U2, U1) # H = U₂-U₁
|
||||
R.diff(S2, S1) # R = S₂-S₁
|
||||
|
||||
HH.square(H) # HH = H²
|
||||
# Exceptional cases
|
||||
# Expressing H as affine, if H == 0, P == Q or -Q
|
||||
# H = U₂-U₁ = X₂*Z₁² - X₁*Z₂² = x₂*Z₂²*Z₁² - x₁*Z₁²*Z₂²
|
||||
# if H == 0 && R == 0, P = Q -> doubling
|
||||
# if only H == 0, P = -Q -> infinity, implied in Z₃ = Z₁*Z₂*H = 0
|
||||
# if only R == 0, P and Q are related by the cubic root endomorphism
|
||||
let isDbl = H.isZero() and R.isZero()
|
||||
|
||||
I.double(HH)
|
||||
I.double() # I = 4HH
|
||||
# Rename buffers under the form (add_or_dbl)
|
||||
template R_or_M: untyped = R
|
||||
template H_or_Y: untyped = H
|
||||
template V_or_S: untyped = U1
|
||||
var HH_or_YY {.noInit.}: F
|
||||
var HHH_or_Mpre {.noInit.}: F
|
||||
|
||||
J.prod(H, I) # J = H*I
|
||||
r.y.prod(P.x, I) # V = X₁*I -- r.y used as V
|
||||
H_or_Y.ccopy(P.y, isDbl) # H (add) or Y₁ (dbl)
|
||||
HH_or_YY.square(H_or_Y) # H² (add) or Y₁² (dbl)
|
||||
|
||||
r.z -= P.y #
|
||||
r.z.double() # r = 2*(S₂-Y₁) -- r.z used as r
|
||||
V_or_S.ccopy(P.x, isDbl) # U₁ (add) or X₁ (dbl)
|
||||
V_or_S *= HH_or_YY # V = U₁*HH (add) or S = X₁*YY (dbl)
|
||||
|
||||
r.x.square(r.z) # r²
|
||||
r.x -= J
|
||||
r.x -= r.y
|
||||
r.x -= r.y # X₃ = r²-J-2*V -- r.x computed
|
||||
block: # Compute M for doubling
|
||||
# "when" static evaluation doesn't shortcut booleans :/
|
||||
# which causes issues when CoefA isn't an int but Fp or Fp2
|
||||
const CoefA = F.C.getCoefA()
|
||||
when CoefA is int:
|
||||
const CoefA_eq_zero = CoefA == 0
|
||||
const CoefA_eq_minus3 {.used.} = CoefA == -3
|
||||
else:
|
||||
const CoefA_eq_zero = false
|
||||
const CoefA_eq_minus3 = false
|
||||
|
||||
r.y -= r.x # V-X₃
|
||||
r.y *= r.z # r*(V-X₃)
|
||||
when CoefA_eq_zero:
|
||||
var a {.noInit.} = H
|
||||
var b {.noInit.} = HH_or_YY
|
||||
a.ccopy(P.x, isDbl) # H or X₁
|
||||
b.ccopy(P.x, isDbl) # HH or X₁
|
||||
HHH_or_Mpre.prod(a, b) # HHH or X₁²
|
||||
|
||||
J *= P.y # Y₁J -- J reused as Y₁J
|
||||
r.y -= J
|
||||
r.y -= J # Y₃ = r*(V-X₃) - 2*Y₁J -- r.y computed
|
||||
var M{.noInit.} = HHH_or_Mpre # Assuming on doubling path
|
||||
M.div2() # X₁²/2
|
||||
M += HHH_or_Mpre # 3X₁²/2
|
||||
R_or_M.ccopy(M, isDbl)
|
||||
|
||||
r.z.sum(P.z, H) # Z₁ + H
|
||||
r.z.square()
|
||||
r.z -= Z1Z1
|
||||
r.z -= HH # Z₃ = (Z1+H)²-Z1Z1-HH
|
||||
elif CoefA_eq_minus3:
|
||||
var a{.noInit.}, b{.noInit.}: F
|
||||
a.sum(P.x, Z1Z1)
|
||||
b.diff(P.z, Z1Z1)
|
||||
a.ccopy(H_or_Y, not isDbl) # H or X₁+ZZ
|
||||
b.ccopy(HH_or_YY, not isDbl) # HH or X₁-ZZ
|
||||
HHH_or_Mpre.prod(a, b) # HHH or X₁²-ZZ²
|
||||
|
||||
# Now handle points at infinity
|
||||
proc one(): F =
|
||||
result.setOne()
|
||||
var M{.noInit.} = HHH_or_Mpre # Assuming on doubling path
|
||||
M.div2() # (X₁²-ZZ²)/2
|
||||
M += HHH_or_Mpre # 3(X₁²-ZZ²)/2
|
||||
R_or_M.ccopy(M, isDbl)
|
||||
|
||||
r.x.ccopy(Q.x, pIsInf)
|
||||
r.y.ccopy(Q.y, pIsInf)
|
||||
r.z.ccopy(static(one()), pIsInf)
|
||||
else:
|
||||
# TODO: Costly `a` coefficients can be computed
|
||||
# by merging their computation with Z₃ = Z₁*Z₂*H (add) or Z₃ = Y₁*Z₁ (dbl)
|
||||
var a{.noInit.} = H
|
||||
var b{.noInit.} = HH_or_YY
|
||||
a.ccopy(P.x, isDbl)
|
||||
b.ccopy(P.x, isDbl)
|
||||
HHH_or_Mpre.prod(a, b, true) # HHH or X₁²
|
||||
|
||||
r.ccopy(P, qIsInf)
|
||||
# Assuming doubling path
|
||||
a.square(HHH_or_Mpre, skipFinalSub = true)
|
||||
a *= HHH_or_Mpre # a = 3X₁²
|
||||
b.square(Z1Z1)
|
||||
# b.mulCheckSparse(CoefA) # TODO: broken static compile-time type inference
|
||||
b *= CoefA # b = αZZ, with α the "a" coefficient of the curve
|
||||
|
||||
a += b
|
||||
a.div2()
|
||||
R_or_M.ccopy(a, isDbl) # (3X₁² - αZZ)/2
|
||||
|
||||
# Let's count our horses, at this point:
|
||||
# - R_or_M is set with R (add) or M (dbl)
|
||||
# - HHH_or_Mpre contains HHH (add) or garbage precomputation (dbl)
|
||||
# - V_or_S is set with V = U₁*HH (add) or S = X₁*YY (dbl)
|
||||
var o {.noInit.}: typeof(r)
|
||||
block: # Finishing line
|
||||
var t {.noInit.}: F
|
||||
t.double(V_or_S)
|
||||
o.x.square(R_or_M)
|
||||
o.x -= t # X₃ = R²-2*V (add) or M²-2*S (dbl)
|
||||
o.x.csub(HHH_or_Mpre, not isDbl) # X₃ = R²-HHH-2*V (add) or M²-2*S (dbl)
|
||||
|
||||
V_or_S -= o.x # V-X₃ (add) or S-X₃ (dbl)
|
||||
o.y.prod(R_or_M, V_or_S) # Y₃ = R(V-X₃) (add) or M(S-X₃) (dbl)
|
||||
HHH_or_Mpre.ccopy(HH_or_YY, isDbl) # HHH (add) or YY (dbl)
|
||||
S1.ccopy(HH_or_YY, isDbl) # S1 (add) or YY (dbl)
|
||||
HHH_or_Mpre *= S1 # HHH*S1 (add) or YY² (dbl)
|
||||
o.y -= HHH_or_Mpre # Y₃ = R(V-X₃)-S₁*HHH (add) or M(S-X₃)-YY² (dbl)
|
||||
|
||||
t.setOne()
|
||||
t.ccopy(H_or_Y, isDbl) # Z₂ (add) or Y₁ (dbl)
|
||||
t.prod(t, P.z, true) # Z₁Z₂ (add) or Y₁Z₁ (dbl)
|
||||
o.z.prod(t, H_or_Y) # Z₁Z₂H (add) or garbage (dbl)
|
||||
o.z.ccopy(t, isDbl) # Z₁Z₂H (add) or Y₁Z₁ (dbl)
|
||||
|
||||
block: # Infinity points
|
||||
o.x.ccopy(Q.x, P.isInf())
|
||||
o.y.ccopy(Q.y, P.isInf())
|
||||
o.z.csetOne(P.isInf())
|
||||
|
||||
o.ccopy(P, Q.isInf())
|
||||
|
||||
r = o
|
||||
|
||||
func double*[F; G: static Subgroup](
|
||||
r: var ECP_ShortW_Jac[F, G],
|
||||
@ -524,11 +624,7 @@ func `+=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Jac) {.inline.} =
|
||||
|
||||
func `+=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Aff) {.inline.} =
|
||||
## In-place mixed point addition
|
||||
##
|
||||
## TODO: ⚠️ cannot handle P == Q
|
||||
var t{.noInit.}: typeof(P)
|
||||
t.madd(P, Q)
|
||||
P = t
|
||||
P.madd(P, Q)
|
||||
|
||||
func double*(P: var ECP_ShortW_Jac) {.inline.} =
|
||||
## In-place point doubling
|
||||
@ -547,10 +643,10 @@ func affine*[F; G](
|
||||
jac: ECP_ShortW_Jac[F, G]) =
|
||||
var invZ {.noInit.}, invZ2{.noInit.}: F
|
||||
invZ.inv(jac.z)
|
||||
invZ2.square(invZ)
|
||||
invZ2.square(invZ, skipFinalSub = true)
|
||||
|
||||
aff.x.prod(jac.x, invZ2)
|
||||
invZ *= invZ2
|
||||
invZ.prod(invZ, invZ2, skipFinalSub = true)
|
||||
aff.y.prod(jac.y, invZ)
|
||||
|
||||
func fromAffine*[F; G](
|
||||
@ -586,38 +682,38 @@ func batchAffine*[N: static int, F, G](
|
||||
zeroes[i] = z.isZero()
|
||||
z.csetOne(zeroes[i])
|
||||
|
||||
affs[i].x.prod(affs[i-1].x, z)
|
||||
if i != N-1:
|
||||
affs[i].x.prod(affs[i-1].x, z, skipFinalSub = true)
|
||||
else:
|
||||
affs[i].x.prod(affs[i-1].x, z, skipFinalSub = false)
|
||||
|
||||
var accInv {.noInit.}: F
|
||||
accInv.inv(affs[N-1].x)
|
||||
|
||||
for i in countdown(N-1, 1):
|
||||
# Skip zero z-coordinates (infinity points)
|
||||
var z = affs[i].x
|
||||
|
||||
# Extract 1/Pᵢ
|
||||
var invi {.noInit.}: F
|
||||
invi.prod(accInv, affs[i-1].x)
|
||||
invi.prod(accInv, affs[i-1].x, skipFinalSub = true)
|
||||
invi.csetZero(zeroes[i])
|
||||
|
||||
# Now convert Pᵢ to affine
|
||||
var invi2 {.noinit.}: F
|
||||
invi2.square(invi)
|
||||
invi2.square(invi, skipFinalSub = true)
|
||||
affs[i].x.prod(jacs[i].x, invi2)
|
||||
invi *= invi2
|
||||
invi.prod(invi, invi2, skipFinalSub = true)
|
||||
affs[i].y.prod(jacs[i].y, invi)
|
||||
|
||||
# next iteration
|
||||
invi = jacs[i].z
|
||||
invi.csetOne(zeroes[i])
|
||||
accInv *= invi
|
||||
accInv.prod(accInv, invi, skipFinalSub = true)
|
||||
|
||||
block: # tail
|
||||
var invi2 {.noinit.}: F
|
||||
accInv.csetZero(zeroes[0])
|
||||
invi2.square(accInv)
|
||||
invi2.square(accInv, skipFinalSub = true)
|
||||
affs[0].x.prod(jacs[0].x, invi2)
|
||||
accInv *= invi2
|
||||
accInv.prod(accInv, invi2, skipFinalSub = true)
|
||||
affs[0].y.prod(jacs[0].y, accInv)
|
||||
|
||||
# ############################################################
|
||||
|
||||
@ -15,6 +15,9 @@ import
|
||||
|
||||
export Subgroup
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [].}
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Elliptic Curve in Short Weierstrass form
|
||||
@ -261,7 +264,6 @@ func madd*[F; G: static Subgroup](
|
||||
## ``r`` may alias P
|
||||
|
||||
# TODO: static doAssert odd order
|
||||
|
||||
when F.C.getCoefA() == 0:
|
||||
var t0 {.noInit.}, t1 {.noInit.}, t2 {.noInit.}, t3 {.noInit.}, t4 {.noInit.}: F
|
||||
var x3 {.noInit.}, y3 {.noInit.}, z3 {.noInit.}: F
|
||||
@ -273,6 +275,10 @@ func madd*[F; G: static Subgroup](
|
||||
# Y₃ = (Y₁Y₂ + 3bZ₁)(Y₁Y₂ − 3bZ₁)
|
||||
# + 9bX₁X₂ (X₁ + X₂Z₁)
|
||||
# Z₃= (Y₁ + Y₂Z₁)(Y₁Y₂ + 3bZ₁) + 3 X₁X₂ (X₁Y₂ + X₂Y₁)
|
||||
#
|
||||
# Note¹⁰ mentions that due to Qz = 1, cannot be
|
||||
# the point at infinity.
|
||||
# We solve that by conditional copies.
|
||||
t0.prod(P.x, Q.x) # 1. t₀ <- X₁ X₂
|
||||
t1.prod(P.y, Q.y) # 2. t₁ <- Y₁ Y₂
|
||||
t3.sum(P.x, P.y) # 3. t₃ <- X₁ + Y₁ ! error in paper
|
||||
@ -304,13 +310,24 @@ func madd*[F; G: static Subgroup](
|
||||
y3 *= SexticNonResidue
|
||||
x3.prod(t4, y3) # 18. X₃ <- t₄ Y₃, X₃ = (Y₁ + Y₂Z₁) 3b(X₁ + X₂Z₁)
|
||||
t2.prod(t3, t1) # 19. t₂ <- t₃ t₁, t₂ = (X₁Y₂ + X₂Y₁)(Y₁Y₂ - 3bZ₁)
|
||||
r.x.diff(t2, x3) # 20. X₃ <- t₂ - X₃, X₃ = (X₁Y₂ + X₂Y₁)(Y₁Y₂ - 3bZ₁) - 3b(Y₁ + Y₂Z₁)(X₁ + X₂Z₁)
|
||||
x3.diff(t2, x3) # 20. X₃ <- t₂ - X₃, X₃ = (X₁Y₂ + X₂Y₁)(Y₁Y₂ - 3bZ₁) - 3b(Y₁ + Y₂Z₁)(X₁ + X₂Z₁)
|
||||
y3 *= t0 # 21. Y₃ <- Y₃ t₀, Y₃ = 9bX₁X₂ (X₁ + X₂Z₁)
|
||||
t1 *= z3 # 22. t₁ <- t₁ Z₃, t₁ = (Y₁Y₂ - 3bZ₁)(Y₁Y₂ + 3bZ₁)
|
||||
r.y.sum(y3, t1) # 23. Y₃ <- t₁ + Y₃, Y₃ = (Y₁Y₂ + 3bZ₁)(Y₁Y₂ - 3bZ₁) + 9bX₁X₂ (X₁ + X₂Z₁)
|
||||
y3 += t1 # 23. Y₃ <- t₁ + Y₃, Y₃ = (Y₁Y₂ + 3bZ₁)(Y₁Y₂ - 3bZ₁) + 9bX₁X₂ (X₁ + X₂Z₁)
|
||||
t0 *= t3 # 31. t₀ <- t₀ t₃, t₀ = 3X₁X₂ (X₁Y₂ + X₂Y₁)
|
||||
z3 *= t4 # 32. Z₃ <- Z₃ t₄, Z₃ = (Y₁Y₂ + 3bZ₁)(Y₁ + Y₂Z₁)
|
||||
r.z.sum(z3, t0) # 33. Z₃ <- Z₃ + t₀, Z₃ = (Y₁ + Y₂Z₁)(Y₁Y₂ + 3bZ₁) + 3X₁X₂ (X₁Y₂ + X₂Y₁)
|
||||
z3 += t0 # 33. Z₃ <- Z₃ + t₀, Z₃ = (Y₁ + Y₂Z₁)(Y₁Y₂ + 3bZ₁) + 3X₁X₂ (X₁Y₂ + X₂Y₁)
|
||||
|
||||
# Deal with infinity point. r and P might alias.
|
||||
let inf = Q.isInf()
|
||||
x3.ccopy(P.x, inf)
|
||||
y3.ccopy(P.y, inf)
|
||||
z3.ccopy(P.z, inf)
|
||||
|
||||
r.x = x3
|
||||
r.y = y3
|
||||
r.z = z3
|
||||
|
||||
else:
|
||||
{.error: "Not implemented.".}
|
||||
|
||||
@ -454,18 +471,18 @@ func batchAffine*[N: static int, F, G](
|
||||
zeroes[i] = z.isZero()
|
||||
z.csetOne(zeroes[i])
|
||||
|
||||
affs[i].x.prod(affs[i-1].x, z)
|
||||
if i != N-1:
|
||||
affs[i].x.prod(affs[i-1].x, z, skipFinalSub = true)
|
||||
else:
|
||||
affs[i].x.prod(affs[i-1].x, z, skipFinalSub = false)
|
||||
|
||||
var accInv {.noInit.}: F
|
||||
accInv.inv(affs[N-1].x)
|
||||
|
||||
for i in countdown(N-1, 1):
|
||||
# Skip zero z-coordinates (infinity points)
|
||||
var z = affs[i].x
|
||||
|
||||
# Extract 1/Pᵢ
|
||||
var invi {.noInit.}: F
|
||||
invi.prod(accInv, affs[i-1].x)
|
||||
invi.prod(accInv, affs[i-1].x, skipFinalSub = true)
|
||||
invi.csetZero(zeroes[i])
|
||||
|
||||
# Now convert Pᵢ to affine
|
||||
@ -475,7 +492,7 @@ func batchAffine*[N: static int, F, G](
|
||||
# next iteration
|
||||
invi = projs[i].z
|
||||
invi.csetOne(zeroes[i])
|
||||
accInv *= invi
|
||||
accInv.prod(accInv, invi, skipFinalSub = true)
|
||||
|
||||
block: # tail
|
||||
accInv.csetZero(zeroes[0])
|
||||
|
||||
@ -276,6 +276,10 @@ func double*[Field](
|
||||
E -= D # C stores E-D
|
||||
r.y *= E
|
||||
|
||||
func `+=`*(P: var ECP_TwEdwards_Prj, Q: ECP_TwEdwards_Prj) {.inline.} =
|
||||
## In-place point addition
|
||||
P.sum(P, Q)
|
||||
|
||||
func double*(P: var ECP_TwEdwards_Prj) {.inline.} =
|
||||
## In-place EC doubling
|
||||
P.double(P)
|
||||
|
||||
@ -2156,5 +2156,29 @@ func inv*(a: var CubicExt) =
|
||||
## to affine for elliptic curve
|
||||
a.invImpl(a)
|
||||
|
||||
# Convenience functions
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
template square*(a: var ExtensionField, skipFinalSub: static bool) =
|
||||
# Square alias,
|
||||
# this allows using the same code for
|
||||
# the base field and its extensions while benefitting from skipping
|
||||
# the final substraction on Fp
|
||||
a.square()
|
||||
|
||||
template square*(r: var ExtensionField, a: ExtensionField, skipFinalSub: static bool) =
|
||||
# Square alias,
|
||||
# this allows using the same code for
|
||||
# the base field and its extensions while benefitting from skipping
|
||||
# the final substraction on Fp
|
||||
r.square(a)
|
||||
|
||||
template prod*(r: var ExtensionField, a, b: ExtensionField, skipFinalSub: static bool) =
|
||||
# Prod alias,
|
||||
# this allows using the same code for
|
||||
# the base field and its extensions while benefitting from skipping
|
||||
# the final substraction on Fp
|
||||
r.prod(a, b)
|
||||
|
||||
{.pop.} # inline
|
||||
{.pop.} # raises no exceptions
|
||||
|
||||
32
constantine/platforms/allocs.nim
Normal file
32
constantine/platforms/allocs.nim
Normal file
@ -0,0 +1,32 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Allocators
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
# Due to the following constraints:
|
||||
# - No dynamic allocation in single-threaded codepaths (for compatibility with embedded devices like TPM or secure hardware)
|
||||
# - Avoiding cryptographic material in third-party libraries (like a memory allocator)
|
||||
# - Giving full control of the library user on allocation strategy
|
||||
# - Performance, especially for long-running processes (fragmentation, multithreaded allocation...)
|
||||
#
|
||||
# stack allocation is strongly preferred where necessary.
|
||||
|
||||
when defined(windows):
|
||||
proc alloca(size: int): pointer {.header: "<malloc.h>".}
|
||||
else:
|
||||
proc alloca(size: int): pointer {.header: "<alloca.h>".}
|
||||
|
||||
template alloca*(T: typedesc): ptr T =
|
||||
cast[ptr T](alloca(sizeof(T)))
|
||||
|
||||
template alloca*(T: typedesc, len: Natural): ptr UncheckedArray[T] =
|
||||
cast[ptr UncheckedArray[T]](alloca(sizeof(T) * len))
|
||||
@ -13,7 +13,7 @@ import
|
||||
|
||||
# Pararun is a parallel shell command runner
|
||||
# ------------------------------------------
|
||||
# Usage: pararun <file-with-1-command-per-line> <numWorkers
|
||||
# Usage: pararun <file-with-1-command-per-line> <numWorkers>
|
||||
|
||||
# AsyncSemaphore
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
30
tests/math/t_ec_shortw_jac_g1_batch_add.nim
Normal file
30
tests/math/t_ec_shortw_jac_g1_batch_add.nim
Normal file
@ -0,0 +1,30 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
# Internals
|
||||
../../constantine/math/config/curves,
|
||||
../../constantine/math/elliptic/ec_shortweierstrass_jacobian,
|
||||
../../constantine/math/arithmetic,
|
||||
# Test utilities
|
||||
./t_ec_template
|
||||
|
||||
const
|
||||
numPoints = [1, 2, 8, 16, 128, 1024, 2048, 16384, 32768] # 262144, 1048576]
|
||||
|
||||
run_EC_batch_add_impl(
|
||||
ec = ECP_ShortW_Jac[Fp[BN254_Snarks], G1],
|
||||
numPoints = numPoints,
|
||||
moduleName = "test_ec_shortweierstrass_jacobian_batch_add_" & $BN254_Snarks
|
||||
)
|
||||
|
||||
run_EC_batch_add_impl(
|
||||
ec = ECP_ShortW_Jac[Fp[BLS12_381], G1],
|
||||
numPoints = numPoints,
|
||||
moduleName = "test_ec_shortweierstrass_jacobian_batch_add_" & $BLS12_381
|
||||
)
|
||||
30
tests/math/t_ec_shortw_prj_g1_batch_add.nim
Normal file
30
tests/math/t_ec_shortw_prj_g1_batch_add.nim
Normal file
@ -0,0 +1,30 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
# Internals
|
||||
../../constantine/math/config/curves,
|
||||
../../constantine/math/elliptic/ec_shortweierstrass_projective,
|
||||
../../constantine/math/arithmetic,
|
||||
# Test utilities
|
||||
./t_ec_template
|
||||
|
||||
const
|
||||
numPoints = [1, 2, 8, 16, 128, 1024, 2048, 16384, 32768] # 262144, 1048576]
|
||||
|
||||
run_EC_batch_add_impl(
|
||||
ec = ECP_ShortW_Prj[Fp[BN254_Snarks], G1],
|
||||
numPoints = numPoints,
|
||||
moduleName = "test_ec_shortweierstrass_projective_batch_add_" & $BN254_Snarks
|
||||
)
|
||||
|
||||
run_EC_batch_add_impl(
|
||||
ec = ECP_ShortW_Prj[Fp[BLS12_381], G1],
|
||||
numPoints = numPoints,
|
||||
moduleName = "test_ec_shortweierstrass_projective_batch_add_" & $BLS12_381
|
||||
)
|
||||
@ -23,6 +23,7 @@ import
|
||||
ec_shortweierstrass_affine,
|
||||
ec_shortweierstrass_jacobian,
|
||||
ec_shortweierstrass_projective,
|
||||
ec_shortweierstrass_batch_ops,
|
||||
ec_twistededwards_affine,
|
||||
ec_twistededwards_projective,
|
||||
ec_scalar_mul],
|
||||
@ -41,7 +42,7 @@ type
|
||||
Long01Sequence
|
||||
|
||||
func random_point*(rng: var RngState, EC: typedesc, randZ: bool, gen: RandomGen): EC {.noInit.} =
|
||||
if not randZ:
|
||||
when EC is ECP_ShortW_Aff:
|
||||
if gen == Uniform:
|
||||
result = rng.random_unsafe(EC)
|
||||
elif gen == HighHammingWeight:
|
||||
@ -49,12 +50,20 @@ func random_point*(rng: var RngState, EC: typedesc, randZ: bool, gen: RandomGen)
|
||||
else:
|
||||
result = rng.random_long01Seq(EC)
|
||||
else:
|
||||
if gen == Uniform:
|
||||
result = rng.random_unsafe_with_randZ(EC)
|
||||
elif gen == HighHammingWeight:
|
||||
result = rng.random_highHammingWeight_with_randZ(EC)
|
||||
if not randZ:
|
||||
if gen == Uniform:
|
||||
result = rng.random_unsafe(EC)
|
||||
elif gen == HighHammingWeight:
|
||||
result = rng.random_highHammingWeight(EC)
|
||||
else:
|
||||
result = rng.random_long01Seq(EC)
|
||||
else:
|
||||
result = rng.random_long01Seq_with_randZ(EC)
|
||||
if gen == Uniform:
|
||||
result = rng.random_unsafe_with_randZ(EC)
|
||||
elif gen == HighHammingWeight:
|
||||
result = rng.random_highHammingWeight_with_randZ(EC)
|
||||
else:
|
||||
result = rng.random_long01Seq_with_randZ(EC)
|
||||
|
||||
template pairingGroup(EC: typedesc): string =
|
||||
when EC is (ECP_ShortW_Aff or ECP_ShortW_Prj or ECP_ShortW_Jac):
|
||||
@ -99,6 +108,15 @@ proc run_EC_addition_tests*(
|
||||
r.sum(inf, P)
|
||||
check: bool(r == P)
|
||||
|
||||
# Aliasing tests
|
||||
r = P
|
||||
r += inf
|
||||
check: bool(r == P)
|
||||
|
||||
r.setInf()
|
||||
r += P
|
||||
check: bool(r == P)
|
||||
|
||||
test(ec, randZ = false, gen = Uniform)
|
||||
test(ec, randZ = true, gen = Uniform)
|
||||
test(ec, randZ = false, gen = HighHammingWeight)
|
||||
@ -438,7 +456,7 @@ proc run_EC_mixed_add_impl*(
|
||||
|
||||
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitwidth & "-bit mode]":
|
||||
test "EC " & G1_or_G2 & " mixed addition is consistent with general addition":
|
||||
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
|
||||
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
|
||||
for _ in 0 ..< Iters:
|
||||
let a = rng.random_point(EC, randZ, gen)
|
||||
let b = rng.random_point(EC, randZ, gen)
|
||||
@ -452,12 +470,83 @@ proc run_EC_mixed_add_impl*(
|
||||
|
||||
check: bool(r_generic == r_mixed)
|
||||
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Uniform)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Uniform)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = HighHammingWeight)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = HighHammingWeight)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Long01Sequence)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Long01Sequence)
|
||||
test(ec, randZ = false, gen = Uniform)
|
||||
test(ec, randZ = true, gen = Uniform)
|
||||
test(ec, randZ = false, gen = HighHammingWeight)
|
||||
test(ec, randZ = true, gen = HighHammingWeight)
|
||||
test(ec, randZ = false, gen = Long01Sequence)
|
||||
test(ec, randZ = true, gen = Long01Sequence)
|
||||
|
||||
test "EC " & G1_or_G2 & " mixed addition - doubling":
|
||||
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
|
||||
for _ in 0 ..< Iters:
|
||||
let a = rng.random_point(EC, randZ, gen)
|
||||
var aAff: ECP_ShortW_Aff[EC.F, EC.G]
|
||||
aAff.affine(a)
|
||||
|
||||
var r_generic, r_mixed: EC
|
||||
|
||||
r_generic.double(a)
|
||||
r_mixed.madd(a, aAff)
|
||||
check: bool(r_generic == r_mixed)
|
||||
|
||||
# Aliasing test
|
||||
r_mixed = a
|
||||
r_mixed += aAff
|
||||
check: bool(r_generic == r_mixed)
|
||||
|
||||
test(ec, randZ = false, gen = Uniform)
|
||||
test(ec, randZ = true, gen = Uniform)
|
||||
test(ec, randZ = false, gen = HighHammingWeight)
|
||||
test(ec, randZ = true, gen = HighHammingWeight)
|
||||
test(ec, randZ = false, gen = Long01Sequence)
|
||||
test(ec, randZ = true, gen = Long01Sequence)
|
||||
|
||||
test "EC " & G1_or_G2 & " mixed addition - adding infinity LHS":
|
||||
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
|
||||
for _ in 0 ..< Iters:
|
||||
var a{.noInit.}: EC
|
||||
a.setInf()
|
||||
let bAff = rng.random_point(ECP_ShortW_Aff[EC.F, EC.G], randZ = false, gen)
|
||||
|
||||
var r_mixed{.noInit.}: EC
|
||||
r_mixed.madd(a, bAff)
|
||||
|
||||
var r{.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
|
||||
r.affine(r_mixed)
|
||||
|
||||
a += bAff
|
||||
|
||||
check:
|
||||
bool(r == bAff)
|
||||
bool(a == r_mixed)
|
||||
|
||||
test(ec, randZ = false, gen = Uniform)
|
||||
test(ec, randZ = false, gen = HighHammingWeight)
|
||||
test(ec, randZ = false, gen = Long01Sequence)
|
||||
|
||||
test "EC " & G1_or_G2 & " mixed addition - adding infinity RHS":
|
||||
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
|
||||
for _ in 0 ..< Iters:
|
||||
let a = rng.random_point(EC, randZ, gen)
|
||||
var bAff{.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
|
||||
bAff.setInf()
|
||||
|
||||
var r{.noInit.}: EC
|
||||
r.madd(a, bAff)
|
||||
|
||||
check: bool(r == a)
|
||||
|
||||
r = a
|
||||
r += bAff
|
||||
check: bool(r == a)
|
||||
|
||||
test(ec, randZ = false, gen = Uniform)
|
||||
test(ec, randZ = true, gen = Uniform)
|
||||
test(ec, randZ = false, gen = HighHammingWeight)
|
||||
test(ec, randZ = true, gen = HighHammingWeight)
|
||||
test(ec, randZ = false, gen = Long01Sequence)
|
||||
test(ec, randZ = true, gen = Long01Sequence)
|
||||
|
||||
proc run_EC_subgroups_cofactors_impl*(
|
||||
ec: typedesc,
|
||||
@ -480,7 +569,7 @@ proc run_EC_subgroups_cofactors_impl*(
|
||||
|
||||
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitwidth & "-bit mode]":
|
||||
test "Effective cofactor matches accelerated cofactor clearing" & " - " & $ec & " - [" & $WordBitwidth & "-bit mode]":
|
||||
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
|
||||
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
|
||||
for _ in 0 ..< ItersMul:
|
||||
let P = rng.random_point(EC, randZ, gen)
|
||||
var cPeff = P
|
||||
@ -491,17 +580,17 @@ proc run_EC_subgroups_cofactors_impl*(
|
||||
|
||||
check: bool(cPeff == cPfast)
|
||||
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Uniform)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Uniform)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = HighHammingWeight)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = HighHammingWeight)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Long01Sequence)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Long01Sequence)
|
||||
test(ec, randZ = false, gen = Uniform)
|
||||
test(ec, randZ = true, gen = Uniform)
|
||||
test(ec, randZ = false, gen = HighHammingWeight)
|
||||
test(ec, randZ = true, gen = HighHammingWeight)
|
||||
test(ec, randZ = false, gen = Long01Sequence)
|
||||
test(ec, randZ = true, gen = Long01Sequence)
|
||||
|
||||
test "Subgroup checks and cofactor clearing consistency":
|
||||
var inSubgroup = 0
|
||||
var offSubgroup = 0
|
||||
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
|
||||
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
|
||||
stdout.write " "
|
||||
for _ in 0 ..< ItersMul:
|
||||
let P = rng.random_point(EC, randZ, gen)
|
||||
@ -526,12 +615,12 @@ proc run_EC_subgroups_cofactors_impl*(
|
||||
|
||||
stdout.write '\n'
|
||||
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Uniform)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Uniform)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = HighHammingWeight)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = HighHammingWeight)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Long01Sequence)
|
||||
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Long01Sequence)
|
||||
test(ec, randZ = false, gen = Uniform)
|
||||
test(ec, randZ = true, gen = Uniform)
|
||||
test(ec, randZ = false, gen = HighHammingWeight)
|
||||
test(ec, randZ = true, gen = HighHammingWeight)
|
||||
test(ec, randZ = false, gen = Long01Sequence)
|
||||
test(ec, randZ = true, gen = Long01Sequence)
|
||||
|
||||
echo " [SUCCESS] Test finished with ", inSubgroup, " points in ", G1_or_G2, " subgroup and ",
|
||||
offSubgroup, " points on curve but not in subgroup (before cofactor clearing)"
|
||||
@ -699,4 +788,82 @@ proc run_EC_conversion_failures*(
|
||||
doAssert bool(Qs[i] == Rs[i])
|
||||
|
||||
test_bn254_snarks_g1(ECP_ShortW_Prj[Fp[BN254_Snarks], G1])
|
||||
test_bn254_snarks_g1(ECP_ShortW_Jac[Fp[BN254_Snarks], G1])
|
||||
test_bn254_snarks_g1(ECP_ShortW_Jac[Fp[BN254_Snarks], G1])
|
||||
|
||||
proc run_EC_batch_add_impl*[N: static int](
|
||||
ec: typedesc,
|
||||
numPoints: array[N, int],
|
||||
moduleName: string
|
||||
) =
|
||||
|
||||
# Random seed for reproducibility
|
||||
var rng: RngState
|
||||
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
|
||||
rng.seed(seed)
|
||||
echo "\n------------------------------------------------------\n"
|
||||
echo moduleName, " xoshiro512** seed: ", seed
|
||||
|
||||
when ec.G == G1:
|
||||
const G1_or_G2 = "G1"
|
||||
else:
|
||||
const G1_or_G2 = "G2"
|
||||
|
||||
const testSuiteDesc = "Elliptic curve batch addition for Short Weierstrass form"
|
||||
|
||||
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitwidth & "-bit mode]":
|
||||
for n in numPoints:
|
||||
test $ec & " batch addition (N=" & $n & ")":
|
||||
proc test(EC: typedesc, gen: RandomGen) =
|
||||
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](n)
|
||||
|
||||
for i in 0 ..< n:
|
||||
points[i] = rng.random_point(ECP_ShortW_Aff[EC.F, EC.G], randZ = false, gen)
|
||||
|
||||
var r_batch{.noinit.}, r_ref{.noInit.}: EC
|
||||
|
||||
r_ref.setInf()
|
||||
for i in 0 ..< n:
|
||||
r_ref += points[i]
|
||||
|
||||
r_batch.sum_batch_vartime(points)
|
||||
|
||||
check: bool(r_batch == r_ref)
|
||||
|
||||
|
||||
test(ec, gen = Uniform)
|
||||
test(ec, gen = HighHammingWeight)
|
||||
test(ec, gen = Long01Sequence)
|
||||
|
||||
test "EC " & G1_or_G2 & " batch addition (N=" & $n & ") - special cases":
|
||||
proc test(EC: typedesc, gen: RandomGen) =
|
||||
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](n)
|
||||
|
||||
let halfN = n div 2
|
||||
|
||||
for i in 0 ..< halfN:
|
||||
points[i] = rng.random_point(ECP_ShortW_Aff[EC.F, EC.G], randZ = false, gen)
|
||||
|
||||
for i in halfN ..< n:
|
||||
# The special cases test relies on internal knowledge that we sum(points[i], points[i+n/2]
|
||||
# It should be changed if scheduling change, for example if we sum(points[2*i], points[2*i+1])
|
||||
let c = rng.random_unsafe(3)
|
||||
if c == 0:
|
||||
points[i] = rng.random_point(ECP_ShortW_Aff[EC.F, EC.G], randZ = false, gen)
|
||||
elif c == 1:
|
||||
points[i] = points[i-halfN]
|
||||
else:
|
||||
points[i].neg(points[i-halfN])
|
||||
|
||||
var r_batch{.noinit.}, r_ref{.noInit.}: EC
|
||||
|
||||
r_ref.setInf()
|
||||
for i in 0 ..< n:
|
||||
r_ref += points[i]
|
||||
|
||||
r_batch.sum_batch_vartime(points)
|
||||
|
||||
check: bool(r_batch == r_ref)
|
||||
|
||||
test(ec, gen = Uniform)
|
||||
test(ec, gen = HighHammingWeight)
|
||||
test(ec, gen = Long01Sequence)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user