mirror of
https://github.com/codex-storage/constantine.git
synced 2025-01-12 03:54:14 +00:00
b9c911ba37
* accel FFT by 30+% with vartime endomorphism support * silly error fix * endomorphism + wNAF, closes #253, FFT 20% speedup * vartime EC addition for all repr * implement vartime EC add * finishing touches, renam to fft_vartime
298 lines
11 KiB
Nim
298 lines
11 KiB
Nim
# Constantine
|
|
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
|
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
|
# Licensed and distributed under either of
|
|
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
|
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
|
|
|
# ############################################################
|
|
#
|
|
# Benchmark of elliptic curves
|
|
#
|
|
# ############################################################
|
|
|
|
import
|
|
# Internals
|
|
../constantine/platforms/abstractions,
|
|
../constantine/math/config/curves,
|
|
../constantine/math/arithmetic,
|
|
../constantine/math/io/io_bigints,
|
|
../constantine/math/elliptic/[
|
|
ec_shortweierstrass_affine,
|
|
ec_shortweierstrass_projective,
|
|
ec_shortweierstrass_jacobian,
|
|
ec_shortweierstrass_jacobian_extended,
|
|
ec_shortweierstrass_batch_ops,
|
|
ec_scalar_mul, ec_endomorphism_accel],
|
|
../constantine/math/constants/zoo_subgroups,
|
|
# Helpers
|
|
../helpers/prng_unsafe,
|
|
./platforms,
|
|
./bench_blueprint,
|
|
# Reference unsafe scalar multiplication
|
|
../constantine/math/elliptic/ec_scalar_mul_vartime
|
|
|
|
export notes
|
|
export abstractions # generic sandwich on SecretBool and SecretBool in Jacobian sum
|
|
|
|
proc separator*() = separator(179)
|
|
|
|
macro fixEllipticDisplay(EC: typedesc): untyped =
|
|
# At compile-time, enums are integers and their display is buggy
|
|
# we get the Curve ID instead of the curve name.
|
|
let instantiated = EC.getTypeInst()
|
|
var name = $instantiated[1][0] # EllipticEquationFormCoordinates
|
|
let fieldName = $instantiated[1][1][0]
|
|
let curveName = $Curve(instantiated[1][1][1].intVal)
|
|
name.add "[" & fieldName & "[" & curveName & "]]"
|
|
result = newLit name
|
|
|
|
proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
|
let ns = inNanoseconds((stop-start) div iters)
|
|
let throughput = 1e9 / float64(ns)
|
|
when SupportsGetTicks:
|
|
echo &"{op:<68} {elliptic:<36} {throughput:>15.3f} ops/s {ns:>16} ns/op {(stopClk - startClk) div iters:>12} CPU cycles (approx)"
|
|
else:
|
|
echo &"{op:<68} {elliptic:<36} {throughput:>15.3f} ops/s {ns:>16} ns/op"
|
|
|
|
template bench*(op: string, EC: typedesc, iters: int, body: untyped): untyped =
|
|
measure(iters, startTime, stopTime, startClk, stopClk, body)
|
|
report(op, fixEllipticDisplay(EC), startTime, stopTime, startClk, stopClk, iters)
|
|
|
|
func `+=`[F; G: static Subgroup](P: var ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_JacExt[F, G]) {.inline.}=
|
|
P.sum_vartime(P, Q)
|
|
func `+=`[F; G: static Subgroup](P: var ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_Aff[F, G]) {.inline.}=
|
|
P.madd_vartime(P, Q)
|
|
|
|
proc addBench*(EC: typedesc, iters: int) =
|
|
var r {.noInit.}: EC
|
|
let P = rng.random_unsafe(EC)
|
|
let Q = rng.random_unsafe(EC)
|
|
|
|
when EC is ECP_ShortW_JacExt:
|
|
bench("EC Add vartime " & $EC.G, EC, iters):
|
|
r.sum_vartime(P, Q)
|
|
else:
|
|
block:
|
|
bench("EC Add " & $EC.G, EC, iters):
|
|
r.sum(P, Q)
|
|
block:
|
|
bench("EC Add vartime " & $EC.G, EC, iters):
|
|
r.sum_vartime(P, Q)
|
|
|
|
proc mixedAddBench*(EC: typedesc, iters: int) =
|
|
var r {.noInit.}: EC
|
|
let P = rng.random_unsafe(EC)
|
|
let Q = rng.random_unsafe(EC)
|
|
var Qaff: ECP_ShortW_Aff[EC.F, EC.G]
|
|
Qaff.affine(Q)
|
|
|
|
when EC is ECP_ShortW_JacExt:
|
|
bench("EC Mixed Addition vartime " & $EC.G, EC, iters):
|
|
r.madd_vartime(P, Qaff)
|
|
else:
|
|
block:
|
|
bench("EC Mixed Addition " & $EC.G, EC, iters):
|
|
r.madd(P, Qaff)
|
|
block:
|
|
bench("EC Mixed Addition vartime " & $EC.G, EC, iters):
|
|
r.madd_vartime(P, Qaff)
|
|
|
|
proc doublingBench*(EC: typedesc, iters: int) =
|
|
var r {.noInit.}: EC
|
|
let P = rng.random_unsafe(EC)
|
|
bench("EC Double " & $EC.G, EC, iters):
|
|
r.double(P)
|
|
|
|
proc affFromProjBench*(EC: typedesc, iters: int) =
|
|
var r {.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
|
|
let P = rng.random_unsafe(EC)
|
|
bench("EC Projective to Affine " & $EC.G, EC, iters):
|
|
r.affine(P)
|
|
|
|
proc affFromJacBench*(EC: typedesc, iters: int) =
|
|
var r {.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
|
|
let P = rng.random_unsafe(EC)
|
|
bench("EC Jacobian to Affine " & $EC.G, EC, iters):
|
|
r.affine(P)
|
|
|
|
proc affFromProjBatchBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) =
|
|
var r = newSeq[affine(EC)](numPoints)
|
|
var points = newSeq[EC](numPoints)
|
|
|
|
for i in 0 ..< numPoints:
|
|
points[i] = rng.random_unsafe(EC)
|
|
|
|
if useBatching:
|
|
bench("EC Projective to Affine - batched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
|
|
r.asUnchecked().batchAffine(points.asUnchecked(), numPoints)
|
|
else:
|
|
bench("EC Projective to Affine - unbatched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
|
|
for i in 0 ..< numPoints:
|
|
r[i].affine(points[i])
|
|
|
|
proc affFromJacBatchBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) =
|
|
var r = newSeq[affine(EC)](numPoints)
|
|
var points = newSeq[EC](numPoints)
|
|
|
|
for i in 0 ..< numPoints:
|
|
points[i] = rng.random_unsafe(EC)
|
|
|
|
if useBatching:
|
|
bench("EC Jacobian to Affine - batched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
|
|
r.asUnchecked().batchAffine(points.asUnchecked(), numPoints)
|
|
else:
|
|
bench("EC Jacobian to Affine - unbatched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
|
|
for i in 0 ..< numPoints:
|
|
r[i].affine(points[i])
|
|
|
|
proc scalarMulGenericBench*(EC: typedesc, bits, window: static int, iters: int) =
|
|
var r {.noInit.}: EC
|
|
var P = rng.random_unsafe(EC)
|
|
P.clearCofactor()
|
|
|
|
let exponent = rng.random_unsafe(BigInt[bits])
|
|
|
|
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (window-" & $window & ", generic)", EC, iters):
|
|
r = P
|
|
r.scalarMulGeneric(exponent, window)
|
|
|
|
proc scalarMulEndo*(EC: typedesc, bits: static int, iters: int) =
|
|
var r {.noInit.}: EC
|
|
var P = rng.random_unsafe(EC)
|
|
P.clearCofactor()
|
|
|
|
let exponent = rng.random_unsafe(BigInt[bits])
|
|
|
|
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (endomorphism accelerated)", EC, iters):
|
|
r = P
|
|
r.scalarMulEndo(exponent)
|
|
|
|
proc scalarMulEndoWindow*(EC: typedesc, bits: static int, iters: int) =
|
|
var r {.noInit.}: EC
|
|
var P = rng.random_unsafe(EC)
|
|
P.clearCofactor()
|
|
|
|
let exponent = rng.random_unsafe(BigInt[bits])
|
|
|
|
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (window-2, endomorphism accelerated)", EC, iters):
|
|
r = P
|
|
when EC.F is Fp:
|
|
r.scalarMulGLV_m2w2(exponent)
|
|
else:
|
|
{.error: "Not implemented".}
|
|
|
|
proc scalarMulVartimeDoubleAddBench*(EC: typedesc, bits: static int, iters: int) =
|
|
var r {.noInit.}: EC
|
|
var P = rng.random_unsafe(EC)
|
|
P.clearCofactor()
|
|
|
|
let exponent = rng.random_unsafe(BigInt[bits])
|
|
|
|
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (vartime reference DoubleAdd)", EC, iters):
|
|
r = P
|
|
r.scalarMul_doubleAdd_vartime(exponent)
|
|
|
|
proc scalarMulVartimeMinHammingWeightRecodingBench*(EC: typedesc, bits: static int, iters: int) =
|
|
var r {.noInit.}: EC
|
|
var P = rng.random_unsafe(EC)
|
|
P.clearCofactor()
|
|
|
|
let exponent = rng.random_unsafe(BigInt[bits])
|
|
|
|
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (vartime min Hamming Weight recoding)", EC, iters):
|
|
r = P
|
|
r.scalarMul_minHammingWeight_vartime(exponent)
|
|
|
|
proc scalarMulVartimeWNAFBench*(EC: typedesc, bits, window: static int, iters: int) =
|
|
var r {.noInit.}: EC
|
|
var P = rng.random_unsafe(EC)
|
|
P.clearCofactor()
|
|
|
|
let exponent = rng.random_unsafe(BigInt[bits])
|
|
|
|
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (vartime wNAF-" & $window & ")", EC, iters):
|
|
r = P
|
|
r.scalarMul_minHammingWeight_windowed_vartime(exponent, window)
|
|
|
|
proc scalarMulVartimeEndoWNAFBench*(EC: typedesc, bits, window: static int, iters: int) =
|
|
var r {.noInit.}: EC
|
|
var P = rng.random_unsafe(EC)
|
|
P.clearCofactor()
|
|
|
|
let exponent = rng.random_unsafe(BigInt[bits])
|
|
|
|
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (vartime endomorphism + wNAF-" & $window & ")", EC, iters):
|
|
r = P
|
|
r.scalarMulEndo_minHammingWeight_windowed_vartime(exponent, window)
|
|
|
|
proc multiAddBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) =
|
|
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](numPoints)
|
|
|
|
for i in 0 ..< numPoints:
|
|
points[i] = rng.random_unsafe(ECP_ShortW_Aff[EC.F, EC.G])
|
|
|
|
var r{.noInit.}: EC
|
|
|
|
if useBatching:
|
|
bench("EC Multi Add batched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
|
|
r.sum_reduce_vartime(points)
|
|
else:
|
|
bench("EC Multi Mixed-Add unbatched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
|
|
r.setInf()
|
|
for i in 0 ..< numPoints:
|
|
r += points[i]
|
|
|
|
|
|
proc msmBench*(EC: typedesc, numPoints: int, iters: int) =
|
|
const bits = EC.F.C.getCurveOrderBitwidth()
|
|
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](numPoints)
|
|
var scalars = newSeq[BigInt[bits]](numPoints)
|
|
|
|
for i in 0 ..< numPoints:
|
|
var tmp = rng.random_unsafe(EC)
|
|
tmp.clearCofactor()
|
|
points[i].affine(tmp)
|
|
scalars[i] = rng.random_unsafe(BigInt[bits])
|
|
|
|
var r{.noInit.}: EC
|
|
var startNaive, stopNaive, startMSMbaseline, stopMSMbaseline, startMSMopt, stopMSMopt: MonoTime
|
|
|
|
if numPoints <= 100000:
|
|
bench("EC scalar muls " & align($numPoints, 7) & " (scalars " & $bits & "-bit, points) pairs ", EC, iters):
|
|
startNaive = getMonotime()
|
|
var tmp: EC
|
|
r.setInf()
|
|
for i in 0 ..< points.len:
|
|
tmp.fromAffine(points[i])
|
|
tmp.scalarMul(scalars[i])
|
|
r += tmp
|
|
stopNaive = getMonotime()
|
|
|
|
block:
|
|
bench("EC multi-scalar-mul baseline " & align($numPoints, 7) & " (scalars " & $bits & "-bit, points) pairs ", EC, iters):
|
|
startMSMbaseline = getMonotime()
|
|
r.multiScalarMul_reference_vartime(scalars, points)
|
|
stopMSMbaseline = getMonotime()
|
|
|
|
block:
|
|
bench("EC multi-scalar-mul optimized " & align($numPoints, 7) & " (scalars " & $bits & "-bit, points) pairs ", EC, iters):
|
|
startMSMopt = getMonotime()
|
|
r.multiScalarMul_vartime(scalars, points)
|
|
stopMSMopt = getMonotime()
|
|
|
|
let perfNaive = inNanoseconds((stopNaive-startNaive) div iters)
|
|
let perfMSMbaseline = inNanoseconds((stopMSMbaseline-startMSMbaseline) div iters)
|
|
let perfMSMopt = inNanoseconds((stopMSMopt-startMSMopt) div iters)
|
|
|
|
if numPoints <= 100000:
|
|
let speedupBaseline = float(perfNaive) / float(perfMSMbaseline)
|
|
echo &"Speedup ratio baseline over naive linear combination: {speedupBaseline:>6.3f}x"
|
|
|
|
let speedupOpt = float(perfNaive) / float(perfMSMopt)
|
|
echo &"Speedup ratio optimized over naive linear combination: {speedupOpt:>6.3f}x"
|
|
|
|
let speedupOptBaseline = float(perfMSMbaseline) / float(perfMSMopt)
|
|
echo &"Speedup ratio optimized over baseline linear combination: {speedupOptBaseline:>6.3f}x"
|