2020-04-15 17:38:02 +00:00
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# ############################################################
#
# Benchmark of elliptic curves
#
# ############################################################
import
# Internals
2020-07-24 20:02:30 +00:00
.. / constantine / config / [ curves , common ] ,
2020-06-04 18:37:29 +00:00
.. / constantine / arithmetic ,
.. / constantine / io / io_bigints ,
2020-09-26 07:16:29 +00:00
.. / constantine / elliptic / [ ec_weierstrass_affine , ec_weierstrass_projective , ec_scalar_mul , ec_endomorphism_accel ] ,
2020-04-15 17:38:02 +00:00
# Helpers
2020-04-15 19:24:18 +00:00
.. / helpers / [ prng_unsafe , static_for ] ,
. / platforms ,
2020-04-15 17:38:02 +00:00
# Standard library
2020-06-14 13:39:06 +00:00
std / [ monotimes , times , strformat , strutils , macros ] ,
# Reference unsafe scalar multiplication
.. / tests / support / ec_reference_scalar_mult
2020-04-15 17:38:02 +00:00
var rng : RngState
let seed = uint32 ( getTime ( ) . toUnix ( ) and ( 1 'i64 shl 32 - 1 ) ) # unixTime mod 2^32
rng . seed ( seed )
echo " bench xoshiro512** seed: " , seed
# warmup
proc warmup * ( ) =
# Warmup - make sure cpu is on max perf
let start = cpuTime ( )
var foo = 123
for i in 0 .. < 300_000_000 :
foo + = i * i mod 456
foo = foo mod 789
# Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
let stop = cpuTime ( )
echo & " Warmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away) \n "
warmup ( )
when defined ( gcc ) :
echo " \n Compiled with GCC "
elif defined ( clang ) :
echo " \n Compiled with Clang "
elif defined ( vcc ) :
echo " \n Compiled with MSVC "
elif defined ( icc ) :
echo " \n Compiled with ICC "
else :
echo " \n Compiled with an unknown compiler "
2020-07-24 20:02:30 +00:00
echo " Optimization level => "
echo " no optimization: " , not defined ( release )
echo " release: " , defined ( release )
echo " danger: " , defined ( danger )
2020-08-20 08:21:39 +00:00
echo " inline assembly: " , UseASM_X86_64
2020-04-15 19:24:18 +00:00
when ( sizeof ( int ) = = 4 ) or defined ( Constantine32 ) :
echo " ⚠️ Warning: using Constantine with 32-bit limbs "
else :
echo " Using Constantine with 64-bit limbs "
when SupportsCPUName :
echo " Running on " , cpuName ( ) , " "
when SupportsGetTicks :
echo " \n ⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them. "
echo " i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling) "
echo " \n ================================================================================================================= \n "
2020-04-15 17:38:02 +00:00
proc separator * ( ) =
2020-06-14 13:39:06 +00:00
echo " - " . repeat ( 177 )
2020-04-15 17:38:02 +00:00
proc report ( op , elliptic : string , start , stop : MonoTime , startClk , stopClk : int64 , iters : int ) =
let ns = inNanoseconds ( ( stop - start ) div iters )
let throughput = 1 e9 / float64 ( ns )
2020-06-04 20:09:30 +00:00
when SupportsGetTicks :
2020-06-14 13:39:06 +00:00
echo & " {op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx) "
2020-06-04 20:09:30 +00:00
else :
2020-06-14 13:39:06 +00:00
echo & " {op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op "
2020-04-15 17:38:02 +00:00
2020-07-24 20:02:30 +00:00
proc notes * ( ) =
echo " Notes: "
echo " - Compilers: "
echo " Compilers are severely limited on multiprecision arithmetic. "
2020-09-03 21:10:48 +00:00
echo " Constantine compile-time assembler is used by default (nimble bench_fp). "
2020-07-24 20:02:30 +00:00
echo " GCC is significantly slower than Clang on multiprecision arithmetic due to catastrophic handling of carries. "
2020-09-03 21:10:48 +00:00
echo " GCC also seems to have issues with large temporaries and register spilling. "
echo " This is somewhat alleviated by Constantine compile-time assembler. "
echo " Bench on specific compiler with assembler: \" nimble bench_ec_g1_gcc \" or \" nimble bench_ec_g1_clang \" . "
echo " Bench on specific compiler with assembler: \" nimble bench_ec_g1_gcc_noasm \" or \" nimble bench_ec_g1_clang_noasm \" . "
2020-07-24 20:02:30 +00:00
echo " - The simplest operations might be optimized away by the compiler. "
echo " - Fast Squaring and Fast Multiplication are possible if there are spare bits in the prime representation (i.e. the prime uses 254 bits out of 256 bits) "
2020-04-15 17:38:02 +00:00
macro fixEllipticDisplay ( T : typedesc ) : untyped =
# At compile-time, enums are integers and their display is buggy
# we get the Curve ID instead of the curve name.
let instantiated = T . getTypeInst ( )
var name = $ instantiated [ 1 ] [ 0 ] # EllipticEquationFormCoordinates
let fieldName = $ instantiated [ 1 ] [ 1 ] [ 0 ]
let curveName = $ Curve ( instantiated [ 1 ] [ 1 ] [ 1 ] . intVal )
name . add " [ " & fieldName & " [ " & curveName & " ]] "
result = newLit name
template bench ( op : string , T : typedesc , iters : int , body : untyped ) : untyped =
let start = getMonotime ( )
2020-06-04 20:09:30 +00:00
when SupportsGetTicks :
let startClk = getTicks ( )
2020-04-15 17:38:02 +00:00
for _ in 0 .. < iters :
body
2020-06-04 20:09:30 +00:00
when SupportsGetTicks :
let stopClk = getTicks ( )
2020-04-15 17:38:02 +00:00
let stop = getMonotime ( )
2020-06-04 20:09:30 +00:00
when not SupportsGetTicks :
let startClk = - 1 'i64
let stopClk = - 1 'i64
2020-04-15 17:38:02 +00:00
report ( op , fixEllipticDisplay ( T ) , start , stop , startClk , stopClk , iters )
proc addBench * ( T : typedesc , iters : int ) =
2020-06-15 20:58:56 +00:00
const G1_or_G2 = when T . F is Fp : " G1 " else : " G2 "
2020-04-15 17:38:02 +00:00
var r {. noInit . } : T
2020-04-15 20:23:46 +00:00
let P = rng . random_unsafe ( T )
let Q = rng . random_unsafe ( T )
2020-06-15 20:58:56 +00:00
bench ( " EC Add " & G1_or_G2 , T , iters ) :
2020-04-15 20:23:46 +00:00
r . sum ( P , Q )
2020-09-26 07:16:29 +00:00
proc mixedAddBench * ( T : typedesc , iters : int ) =
const G1_or_G2 = when T . F is Fp : " G1 " else : " G2 "
var r {. noInit . } : T
let P = rng . random_unsafe ( T )
let Q = rng . random_unsafe ( T )
var Qaff : ECP_SWei_Aff [ T . F ]
Qaff . affineFromProjective ( Q )
bench ( " EC Mixed Addition " & G1_or_G2 , T , iters ) :
r . madd ( P , Qaff )
2020-04-15 20:23:46 +00:00
proc doublingBench * ( T : typedesc , iters : int ) =
2020-06-15 20:58:56 +00:00
const G1_or_G2 = when T . F is Fp : " G1 " else : " G2 "
2020-04-15 20:23:46 +00:00
var r {. noInit . } : T
let P = rng . random_unsafe ( T )
2020-06-15 20:58:56 +00:00
bench ( " EC Double " & G1_or_G2 , T , iters ) :
2020-04-15 20:23:46 +00:00
r . double ( P )
2020-06-04 18:37:29 +00:00
2020-09-03 21:10:48 +00:00
proc scalarMulGenericBench * ( T : typedesc , window : static int , iters : int ) =
2020-06-04 18:37:29 +00:00
const bits = T . F . C . getCurveOrderBitwidth ( )
2020-06-15 20:58:56 +00:00
const G1_or_G2 = when T . F is Fp : " G1 " else : " G2 "
2020-06-04 18:37:29 +00:00
var r {. noInit . } : T
2020-06-14 13:39:06 +00:00
let P = rng . random_unsafe ( T ) # TODO: clear cofactor
2020-06-04 18:37:29 +00:00
let exponent = rng . random_unsafe ( BigInt [ bits ] )
2020-09-03 21:10:48 +00:00
bench ( " EC ScalarMul Generic " & G1_or_G2 & " (window = " & $ window & " , scratchsize = " & $ ( 1 shl window ) & ' ) ' , T , iters ) :
2020-06-04 18:37:29 +00:00
r = P
2020-09-03 21:10:48 +00:00
r . scalarMulGeneric ( exponent , window )
2020-06-04 18:37:29 +00:00
2020-06-15 20:58:56 +00:00
proc scalarMulEndo * ( T : typedesc , iters : int ) =
2020-06-14 13:39:06 +00:00
const bits = T . F . C . getCurveOrderBitwidth ( )
2020-06-15 20:58:56 +00:00
const G1_or_G2 = when T . F is Fp : " G1 " else : " G2 "
2020-06-14 13:39:06 +00:00
var r {. noInit . } : T
let P = rng . random_unsafe ( T ) # TODO: clear cofactor
let exponent = rng . random_unsafe ( BigInt [ bits ] )
2020-06-15 20:58:56 +00:00
bench ( " EC ScalarMul " & G1_or_G2 & " (endomorphism accelerated) " , T , iters ) :
2020-06-14 13:39:06 +00:00
r = P
2020-09-03 21:10:48 +00:00
r . scalarMulEndo ( exponent )
2020-06-14 13:39:06 +00:00
2020-08-24 22:02:30 +00:00
proc scalarMulEndoWindow * ( T : typedesc , iters : int ) =
const bits = T . F . C . getCurveOrderBitwidth ( )
const G1_or_G2 = when T . F is Fp : " G1 " else : " G2 "
var r {. noInit . } : T
let P = rng . random_unsafe ( T ) # TODO: clear cofactor
let exponent = rng . random_unsafe ( BigInt [ bits ] )
bench ( " EC ScalarMul Window-2 " & G1_or_G2 & " (endomorphism accelerated) " , T , iters ) :
r = P
when T . F is Fp :
r . scalarMulGLV_m2w2 ( exponent )
else :
{. error : " Not implemented " . }
2020-06-14 13:39:06 +00:00
proc scalarMulUnsafeDoubleAddBench * ( T : typedesc , iters : int ) =
const bits = T . F . C . getCurveOrderBitwidth ( )
2020-06-15 20:58:56 +00:00
const G1_or_G2 = when T . F is Fp : " G1 " else : " G2 "
2020-06-14 13:39:06 +00:00
var r {. noInit . } : T
let P = rng . random_unsafe ( T ) # TODO: clear cofactor
let exponent = rng . random_unsafe ( BigInt [ bits ] )
2020-06-15 20:58:56 +00:00
bench ( " EC ScalarMul " & G1_or_G2 & " (unsafe reference DoubleAdd) " , T , iters ) :
2020-06-14 13:39:06 +00:00
r = P
2020-09-03 21:10:48 +00:00
r . unsafe_ECmul_double_add ( exponent )