import ../constantine/math/arithmetic, ../constantine/math/io/[io_bigints, io_fields], ../constantine/math/config/curves, ../constantine/platforms/[abstractions, codecs], ../constantine/math_arbitrary_precision/arithmetic/bigints_views, ../helpers/prng_unsafe, ./platforms, ./bench_blueprint import stint, gmp from bigints import nil # force qualified import to avoid conflicts on BigInt # Benchmarks for modular exponentiation implementations: # # - Constantine has 2 backends # - The cryptographic backend uses fixed-sized integer. # Often the modulus is known at compile-time (specific elliptic curves), # except for RSA. # # This allows reducing precomputation time, # and unrolling all loops. # This is significant as incrementing a loop counter messes up carry propagation. # # That backend requires the modulus to be prime. # # As cryptography only uses primes (which are odd), this is not a limitation. # However it is not suitable for general-purpose # # - The arbitrary-sized integer backend. # Some protocol like Ethereum modexp (EIP-198) require # modular exponentiation on arbitrary inputs. # # - Stint, GMP, nim-bigints are also benchmarked # for reference. GMP and nim-bigints require dynamic allocation. # - For GMP, we reuse buffers to limit allocation to the first benchmark # - nim-bigints doesn't allow reusing buffers # # Stint requires all inputs to be the same size # so we use 256-bits for all. # # To benchmark the cryptographic backend, we use Secp256k1 (the Bitcoin curve). # Note that Constantine implements it generically, # due to the special form of the prime (2²⁵⁶ - 2³² - 977), # even faster algorithms can be used. # This gives an upper-bound proc report(op: string, elapsedNs: int64, elapsedCycles: int64, iters: int) = let ns = elapsedNs div iters let cycles = elapsedCycles div iters let throughput = 1e9 / float64(ns) when SupportsGetTicks: echo &"{op:<45} {throughput:>15.3f} ops/s {ns:>16} ns/op {cycles:>12} CPU cycles (approx)" else: echo &"{op:<45} {throughput:>15.3f} ops/s {ns:>16} ns/op" const # https://gmplib.org/manual/Integer-Import-and-Export.html GMP_WordLittleEndian = -1'i32 GMP_WordNativeEndian = 0'i32 GMP_WordBigEndian = 1'i32 GMP_MostSignificantWordFirst = 1'i32 GMP_LeastSignificantWordFirst = -1'i32 const bits = 256 type BenchDesc = object # Hex strings a: string e: string M: string proc genBench(iters: int): seq[BenchDesc] = for _ in 0 ..< iters: let a = rng.random_long01Seq(BigInt[bits]) let e = rng.random_long01Seq(BigInt[bits]) let M = rng.random_long01Seq(BigInt[bits]) result.add BenchDesc( a: a.toHex(), e: e.toHex(), M: M.toHex()) template bench(fnCall: untyped, ticks, ns: var int64): untyped = block: let startTime = getMonotime() let startClock = getTicks() fnCall let stopClock = getTicks() let stopTime = getMonotime() ticks += stopClock - startClock ns += inNanoseconds(stopTime-startTime) proc benchAll(desc: seq[BenchDesc]) = var perfCttArb, perfCttCrypto, perfGmp, perfStint, perfNimBigInt: int64 block: # Constantine Arbitrary-precision var ticks, nanoseconds: int64 for i in 0 ..< desc.len: # The implementation is view based and uses unowned-buffers (seq or arrays) # but for hex parsing simplicity we reuse BigInt buffers # and we directly access the array behind with .limbs var r: BigInt[bits] let a = BigInt[bits].fromHex(desc[i].a) let M = BigInt[bits].fromHex(desc[i].M) let e = array[bits div 8, byte].fromHex(desc[i].e) bench( r.limbs.powMod_varTime(a.limbs, e, M.limbs, window = 4), ticks, nanoseconds) report("Constantine (generic arbitrary-precision)", nanoseconds, ticks, desc.len) perfCttArb = nanoseconds block: # Constantine Cryptographic backend var ticks, nanoseconds: int64 var e = newSeq[byte](bits div 8) for i in 0 ..< desc.len: var r: Fp[Secp256k1] let a = Fp[Secp256k1].fromHex(desc[i].a) e.paddedFromHex(desc[i].e, bigEndian) bench( (r = a; r.pow_varTime(e)), ticks, nanoseconds) report("Constantine (crypto fixed 256-bit precision)", nanoseconds, ticks, desc.len) perfCttCrypto = nanoseconds block: # GMP var ticks, nanoseconds: int64 var a, e, M, r: mpz_t mpz_init(a) mpz_init(e) mpz_init(M) mpz_init(r) for i in 0 ..< desc.len: let aCtt = BigInt[bits].fromHex(desc[i].a) a.mpz_import(aCtt.limbs.len, GMP_LeastSignificantWordFirst, sizeof(SecretWord), GMP_WordNativeEndian, 0, aCtt.limbs[0].unsafeAddr) let eCtt = BigInt[bits].fromHex(desc[i].e) e.mpz_import(eCtt.limbs.len, GMP_LeastSignificantWordFirst, sizeof(SecretWord), GMP_WordNativeEndian, 0, eCtt.limbs[0].unsafeAddr) let mCtt = BigInt[bits].fromHex(desc[i].M) M.mpz_import(mCtt.limbs.len, GMP_LeastSignificantWordFirst, sizeof(SecretWord), GMP_WordNativeEndian, 0, mCtt.limbs[0].unsafeAddr) bench( r.mpz_powm(a, e, M), ticks, nanoseconds) report("GMP", nanoseconds, ticks, desc.len) perfGMP = nanoseconds mpz_clear(r) mpz_clear(M) mpz_clear(e) mpz_clear(a) block: # Stint var ticks, nanoseconds: int64 for i in 0 ..< desc.len: let a = Stuint[bits].fromHex(desc[i].a) let e = Stuint[bits].fromHex(desc[i].e) let M = Stuint[bits].fromHex(desc[i].M) bench( (let r = powmod(a, e, M)), ticks, nanoseconds) report("Stint", nanoseconds, ticks, desc.len) perfStint = nanoseconds block: # Nim bigints var ticks, nanoseconds: int64 for i in 0 ..< desc.len: # Drop the 0x prefix let a = bigints.initBigInt(desc[i].a[2..^1], base = 16) let e = bigints.initBigInt(desc[i].e[2..^1], base = 16) let M = bigints.initBigInt(desc[i].M[2..^1], base = 16) bench( (let r = bigints.powmod(a, e, M)), ticks, nanoseconds) report("nim-bigints", nanoseconds, ticks, desc.len) perfNimBigInt = nanoseconds let ratioCrypto = float64(perfCttCrypto) / float64(perfCttArb) let ratioGMP = float64(perfGMP) / float64(perfCttArb) let ratioStint = float64(perfStint) / float64(perfCttArb) let ratioNimBigInt = float64(perfNimBigInt) / float64(perfCttArb) echo "" echo &"Perf ratio Constantine generic vs crypto fixed precision: {ratioCrypto:>8.3f}x" echo &"Perf ratio Constantine generic vs GMP: {ratioGMP:>8.3f}x" echo &"Perf ratio Constantine generic vs Stint: {ratioStint:>8.3f}x" echo &"Perf ratio Constantine generic vs nim-bigints: {ratioNimBigInt:>8.3f}x" when isMainModule: let benchDesc = genBench(100) benchDesc.benchAll()