constantine/benchmarks/bench_powmod.nim
Mamy Ratsimbazafy b7687ddc4a
Accelerate eth_evm_modexp by 25x by dividing input size by 8 (#249)
* Accelerate eth_evm_modexp by 25x by dividing input size by 8 (scales quadratically)

* instant exponentiation by power of 2 depending on trailing zeroes

* improve bench report

* rename

* rewrite the pow2k even/trailingZero accel

* eth_evm_modexp: remove leftover TimeEffect
2023-07-03 01:45:36 +02:00

205 lines
6.7 KiB
Nim

import
../constantine/math/arithmetic,
../constantine/math/io/[io_bigints, io_fields],
../constantine/math/config/curves,
../constantine/platforms/[abstractions, codecs],
../constantine/math_arbitrary_precision/arithmetic/bigints_views,
../helpers/prng_unsafe,
./platforms, ./bench_blueprint
import stint, gmp
from bigints import nil # force qualified import to avoid conflicts on BigInt
# Benchmarks for modular exponentiation implementations:
#
# - Constantine has 2 backends
# - The cryptographic backend uses fixed-sized integer.
# Often the modulus is known at compile-time (specific elliptic curves),
# except for RSA.
#
# This allows reducing precomputation time,
# and unrolling all loops.
# This is significant as incrementing a loop counter messes up carry propagation.
#
# That backend requires the modulus to be prime.
#
# As cryptography only uses primes (which are odd), this is not a limitation.
# However it is not suitable for general-purpose
#
# - The arbitrary-sized integer backend.
# Some protocol like Ethereum modexp (EIP-198) require
# modular exponentiation on arbitrary inputs.
#
# - Stint, GMP, nim-bigints are also benchmarked
# for reference. GMP and nim-bigints require dynamic allocation.
# - For GMP, we reuse buffers to limit allocation to the first benchmark
# - nim-bigints doesn't allow reusing buffers
#
# Stint requires all inputs to be the same size
# so we use 256-bits for all.
#
# To benchmark the cryptographic backend, we use Secp256k1 (the Bitcoin curve).
# Note that Constantine implements it generically,
# due to the special form of the prime (2²⁵⁶ - 2³² - 977),
# even faster algorithms can be used.
# This gives an upper-bound
proc report(op: string, elapsedNs: int64, elapsedCycles: int64, iters: int) =
let ns = elapsedNs div iters
let cycles = elapsedCycles div iters
let throughput = 1e9 / float64(ns)
when SupportsGetTicks:
echo &"{op:<45} {throughput:>15.3f} ops/s {ns:>16} ns/op {cycles:>12} CPU cycles (approx)"
else:
echo &"{op:<45} {throughput:>15.3f} ops/s {ns:>16} ns/op"
const # https://gmplib.org/manual/Integer-Import-and-Export.html
GMP_WordLittleEndian = -1'i32
GMP_WordNativeEndian = 0'i32
GMP_WordBigEndian = 1'i32
GMP_MostSignificantWordFirst = 1'i32
GMP_LeastSignificantWordFirst = -1'i32
const bits = 256
type BenchDesc = object
# Hex strings
a: string
e: string
M: string
proc genBench(iters: int): seq[BenchDesc] =
for _ in 0 ..< iters:
let a = rng.random_long01Seq(BigInt[bits])
let e = rng.random_long01Seq(BigInt[bits])
let M = rng.random_long01Seq(BigInt[bits])
result.add BenchDesc(
a: a.toHex(),
e: e.toHex(),
M: M.toHex())
template bench(fnCall: untyped, ticks, ns: var int64): untyped =
block:
let startTime = getMonotime()
let startClock = getTicks()
fnCall
let stopClock = getTicks()
let stopTime = getMonotime()
ticks += stopClock - startClock
ns += inNanoseconds(stopTime-startTime)
proc benchAll(desc: seq[BenchDesc]) =
var perfCttArb, perfCttCrypto, perfGmp, perfStint, perfNimBigInt: int64
block: # Constantine Arbitrary-precision
var ticks, nanoseconds: int64
for i in 0 ..< desc.len:
# The implementation is view based and uses unowned-buffers (seq or arrays)
# but for hex parsing simplicity we reuse BigInt buffers
# and we directly access the array behind with .limbs
var r: BigInt[bits]
let a = BigInt[bits].fromHex(desc[i].a)
let M = BigInt[bits].fromHex(desc[i].M)
let e = array[bits div 8, byte].fromHex(desc[i].e)
bench(
r.limbs.powMod_varTime(a.limbs, e, M.limbs, window = 4),
ticks, nanoseconds)
report("Constantine (generic arbitrary-precision)", nanoseconds, ticks, desc.len)
perfCttArb = nanoseconds
block: # Constantine Cryptographic backend
var ticks, nanoseconds: int64
var e = newSeq[byte](bits div 8)
for i in 0 ..< desc.len:
var r: Fp[Secp256k1]
let a = Fp[Secp256k1].fromHex(desc[i].a)
e.paddedFromHex(desc[i].e, bigEndian)
bench(
(r = a; r.pow_varTime(e)),
ticks, nanoseconds)
report("Constantine (crypto fixed 256-bit precision)", nanoseconds, ticks, desc.len)
perfCttCrypto = nanoseconds
block: # GMP
var ticks, nanoseconds: int64
var a, e, M, r: mpz_t
mpz_init(a)
mpz_init(e)
mpz_init(M)
mpz_init(r)
for i in 0 ..< desc.len:
let aCtt = BigInt[bits].fromHex(desc[i].a)
a.mpz_import(aCtt.limbs.len, GMP_LeastSignificantWordFirst, sizeof(SecretWord), GMP_WordNativeEndian, 0, aCtt.limbs[0].unsafeAddr)
let eCtt = BigInt[bits].fromHex(desc[i].e)
e.mpz_import(eCtt.limbs.len, GMP_LeastSignificantWordFirst, sizeof(SecretWord), GMP_WordNativeEndian, 0, eCtt.limbs[0].unsafeAddr)
let mCtt = BigInt[bits].fromHex(desc[i].M)
M.mpz_import(mCtt.limbs.len, GMP_LeastSignificantWordFirst, sizeof(SecretWord), GMP_WordNativeEndian, 0, mCtt.limbs[0].unsafeAddr)
bench(
r.mpz_powm(a, e, M),
ticks, nanoseconds)
report("GMP", nanoseconds, ticks, desc.len)
perfGMP = nanoseconds
mpz_clear(r)
mpz_clear(M)
mpz_clear(e)
mpz_clear(a)
block: # Stint
var ticks, nanoseconds: int64
for i in 0 ..< desc.len:
let a = Stuint[bits].fromHex(desc[i].a)
let e = Stuint[bits].fromHex(desc[i].e)
let M = Stuint[bits].fromHex(desc[i].M)
bench(
(let r = powmod(a, e, M)),
ticks, nanoseconds)
report("Stint", nanoseconds, ticks, desc.len)
perfStint = nanoseconds
block: # Nim bigints
var ticks, nanoseconds: int64
for i in 0 ..< desc.len:
# Drop the 0x prefix
let a = bigints.initBigInt(desc[i].a[2..^1], base = 16)
let e = bigints.initBigInt(desc[i].e[2..^1], base = 16)
let M = bigints.initBigInt(desc[i].M[2..^1], base = 16)
bench(
(let r = bigints.powmod(a, e, M)),
ticks, nanoseconds)
report("nim-bigints", nanoseconds, ticks, desc.len)
perfNimBigInt = nanoseconds
let ratioCrypto = float64(perfCttCrypto) / float64(perfCttArb)
let ratioGMP = float64(perfGMP) / float64(perfCttArb)
let ratioStint = float64(perfStint) / float64(perfCttArb)
let ratioNimBigInt = float64(perfNimBigInt) / float64(perfCttArb)
echo ""
echo &"Perf ratio Constantine generic vs crypto fixed precision: {ratioCrypto:>8.3f}x"
echo &"Perf ratio Constantine generic vs GMP: {ratioGMP:>8.3f}x"
echo &"Perf ratio Constantine generic vs Stint: {ratioStint:>8.3f}x"
echo &"Perf ratio Constantine generic vs nim-bigints: {ratioNimBigInt:>8.3f}x"
when isMainModule:
let benchDesc = genBench(100)
benchDesc.benchAll()