constantine/benchmarks/bn254_fp.nim
Mamy Ratsimbazafy 4ff0e3d90b
Internals refactor + renewed focus on perf (#17)
* Lay out the refactoring objectives and tradeoffs

* Refactor the 32 and 64-bit primitives [skip ci]

* BigInts and Modular BigInts compile

* Make the bigints test compile

* Fix modular reduction

* Fix reduction tests vs GMP

* Implement montegomery mul, pow, inverse, WIP finite field compilation

* Make FiniteField compile

* Fix exponentiation compilation

* Fix Montgomery magic constant computation  for 2^64 words

* Fix typo in non-optimized CIOS - passing finite fields IO tests

* Add limbs comparisons [skip ci]

* Fix on precomputation of the Montgomery magic constant

* Passing all tests including 𝔽p2

* modular addition, the test for mersenne prime was wrong

* update benches

* Fix "nimble test" + typo on out-of-place field addition

* bigint division, normalization is needed: https://travis-ci.com/github/mratsim/constantine/jobs/298359743

* missing conversion in subborrow non-x86 fallback - https://travis-ci.com/github/mratsim/constantine/jobs/298359744

* Fix little-endian serialization

* Constantine32 flag to run 32-bit constantine on 64-bit machines

* IO Field test, ensure that BaseType is used instead of uint64 when the prime can field in uint32

* Implement proper addcarry and subborrow fallback for the compile-time VM

* Fix export issue when the logical wordbitwidth == physical wordbitwidth - passes all tests (32-bit and 64-bit)

* Fix uint128 on ARM

* Fix C++ conditional copy and ARM addcarry/subborrow

* Add investigation for SIGFPE in Travis

* Fix debug display for unsafeDiv2n1n

* multiplexer typo

* moveMem bug in glibc of Ubuntu 16.04?

* Was probably missing an early clobbered register annotation on conditional mov

* Note on Montgomery-friendly moduli

* Strongly suspect a GCC before GCC 7 codegen bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87139)

* hex conversion was (for debugging) not taking requested order into account + inlining comment

* Use 32-bit limbs on ARM64, uint128 builtin __udivti4 bug?

* Revert "Use 32-bit limbs on ARM64, uint128 builtin __udivti4 bug?"

This reverts commit 087f9aa7fb40bbd058d05cbd8eec7fc082911f49.

* Fix subborrow fallback for non-x86 (need to maks the borrow)
2020-03-16 16:33:51 +01:00

156 lines
4.8 KiB
Nim

# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# ############################################################
#
# Benchmark of modular exponentiation
#
# ############################################################
# 2 implementations are available
# - 1 is constant time
# - 1 exposes the exponent bits to:
# timing attack,
# memory access analysis,
# power analysis (i.e. oscilloscopes on embedded)
# It is suitable for public exponents for example
# to compute modular inversion via the Fermat method
import
../constantine/config/[common, curves],
../constantine/arithmetic/[bigints, finite_fields],
../constantine/io/[io_bigints, io_fields],
random, std/monotimes, times, strformat,
./timers
const Iters = 1_000_000
const InvIters = 1000
randomize(1234)
# warmup
proc warmup*() =
# Warmup - make sure cpu is on max perf
let start = cpuTime()
var foo = 123
for i in 0 ..< 300_000_000:
foo += i*i mod 456
foo = foo mod 789
# Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
let stop = cpuTime()
echo &"\n\nWarmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n"
warmup()
echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
echo "==========================================================================================================\n"
proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
echo &"{op:<15} {field:<15} {inNanoseconds((stop-start) div iters):>9} ns {(stopClk - startClk) div iters:>9} cycles"
proc addBench() =
var x, y: Fp[BN254]
# BN254 field modulus
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
# Truncated BLS12-381 prime
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624")
let start = getMonotime()
let startClk = getTicks()
for _ in 0 ..< Iters:
x += y
let stopClk = getTicks()
let stop = getMonotime()
report("Addition", "Fp[BN254]", start, stop, startClk, stopClk, Iters)
addBench()
proc subBench() =
var x, y: Fp[BN254]
# BN254 field modulus
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
# Truncated BLS12-381 prime
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624")
let start = getMonotime()
let startClk = getTicks()
for _ in 0 ..< Iters:
x -= y
let stopClk = getTicks()
let stop = getMonotime()
report("Substraction", "Fp[BN254]", start, stop, startClk, stopClk, Iters)
subBench()
proc negBench() =
var r, x: Fp[BN254]
# BN254 field modulus
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
let start = getMonotime()
let startClk = getTicks()
for _ in 0 ..< Iters:
r.neg(x)
let stopClk = getTicks()
let stop = getMonotime()
report("Negation", "Fp[BN254]", start, stop, startClk, stopClk, Iters)
negBench()
proc mulBench() =
var r, x, y: Fp[BN254]
# BN254 field modulus
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
# Truncated BLS12-381 prime
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624")
let start = getMonotime()
let startClk = getTicks()
for _ in 0 ..< Iters:
r.prod(x, y)
let stopClk = getTicks()
let stop = getMonotime()
report("Multiplication", "Fp[BN254]", start, stop, startClk, stopClk, Iters)
mulBench()
proc sqrBench() =
var r, x: Fp[BN254]
# BN254 field modulus
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
let start = getMonotime()
let startClk = getTicks()
for _ in 0 ..< Iters:
r.square(x)
let stopClk = getTicks()
let stop = getMonotime()
report("Squaring", "Fp[BN254]", start, stop, startClk, stopClk, Iters)
sqrBench()
proc invBench() =
# TODO: having x on the stack triggers stack smashing detection. To be investigated
var x: ref Fp[BN254]
new x
# BN254 field modulus
x[].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
let start = getMonotime()
let startClk = getTicks()
for _ in 0 ..< InvIters:
# Note: we don't copy the original x so x is alterning between x and x^-1
inv(x[])
let stopClk = getTicks()
let stop = getMonotime()
report("Inversion", "Fp[BN254]", start, stop, startClk, stopClk, InvIters)
invBench()