mirror of
https://github.com/logos-storage/constantine.git
synced 2026-01-05 14:43:08 +00:00
Add benchmark clock timers
This commit is contained in:
parent
ca817fcb69
commit
1fdb1df80a
@ -25,12 +25,16 @@ import
|
|||||||
../constantine/config/[common, curves],
|
../constantine/config/[common, curves],
|
||||||
../constantine/arithmetic/[bigints_checked, finite_fields],
|
../constantine/arithmetic/[bigints_checked, finite_fields],
|
||||||
../constantine/io/[io_bigints, io_fields],
|
../constantine/io/[io_bigints, io_fields],
|
||||||
random, std/monotimes, times, strformat
|
random, std/monotimes, times, strformat,
|
||||||
|
./timers
|
||||||
|
|
||||||
const Iters = 1_000_000
|
const Iters = 1_000_000
|
||||||
|
|
||||||
randomize(1234)
|
randomize(1234)
|
||||||
|
|
||||||
|
echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||||
|
echo "==========================================================================================================\n"
|
||||||
|
|
||||||
proc addBench() =
|
proc addBench() =
|
||||||
var r, x, y: Fp[BLS12_381]
|
var r, x, y: Fp[BLS12_381]
|
||||||
# BN254 field modulus
|
# BN254 field modulus
|
||||||
@ -39,12 +43,15 @@ proc addBench() =
|
|||||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
|
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
|
||||||
|
|
||||||
let start = getMonotime()
|
let start = getMonotime()
|
||||||
|
let startClk = getTicks()
|
||||||
for _ in 0 ..< Iters:
|
for _ in 0 ..< Iters:
|
||||||
x += y
|
x += y
|
||||||
|
let stopClk = getTicks()
|
||||||
let stop = getMonotime()
|
let stop = getMonotime()
|
||||||
|
|
||||||
echo &"Time for {Iters} additions in 𝔽p (constant-time 381-bit): {inMilliseconds(stop-start)} ms"
|
echo &"Time for {Iters} additions in 𝔽p (constant-time 381-bit): {inMilliseconds(stop-start)} ms"
|
||||||
echo &"Time for 1 addition in 𝔽p ==> {inNanoseconds((stop-start) div Iters)} ns"
|
echo &"Time for 1 addition in 𝔽p ==> {inNanoseconds((stop-start) div Iters)} ns"
|
||||||
|
echo &"Cycles per addition 𝔽p ==> {(stopClk - startClk) div Iters} cycles"
|
||||||
|
|
||||||
addBench()
|
addBench()
|
||||||
|
|
||||||
@ -56,11 +63,14 @@ proc mulBench() =
|
|||||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
|
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
|
||||||
|
|
||||||
let start = getMonotime()
|
let start = getMonotime()
|
||||||
|
let startClk = getTicks()
|
||||||
for _ in 0 ..< Iters:
|
for _ in 0 ..< Iters:
|
||||||
r.prod(x, y)
|
r.prod(x, y)
|
||||||
|
let stopClk = getTicks()
|
||||||
let stop = getMonotime()
|
let stop = getMonotime()
|
||||||
|
|
||||||
echo &"Time for {Iters} multiplications 𝔽p (constant-time 381-bit): {inMilliseconds(stop-start)} ms"
|
echo &"Time for {Iters} multiplications 𝔽p (constant-time 381-bit): {inMilliseconds(stop-start)} ms"
|
||||||
echo &"Time for 1 multiplication 𝔽p ==> {inNanoseconds((stop-start) div Iters)} ns"
|
echo &"Time for 1 multiplication 𝔽p ==> {inNanoseconds((stop-start) div Iters)} ns"
|
||||||
|
echo &"Cycles per multiplication 𝔽p ==> {(stopClk - startClk) div Iters} cycles"
|
||||||
|
|
||||||
mulBench()
|
mulBench()
|
||||||
|
|||||||
49
benchmarks/timers.nim
Normal file
49
benchmarks/timers.nim
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
when defined(i386) or defined(amd64):
|
||||||
|
# From Linux
|
||||||
|
#
|
||||||
|
# The RDTSC instruction is not ordered relative to memory
|
||||||
|
# access. The Intel SDM and the AMD APM are both vague on this
|
||||||
|
# point, but empirically an RDTSC instruction can be
|
||||||
|
# speculatively executed before prior loads. An RDTSC
|
||||||
|
# immediately after an appropriate barrier appears to be
|
||||||
|
# ordered as a normal load, that is, it provides the same
|
||||||
|
# ordering guarantees as reading from a global memory location
|
||||||
|
# that some other imaginary CPU is updating continuously with a
|
||||||
|
# time stamp.
|
||||||
|
#
|
||||||
|
# From Intel SDM
|
||||||
|
# https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf
|
||||||
|
when not defined(vcc):
|
||||||
|
when defined(amd64):
|
||||||
|
proc getTicks*(): int64 {.inline.} =
|
||||||
|
var lo, hi: int64
|
||||||
|
# TODO: Provide a compile-time flag for RDTSCP support
|
||||||
|
# and use it instead of lfence + RDTSC
|
||||||
|
{.emit: """asm volatile(
|
||||||
|
"lfence\n"
|
||||||
|
"rdtsc\n"
|
||||||
|
: "=a"(`lo`), "=d"(`hi`)
|
||||||
|
:
|
||||||
|
: "memory"
|
||||||
|
);""".}
|
||||||
|
return (hi shl 32) or lo
|
||||||
|
else:
|
||||||
|
proc getTicks*(): int64 {.inline.} =
|
||||||
|
# TODO: Provide a compile-time flag for RDTSCP support
|
||||||
|
# and use it instead of lfence + RDTSC
|
||||||
|
{.emit: """asm volatile(
|
||||||
|
"lfence\n"
|
||||||
|
"rdtsc\n"
|
||||||
|
: "=a"(`result`)
|
||||||
|
:
|
||||||
|
: "memory"
|
||||||
|
);""".}
|
||||||
|
else:
|
||||||
|
proc rdtsc(): int64 {.sideeffect, importc: "__rdtsc", header: "<intrin.h>".}
|
||||||
|
proc lfence() {.importc: "__mm_lfence", header: "<intrin.h>".}
|
||||||
|
|
||||||
|
proc getTicks*(): int64 {.inline.} =
|
||||||
|
lfence()
|
||||||
|
return rdtsc()
|
||||||
|
else:
|
||||||
|
{.error: "getticks is not supported on this CPU architecture".}
|
||||||
Loading…
x
Reference in New Issue
Block a user