benchmarking skips cycle counting for ARM
This commit is contained in:
parent
d7e170288f
commit
7ae0f51000
|
@ -16,7 +16,8 @@ import
|
|||
# Internals
|
||||
../constantine/config/curves,
|
||||
# Helpers
|
||||
../helpers/[timers, prng_unsafe, static_for],
|
||||
../helpers/[prng_unsafe, static_for],
|
||||
./platforms,
|
||||
# Standard library
|
||||
std/[monotimes, times, strformat, strutils, macros]
|
||||
|
||||
|
@ -40,9 +41,6 @@ proc warmup*() =
|
|||
|
||||
warmup()
|
||||
|
||||
echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "==========================================================================================================\n"
|
||||
echo "All benchmarks are using constant-time implementations to protect against side-channel attacks."
|
||||
when defined(gcc):
|
||||
echo "\nCompiled with GCC"
|
||||
elif defined(clang):
|
||||
|
@ -54,9 +52,21 @@ elif defined(icc):
|
|||
else:
|
||||
echo "\nCompiled with an unknown compiler"
|
||||
|
||||
when defined(i386) or defined(amd64):
|
||||
import ../helpers/x86
|
||||
echo "Running on ", cpuName(), "\n\n"
|
||||
echo "Optimization level => no optimization: ", not defined(release), " | release: ", defined(release), " | danger: ", defined(danger)
|
||||
|
||||
when (sizeof(int) == 4) or defined(Constantine32):
|
||||
echo "⚠️ Warning: using Constantine with 32-bit limbs"
|
||||
else:
|
||||
echo "Using Constantine with 64-bit limbs"
|
||||
|
||||
when SupportsCPUName:
|
||||
echo "Running on ", cpuName(), ""
|
||||
|
||||
when SupportsGetTicks:
|
||||
echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)"
|
||||
|
||||
echo "\n=================================================================================================================\n"
|
||||
|
||||
proc separator*() =
|
||||
echo "-".repeat(132)
|
||||
|
|
|
@ -18,7 +18,8 @@ import
|
|||
../constantine/arithmetic,
|
||||
../constantine/towers,
|
||||
# Helpers
|
||||
../helpers/[timers, prng_unsafe, static_for],
|
||||
../helpers/[prng_unsafe, static_for],
|
||||
./platforms,
|
||||
# Standard library
|
||||
std/[monotimes, times, strformat, strutils, macros]
|
||||
|
||||
|
@ -42,9 +43,6 @@ proc warmup*() =
|
|||
|
||||
warmup()
|
||||
|
||||
echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "==========================================================================================================\n"
|
||||
echo "All benchmarks are using constant-time implementations to protect against side-channel attacks."
|
||||
when defined(gcc):
|
||||
echo "\nCompiled with GCC"
|
||||
elif defined(clang):
|
||||
|
@ -56,17 +54,29 @@ elif defined(icc):
|
|||
else:
|
||||
echo "\nCompiled with an unknown compiler"
|
||||
|
||||
when defined(i386) or defined(amd64):
|
||||
import ../helpers/x86
|
||||
echo "Running on ", cpuName(), "\n\n"
|
||||
echo "Optimization level => no optimization: ", not defined(release), " | release: ", defined(release), " | danger: ", defined(danger)
|
||||
|
||||
when (sizeof(int) == 4) or defined(Constantine32):
|
||||
echo "⚠️ Warning: using Constantine with 32-bit limbs"
|
||||
else:
|
||||
echo "Using Constantine with 64-bit limbs"
|
||||
|
||||
when SupportsCPUName:
|
||||
echo "Running on ", cpuName(), ""
|
||||
|
||||
when SupportsGetTicks:
|
||||
echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)"
|
||||
|
||||
echo "\n=================================================================================================================\n"
|
||||
|
||||
proc separator*() =
|
||||
echo "-".repeat(107)
|
||||
echo "-".repeat(110)
|
||||
|
||||
proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
let ns = inNanoseconds((stop-start) div iters)
|
||||
let throughput = 1e9 / float64(ns)
|
||||
echo &"{op:<15} {field:<15} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
|
||||
echo &"{op:<15} {field:<18} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
|
||||
|
||||
macro fixFieldDisplay(T: typedesc): untyped =
|
||||
# At compile-time, enums are integers and their display is buggy
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
when defined(i386) or defined(amd64):
|
||||
import platforms/x86
|
||||
export getTicks, cpuName
|
||||
|
||||
const SupportsCPUName* = true
|
||||
const SupportsGetTicks* = true
|
||||
else:
|
||||
const SupportsCPUName* = false
|
||||
const SupportsGetTicks* = false
|
||||
|
||||
# Prevent compiler optimizing benchmark away
|
||||
# -----------------------------------------------
|
||||
# This doesn't always work unfortunately ...
|
||||
|
||||
proc volatilize(x: ptr byte) {.codegenDecl: "$# $#(char const volatile *x)", inline.} =
|
||||
discard
|
||||
|
||||
template preventOptimAway*[T](x: var T) =
|
||||
volatilize(cast[ptr byte](unsafeAddr x))
|
||||
|
||||
template preventOptimAway*[T](x: T) =
|
||||
volatilize(cast[ptr byte](x))
|
|
@ -0,0 +1,16 @@
|
|||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
# TODO cycle counting on ARM
|
||||
#
|
||||
# - see writeup: http://zhiyisun.github.io/2016/03/02/How-to-Use-Performance-Monitor-Unit-(PMU)-of-64-bit-ARMv8-A-in-Linux.html
|
||||
#
|
||||
# Otherwise Google or FFTW approach might work but might require perf_counter privilege (`kernel.perf_event_paranoid=0` ?)
|
||||
# - https://github.com/google/benchmark/blob/0ab2c290/src/cycleclock.h#L127-L151
|
||||
# - https://github.com/FFTW/fftw3/blob/ef15637f/kernel/cycle.h#L518-L564
|
||||
# - https://github.com/vesperix/FFTW-for-ARMv7/blob/22ec5c0b/kernel/cycle.h#L404-L457
|
|
@ -1,3 +1,11 @@
|
|||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
# Cpu Name
|
||||
# -------------------------------------------------------
|
||||
|
|
@ -128,7 +128,7 @@ func fromHex(output: var openArray[byte], hexStr: string, order: static[Endianne
|
|||
# -------------------------------------------------------------------------
|
||||
|
||||
when isMainModule:
|
||||
import random, std/monotimes, times, strformat, ../helpers/timers
|
||||
import random, std/monotimes, times, strformat, ../benchmarks/platforms
|
||||
|
||||
const Iters = 1_000_000
|
||||
const InvIters = 1000
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
when defined(i386) or defined(amd64):
|
||||
import x86
|
||||
export getTicks
|
||||
|
||||
# This doesn't always work unfortunately ...
|
||||
|
||||
proc volatilize(x: ptr byte) {.codegenDecl: "$# $#(char const volatile *x)", inline.} =
|
||||
discard
|
||||
|
||||
template preventOptimAway*[T](x: var T) =
|
||||
volatilize(cast[ptr byte](unsafeAddr x))
|
||||
|
||||
template preventOptimAway*[T](x: T) =
|
||||
volatilize(cast[ptr byte](x))
|
Loading…
Reference in New Issue