benchmarking skips cycle counting for ARM

This commit is contained in:
Mamy André-Ratsimbazafy 2020-04-15 21:24:18 +02:00
parent d7e170288f
commit 7ae0f51000
No known key found for this signature in database
GPG Key ID: 7B88AD1FE79492E1
7 changed files with 91 additions and 31 deletions

View File

@ -16,7 +16,8 @@ import
# Internals
../constantine/config/curves,
# Helpers
../helpers/[timers, prng_unsafe, static_for],
../helpers/[prng_unsafe, static_for],
./platforms,
# Standard library
std/[monotimes, times, strformat, strutils, macros]
@ -40,9 +41,6 @@ proc warmup*() =
warmup()
echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
echo "==========================================================================================================\n"
echo "All benchmarks are using constant-time implementations to protect against side-channel attacks."
when defined(gcc):
echo "\nCompiled with GCC"
elif defined(clang):
@ -54,9 +52,21 @@ elif defined(icc):
else:
echo "\nCompiled with an unknown compiler"
when defined(i386) or defined(amd64):
import ../helpers/x86
echo "Running on ", cpuName(), "\n\n"
echo "Optimization level => no optimization: ", not defined(release), " | release: ", defined(release), " | danger: ", defined(danger)
when (sizeof(int) == 4) or defined(Constantine32):
echo "⚠️ Warning: using Constantine with 32-bit limbs"
else:
echo "Using Constantine with 64-bit limbs"
when SupportsCPUName:
echo "Running on ", cpuName(), ""
when SupportsGetTicks:
echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)"
echo "\n=================================================================================================================\n"
proc separator*() =
echo "-".repeat(132)

View File

@ -18,7 +18,8 @@ import
../constantine/arithmetic,
../constantine/towers,
# Helpers
../helpers/[timers, prng_unsafe, static_for],
../helpers/[prng_unsafe, static_for],
./platforms,
# Standard library
std/[monotimes, times, strformat, strutils, macros]
@ -42,9 +43,6 @@ proc warmup*() =
warmup()
echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
echo "==========================================================================================================\n"
echo "All benchmarks are using constant-time implementations to protect against side-channel attacks."
when defined(gcc):
echo "\nCompiled with GCC"
elif defined(clang):
@ -56,17 +54,29 @@ elif defined(icc):
else:
echo "\nCompiled with an unknown compiler"
when defined(i386) or defined(amd64):
import ../helpers/x86
echo "Running on ", cpuName(), "\n\n"
echo "Optimization level => no optimization: ", not defined(release), " | release: ", defined(release), " | danger: ", defined(danger)
when (sizeof(int) == 4) or defined(Constantine32):
echo "⚠️ Warning: using Constantine with 32-bit limbs"
else:
echo "Using Constantine with 64-bit limbs"
when SupportsCPUName:
echo "Running on ", cpuName(), ""
when SupportsGetTicks:
echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)"
echo "\n=================================================================================================================\n"
proc separator*() =
echo "-".repeat(107)
echo "-".repeat(110)
proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
let ns = inNanoseconds((stop-start) div iters)
let throughput = 1e9 / float64(ns)
echo &"{op:<15} {field:<15} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
echo &"{op:<15} {field:<18} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
macro fixFieldDisplay(T: typedesc): untyped =
# At compile-time, enums are integers and their display is buggy

30
benchmarks/platforms.nim Normal file
View File

@ -0,0 +1,30 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
when defined(i386) or defined(amd64):
import platforms/x86
export getTicks, cpuName
const SupportsCPUName* = true
const SupportsGetTicks* = true
else:
const SupportsCPUName* = false
const SupportsGetTicks* = false
# Prevent compiler optimizing benchmark away
# -----------------------------------------------
# This doesn't always work unfortunately ...
proc volatilize(x: ptr byte) {.codegenDecl: "$# $#(char const volatile *x)", inline.} =
discard
template preventOptimAway*[T](x: var T) =
volatilize(cast[ptr byte](unsafeAddr x))
template preventOptimAway*[T](x: T) =
volatilize(cast[ptr byte](x))

View File

@ -0,0 +1,16 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# TODO cycle counting on ARM
#
# - see writeup: http://zhiyisun.github.io/2016/03/02/How-to-Use-Performance-Monitor-Unit-(PMU)-of-64-bit-ARMv8-A-in-Linux.html
#
# Otherwise Google or FFTW approach might work but might require perf_counter privilege (`kernel.perf_event_paranoid=0` ?)
# - https://github.com/google/benchmark/blob/0ab2c290/src/cycleclock.h#L127-L151
# - https://github.com/FFTW/fftw3/blob/ef15637f/kernel/cycle.h#L518-L564
# - https://github.com/vesperix/FFTW-for-ARMv7/blob/22ec5c0b/kernel/cycle.h#L404-L457

View File

@ -1,3 +1,11 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# Cpu Name
# -------------------------------------------------------

View File

@ -128,7 +128,7 @@ func fromHex(output: var openArray[byte], hexStr: string, order: static[Endianne
# -------------------------------------------------------------------------
when isMainModule:
import random, std/monotimes, times, strformat, ../helpers/timers
import random, std/monotimes, times, strformat, ../benchmarks/platforms
const Iters = 1_000_000
const InvIters = 1000

View File

@ -1,14 +0,0 @@
when defined(i386) or defined(amd64):
import x86
export getTicks
# This doesn't always work unfortunately ...
proc volatilize(x: ptr byte) {.codegenDecl: "$# $#(char const volatile *x)", inline.} =
discard
template preventOptimAway*[T](x: var T) =
volatilize(cast[ptr byte](unsafeAddr x))
template preventOptimAway*[T](x: T) =
volatilize(cast[ptr byte](x))