From 7ae0f510006fda15a78b2dc085139f7872078c40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mamy=20Andr=C3=A9-Ratsimbazafy?= Date: Wed, 15 Apr 2020 21:24:18 +0200 Subject: [PATCH] benchmarking skips cycle counting for ARM --- benchmarks/bench_elliptic_template.nim | 24 ++++++++++++------ benchmarks/bench_fields_template.nim | 28 ++++++++++++++------- benchmarks/platforms.nim | 30 +++++++++++++++++++++++ benchmarks/platforms/arm.nim | 16 ++++++++++++ {helpers => benchmarks/platforms}/x86.nim | 8 ++++++ formal_verification/bls12_381_q_64.nim | 2 +- helpers/timers.nim | 14 ----------- 7 files changed, 91 insertions(+), 31 deletions(-) create mode 100644 benchmarks/platforms.nim create mode 100644 benchmarks/platforms/arm.nim rename {helpers => benchmarks/platforms}/x86.nim (83%) delete mode 100644 helpers/timers.nim diff --git a/benchmarks/bench_elliptic_template.nim b/benchmarks/bench_elliptic_template.nim index 2ed2fde..79a0ab1 100644 --- a/benchmarks/bench_elliptic_template.nim +++ b/benchmarks/bench_elliptic_template.nim @@ -16,7 +16,8 @@ import # Internals ../constantine/config/curves, # Helpers - ../helpers/[timers, prng_unsafe, static_for], + ../helpers/[prng_unsafe, static_for], + ./platforms, # Standard library std/[monotimes, times, strformat, strutils, macros] @@ -40,9 +41,6 @@ proc warmup*() = warmup() -echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them." -echo "==========================================================================================================\n" -echo "All benchmarks are using constant-time implementations to protect against side-channel attacks." when defined(gcc): echo "\nCompiled with GCC" elif defined(clang): @@ -54,9 +52,21 @@ elif defined(icc): else: echo "\nCompiled with an unknown compiler" -when defined(i386) or defined(amd64): - import ../helpers/x86 - echo "Running on ", cpuName(), "\n\n" +echo "Optimization level => no optimization: ", not defined(release), " | release: ", defined(release), " | danger: ", defined(danger) + +when (sizeof(int) == 4) or defined(Constantine32): + echo "⚠️ Warning: using Constantine with 32-bit limbs" +else: + echo "Using Constantine with 64-bit limbs" + +when SupportsCPUName: + echo "Running on ", cpuName(), "" + +when SupportsGetTicks: + echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them." + echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)" + +echo "\n=================================================================================================================\n" proc separator*() = echo "-".repeat(132) diff --git a/benchmarks/bench_fields_template.nim b/benchmarks/bench_fields_template.nim index 20dbcc2..e6ced40 100644 --- a/benchmarks/bench_fields_template.nim +++ b/benchmarks/bench_fields_template.nim @@ -18,7 +18,8 @@ import ../constantine/arithmetic, ../constantine/towers, # Helpers - ../helpers/[timers, prng_unsafe, static_for], + ../helpers/[prng_unsafe, static_for], + ./platforms, # Standard library std/[monotimes, times, strformat, strutils, macros] @@ -42,9 +43,6 @@ proc warmup*() = warmup() -echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them." -echo "==========================================================================================================\n" -echo "All benchmarks are using constant-time implementations to protect against side-channel attacks." when defined(gcc): echo "\nCompiled with GCC" elif defined(clang): @@ -56,17 +54,29 @@ elif defined(icc): else: echo "\nCompiled with an unknown compiler" -when defined(i386) or defined(amd64): - import ../helpers/x86 - echo "Running on ", cpuName(), "\n\n" +echo "Optimization level => no optimization: ", not defined(release), " | release: ", defined(release), " | danger: ", defined(danger) + +when (sizeof(int) == 4) or defined(Constantine32): + echo "⚠️ Warning: using Constantine with 32-bit limbs" +else: + echo "Using Constantine with 64-bit limbs" + +when SupportsCPUName: + echo "Running on ", cpuName(), "" + +when SupportsGetTicks: + echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them." + echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)" + +echo "\n=================================================================================================================\n" proc separator*() = - echo "-".repeat(107) + echo "-".repeat(110) proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) = let ns = inNanoseconds((stop-start) div iters) let throughput = 1e9 / float64(ns) - echo &"{op:<15} {field:<15} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" + echo &"{op:<15} {field:<18} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" macro fixFieldDisplay(T: typedesc): untyped = # At compile-time, enums are integers and their display is buggy diff --git a/benchmarks/platforms.nim b/benchmarks/platforms.nim new file mode 100644 index 0000000..a9dd92a --- /dev/null +++ b/benchmarks/platforms.nim @@ -0,0 +1,30 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +when defined(i386) or defined(amd64): + import platforms/x86 + export getTicks, cpuName + + const SupportsCPUName* = true + const SupportsGetTicks* = true +else: + const SupportsCPUName* = false + const SupportsGetTicks* = false + +# Prevent compiler optimizing benchmark away +# ----------------------------------------------- +# This doesn't always work unfortunately ... + +proc volatilize(x: ptr byte) {.codegenDecl: "$# $#(char const volatile *x)", inline.} = + discard + +template preventOptimAway*[T](x: var T) = + volatilize(cast[ptr byte](unsafeAddr x)) + +template preventOptimAway*[T](x: T) = + volatilize(cast[ptr byte](x)) diff --git a/benchmarks/platforms/arm.nim b/benchmarks/platforms/arm.nim new file mode 100644 index 0000000..076fd19 --- /dev/null +++ b/benchmarks/platforms/arm.nim @@ -0,0 +1,16 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# TODO cycle counting on ARM +# +# - see writeup: http://zhiyisun.github.io/2016/03/02/How-to-Use-Performance-Monitor-Unit-(PMU)-of-64-bit-ARMv8-A-in-Linux.html +# +# Otherwise Google or FFTW approach might work but might require perf_counter privilege (`kernel.perf_event_paranoid=0` ?) +# - https://github.com/google/benchmark/blob/0ab2c290/src/cycleclock.h#L127-L151 +# - https://github.com/FFTW/fftw3/blob/ef15637f/kernel/cycle.h#L518-L564 +# - https://github.com/vesperix/FFTW-for-ARMv7/blob/22ec5c0b/kernel/cycle.h#L404-L457 diff --git a/helpers/x86.nim b/benchmarks/platforms/x86.nim similarity index 83% rename from helpers/x86.nim rename to benchmarks/platforms/x86.nim index dbbcfea..ad3c384 100644 --- a/helpers/x86.nim +++ b/benchmarks/platforms/x86.nim @@ -1,3 +1,11 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + # Cpu Name # ------------------------------------------------------- diff --git a/formal_verification/bls12_381_q_64.nim b/formal_verification/bls12_381_q_64.nim index 53f2fb0..b5da75e 100644 --- a/formal_verification/bls12_381_q_64.nim +++ b/formal_verification/bls12_381_q_64.nim @@ -128,7 +128,7 @@ func fromHex(output: var openArray[byte], hexStr: string, order: static[Endianne # ------------------------------------------------------------------------- when isMainModule: - import random, std/monotimes, times, strformat, ../helpers/timers + import random, std/monotimes, times, strformat, ../benchmarks/platforms const Iters = 1_000_000 const InvIters = 1000 diff --git a/helpers/timers.nim b/helpers/timers.nim deleted file mode 100644 index 9a31232..0000000 --- a/helpers/timers.nim +++ /dev/null @@ -1,14 +0,0 @@ -when defined(i386) or defined(amd64): - import x86 - export getTicks - -# This doesn't always work unfortunately ... - -proc volatilize(x: ptr byte) {.codegenDecl: "$# $#(char const volatile *x)", inline.} = - discard - -template preventOptimAway*[T](x: var T) = - volatilize(cast[ptr byte](unsafeAddr x)) - -template preventOptimAway*[T](x: T) = - volatilize(cast[ptr byte](x))