* Add metering facilities

* Metering reporting

* Add example report on metering BLS12-381 pairings
This commit is contained in:
Mamy Ratsimbazafy 2021-01-29 22:21:19 +01:00 committed by GitHub
parent 95e23339b2
commit b91ec1cb15
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 395 additions and 39 deletions

View File

@ -70,14 +70,14 @@ func toBig*(src: FF): auto {.noInit, inline.} =
# Copy
# ------------------------------------------------------------
func ccopy*(a: var FF, b: FF, ctl: SecretBool) {.inline.} =
func ccopy*(a: var FF, b: FF, ctl: SecretBool) {.inline, meter.} =
## Constant-time conditional copy
## If ctl is true: b is copied into a
## if ctl is false: b is not copied and a is unmodified
## Time and memory accesses are the same whether a copy occurs or not
ccopy(a.mres, b.mres, ctl)
func cswap*(a, b: var FF, ctl: CTBool) {.inline.} =
func cswap*(a, b: var FF, ctl: CTBool) {.inline, meter.} =
## Swap ``a`` and ``b`` if ``ctl`` is true
##
## Constant-time:
@ -132,7 +132,7 @@ func setOne*(a: var FF) {.inline.} =
# Check if the compiler optimizes it away
a.mres = FF.getMontyOne()
func `+=`*(a: var FF, b: FF) {.inline.} =
func `+=`*(a: var FF, b: FF) {.inline, meter.} =
## In-place addition modulo p
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
addmod_asm(a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs)
@ -141,7 +141,7 @@ func `+=`*(a: var FF, b: FF) {.inline.} =
overflowed = overflowed or not(a.mres < FF.fieldMod())
discard csub(a.mres, FF.fieldMod(), overflowed)
func `-=`*(a: var FF, b: FF) {.inline.} =
func `-=`*(a: var FF, b: FF) {.inline, meter.} =
## In-place substraction modulo p
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
submod_asm(a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs)
@ -149,7 +149,7 @@ func `-=`*(a: var FF, b: FF) {.inline.} =
let underflowed = sub(a.mres, b.mres)
discard cadd(a.mres, FF.fieldMod(), underflowed)
func double*(a: var FF) {.inline.} =
func double*(a: var FF) {.inline, meter.} =
## Double ``a`` modulo p
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
addmod_asm(a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
@ -158,7 +158,7 @@ func double*(a: var FF) {.inline.} =
overflowed = overflowed or not(a.mres < FF.fieldMod())
discard csub(a.mres, FF.fieldMod(), overflowed)
func sum*(r: var FF, a, b: FF) {.inline.} =
func sum*(r: var FF, a, b: FF) {.inline, meter.} =
## Sum ``a`` and ``b`` into ``r`` modulo p
## r is initialized/overwritten
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
@ -169,11 +169,11 @@ func sum*(r: var FF, a, b: FF) {.inline.} =
overflowed = overflowed or not(r.mres < FF.fieldMod())
discard csub(r.mres, FF.fieldMod(), overflowed)
func sumNoReduce*(r: var FF, a, b: FF) {.inline.} =
func sumNoReduce*(r: var FF, a, b: FF) {.inline, meter.} =
## Sum ``a`` and ``b`` into ``r`` without reduction
discard r.mres.sum(a.mres, b.mres)
func diff*(r: var FF, a, b: FF) {.inline.} =
func diff*(r: var FF, a, b: FF) {.inline, meter.} =
## Substract `b` from `a` and store the result into `r`.
## `r` is initialized/overwritten
## Requires r != b
@ -184,7 +184,7 @@ func diff*(r: var FF, a, b: FF) {.inline.} =
var underflowed = r.mres.diff(a.mres, b.mres)
discard cadd(r.mres, FF.fieldMod(), underflowed)
func diffAlias*(r: var FF, a, b: FF) {.inline.} =
func diffAlias*(r: var FF, a, b: FF) {.inline, meter.} =
## Substract `b` from `a` and store the result into `r`.
## `r` is initialized/overwritten
## Handles r == b
@ -196,12 +196,12 @@ func diffAlias*(r: var FF, a, b: FF) {.inline.} =
var underflowed = r.mres.diff(a.mres, b.mres)
discard cadd(r.mres, FF.fieldMod(), underflowed)
func diffNoReduce*(r: var FF, a, b: FF) {.inline.} =
func diffNoReduce*(r: var FF, a, b: FF) {.inline, meter.} =
## Substract `b` from `a` and store the result into `r`
## without reduction
discard r.mres.diff(a.mres, b.mres)
func double*(r: var FF, a: FF) {.inline.} =
func double*(r: var FF, a: FF) {.inline, meter.} =
## Double ``a`` into ``r``
## `r` is initialized/overwritten
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
@ -212,16 +212,16 @@ func double*(r: var FF, a: FF) {.inline.} =
overflowed = overflowed or not(r.mres < FF.fieldMod())
discard csub(r.mres, FF.fieldMod(), overflowed)
func prod*(r: var FF, a, b: FF) {.inline.} =
func prod*(r: var FF, a, b: FF) {.inline, meter.} =
## Store the product of ``a`` by ``b`` modulo p into ``r``
## ``r`` is initialized / overwritten
r.mres.montyMul(a.mres, b.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontyMul())
func square*(r: var FF, a: FF) {.inline.} =
func square*(r: var FF, a: FF) {.inline, meter.} =
## Squaring modulo p
r.mres.montySquare(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontySquare())
func neg*(r: var FF, a: FF) {.inline.} =
func neg*(r: var FF, a: FF) {.inline, meter.} =
## Negate modulo p
when UseASM_X86_64 and defined(gcc):
# Clang and every compiler besides GCC
@ -239,11 +239,11 @@ func neg*(r: var FF, a: FF) {.inline.} =
t.mres.czero(isZero)
r = t
func neg*(a: var FF) {.inline.} =
func neg*(a: var FF) {.inline, meter.} =
## Negate modulo p
a.neg(a)
func div2*(a: var FF) {.inline.} =
func div2*(a: var FF) {.inline, meter.} =
## Modular division by 2
a.mres.div2_modular(FF.getPrimePlus1div2())
@ -253,26 +253,26 @@ func div2*(a: var FF) {.inline.} =
#
# ############################################################
func cneg*(r: var FF, a: FF, ctl: SecretBool) =
func cneg*(r: var FF, a: FF, ctl: SecretBool) {.meter.} =
## Constant-time in-place conditional negation
## The negation is only performed if ctl is "true"
r.neg(a)
r.ccopy(a, not ctl)
func cneg*(a: var FF, ctl: SecretBool) =
func cneg*(a: var FF, ctl: SecretBool) {.meter.} =
## Constant-time in-place conditional negation
## The negation is only performed if ctl is "true"
var t = a
a.cneg(t, ctl)
func cadd*(a: var FF, b: FF, ctl: SecretBool) =
func cadd*(a: var FF, b: FF, ctl: SecretBool) {.meter.} =
## Constant-time in-place conditional addition
## The addition is only performed if ctl is "true"
var t = a
t += b
a.ccopy(t, ctl)
func csub*(a: var FF, b: FF, ctl: SecretBool) =
func csub*(a: var FF, b: FF, ctl: SecretBool) {.meter.} =
## Constant-time in-place conditional substraction
## The substraction is only performed if ctl is "true"
var t = a
@ -365,15 +365,15 @@ func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) {.inline.} =
# - Those that return a field element
# - Those that internally allocate a temporary field element
func `+`*(a, b: FF): FF {.noInit, inline.} =
func `+`*(a, b: FF): FF {.noInit, inline, meter.} =
## Addition modulo p
result.sum(a, b)
func `-`*(a, b: FF): FF {.noInit, inline.} =
func `-`*(a, b: FF): FF {.noInit, inline, meter.} =
## Substraction modulo p
result.diff(a, b)
func `*`*(a, b: FF): FF {.noInit, inline.} =
func `*`*(a, b: FF): FF {.noInit, inline, meter.} =
## Multiplication modulo p
##
## It is recommended to assign with {.noInit.}
@ -381,20 +381,20 @@ func `*`*(a, b: FF): FF {.noInit, inline.} =
## routine will zero init internally the result.
result.prod(a, b)
func `*=`*(a: var FF, b: FF) {.inline.} =
func `*=`*(a: var FF, b: FF) {.inline, meter.} =
## Multiplication modulo p
a.prod(a, b)
func square*(a: var FF) {.inline.} =
func square*(a: var FF) {.inline, meter.} =
## Squaring modulo p
a.mres.montySquare(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontySquare())
func square_repeated*(r: var FF, num: int) {.inline.} =
func square_repeated*(r: var FF, num: int) {.inline, meter.} =
## Repeated squarings
for _ in 0 ..< num:
r.square()
func square_repeated*(r: var FF, a: FF, num: int) {.inline.} =
func square_repeated*(r: var FF, a: FF, num: int) {.inline, meter.} =
## Repeated squarings
r.square(a)
for _ in 1 ..< num:

View File

@ -13,6 +13,9 @@
# ############################################################
import ../primitives
import ../../metering/tracer
export tracer
when sizeof(int) == 8 and not defined(Constantine32):
type

View File

@ -8,7 +8,7 @@
import
../primitives,
../config/curves,
../config/[common, curves],
../arithmetic,
../towers,
../isogeny/frobenius
@ -31,7 +31,7 @@ import
# 𝔽p12 -> Gϕ₁₂ - Mapping to Cyclotomic group
# ----------------------------------------------------------------
func finalExpEasy*[C: static Curve](f: var Fp12[C]) =
func finalExpEasy*[C: static Curve](f: var Fp12[C]) {.meter.} =
## Easy part of the final exponentiation
##
## This maps the result of the Miller loop into the cyclotomic subgroup Gϕ₁₂
@ -119,19 +119,19 @@ func finalExpEasy*[C: static Curve](f: var Fp12[C]) =
#
# The result of any pairing is in a cyclotomic subgroup
func cyclotomic_inv*(a: var Fp12) =
func cyclotomic_inv*(a: var Fp12) {.meter.} =
## Fast inverse for a
## `a` MUST be in the cyclotomic subgroup
## consequently `a` MUST be unitary
a.conj()
func cyclotomic_inv*(r: var Fp12, a: Fp12) =
func cyclotomic_inv*(r: var Fp12, a: Fp12) {.meter.} =
## Fast inverse for a
## `a` MUST be in the cyclotomic subgroup
## consequently `a` MUST be unitary
r.conj(a)
func cyclotomic_square*[C](r: var Fp12[C], a: Fp12[C]) =
func cyclotomic_square*[C](r: var Fp12[C], a: Fp12[C]) {.meter.} =
## Square `a` into `r`
## `a` MUST be in the cyclotomic subgroup
## consequently `a` MUST be unitary
@ -177,7 +177,7 @@ func cyclotomic_square*[C](r: var Fp12[C], a: Fp12[C]) =
else:
{.error: "Not implemented".}
func cyclotomic_square*[C](a: var Fp12[C]) =
func cyclotomic_square*[C](a: var Fp12[C]) {.meter.} =
## Square `a` into `r`
## `a` MUST be in the cyclotomic subgroup
## consequently `a` MUST be unitary
@ -225,7 +225,7 @@ func cyclotomic_square*[C](a: var Fp12[C]) =
else:
{.error: "Not implemented".}
func cycl_sqr_repeated*(f: var Fp12, num: int) {.inline.} =
func cycl_sqr_repeated*(f: var Fp12, num: int) {.inline, meter.} =
## Repeated cyclotomic squarings
for _ in 0 ..< num:
f.cyclotomic_square()
@ -240,7 +240,7 @@ iterator unpack(scalarByte: byte): bool =
yield bool((scalarByte and 0b00000010) shr 1)
yield bool( scalarByte and 0b00000001)
func cyclotomic_exp*[C](r: var Fp12[C], a: Fp12[C], exponent: BigInt, invert: bool) =
func cyclotomic_exp*[C](r: var Fp12[C], a: Fp12[C], exponent: BigInt, invert: bool) {.meter.} =
var eBytes: array[(exponent.bits+7) div 8, byte]
eBytes.exportRawUint(exponent, bigEndian)

View File

@ -7,7 +7,7 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/[curves, type_ff],
../config/[common, curves, type_ff],
../towers,
../elliptic/[
ec_shortweierstrass_affine,
@ -49,7 +49,7 @@ func millerLoopGenericBLS12*[C](
f: var Fp12[C],
P: ECP_ShortW_Aff[Fp[C], NotOnTwist],
Q: ECP_ShortW_Aff[Fp2[C], OnTwist]
) =
) {.meter.} =
## Generic Miller Loop for BLS12 curve
## Computes f{u,Q}(P) with u the BLS curve parameter
@ -133,7 +133,7 @@ func pairing_bls12_reference*[C](
# Optimized pairing implementation
# ----------------------------------------------------------------
func finalExpHard_BLS12*[C](f: var Fp12[C]) =
func finalExpHard_BLS12*[C](f: var Fp12[C]) {.meter.} =
## Hard part of the final exponentiation
## Specialized for BLS12 curves
##
@ -191,7 +191,7 @@ func finalExpHard_BLS12*[C](f: var Fp12[C]) =
func pairing_bls12*[C](
gt: var Fp12[C],
P: ECP_ShortW_Proj[Fp[C], NotOnTwist],
Q: ECP_ShortW_Proj[Fp2[C], OnTwist]) =
Q: ECP_ShortW_Proj[Fp2[C], OnTwist]) {.meter.} =
## Compute the optimal Ate Pairing for BLS12 curves
## Input: P ∈ G1, Q ∈ G2
## Output: e(P, Q) ∈ Gt

97
metering/README.md Normal file
View File

@ -0,0 +1,97 @@
# Metering
## Overview
This folder allows measuring an accurate cost of high-level primitives in terms of basic operations (Field mul, add, inv, ...)
### For optimization
Metering allows choosing the best algorithm or representation when multiple are available, for example choosing elliptic curve coordinates between affine projective or jacobian? Also some might be faster for certain fields (Fp or Fp2) or certain curves.
It also allows to focus tuning operations that underlie the high-level building blocks. This is not a replacement for profiling but a complement.
Metering allows reasoning at the complexity and algorithmic level while profiling allows reasoning at the hardware and timing level.
### For blockchains
Important for blockchain to correctly price the VM opcodes. Pricing too low would allow denial-of-service attacks, too high will disincentivize their use.
Note: this only takes into account the number of operations
but does not take into account stack usage for temporaries.
## Measuring cost
The file m_pairings has a minimal example for the current state.
```Nim
var rng*: RngState
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
rng.seed(seed)
echo "bench xoshiro512** seed: ", seed
func random_point*(rng: var RngState, EC: typedesc): EC {.noInit.} =
result = rng.random_unsafe(EC)
result.clearCofactorReference()
proc pairingBLS12Meter*(C: static Curve) =
let
P = rng.random_point(ECP_ShortW_Proj[Fp[C], NotOnTwist])
Q = rng.random_point(ECP_ShortW_Proj[Fp2[C], OnTwist])
var f: Fp12[C]
resetMetering()
f.pairing_bls12(P, Q)
resetMetering()
pairingBLS12Meter(BLS12_381)
const flags = if UseASM_X86_64 or UseASM_X86_32: "UseAssembly" else: "NoAssembly"
reportCli(Metrics, flags)
```
After compiling with
```
nim c -r --hints:off --warnings:off --verbosity:0 -d:danger -d:CttMeter --outdir:build metering/m_pairings.nim
```
We get
```
bench xoshiro512** seed: 1611954740
CPU: Intel(R) Core(TM) i9-9980XE CPU @ 3.00GHz
The CPU Cycle Count is indicative only. It cannot be used to compare across systems, works at your CPU nominal frequency and is sensitive to overclocking, throttling and frequency scaling (powersaving and Turbo Boost).
|--------------------------------------------------|--------------|--------------------|---------------|-----------------|--------------------------|--------------------------|
| Procedures | # of Calls | Throughput (ops/s) | Time (µs) | Avg Time (µs) | CPU cycles (in billions) | Avg cycles (in billions) |
| UseAssembly | | | | | indicative only | indicative only |
|--------------------------------------------------|--------------|--------------------|---------------|-----------------|--------------------------|--------------------------|
|`+=`* | 11473| inf| 0.000| 0.000|
|`-=`* | 18603| 2067000000000.000| 0.009| 0.000|
|double* | 7212| 2404000000000.000| 0.003| 0.000|
|sum* | 21058| 7019333333333.333| 0.003| 0.000|
|diff* | 8884| 2961333333333.333| 0.003| 0.000|
|diffAlias* | 10| inf| 0.000| 0.000|
|double* | 4186| inf| 0.000| 0.000|
|prod* | 14486| 1609555555555.555| 0.009| 0.000|
|square* | 16| inf| 0.000| 0.000|
|neg* | 2093| inf| 0.000| 0.000|
|neg* | 2050| inf| 0.000| 0.000|
|div2* | 512| inf| 0.000| 0.000|
|`*=`* | 5584| 620444444444.444| 0.009| 0.000|
|square* | 1116| inf| 0.000| 0.000|
|square_repeated* | 126| 1235294117.647| 0.102| 0.001|
|finalExpEasy* | 1| 5555555.556| 0.180| 0.180|
|cyclotomic_inv* | 5| 1000000000.000| 0.005| 0.001|
|cyclotomic_inv* | 1| inf| 0.000| 0.000|
|cyclotomic_square* | 6| 70588235.294| 0.085| 0.014|
|cyclotomic_square* | 309| 70499657.769| 4.383| 0.014|
|cycl_sqr_repeated* | 25| 5556790.398| 4.499| 0.180|
|millerLoopGenericBLS12* | 1| 279251.606| 3.581| 3.581|
|finalExpHard_BLS12* | 1| 178475.817| 5.603| 5.603|
|pairing_bls12* | 1| 105196.718| 9.506| 9.506|
|--------------------------------------------------|--------------|--------------------|---------------|-----------------|--------------------------|--------------------------|
```
The reporting and tracing will be improved to collect the fields and curves
It's already useful to know how many base field operations are necessary.

42
metering/m_pairings.nim Normal file
View File

@ -0,0 +1,42 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
std/times,
./reports, ./tracer,
../constantine/config/[common, curves],
../constantine/[arithmetic, towers],
../constantine/elliptic/ec_shortweierstrass_projective,
../constantine/hash_to_curve/cofactors,
../constantine/pairing/pairing_bls12,
# Helpers
../helpers/prng_unsafe
var rng*: RngState
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
rng.seed(seed)
echo "bench xoshiro512** seed: ", seed
func random_point*(rng: var RngState, EC: typedesc): EC {.noInit.} =
result = rng.random_unsafe(EC)
result.clearCofactorReference()
proc pairingBLS12Meter*(C: static Curve) =
let
P = rng.random_point(ECP_ShortW_Proj[Fp[C], NotOnTwist])
Q = rng.random_point(ECP_ShortW_Proj[Fp2[C], OnTwist])
var f: Fp12[C]
resetMetering()
f.pairing_bls12(P, Q)
resetMetering()
pairingBLS12Meter(BLS12_381)
const flags = if UseASM_X86_64 or UseASM_X86_32: "UseAssembly" else: "NoAssembly"
reportCli(Metrics, flags)

62
metering/reports.nim Normal file
View File

@ -0,0 +1,62 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
std/[strformat, strutils],
../benchmarks/platforms,
tracer
# Reporting benchmark result
# -------------------------------------------------------
proc reportCli*(metrics: seq[Metadata], flags: string) =
let name = when SupportsCPUName: cpuName() else: "(name auto-detection not implemented for this CPU family)"
echo "\nCPU: ", name
when SupportsGetTicks:
# https://blog.trailofbits.com/2019/10/03/tsc-frequency-for-all-better-profiling-and-benchmarking/
# https://www.agner.org/optimize/blog/read.php?i=838
echo "The CPU Cycle Count is indicative only. It cannot be used to compare across systems, works at your CPU nominal frequency and is sensitive to overclocking, throttling and frequency scaling (powersaving and Turbo Boost)."
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|{'-'.repeat(26)}|{'-'.repeat(26)}|"""
echo "\n"
echo lineSep
echo &"""|{"Procedures":^50}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (µs)":^15}|{"Avg Time (µs)":^17}|{"CPU cycles (in billions)":^26}|{"Avg cycles (in billions)":^26}|"""
echo &"""|{flags:^50}|{' '.repeat(14)}|{' '.repeat(20)}|{' '.repeat(15)}|{' '.repeat(17)}|{"indicative only":^26}|{"indicative only":^26}|"""
echo lineSep
for m in metrics:
if m.numCalls == 0:
continue
# TODO: running variance / standard deviation but the Welford method is quite costly.
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3
let avgTimeUs = cumulTimeUs / m.numCalls.float64
let throughput = 1e6 / avgTimeUs
let cumulCyclesBillions = m.cumulatedCycles.float64 * 1e-9
let avgCyclesBillions = cumulCyclesBillions / m.numCalls.float64
echo &"""|{m.procName:<50}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo lineSep
else:
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|"""
echo "\n"
echo lineSep
echo &"""|{"Procedures":^50}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (µs)":^15}|{"Avg Time (µs)":^17}|"""
echo &"""|{flags:^50}|{' '.repeat(14)}|{' '.repeat(20)}|{' '.repeat(15)}|{' '.repeat(17)}|"""
echo lineSep
for m in metrics:
if m.numCalls == 0:
continue
# TODO: running variance / standard deviation but the Welford method is quite costly.
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3
let avgTimeUs = cumulTimeUs / m.numCalls.float64
let throughput = 1e6 / avgTimeUs
echo &"""|{m.procName:<50}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo lineSep

152
metering/tracer.nim Normal file
View File

@ -0,0 +1,152 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
std/[macros, times, monotimes],
../benchmarks/platforms
# ############################################################
#
# Trace operations
#
# ############################################################
# Utils
# --------------------------------------------------
const someGcc = defined(gcc) or defined(llvm_gcc) or defined(clang) or defined(icc)
const hasThreadSupport = defined(threads)
proc atomicInc*(memLoc: var int64, x = 1'i64): int64 =
when someGcc and hasThreadSupport:
result = atomicAddFetch(memLoc.addr, x, ATOMIC_RELAXED)
elif defined(vcc) and hasThreadSupport:
result = addAndFetch(memLoc.addr, x)
result += x
else:
memloc += x
result = memLoc
# Types
# --------------------------------------------------
type
Metadata* = object
procName*: string
module: string
package: string
tag: string # Can be change to multi-tags later
numCalls*: int64
cumulatedTimeNs*: int64 # in microseconds
when SupportsGetTicks:
cumulatedCycles*: int64
var ctMetrics{.compileTime.}: seq[Metadata]
## Metrics are collected here, this is just a temporary holder of compileTime values
## Unfortunately the "seq" is emptied when passing the compileTime/runtime boundaries
## due to Nim bugs
var Metrics*: seq[Metadata]
## We can't directly use it at compileTime because it doesn't exist.
## We need `Metrics = static(ctMetrics)`
## To transfer the compileTime content to runtime at an opportune time.
template mtag(tagname: string){.pragma.}
## This will allow tagging proc in the future with
## "Fp", "ec", "polynomial"
proc resetMetering*() =
Metrics = static(ctMetrics)
const CttMeter {.booldefine.} = off
const CttTrace {.booldefine.} = off # For manual "debug-echo"-style timing.
when CttTrace:
# strformat doesn't work in templates.
from strutils import alignLeft, formatFloat
# Symbols
# --------------------------------------------------
template fnEntry(name: string, id: int, startTime, startCycle: untyped): untyped =
## Bench tracing to insert on function entry
{.noSideEffect, gcsafe.}:
discard Metrics[id].numCalls.atomicInc()
let startTime = getMonoTime()
when SupportsGetTicks:
let startCycle = getTicks()
else:
let startCycle = 0
template fnExit(name: string, id: int, startTime, startCycle: untyped): untyped =
## Bench tracing to insert before each function exit
{.noSideEffect, gcsafe.}:
when SupportsGetTicks:
let stopCycle = getTicks()
let stopTime = getMonoTime()
when SupportsGetTicks:
let elapsedCycles = stopCycle - startCycle
let elapsedTime = inMicroseconds(stopTime - startTime)
discard Metrics[id].cumulatedTimeNs.atomicInc(elapsedTime)
when SupportsGetTicks:
discard Metrics[id].cumulatedCycles.atomicInc(elapsedCycles)
when CttTrace:
# Advice: Use "when name == relevantProc" to isolate specific procedures.
# strformat doesn't work in templates.
when SupportsGetTicks:
echo static(alignLeft(name, 50)),
"Time (µs): ", alignLeft(formatFloat(elapsedTime.float64 * 1e-3, precision=3), 10),
"Cycles (billions): ", formatFloat(elapsedCycles.float64 * 1e-9, precision=3)
else:
echo static(alignLeft(name, 50)),
"Time (µs): ", alignLeft(formatFloat(elapsedTime.float64 * 1e-3, precision=3), 10)
macro meterAnnotate(procAst: untyped): untyped =
procAst.expectKind({nnkProcDef, nnkFuncDef})
let id = ctMetrics.len
let name = procAst[0].repr
# TODO, get the module and the package the proc is coming from
# and the tag "Fp", "ec", "polynomial" ...
ctMetrics.add Metadata(procName: name)
var newBody = newStmtList()
let startTime = genSym(nskLet, "metering_" & name & "_startTime_")
let startCycle = genSym(nskLet, "metering_" & name & "_startCycles_")
newBody.add getAst(fnEntry(name, id, startTime, startCycle))
newbody.add nnkDefer.newTree(getAst(fnExit(name, id, startTime, startCycle)))
newBody.add procAst.body
procAst.body = newBody
result = procAst
template meter*(procBody: untyped): untyped =
when CttMeter or CttTrace:
meterAnnotate(procBody)
else:
procBody
# Sanity checks
# ---------------------------------------------------
when isMainModule:
static: doAssert CttMeter or CttTrace, "CttMeter or CttTrace must be on for tracing"
expandMacros:
proc foo(x: int): int{.meter.} =
echo "Hey hey hey"
result = x
resetMetering()
echo Metrics
discard foo(10)
echo Metrics
doAssert Metrics[0].numCalls == 1