Revive nbench (#1022)

* Fix nbench compilation with HashedBeaconState

* Add nbench to tooling

* use newClone - fix 265e01e404 (r425198575)

* Detail advance_slot and hashTreeRoot

* Report throughput

* Fallback for ARM

* windows does not support inline ASM
This commit is contained in:
Mamy Ratsimbazafy 2020-05-15 00:54:10 +02:00 committed by GitHub
parent 8b4065bc19
commit 1916e1d2da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 128 additions and 55 deletions

View File

@ -25,12 +25,15 @@ TOOLS := \
process_dashboard \ process_dashboard \
stack_sizes \ stack_sizes \
state_sim \ state_sim \
block_sim block_sim \
nbench \
nbench_spec_scenarios
# bench_bls_sig_agggregation TODO reenable after bls v0.10.1 changes # bench_bls_sig_agggregation TODO reenable after bls v0.10.1 changes
TOOLS_DIRS := \ TOOLS_DIRS := \
beacon_chain \ beacon_chain \
benchmarks \ benchmarks \
ncli \ ncli \
nbench \
research \ research \
tests/simulation tests/simulation
TOOLS_CSV := $(subst $(SPACE),$(COMMA),$(TOOLS)) TOOLS_CSV := $(subst $(SPACE),$(COMMA),$(TOOLS))
@ -174,4 +177,3 @@ publish-book: | book
rm -rf tmp-book rm -rf tmp-book
endif # "variables.mk" was not included endif # "variables.mk" was not included

View File

@ -19,7 +19,8 @@ import
faststreams/[inputs, outputs, buffers], faststreams/[inputs, outputs, buffers],
serialization, serialization/testing/tracing, serialization, serialization/testing/tracing,
./spec/[crypto, datatypes, digest], ./spec/[crypto, datatypes, digest],
./ssz/[types, bytes_reader] ./ssz/[types, bytes_reader],
../nbench/bench_lab
# ################### Helper functions ################################### # ################### Helper functions ###################################
@ -607,7 +608,7 @@ func hashTreeRootImpl[T](x: T): Eth2Digest =
else: else:
unsupported T unsupported T
func hash_tree_root*(x: auto): Eth2Digest {.raises: [Defect].} = func hash_tree_root*(x: auto): Eth2Digest {.raises: [Defect], nbench.} =
trs "STARTING HASH TREE ROOT FOR TYPE ", name(type(x)) trs "STARTING HASH TREE ROOT FOR TYPE ", name(type(x))
mixin toSszType mixin toSszType
when x is TypeWithMaxLen: when x is TypeWithMaxLen:

View File

@ -39,7 +39,7 @@ import
declareGauge beacon_current_validators, """Number of status="pending|active|exited|withdrawable" validators in current epoch""" # On epoch transition declareGauge beacon_current_validators, """Number of status="pending|active|exited|withdrawable" validators in current epoch""" # On epoch transition
declareGauge beacon_previous_validators, """Number of status="pending|active|exited|withdrawable" validators in previous epoch""" # On epoch transition declareGauge beacon_previous_validators, """Number of status="pending|active|exited|withdrawable" validators in previous epoch""" # On epoch transition
func get_epoch_validator_count(state: BeaconState): int64 = func get_epoch_validator_count(state: BeaconState): int64 {.nbench.} =
# https://github.com/ethereum/eth2.0-metrics/blob/master/metrics.md#additional-metrics # https://github.com/ethereum/eth2.0-metrics/blob/master/metrics.md#additional-metrics
# #
# This O(n) loop doesn't add to the algorithmic complexity of the epoch # This O(n) loop doesn't add to the algorithmic complexity of the epoch
@ -124,7 +124,7 @@ func process_slot*(state: var HashedBeaconState) {.nbench.} =
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function # https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function
proc advance_slot*(state: var HashedBeaconState, proc advance_slot*(state: var HashedBeaconState,
nextStateRoot: Opt[Eth2Digest], updateFlags: UpdateFlags) = nextStateRoot: Opt[Eth2Digest], updateFlags: UpdateFlags) {.nbench.} =
# Special case version of process_slots that moves one slot at a time - can # Special case version of process_slots that moves one slot at a time - can
# run faster if the state root is known already (for example when replaying # run faster if the state root is known already (for example when replaying
# existing slots) # existing slots)

View File

@ -9,7 +9,7 @@ import
# Standard lib # Standard lib
macros, std/[monotimes, times], macros, std/[monotimes, times],
# Internal # Internal
platforms/x86 platforms/platforms
# Bench laboratory # Bench laboratory
# -------------------------------------------------- # --------------------------------------------------
@ -43,6 +43,7 @@ type
# TODO - replace by eth-metrics once we figure out a CSV/JSON/Console backend # TODO - replace by eth-metrics once we figure out a CSV/JSON/Console backend
numCalls*: int64 numCalls*: int64
cumulatedTimeNs*: int64 # in nanoseconds cumulatedTimeNs*: int64 # in nanoseconds
when SupportsGetTicks:
cumulatedCycles*: int64 cumulatedCycles*: int64
var ctBenchMetrics*{.compileTime.}: seq[Metadata] var ctBenchMetrics*{.compileTime.}: seq[Metadata]
@ -64,10 +65,13 @@ template ntag(tagname: string){.pragma.}
template fnEntry(name: string, id: int, startTime, startCycle: untyped): untyped = template fnEntry(name: string, id: int, startTime, startCycle: untyped): untyped =
## Bench tracing to insert on function entry ## Bench tracing to insert on function entry
{.noSideEffect.}: {.noSideEffect, gcsafe.}:
discard BenchMetrics[id].numCalls.atomicInc() discard BenchMetrics[id].numCalls.atomicInc()
let startTime = getMonoTime() let startTime = getMonoTime()
when SupportsGetTicks:
let startCycle = getTicks() let startCycle = getTicks()
else:
let startCycle = 0
const nbench_trace {.booldefine.} = off # For manual "debug-echo"-style timing. const nbench_trace {.booldefine.} = off # For manual "debug-echo"-style timing.
when nbench_trace: when nbench_trace:
@ -76,21 +80,28 @@ when nbench_trace:
template fnExit(name: string, id: int, startTime, startCycle: untyped): untyped = template fnExit(name: string, id: int, startTime, startCycle: untyped): untyped =
## Bench tracing to insert before each function exit ## Bench tracing to insert before each function exit
{.noSideEffect.}: {.noSideEffect, gcsafe.}:
when SupportsGetTicks:
let stopCycle = getTicks() let stopCycle = getTicks()
let stopTime = getMonoTime() let stopTime = getMonoTime()
when SupportsGetTicks:
let elapsedCycles = stopCycle - startCycle let elapsedCycles = stopCycle - startCycle
let elapsedTime = inNanoseconds(stopTime - startTime) let elapsedTime = inNanoseconds(stopTime - startTime)
discard BenchMetrics[id].cumulatedTimeNs.atomicInc(elapsedTime) discard BenchMetrics[id].cumulatedTimeNs.atomicInc(elapsedTime)
when SupportsGetTicks:
discard BenchMetrics[id].cumulatedCycles.atomicInc(elapsedCycles) discard BenchMetrics[id].cumulatedCycles.atomicInc(elapsedCycles)
when nbench_trace: when nbench_trace:
# Advice: Use "when name == relevantProc" to isolate specific procedures. # Advice: Use "when name == relevantProc" to isolate specific procedures.
# strformat doesn't work in templates. # strformat doesn't work in templates.
when SupportsGetTicks:
echo static(alignLeft(name, 50)), echo static(alignLeft(name, 50)),
"Time (ms): ", alignLeft(formatFloat(elapsedTime.float64 * 1e-6, precision=3), 10), "Time (ms): ", alignLeft(formatFloat(elapsedTime.float64 * 1e-6, precision=3), 10),
"Cycles (billions): ", formatFloat(elapsedCycles.float64 * 1e-9, precision=3) "Cycles (billions): ", formatFloat(elapsedCycles.float64 * 1e-9, precision=3)
else:
echo static(alignLeft(name, 50)),
"Time (ms): ", alignLeft(formatFloat(elapsedTime.float64 * 1e-6, precision=3), 10)
macro nbenchAnnotate(procAst: untyped): untyped = macro nbenchAnnotate(procAst: untyped): untyped =
procAst.expectKind({nnkProcDef, nnkFuncDef}) procAst.expectKind({nnkProcDef, nnkFuncDef})
@ -100,7 +111,7 @@ macro nbenchAnnotate(procAst: untyped): untyped =
# TODO, get the module and the package the proc is coming from # TODO, get the module and the package the proc is coming from
# and the tag "crypto", "ssz", "block_transition", "epoch_transition" ... # and the tag "crypto", "ssz", "block_transition", "epoch_transition" ...
ctBenchMetrics.add Metadata(procName: $name, numCalls: 0, cumulatedTimeNs: 0, cumulatedCycles: 0) ctBenchMetrics.add Metadata(procName: $name)
var newBody = newStmtList() var newBody = newStmtList()
let startTime = genSym(nskLet, "nbench_" & $name & "_startTime_") let startTime = genSym(nskLet, "nbench_" & $name & "_startTime_")
let startCycle = genSym(nskLet, "nbench_" & $name & "_startCycles_") let startCycle = genSym(nskLet, "nbench_" & $name & "_startCycles_")

View File

@ -0,0 +1,28 @@
# beacon_chain
# Copyright (c) 2018-2020 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
const GccCompatible = defined(gcc) or defined(clang)
const x86arch = defined(i386) or defined(amd64)
const supports_x86_inline_asm = block:
x86arch and (
(
GccCompatible and not defined(windows)
) or (
defined(vcc)
)
)
when supports_x86_inline_asm:
import x86
export getTicks, cpuName
const SupportsCPUName* = true
const SupportsGetTicks* = true
else:
const SupportsCPUName* = false
const SupportsGetTicks* = false

View File

@ -9,33 +9,31 @@ import
# Standard library # Standard library
strformat, strutils, strformat, strutils,
# Bench # Bench
bench_lab bench_lab, platforms/platforms
template cpuX86(body: untyped): untyped = template cpuX86(body: untyped): untyped =
when defined(i386) or defined(amd64): when defined(i386) or defined(amd64):
body body
cpuX86:
import platforms/x86
# Reporting benchmark result # Reporting benchmark result
# ------------------------------------------------------- # -------------------------------------------------------
proc reportCli*(metrics: seq[Metadata], preset, flags: string) = proc reportCli*(metrics: seq[Metadata], preset, flags: string) =
cpuX86: let name = when SupportsCPUName: cpuName() else: "(name auto-detection not implemented for this CPU family)"
let name = cpuName()
echo "\nCPU: ", name echo "\nCPU: ", name
when SupportsGetTicks:
# https://blog.trailofbits.com/2019/10/03/tsc-frequency-for-all-better-profiling-and-benchmarking/ # https://blog.trailofbits.com/2019/10/03/tsc-frequency-for-all-better-profiling-and-benchmarking/
# https://www.agner.org/optimize/blog/read.php?i=838 # https://www.agner.org/optimize/blog/read.php?i=838
echo "The CPU Cycle Count is indicative only. It cannot be used to compare across systems, works at your CPU nominal frequency and is sensitive to overclocking, throttling and frequency scaling (powersaving and Turbo Boost)." echo "The CPU Cycle Count is indicative only. It cannot be used to compare across systems, works at your CPU nominal frequency and is sensitive to overclocking, throttling and frequency scaling (powersaving and Turbo Boost)."
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(15)}|{'-'.repeat(17)}|{'-'.repeat(26)}|{'-'.repeat(26)}|""" const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|{'-'.repeat(26)}|{'-'.repeat(26)}|"""
echo "\n" echo "\n"
echo lineSep echo lineSep
echo &"""|{"Procedures (" & preset & ')':^50}|{"# of Calls":^14}|{"Time (ms)":^15}|{"Avg Time (ms)":^17}|{"CPU cycles (in billions)":^26}|{"Avg cycles (in billions)":^26}|""" echo &"""|{"Procedures (" & preset & ')':^50}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (ms)":^15}|{"Avg Time (ms)":^17}|{"CPU cycles (in billions)":^26}|{"Avg cycles (in billions)":^26}|"""
echo &"""|{flags:^50}|{' '.repeat(14)}|{' '.repeat(15)}|{' '.repeat(17)}|{"indicative only":^26}|{"indicative only":^26}|""" echo &"""|{flags:^50}|{' '.repeat(14)}|{' '.repeat(20)}|{' '.repeat(15)}|{' '.repeat(17)}|{"indicative only":^26}|{"indicative only":^26}|"""
echo lineSep echo lineSep
for m in metrics: for m in metrics:
if m.numCalls == 0: if m.numCalls == 0:
@ -44,7 +42,26 @@ proc reportCli*(metrics: seq[Metadata], preset, flags: string) =
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm # https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
let cumulTimeMs = m.cumulatedTimeNs.float64 * 1e-6 let cumulTimeMs = m.cumulatedTimeNs.float64 * 1e-6
let avgTimeMs = cumulTimeMs / m.numCalls.float64 let avgTimeMs = cumulTimeMs / m.numCalls.float64
let throughput = 1e3 / avgTimeMs
let cumulCyclesBillions = m.cumulatedCycles.float64 * 1e-9 let cumulCyclesBillions = m.cumulatedCycles.float64 * 1e-9
let avgCyclesBillions = cumulCyclesBillions / m.numCalls.float64 let avgCyclesBillions = cumulCyclesBillions / m.numCalls.float64
echo &"""|{m.procName:<50}|{m.numCalls:>14}|{cumulTimeMs:>15.3f}|{avgTimeMs:>17.3f}|{cumulCyclesBillions:>26.3f}|{avgCyclesBillions:>26.3f}|""" echo &"""|{m.procName:<50}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeMs:>15.3f}|{avgTimeMs:>17.3f}|"""
echo lineSep
else:
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|"""
echo "\n"
echo lineSep
echo &"""|{"Procedures (" & preset & ')':^50}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (ms)":^15}|{"Avg Time (ms)":^17}|"""
echo &"""|{flags:^50}|{' '.repeat(14)}|{' '.repeat(20)}|{' '.repeat(15)}|{' '.repeat(17)}|"""
echo lineSep
for m in metrics:
if m.numCalls == 0:
continue
# TODO: running variance / standard deviation but the Welford method is quite costly.
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
let cumulTimeMs = m.cumulatedTimeNs.float64 * 1e-6
let avgTimeMs = cumulTimeMs / m.numCalls.float64
let throughput = 1e3 / avgTimeMs
echo &"""|{m.procName:<50}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeMs:>15.3f}|{avgTimeMs:>17.3f}|"""
echo lineSep echo lineSep

View File

@ -125,6 +125,9 @@ type
proc parseSSZ(path: string, T: typedesc): T = proc parseSSZ(path: string, T: typedesc): T =
try: try:
when T is ref:
result = newClone(SSZ.loadFile(path, typeof(default(T)[])))
else:
result = SSZ.loadFile(path, T) result = SSZ.loadFile(path, T)
except SerializationError as err: except SerializationError as err:
writeStackTrace() writeStackTrace()
@ -140,7 +143,10 @@ proc runFullTransition*(dir, preState, blocksPrefix: string, blocksQty: int, ski
let prePath = dir / preState & ".ssz" let prePath = dir / preState & ".ssz"
echo "Running: ", prePath echo "Running: ", prePath
var state = newClone(parseSSZ(prePath, BeaconState)) let state = (ref HashedBeaconState)(
data: parseSSZ(prePath, BeaconState)
)
state.root = hash_tree_root(state.data)
for i in 0 ..< blocksQty: for i in 0 ..< blocksQty:
let blockPath = dir / blocksPrefix & $i & ".ssz" let blockPath = dir / blocksPrefix & $i & ".ssz"
@ -157,9 +163,12 @@ proc runProcessSlots*(dir, preState: string, numSlots: uint64) =
let prePath = dir / preState & ".ssz" let prePath = dir / preState & ".ssz"
echo "Running: ", prePath echo "Running: ", prePath
var state = newClone(parseSSZ(prePath, BeaconState)) let state = (ref HashedBeaconState)(
data: parseSSZ(prePath, BeaconState)
)
state.root = hash_tree_root(state.data)
process_slots(state[], state.slot + numSlots) process_slots(state[], state.data.slot + numSlots)
template processEpochScenarioImpl( template processEpochScenarioImpl(
dir, preState: string, dir, preState: string,
@ -168,16 +177,19 @@ template processEpochScenarioImpl(
let prePath = dir/preState & ".ssz" let prePath = dir/preState & ".ssz"
echo "Running: ", prePath echo "Running: ", prePath
var state = newClone(parseSSZ(prePath, BeaconState)) let state = (ref HashedBeaconState)(
data: parseSSZ(prePath, BeaconState)
)
state.root = hash_tree_root(state.data)
when needCache: when needCache:
var cache = get_empty_per_epoch_cache() var cache = get_empty_per_epoch_cache()
# Epoch transitions can't fail (TODO is this true?) # Epoch transitions can't fail (TODO is this true?)
when needCache: when needCache:
transitionFn(state[], cache) transitionFn(state.data, cache)
else: else:
transitionFn(state[]) transitionFn(state.data)
echo astToStr(transitionFn) & " status: ", "Done" # if success: "SUCCESS ✓" else: "FAILURE ⚠️" echo astToStr(transitionFn) & " status: ", "Done" # if success: "SUCCESS ✓" else: "FAILURE ⚠️"
@ -193,7 +205,10 @@ template processBlockScenarioImpl(
let prePath = dir/preState & ".ssz" let prePath = dir/preState & ".ssz"
echo "Running: ", prePath echo "Running: ", prePath
var state = newClone(parseSSZ(prePath, BeaconState)) let state = (ref HashedBeaconState)(
data: parseSSZ(prePath, BeaconState)
)
state.root = hash_tree_root(state.data)
when needCache: when needCache:
var cache = get_empty_per_epoch_cache() var cache = get_empty_per_epoch_cache()
@ -206,9 +221,9 @@ template processBlockScenarioImpl(
var consObj = parseSSZ(consObjPath, ConsensusObjectRefType) var consObj = parseSSZ(consObjPath, ConsensusObjectRefType)
when needFlags and needCache: when needFlags and needCache:
let success = transitionFn(state[], consObj[], flags, cache) let success = transitionFn(state.data, consObj[], flags, cache)
elif needFlags: elif needFlags:
let success = transitionFn(state[], consObj[], flags) let success = transitionFn(state.data, consObj[], flags)
elif needCache: elif needCache:
let success = transitionFn(state, consObj[], flags, cache) let success = transitionFn(state, consObj[], flags, cache)
else: else:
@ -286,4 +301,3 @@ genProcessBlockScenario(runProcessVoluntaryExits,
SignedVoluntaryExit, SignedVoluntaryExit,
needFlags = true, needFlags = true,
needCache = false) needCache = false)