Revive nbench (#1022)
* Fix nbench compilation with HashedBeaconState
* Add nbench to tooling
* use newClone - fix 265e01e404 (r425198575)
* Detail advance_slot and hashTreeRoot
* Report throughput
* Fallback for ARM
* windows does not support inline ASM
This commit is contained in:
parent
8b4065bc19
commit
1916e1d2da
6
Makefile
6
Makefile
|
@ -25,12 +25,15 @@ TOOLS := \
|
|||
process_dashboard \
|
||||
stack_sizes \
|
||||
state_sim \
|
||||
block_sim
|
||||
block_sim \
|
||||
nbench \
|
||||
nbench_spec_scenarios
|
||||
# bench_bls_sig_agggregation TODO reenable after bls v0.10.1 changes
|
||||
TOOLS_DIRS := \
|
||||
beacon_chain \
|
||||
benchmarks \
|
||||
ncli \
|
||||
nbench \
|
||||
research \
|
||||
tests/simulation
|
||||
TOOLS_CSV := $(subst $(SPACE),$(COMMA),$(TOOLS))
|
||||
|
@ -174,4 +177,3 @@ publish-book: | book
|
|||
rm -rf tmp-book
|
||||
|
||||
endif # "variables.mk" was not included
|
||||
|
||||
|
|
|
@ -19,7 +19,8 @@ import
|
|||
faststreams/[inputs, outputs, buffers],
|
||||
serialization, serialization/testing/tracing,
|
||||
./spec/[crypto, datatypes, digest],
|
||||
./ssz/[types, bytes_reader]
|
||||
./ssz/[types, bytes_reader],
|
||||
../nbench/bench_lab
|
||||
|
||||
# ################### Helper functions ###################################
|
||||
|
||||
|
@ -607,7 +608,7 @@ func hashTreeRootImpl[T](x: T): Eth2Digest =
|
|||
else:
|
||||
unsupported T
|
||||
|
||||
func hash_tree_root*(x: auto): Eth2Digest {.raises: [Defect].} =
|
||||
func hash_tree_root*(x: auto): Eth2Digest {.raises: [Defect], nbench.} =
|
||||
trs "STARTING HASH TREE ROOT FOR TYPE ", name(type(x))
|
||||
mixin toSszType
|
||||
when x is TypeWithMaxLen:
|
||||
|
|
|
@ -39,7 +39,7 @@ import
|
|||
declareGauge beacon_current_validators, """Number of status="pending|active|exited|withdrawable" validators in current epoch""" # On epoch transition
|
||||
declareGauge beacon_previous_validators, """Number of status="pending|active|exited|withdrawable" validators in previous epoch""" # On epoch transition
|
||||
|
||||
func get_epoch_validator_count(state: BeaconState): int64 =
|
||||
func get_epoch_validator_count(state: BeaconState): int64 {.nbench.} =
|
||||
# https://github.com/ethereum/eth2.0-metrics/blob/master/metrics.md#additional-metrics
|
||||
#
|
||||
# This O(n) loop doesn't add to the algorithmic complexity of the epoch
|
||||
|
@ -124,7 +124,7 @@ func process_slot*(state: var HashedBeaconState) {.nbench.} =
|
|||
|
||||
# https://github.com/ethereum/eth2.0-specs/blob/v0.11.1/specs/phase0/beacon-chain.md#beacon-chain-state-transition-function
|
||||
proc advance_slot*(state: var HashedBeaconState,
|
||||
nextStateRoot: Opt[Eth2Digest], updateFlags: UpdateFlags) =
|
||||
nextStateRoot: Opt[Eth2Digest], updateFlags: UpdateFlags) {.nbench.} =
|
||||
# Special case version of process_slots that moves one slot at a time - can
|
||||
# run faster if the state root is known already (for example when replaying
|
||||
# existing slots)
|
||||
|
|
|
@ -9,7 +9,7 @@ import
|
|||
# Standard lib
|
||||
macros, std/[monotimes, times],
|
||||
# Internal
|
||||
platforms/x86
|
||||
platforms/platforms
|
||||
|
||||
# Bench laboratory
|
||||
# --------------------------------------------------
|
||||
|
@ -43,7 +43,8 @@ type
|
|||
# TODO - replace by eth-metrics once we figure out a CSV/JSON/Console backend
|
||||
numCalls*: int64
|
||||
cumulatedTimeNs*: int64 # in nanoseconds
|
||||
cumulatedCycles*: int64
|
||||
when SupportsGetTicks:
|
||||
cumulatedCycles*: int64
|
||||
|
||||
var ctBenchMetrics*{.compileTime.}: seq[Metadata]
|
||||
## Metrics are collected here, this is just a temporary holder of compileTime values
|
||||
|
@ -64,10 +65,13 @@ template ntag(tagname: string){.pragma.}
|
|||
|
||||
template fnEntry(name: string, id: int, startTime, startCycle: untyped): untyped =
|
||||
## Bench tracing to insert on function entry
|
||||
{.noSideEffect.}:
|
||||
{.noSideEffect, gcsafe.}:
|
||||
discard BenchMetrics[id].numCalls.atomicInc()
|
||||
let startTime = getMonoTime()
|
||||
let startCycle = getTicks()
|
||||
when SupportsGetTicks:
|
||||
let startCycle = getTicks()
|
||||
else:
|
||||
let startCycle = 0
|
||||
|
||||
const nbench_trace {.booldefine.} = off # For manual "debug-echo"-style timing.
|
||||
when nbench_trace:
|
||||
|
@ -76,21 +80,28 @@ when nbench_trace:
|
|||
|
||||
template fnExit(name: string, id: int, startTime, startCycle: untyped): untyped =
|
||||
## Bench tracing to insert before each function exit
|
||||
{.noSideEffect.}:
|
||||
let stopCycle = getTicks()
|
||||
{.noSideEffect, gcsafe.}:
|
||||
when SupportsGetTicks:
|
||||
let stopCycle = getTicks()
|
||||
let stopTime = getMonoTime()
|
||||
let elapsedCycles = stopCycle - startCycle
|
||||
when SupportsGetTicks:
|
||||
let elapsedCycles = stopCycle - startCycle
|
||||
let elapsedTime = inNanoseconds(stopTime - startTime)
|
||||
|
||||
discard BenchMetrics[id].cumulatedTimeNs.atomicInc(elapsedTime)
|
||||
discard BenchMetrics[id].cumulatedCycles.atomicInc(elapsedCycles)
|
||||
when SupportsGetTicks:
|
||||
discard BenchMetrics[id].cumulatedCycles.atomicInc(elapsedCycles)
|
||||
|
||||
when nbench_trace:
|
||||
# Advice: Use "when name == relevantProc" to isolate specific procedures.
|
||||
# strformat doesn't work in templates.
|
||||
echo static(alignLeft(name, 50)),
|
||||
"Time (ms): ", alignLeft(formatFloat(elapsedTime.float64 * 1e-6, precision=3), 10),
|
||||
"Cycles (billions): ", formatFloat(elapsedCycles.float64 * 1e-9, precision=3)
|
||||
when SupportsGetTicks:
|
||||
echo static(alignLeft(name, 50)),
|
||||
"Time (ms): ", alignLeft(formatFloat(elapsedTime.float64 * 1e-6, precision=3), 10),
|
||||
"Cycles (billions): ", formatFloat(elapsedCycles.float64 * 1e-9, precision=3)
|
||||
else:
|
||||
echo static(alignLeft(name, 50)),
|
||||
"Time (ms): ", alignLeft(formatFloat(elapsedTime.float64 * 1e-6, precision=3), 10)
|
||||
|
||||
macro nbenchAnnotate(procAst: untyped): untyped =
|
||||
procAst.expectKind({nnkProcDef, nnkFuncDef})
|
||||
|
@ -100,7 +111,7 @@ macro nbenchAnnotate(procAst: untyped): untyped =
|
|||
# TODO, get the module and the package the proc is coming from
|
||||
# and the tag "crypto", "ssz", "block_transition", "epoch_transition" ...
|
||||
|
||||
ctBenchMetrics.add Metadata(procName: $name, numCalls: 0, cumulatedTimeNs: 0, cumulatedCycles: 0)
|
||||
ctBenchMetrics.add Metadata(procName: $name)
|
||||
var newBody = newStmtList()
|
||||
let startTime = genSym(nskLet, "nbench_" & $name & "_startTime_")
|
||||
let startCycle = genSym(nskLet, "nbench_" & $name & "_startCycles_")
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
# beacon_chain
|
||||
# Copyright (c) 2018-2020 Status Research & Development GmbH
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
const GccCompatible = defined(gcc) or defined(clang)
|
||||
const x86arch = defined(i386) or defined(amd64)
|
||||
|
||||
const supports_x86_inline_asm = block:
|
||||
x86arch and (
|
||||
(
|
||||
GccCompatible and not defined(windows)
|
||||
) or (
|
||||
defined(vcc)
|
||||
)
|
||||
)
|
||||
|
||||
when supports_x86_inline_asm:
|
||||
import x86
|
||||
export getTicks, cpuName
|
||||
|
||||
const SupportsCPUName* = true
|
||||
const SupportsGetTicks* = true
|
||||
else:
|
||||
const SupportsCPUName* = false
|
||||
const SupportsGetTicks* = false
|
|
@ -9,42 +9,59 @@ import
|
|||
# Standard library
|
||||
strformat, strutils,
|
||||
# Bench
|
||||
bench_lab
|
||||
bench_lab, platforms/platforms
|
||||
|
||||
template cpuX86(body: untyped): untyped =
|
||||
when defined(i386) or defined(amd64):
|
||||
body
|
||||
|
||||
cpuX86:
|
||||
import platforms/x86
|
||||
|
||||
# Reporting benchmark result
|
||||
# -------------------------------------------------------
|
||||
|
||||
proc reportCli*(metrics: seq[Metadata], preset, flags: string) =
|
||||
|
||||
cpuX86:
|
||||
let name = cpuName()
|
||||
echo "\nCPU: ", name
|
||||
let name = when SupportsCPUName: cpuName() else: "(name auto-detection not implemented for this CPU family)"
|
||||
echo "\nCPU: ", name
|
||||
|
||||
# https://blog.trailofbits.com/2019/10/03/tsc-frequency-for-all-better-profiling-and-benchmarking/
|
||||
# https://www.agner.org/optimize/blog/read.php?i=838
|
||||
echo "The CPU Cycle Count is indicative only. It cannot be used to compare across systems, works at your CPU nominal frequency and is sensitive to overclocking, throttling and frequency scaling (powersaving and Turbo Boost)."
|
||||
when SupportsGetTicks:
|
||||
# https://blog.trailofbits.com/2019/10/03/tsc-frequency-for-all-better-profiling-and-benchmarking/
|
||||
# https://www.agner.org/optimize/blog/read.php?i=838
|
||||
echo "The CPU Cycle Count is indicative only. It cannot be used to compare across systems, works at your CPU nominal frequency and is sensitive to overclocking, throttling and frequency scaling (powersaving and Turbo Boost)."
|
||||
|
||||
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(15)}|{'-'.repeat(17)}|{'-'.repeat(26)}|{'-'.repeat(26)}|"""
|
||||
echo "\n"
|
||||
echo lineSep
|
||||
echo &"""|{"Procedures (" & preset & ')':^50}|{"# of Calls":^14}|{"Time (ms)":^15}|{"Avg Time (ms)":^17}|{"CPU cycles (in billions)":^26}|{"Avg cycles (in billions)":^26}|"""
|
||||
echo &"""|{flags:^50}|{' '.repeat(14)}|{' '.repeat(15)}|{' '.repeat(17)}|{"indicative only":^26}|{"indicative only":^26}|"""
|
||||
echo lineSep
|
||||
for m in metrics:
|
||||
if m.numCalls == 0:
|
||||
continue
|
||||
# TODO: running variance / standard deviation but the Welford method is quite costly.
|
||||
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
|
||||
let cumulTimeMs = m.cumulatedTimeNs.float64 * 1e-6
|
||||
let avgTimeMs = cumulTimeMs / m.numCalls.float64
|
||||
let cumulCyclesBillions = m.cumulatedCycles.float64 * 1e-9
|
||||
let avgCyclesBillions = cumulCyclesBillions / m.numCalls.float64
|
||||
echo &"""|{m.procName:<50}|{m.numCalls:>14}|{cumulTimeMs:>15.3f}|{avgTimeMs:>17.3f}|{cumulCyclesBillions:>26.3f}|{avgCyclesBillions:>26.3f}|"""
|
||||
echo lineSep
|
||||
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|{'-'.repeat(26)}|{'-'.repeat(26)}|"""
|
||||
echo "\n"
|
||||
echo lineSep
|
||||
echo &"""|{"Procedures (" & preset & ')':^50}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (ms)":^15}|{"Avg Time (ms)":^17}|{"CPU cycles (in billions)":^26}|{"Avg cycles (in billions)":^26}|"""
|
||||
echo &"""|{flags:^50}|{' '.repeat(14)}|{' '.repeat(20)}|{' '.repeat(15)}|{' '.repeat(17)}|{"indicative only":^26}|{"indicative only":^26}|"""
|
||||
echo lineSep
|
||||
for m in metrics:
|
||||
if m.numCalls == 0:
|
||||
continue
|
||||
# TODO: running variance / standard deviation but the Welford method is quite costly.
|
||||
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
|
||||
let cumulTimeMs = m.cumulatedTimeNs.float64 * 1e-6
|
||||
let avgTimeMs = cumulTimeMs / m.numCalls.float64
|
||||
let throughput = 1e3 / avgTimeMs
|
||||
let cumulCyclesBillions = m.cumulatedCycles.float64 * 1e-9
|
||||
let avgCyclesBillions = cumulCyclesBillions / m.numCalls.float64
|
||||
echo &"""|{m.procName:<50}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeMs:>15.3f}|{avgTimeMs:>17.3f}|"""
|
||||
echo lineSep
|
||||
|
||||
else:
|
||||
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|"""
|
||||
echo "\n"
|
||||
echo lineSep
|
||||
echo &"""|{"Procedures (" & preset & ')':^50}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (ms)":^15}|{"Avg Time (ms)":^17}|"""
|
||||
echo &"""|{flags:^50}|{' '.repeat(14)}|{' '.repeat(20)}|{' '.repeat(15)}|{' '.repeat(17)}|"""
|
||||
echo lineSep
|
||||
for m in metrics:
|
||||
if m.numCalls == 0:
|
||||
continue
|
||||
# TODO: running variance / standard deviation but the Welford method is quite costly.
|
||||
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
|
||||
let cumulTimeMs = m.cumulatedTimeNs.float64 * 1e-6
|
||||
let avgTimeMs = cumulTimeMs / m.numCalls.float64
|
||||
let throughput = 1e3 / avgTimeMs
|
||||
echo &"""|{m.procName:<50}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeMs:>15.3f}|{avgTimeMs:>17.3f}|"""
|
||||
echo lineSep
|
||||
|
|
|
@ -125,7 +125,10 @@ type
|
|||
|
||||
proc parseSSZ(path: string, T: typedesc): T =
|
||||
try:
|
||||
result = SSZ.loadFile(path, T)
|
||||
when T is ref:
|
||||
result = newClone(SSZ.loadFile(path, typeof(default(T)[])))
|
||||
else:
|
||||
result = SSZ.loadFile(path, T)
|
||||
except SerializationError as err:
|
||||
writeStackTrace()
|
||||
stderr.write "SSZ load issue for file \"", path, "\"\n"
|
||||
|
@ -140,7 +143,10 @@ proc runFullTransition*(dir, preState, blocksPrefix: string, blocksQty: int, ski
|
|||
let prePath = dir / preState & ".ssz"
|
||||
|
||||
echo "Running: ", prePath
|
||||
var state = newClone(parseSSZ(prePath, BeaconState))
|
||||
let state = (ref HashedBeaconState)(
|
||||
data: parseSSZ(prePath, BeaconState)
|
||||
)
|
||||
state.root = hash_tree_root(state.data)
|
||||
|
||||
for i in 0 ..< blocksQty:
|
||||
let blockPath = dir / blocksPrefix & $i & ".ssz"
|
||||
|
@ -157,9 +163,12 @@ proc runProcessSlots*(dir, preState: string, numSlots: uint64) =
|
|||
let prePath = dir / preState & ".ssz"
|
||||
|
||||
echo "Running: ", prePath
|
||||
var state = newClone(parseSSZ(prePath, BeaconState))
|
||||
let state = (ref HashedBeaconState)(
|
||||
data: parseSSZ(prePath, BeaconState)
|
||||
)
|
||||
state.root = hash_tree_root(state.data)
|
||||
|
||||
process_slots(state[], state.slot + numSlots)
|
||||
process_slots(state[], state.data.slot + numSlots)
|
||||
|
||||
template processEpochScenarioImpl(
|
||||
dir, preState: string,
|
||||
|
@ -168,16 +177,19 @@ template processEpochScenarioImpl(
|
|||
let prePath = dir/preState & ".ssz"
|
||||
|
||||
echo "Running: ", prePath
|
||||
var state = newClone(parseSSZ(prePath, BeaconState))
|
||||
let state = (ref HashedBeaconState)(
|
||||
data: parseSSZ(prePath, BeaconState)
|
||||
)
|
||||
state.root = hash_tree_root(state.data)
|
||||
|
||||
when needCache:
|
||||
var cache = get_empty_per_epoch_cache()
|
||||
|
||||
# Epoch transitions can't fail (TODO is this true?)
|
||||
when needCache:
|
||||
transitionFn(state[], cache)
|
||||
transitionFn(state.data, cache)
|
||||
else:
|
||||
transitionFn(state[])
|
||||
transitionFn(state.data)
|
||||
|
||||
echo astToStr(transitionFn) & " status: ", "Done" # if success: "SUCCESS ✓" else: "FAILURE ⚠️"
|
||||
|
||||
|
@ -193,7 +205,10 @@ template processBlockScenarioImpl(
|
|||
let prePath = dir/preState & ".ssz"
|
||||
|
||||
echo "Running: ", prePath
|
||||
var state = newClone(parseSSZ(prePath, BeaconState))
|
||||
let state = (ref HashedBeaconState)(
|
||||
data: parseSSZ(prePath, BeaconState)
|
||||
)
|
||||
state.root = hash_tree_root(state.data)
|
||||
|
||||
when needCache:
|
||||
var cache = get_empty_per_epoch_cache()
|
||||
|
@ -206,9 +221,9 @@ template processBlockScenarioImpl(
|
|||
var consObj = parseSSZ(consObjPath, ConsensusObjectRefType)
|
||||
|
||||
when needFlags and needCache:
|
||||
let success = transitionFn(state[], consObj[], flags, cache)
|
||||
let success = transitionFn(state.data, consObj[], flags, cache)
|
||||
elif needFlags:
|
||||
let success = transitionFn(state[], consObj[], flags)
|
||||
let success = transitionFn(state.data, consObj[], flags)
|
||||
elif needCache:
|
||||
let success = transitionFn(state, consObj[], flags, cache)
|
||||
else:
|
||||
|
@ -286,4 +301,3 @@ genProcessBlockScenario(runProcessVoluntaryExits,
|
|||
SignedVoluntaryExit,
|
||||
needFlags = true,
|
||||
needCache = false)
|
||||
|
||||
|
|
Loading…
Reference in New Issue