nimbus-eth2/config.nims

140 lines
5.0 KiB
Plaintext
Raw Normal View History

when defined(release):
let nimCachePath = "nimcache/release/" & projectName()
else:
let nimCachePath = "nimcache/debug/" & projectName()
switch("nimcache", nimCachePath)
use LTO in release builds (#1661) * use LTO in release builds This significantly (40%) speeds up block replay and hashing - for example replaying first 1000 blocks, without/with LTO: ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 25468.481, 0.000, 25468.481, 25468.481, 1, Initialize DB 0.297, 0.516, 0.053, 13.645, 721, Load block from database 26.458, 0.000, 26.458, 26.458, 1, Load state from database 20.737, 8.288, 11.096, 199.325, 690, Apply block 333.069, 62.798, 45.225, 429.452, 31, Apply epoch block 0.000, 0.000, 0.000, 0.000, 0, Database block store ``` ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release --passc:-flto --passl:-flto --stacktrace:off ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 23903.006, 0.000, 23903.006, 23903.006, 1, Initialize DB 0.253, 0.122, 0.047, 0.731, 721, Load block from database 24.455, 0.000, 24.455, 24.455, 1, Load state from database 18.734, 7.062, 10.346, 167.397, 690, Apply block 194.869, 33.175, 29.311, 226.981, 31, Apply epoch block ``` Epoch processing is heavy on both arithmetics and hash caching, both of which get a significant boost here. This makes sense: nim creates lots of small functions spread out over many C files. A much worse solution is to try to annotate code with `inline` - it copies functions to multiple C files but still doesn't do intermodule optimizations significantly limiting the compilers' ability to reason about the code, causing bloat and misrepresenting the usefulness of a function to the call frequency analysis that drives actual (C-compiler) inlining and many other optimizations. In particular, many nim functions are part of `system` or the `C` backend - stack tracing, memory allocation etc - nim's inlining system is pretty incomplete in that it does not deal with these and many other cases. * windows workaround * skip LTO on windows for now
2020-09-24 18:40:28 +02:00
# `-flto` gives a significant improvement in processing speed, specially hash tree and state transition (basically any CPU-bound code implemented in nim)
# With LTO enabled, optimization flags should be passed to both compiler and linker!
2020-09-25 18:15:02 +02:00
if defined(release) and not defined(disableLTO):
use LTO in release builds (#1661) * use LTO in release builds This significantly (40%) speeds up block replay and hashing - for example replaying first 1000 blocks, without/with LTO: ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 25468.481, 0.000, 25468.481, 25468.481, 1, Initialize DB 0.297, 0.516, 0.053, 13.645, 721, Load block from database 26.458, 0.000, 26.458, 26.458, 1, Load state from database 20.737, 8.288, 11.096, 199.325, 690, Apply block 333.069, 62.798, 45.225, 429.452, 31, Apply epoch block 0.000, 0.000, 0.000, 0.000, 0, Database block store ``` ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release --passc:-flto --passl:-flto --stacktrace:off ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 23903.006, 0.000, 23903.006, 23903.006, 1, Initialize DB 0.253, 0.122, 0.047, 0.731, 721, Load block from database 24.455, 0.000, 24.455, 24.455, 1, Load state from database 18.734, 7.062, 10.346, 167.397, 690, Apply block 194.869, 33.175, 29.311, 226.981, 31, Apply epoch block ``` Epoch processing is heavy on both arithmetics and hash caching, both of which get a significant boost here. This makes sense: nim creates lots of small functions spread out over many C files. A much worse solution is to try to annotate code with `inline` - it copies functions to multiple C files but still doesn't do intermodule optimizations significantly limiting the compilers' ability to reason about the code, causing bloat and misrepresenting the usefulness of a function to the call frequency analysis that drives actual (C-compiler) inlining and many other optimizations. In particular, many nim functions are part of `system` or the `C` backend - stack tracing, memory allocation etc - nim's inlining system is pretty incomplete in that it does not deal with these and many other cases. * windows workaround * skip LTO on windows for now
2020-09-24 18:40:28 +02:00
if defined(macosx): # Clang
switch("passC", "-flto=thin")
switch("passL", "-flto=thin -Wl,-object_path_lto," & nimCachePath & "/lto")
use LTO in release builds (#1661) * use LTO in release builds This significantly (40%) speeds up block replay and hashing - for example replaying first 1000 blocks, without/with LTO: ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 25468.481, 0.000, 25468.481, 25468.481, 1, Initialize DB 0.297, 0.516, 0.053, 13.645, 721, Load block from database 26.458, 0.000, 26.458, 26.458, 1, Load state from database 20.737, 8.288, 11.096, 199.325, 690, Apply block 333.069, 62.798, 45.225, 429.452, 31, Apply epoch block 0.000, 0.000, 0.000, 0.000, 0, Database block store ``` ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release --passc:-flto --passl:-flto --stacktrace:off ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 23903.006, 0.000, 23903.006, 23903.006, 1, Initialize DB 0.253, 0.122, 0.047, 0.731, 721, Load block from database 24.455, 0.000, 24.455, 24.455, 1, Load state from database 18.734, 7.062, 10.346, 167.397, 690, Apply block 194.869, 33.175, 29.311, 226.981, 31, Apply epoch block ``` Epoch processing is heavy on both arithmetics and hash caching, both of which get a significant boost here. This makes sense: nim creates lots of small functions spread out over many C files. A much worse solution is to try to annotate code with `inline` - it copies functions to multiple C files but still doesn't do intermodule optimizations significantly limiting the compilers' ability to reason about the code, causing bloat and misrepresenting the usefulness of a function to the call frequency analysis that drives actual (C-compiler) inlining and many other optimizations. In particular, many nim functions are part of `system` or the `C` backend - stack tracing, memory allocation etc - nim's inlining system is pretty incomplete in that it does not deal with these and many other cases. * windows workaround * skip LTO on windows for now
2020-09-24 18:40:28 +02:00
elif defined(linux):
switch("passC", "-flto=auto")
switch("passL", "-flto=auto")
switch("passC", "-finline-limit=100000")
switch("passL", "-finline-limit=100000")
use LTO in release builds (#1661) * use LTO in release builds This significantly (40%) speeds up block replay and hashing - for example replaying first 1000 blocks, without/with LTO: ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 25468.481, 0.000, 25468.481, 25468.481, 1, Initialize DB 0.297, 0.516, 0.053, 13.645, 721, Load block from database 26.458, 0.000, 26.458, 26.458, 1, Load state from database 20.737, 8.288, 11.096, 199.325, 690, Apply block 333.069, 62.798, 45.225, 429.452, 31, Apply epoch block 0.000, 0.000, 0.000, 0.000, 0, Database block store ``` ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release --passc:-flto --passl:-flto --stacktrace:off ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 23903.006, 0.000, 23903.006, 23903.006, 1, Initialize DB 0.253, 0.122, 0.047, 0.731, 721, Load block from database 24.455, 0.000, 24.455, 24.455, 1, Load state from database 18.734, 7.062, 10.346, 167.397, 690, Apply block 194.869, 33.175, 29.311, 226.981, 31, Apply epoch block ``` Epoch processing is heavy on both arithmetics and hash caching, both of which get a significant boost here. This makes sense: nim creates lots of small functions spread out over many C files. A much worse solution is to try to annotate code with `inline` - it copies functions to multiple C files but still doesn't do intermodule optimizations significantly limiting the compilers' ability to reason about the code, causing bloat and misrepresenting the usefulness of a function to the call frequency analysis that drives actual (C-compiler) inlining and many other optimizations. In particular, many nim functions are part of `system` or the `C` backend - stack tracing, memory allocation etc - nim's inlining system is pretty incomplete in that it does not deal with these and many other cases. * windows workaround * skip LTO on windows for now
2020-09-24 18:40:28 +02:00
else:
# On windows, LTO needs more love and attention so "gcc-ar" and "gcc-ranlib" are
# used for static libraries.
use LTO in release builds (#1661) * use LTO in release builds This significantly (40%) speeds up block replay and hashing - for example replaying first 1000 blocks, without/with LTO: ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 25468.481, 0.000, 25468.481, 25468.481, 1, Initialize DB 0.297, 0.516, 0.053, 13.645, 721, Load block from database 26.458, 0.000, 26.458, 26.458, 1, Load state from database 20.737, 8.288, 11.096, 199.325, 690, Apply block 333.069, 62.798, 45.225, 429.452, 31, Apply epoch block 0.000, 0.000, 0.000, 0.000, 0, Database block store ``` ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release --passc:-flto --passl:-flto --stacktrace:off ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 23903.006, 0.000, 23903.006, 23903.006, 1, Initialize DB 0.253, 0.122, 0.047, 0.731, 721, Load block from database 24.455, 0.000, 24.455, 24.455, 1, Load state from database 18.734, 7.062, 10.346, 167.397, 690, Apply block 194.869, 33.175, 29.311, 226.981, 31, Apply epoch block ``` Epoch processing is heavy on both arithmetics and hash caching, both of which get a significant boost here. This makes sense: nim creates lots of small functions spread out over many C files. A much worse solution is to try to annotate code with `inline` - it copies functions to multiple C files but still doesn't do intermodule optimizations significantly limiting the compilers' ability to reason about the code, causing bloat and misrepresenting the usefulness of a function to the call frequency analysis that drives actual (C-compiler) inlining and many other optimizations. In particular, many nim functions are part of `system` or the `C` backend - stack tracing, memory allocation etc - nim's inlining system is pretty incomplete in that it does not deal with these and many other cases. * windows workaround * skip LTO on windows for now
2020-09-24 18:40:28 +02:00
discard
if defined(windows):
# disable timestamps in Windows PE headers - https://wiki.debian.org/ReproducibleBuilds/TimestampsInPEBinaries
switch("passL", "-Wl,--no-insert-timestamp")
2019-04-11 23:30:26 +02:00
# increase stack size
switch("passL", "-Wl,--stack,8388608")
2019-04-25 01:49:41 +02:00
# https://github.com/nim-lang/Nim/issues/4057
--tlsEmulation:off
if defined(i386):
# set the IMAGE_FILE_LARGE_ADDRESS_AWARE flag so we can use PAE, if enabled, and access more than 2 GiB of RAM
switch("passL", "-Wl,--large-address-aware")
# The dynamic Chronicles output currently prevents us from using colors on Windows
# because these require direct manipulations of the stdout File object.
switch("define", "chronicles_colors=off")
2020-02-05 18:20:05 +01:00
# This helps especially for 32-bit x86, which sans SSE2 and newer instructions
# requires quite roundabout code generation for cryptography, and other 64-bit
# and larger arithmetic use cases, along with register starvation issues. When
# engineering a more portable binary release, this should be tweaked but still
# use at least -msse2 or -msse3.
if defined(disableMarchNative):
switch("passC", "-msse3")
use LTO in release builds (#1661) * use LTO in release builds This significantly (40%) speeds up block replay and hashing - for example replaying first 1000 blocks, without/with LTO: ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 25468.481, 0.000, 25468.481, 25468.481, 1, Initialize DB 0.297, 0.516, 0.053, 13.645, 721, Load block from database 26.458, 0.000, 26.458, 26.458, 1, Load state from database 20.737, 8.288, 11.096, 199.325, 690, Apply block 333.069, 62.798, 45.225, 429.452, 31, Apply epoch block 0.000, 0.000, 0.000, 0.000, 0, Database block store ``` ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release --passc:-flto --passl:-flto --stacktrace:off ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 23903.006, 0.000, 23903.006, 23903.006, 1, Initialize DB 0.253, 0.122, 0.047, 0.731, 721, Load block from database 24.455, 0.000, 24.455, 24.455, 1, Load state from database 18.734, 7.062, 10.346, 167.397, 690, Apply block 194.869, 33.175, 29.311, 226.981, 31, Apply epoch block ``` Epoch processing is heavy on both arithmetics and hash caching, both of which get a significant boost here. This makes sense: nim creates lots of small functions spread out over many C files. A much worse solution is to try to annotate code with `inline` - it copies functions to multiple C files but still doesn't do intermodule optimizations significantly limiting the compilers' ability to reason about the code, causing bloat and misrepresenting the usefulness of a function to the call frequency analysis that drives actual (C-compiler) inlining and many other optimizations. In particular, many nim functions are part of `system` or the `C` backend - stack tracing, memory allocation etc - nim's inlining system is pretty incomplete in that it does not deal with these and many other cases. * windows workaround * skip LTO on windows for now
2020-09-24 18:40:28 +02:00
switch("passL", "-msse3")
else:
switch("passC", "-march=native")
use LTO in release builds (#1661) * use LTO in release builds This significantly (40%) speeds up block replay and hashing - for example replaying first 1000 blocks, without/with LTO: ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 25468.481, 0.000, 25468.481, 25468.481, 1, Initialize DB 0.297, 0.516, 0.053, 13.645, 721, Load block from database 26.458, 0.000, 26.458, 26.458, 1, Load state from database 20.737, 8.288, 11.096, 199.325, 690, Apply block 333.069, 62.798, 45.225, 429.452, 31, Apply epoch block 0.000, 0.000, 0.000, 0.000, 0, Database block store ``` ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release --passc:-flto --passl:-flto --stacktrace:off ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 23903.006, 0.000, 23903.006, 23903.006, 1, Initialize DB 0.253, 0.122, 0.047, 0.731, 721, Load block from database 24.455, 0.000, 24.455, 24.455, 1, Load state from database 18.734, 7.062, 10.346, 167.397, 690, Apply block 194.869, 33.175, 29.311, 226.981, 31, Apply epoch block ``` Epoch processing is heavy on both arithmetics and hash caching, both of which get a significant boost here. This makes sense: nim creates lots of small functions spread out over many C files. A much worse solution is to try to annotate code with `inline` - it copies functions to multiple C files but still doesn't do intermodule optimizations significantly limiting the compilers' ability to reason about the code, causing bloat and misrepresenting the usefulness of a function to the call frequency analysis that drives actual (C-compiler) inlining and many other optimizations. In particular, many nim functions are part of `system` or the `C` backend - stack tracing, memory allocation etc - nim's inlining system is pretty incomplete in that it does not deal with these and many other cases. * windows workaround * skip LTO on windows for now
2020-09-24 18:40:28 +02:00
switch("passL", "-march=native")
if defined(windows):
2020-02-12 00:36:54 +01:00
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65782
# ("-fno-asynchronous-unwind-tables" breaks Nim's exception raising, sometimes)
switch("passC", "-mno-avx512f")
use LTO in release builds (#1661) * use LTO in release builds This significantly (40%) speeds up block replay and hashing - for example replaying first 1000 blocks, without/with LTO: ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 25468.481, 0.000, 25468.481, 25468.481, 1, Initialize DB 0.297, 0.516, 0.053, 13.645, 721, Load block from database 26.458, 0.000, 26.458, 26.458, 1, Load state from database 20.737, 8.288, 11.096, 199.325, 690, Apply block 333.069, 62.798, 45.225, 429.452, 31, Apply epoch block 0.000, 0.000, 0.000, 0.000, 0, Database block store ``` ``` [arnetheduck@tempus ncli]$ ../env.sh nim c -d:release --passc:-flto --passl:-flto --stacktrace:off ncli_db [arnetheduck@tempus ncli]$ ./ncli_db bench --db:db --network:medalla --slots:1000 Loaded 215006 blocks, head slot 307400 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 23903.006, 0.000, 23903.006, 23903.006, 1, Initialize DB 0.253, 0.122, 0.047, 0.731, 721, Load block from database 24.455, 0.000, 24.455, 24.455, 1, Load state from database 18.734, 7.062, 10.346, 167.397, 690, Apply block 194.869, 33.175, 29.311, 226.981, 31, Apply epoch block ``` Epoch processing is heavy on both arithmetics and hash caching, both of which get a significant boost here. This makes sense: nim creates lots of small functions spread out over many C files. A much worse solution is to try to annotate code with `inline` - it copies functions to multiple C files but still doesn't do intermodule optimizations significantly limiting the compilers' ability to reason about the code, causing bloat and misrepresenting the usefulness of a function to the call frequency analysis that drives actual (C-compiler) inlining and many other optimizations. In particular, many nim functions are part of `system` or the `C` backend - stack tracing, memory allocation etc - nim's inlining system is pretty incomplete in that it does not deal with these and many other cases. * windows workaround * skip LTO on windows for now
2020-09-24 18:40:28 +02:00
switch("passL", "-mno-avx512f")
2020-02-05 18:20:05 +01:00
--threads:on
--opt:speed
--excessiveStackTrace:on
# enable metric collection
--define:metrics
--define:chronicles_line_numbers
2020-03-28 23:04:43 +01:00
# for heap-usage-by-instance-type metrics and object base-type strings
--define:nimTypeNames
2019-11-13 00:22:21 +01:00
# switch("define", "snappy_implementation=libp2p")
2020-06-11 19:41:43 +03:00
const currentDir = currentSourcePath()[0 .. ^(len("config.nims") + 1)]
switch("define", "nim_compiler_path=" & currentDir & "env.sh nim")
switch("define", "withoutPCRE")
2020-06-11 19:41:43 +03:00
switch("import", "testutils/moduletests")
const useLibStackTrace = not defined(macosx) and
not defined(windows) and
not defined(disable_libbacktrace)
when useLibStackTrace:
--define:nimStackTraceOverride
switch("import", "libbacktrace")
else:
--stacktrace:on
--linetrace:on
2019-11-13 00:22:21 +01:00
# the default open files limit is too low on macOS (512), breaking the
# "--debugger:native" build. It can be increased with `ulimit -n 1024`.
if not defined(macosx):
2019-11-12 18:05:05 +01:00
# add debugging symbols and original files and line numbers
--debugger:native
--define:nimOldCaseObjects # https://github.com/status-im/nim-confutils/issues/9
# `switch("warning[CaseTransition]", "off")` fails with "Error: invalid command line option: '--warning[CaseTransition]'"
switch("warning", "CaseTransition:off")
2020-06-03 14:49:32 +02:00
# The compiler doth protest too much, methinks, about all these cases where it can't
# do its (N)RVO pass: https://github.com/nim-lang/RFCs/issues/230
switch("warning", "ObservableStores:off")
# Too many false positives for "Warning: method has lock level <unknown>, but another method has 0 [LockLevel]"
switch("warning", "LockLevel:off")
# Useful for Chronos metrics.
#--define:chronosFutureTracking
# ############################################################
#
# No LTO for crypto
#
# ############################################################
# This applies per-file compiler flags to C files
# which do not support {.localPassC: "-fno-lto".}
# Unfortunately this is filename based instead of path-based
# Assumes GCC
# BLST
put("server.always", "-fno-lto")
put("assembly.always", "-fno-lto")
# Secp256k1
put("secp256k1.always", "-fno-lto")
# BearSSL - only RNGs
put("aesctr_drbg.always", "-fno-lto")
put("hmac_drbg.always", "-fno-lto")
put("sysrng.always", "-fno-lto")
# Miracl - only ECP to derive public key from private key
put("ecp_BLS12381.always", "-fno-lto")
# ############################################################
#
# Spurious warnings
#
# ############################################################
# sqlite3.c: In function sqlite3SelectNew:
# vendor/nim-sqlite3-abi/sqlite3.c:124500: warning: function may return address of local variable [-Wreturn-local-addr]
put("sqlite3.always", "-fno-lto") # -Wno-return-local-addr