From 61108cbc13add3d9f7dddc2f8880b85ffadb232f Mon Sep 17 00:00:00 2001 From: mratsim Date: Wed, 28 Feb 2018 18:46:28 +0100 Subject: [PATCH] :fire: Speed and memory optimizations (OpenMP !): - OpenMP multiprocessing for dataset generation - remove some temporaries - add march_native flag --- README.md | 7 +++++- ethash.nimble | 4 +++- nim.cfg | 51 +++++++++++++++++++++++++++++++++++++++++++ src/ethash.nim | 9 +++++++- src/mining.nim | 4 ++-- src/proof_of_work.nim | 16 +++++++++----- tests/test_mining.nim | 10 ++++----- 7 files changed, 86 insertions(+), 15 deletions(-) create mode 100644 nim.cfg diff --git a/README.md b/README.md index 23d048d..ae6b508 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,13 @@ Implementation is based on the [spec revision 23 (2017-08-03)](https://github.co An unoptimized mining CPU backend is available through the compile-time flag ``-d:ethash_mining``. It requires compilation through the C++ backend. +## Optimizations +For maximum speed, compile Ethash with `-d:release -d:march_native -d:openmp`. +This will compile Ethash in Nim release mode, with all supported CPU extensions (AVX2 especially) and with OpenMP multiprocessing +On MacOS, OpenMP requires installing GCC-7. It can be done through homebrew. + ## Original implementation Original Ethereum implementation is available [here](https://github.com/ethereum/ethash). -**Warning ⚠ - License notice**: the original implementation is under GPLv3 or LGPLv3 and must not be used in this project. \ No newline at end of file +**Warning ⚠ - License notice**: the original implementation is under GPLv3 or LGPLv3 and must not be used in this project. diff --git a/ethash.nimble b/ethash.nimble index 6cef056..a33bfa9 100644 --- a/ethash.nimble +++ b/ethash.nimble @@ -22,7 +22,9 @@ proc test(name: string, lang: string = "c") = task test, "Run Proof-of-Work tests (without mining)": test "all_tests" -task test_mining, "Run Proof-of-Work and mining tests (test in release mode)": +task test_mining, "Run Proof-of-Work and mining tests (test in release mode + OpenMP + march=native)": switch("define", "release") + switch("define", "openmp") + switch("define", "march_native") switch("define", "ethash_mining") test "all_tests" diff --git a/nim.cfg b/nim.cfg new file mode 100644 index 0000000..fa0e228 --- /dev/null +++ b/nim.cfg @@ -0,0 +1,51 @@ +# Nim eth compilation flag config + +#### From default nim.cfg, somehow it's not taken into account with a custom nim.cfg +@if release or quick: + obj_checks:off + field_checks:off + range_checks:off + bound_checks:off + overflow_checks:off + assertions:off + stacktrace:off + linetrace:off + debugger:off + line_dir:off + dead_code_elim:on +@end + +@if release: + opt:speed +@end + +# Configuration for GCC compiler: +gcc.options.speed = "-O3 -fno-strict-aliasing" +gcc.options.size = "-Os" +@if windows: + gcc.options.debug = "-g3 -O0 -gdwarf-3" +@else: + gcc.options.debug = "-g3 -O0" +@end +gcc.cpp.options.speed = "-O3 -fno-strict-aliasing" +gcc.cpp.options.size = "-Os" +gcc.cpp.options.debug = "-g3 -O0" + +# Configuration for the LLVM Clang compiler: +clang.options.debug = "-g" +clang.options.always = "-w" +clang.options.speed = "-O3" +clang.options.size = "-Os" +####### + + +@if openmp: + # stackTrace:off # Stack traces are already removed selectively in the code + # Otherwise heap alloc from string will crash the program + @if macosx: # Default compiler on Mac is clang without OpenMP and gcc is an alias to clang. + # Use Homebrew GCC instead for OpenMP support. GCC (v7), must be properly linked via `brew link gcc` + cc:"gcc" + gcc.exe:"/usr/local/bin/gcc-7" + gcc.linkerexe:"/usr/local/bin/gcc-7" + @end +@end diff --git a/src/ethash.nim b/src/ethash.nim index 92af342..ab5d321 100644 --- a/src/ethash.nim +++ b/src/ethash.nim @@ -1,10 +1,17 @@ # Copyright (c) 2018 Status Research & Development GmbH # Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0). +when defined(openmp): + {.passC: "-fopenmp".} + {.passL: "-fopenmp".} + +when defined(march_native): + {.passC: "-march=native".} + import ./proof_of_work export proof_of_work when defined(ethash_mining): # without mining, we can use the C compilation target import ./mining - export mining \ No newline at end of file + export mining diff --git a/src/mining.nim b/src/mining.nim index 49acac5..b9e398f 100644 --- a/src/mining.nim +++ b/src/mining.nim @@ -120,7 +120,7 @@ proc mine*(full_size: Natural, dataset: seq[Hash[512]], header: Hash[256], diffi randomize() # Start with a completely random seed result = uint64 random(high(int)) # TODO: Nim random does not work on uint64 range. - # Also random is deprecate and do not include the end of the range. + # Also random is deprecated in devel and does not include the end of the range. while not result.isValid(difficulty, full_size, dataset, header): - inc(result) # we rely on uin overflow (mod 2^64) here. + inc(result) # we rely on uint overflow (mod 2^64) here. diff --git a/src/proof_of_work.nim b/src/proof_of_work.nim index 3cbce4c..ef78e04 100644 --- a/src/proof_of_work.nim +++ b/src/proof_of_work.nim @@ -126,18 +126,24 @@ proc calc_dataset_item*(cache: seq[Hash[512]], i: Natural): Hash[512] {.noSideEf result = keccak512 mix[] -proc calc_dataset*(full_size: Natural, cache: seq[Hash[512]]): seq[Hash[512]] {.noSideEffect.} = +when defined(openmp): + # Remove stacktraces when using OpenMP, heap alloc from strings will crash. + {.push stacktrace: off.} +proc calc_dataset*(full_size: Natural, cache: seq[Hash[512]]): seq[Hash[512]] = result = newSeq[Hash[512]](full_size div HASH_BYTES) + for i in `||`(0, result.len - 1, "simd"): + # OpenMP loop + result[i] = calc_dataset_item(cache, i) - for i, hash in result.mpairs: - hash = calc_dataset_item(cache, i) +when defined(openmp): + # Remove stacktraces when using OpenMP, heap alloc from strings will crash. + {.pop.} # ############################################################################### # Main loop -type HashimotoHash = tuple[mix_digest: Hash[256], value: Hash[256]] -type DatasetLookup = proc(i: Natural): Hash[512] {.noSideEffect.} +type HashimotoHash = tuple[mix_digest, value: Hash[256]] template hashimoto(header: Hash[256], nonce: uint64, diff --git a/tests/test_mining.nim b/tests/test_mining.nim index 01ca10a..1b5fc6d 100644 --- a/tests/test_mining.nim +++ b/tests/test_mining.nim @@ -19,16 +19,16 @@ suite "Test mining": full_size = get_datasize(blck) echo "\nGenerating dataset" - var start = cpuTime() + var start = epochTime() let dag = calc_dataset(full_size, cache) - echo " Done, time taken: ", $(cpuTime() - start), " seconds" + echo " Done, time taken: ", $(epochTime() - start), " seconds" echo "\nStarting mining" - start = cpuTime() + start = epochTime() let mined_nonce = mine(full_size, dag, header, difficulty) - echo " Done, time taken: ", $(cpuTime() - start), " seconds" + echo " Done, time taken: ", $(epochTime() - start), " seconds" - echo "\nUnfortunately we can't really test Ethereu mining as multiple nonces are valid" + echo "\nUnfortunately we can't really test Ethereum mining as multiple nonces are valid" echo "for a set of parameters, so we only test that there is no exception or out of memory" echo "\nThe nonce mined was:" echo mined_nonce