From 60a143ad0f8137eaa808ff5156d21734dafce56b Mon Sep 17 00:00:00 2001 From: mratsim Date: Sat, 24 Mar 2018 17:35:47 +0100 Subject: [PATCH] Cosmetic change on the conversion proc + keep a copy of keccak_tiny implementation as benchmark --- README.md | 2 +- benchmarks/proof_of_work_keccak_tiny.nim | 230 ++++++++++++++++++++ src/mining.nim | 2 +- src/private/{casting.nim => conversion.nim} | 13 +- src/private/functional.nim | 7 +- src/proof_of_work.nim | 2 +- 6 files changed, 241 insertions(+), 15 deletions(-) create mode 100644 benchmarks/proof_of_work_keccak_tiny.nim rename src/private/{casting.nim => conversion.nim} (83%) diff --git a/README.md b/README.md index ae6b508..89ee592 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Nim Ethash -[![Build Status (Travis)](https://img.shields.io/travis/status-im/nim-ethash/master.svg?label=Linux%20/%20macOS "Linux/macOS build status (Travis)")](https://travis-ci.org/status-im/nim-ethash)[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) ![Stability: experimental](https://img.shields.io/badge/stability-experimental-orange.svg) +[![Build Status (Travis)](https://img.shields.io/travis/status-im/nim-ethash/master.svg?label=Linux%20/%20macOS "Linux/macOS build status (Travis)")](https://travis-ci.org/status-im/nim-ethash)[![License: Apache](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) ![Stability: experimental](https://img.shields.io/badge/stability-experimental-orange.svg) A pure Nim implementation of Ethash, the Ethereum proof of work diff --git a/benchmarks/proof_of_work_keccak_tiny.nim b/benchmarks/proof_of_work_keccak_tiny.nim new file mode 100644 index 0000000..f608ed0 --- /dev/null +++ b/benchmarks/proof_of_work_keccak_tiny.nim @@ -0,0 +1,230 @@ +# Copyright (c) 2018 Status Research & Development GmbH +# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0). + +import math, endians, + keccak_tiny + +import ../src/private/[primes, conversion, functional, intmath] +import ../src/data_sizes + +# ############################################################################### +# Definitions + +const + REVISION* = 23 # Based on spec revision 23 + WORD_BYTES = 4 # bytes in word - in Nim we use 64 bits words # TODO check that + DATASET_BYTES_INIT* = 2'u^30 # bytes in dataset at genesis + DATASET_BYTES_GROWTH* = 2'u^23 # dataset growth per epoch + CACHE_BYTES_INIT* = 2'u^24 # bytes in cache at genesis + CACHE_BYTES_GROWTH* = 2'u^17 # cache growth per epoch + CACHE_MULTIPLIER = 1024 # Size of the DAG relative to the cache + EPOCH_LENGTH* = 30000 # blocks per epoch + MIX_BYTES* = 128 # width of mix + HASH_BYTES* = 64 # hash length in bytes + DATASET_PARENTS* = 256 # number of parents of each dataset element + CACHE_ROUNDS* = 3 # number of rounds in cache production + ACCESSES* = 64 # number of accesses in hashimoto loop + +# ############################################################################### +# Parameters + +proc get_cache_size*(block_number: uint): uint {.noSideEffect.}= + result = CACHE_BYTES_INIT + CACHE_BYTES_GROWTH * (block_number div EPOCH_LENGTH) + result -= HASH_BYTES + while (let dm = divmod(result, HASH_BYTES); + dm.rem == 0 and not dm.quot.isPrime): + # In a static lang, checking that the result of a division is prime + # Means checking that reminder == 0 and quotient is prime + result -= 2 * HASH_BYTES + +proc get_data_size*(block_number: uint): uint {.noSideEffect.}= + result = DATASET_BYTES_INIT + DATASET_BYTES_GROWTH * (block_number div EPOCH_LENGTH) + result -= MIX_BYTES + while (let dm = divmod(result, MIX_BYTES); + dm.rem == 0 and not dm.quot.isPrime): + result -= 2 * MIX_BYTES + +# ############################################################################### +# Fetch from lookup tables of 2048 epochs of data sizes and cache sizes + +proc get_datasize_lut*(block_number: Natural): uint64 {.noSideEffect, inline.} = + data_sizes[block_number div EPOCH_LENGTH] + +proc get_cachesize_lut*(block_number: Natural): uint64 {.noSideEffect, inline.} = + cache_sizes[block_number div EPOCH_LENGTH] + +# ############################################################################### +# Cache generation + +proc mkcache*(cache_size: uint64, seed: Hash[256]): seq[Hash[512]] {.noSideEffect.}= + + # Cache size + let n = int(cache_size div HASH_BYTES) + + # Sequentially produce the initial dataset + result = newSeq[Hash[512]](n) + result[0] = keccak512 seed.data + + for i in 1 ..< n: + result[i] = keccak512 result[i-1].data + + # Use a low-round version of randmemohash + for _ in 0 ..< CACHE_ROUNDS: + for i in 0 ..< n: + let + v = result[i].as_u32_words[0] mod n.uint32 + a = result[(i-1+n) mod n].data + b = result[v.int].data + result[i] = keccak512 zipMap(a, b, x xor y) + +# ############################################################################### +# Data aggregation function + +const FNV_PRIME = 0x01000193 + +proc fnv*[T: SomeUnsignedInt or Natural](v1, v2: T): uint32 {.inline, noSideEffect.}= + + # Original formula is ((v1 * FNV_PRIME) xor v2) mod 2^32 + # However contrary to Python and depending on the type T, + # in Nim (v1 * FNV_PRIME) can overflow + # We can't do 2^32 with an int (only 2^32-1) + # and in general (a xor b) mod c != (a mod c) xor (b mod c) + # + # Thankfully + # We know that: + # - (a xor b) and c == (a and c) xor (b and c) + # - for powers of 2: a mod 2^p == a and (2^p - 1) + # - 2^32 - 1 == high(uint32) + + # So casting to uint32 should do the modulo and masking just fine + + (v1.uint32 * FNV_PRIME) xor v2.uint32 + +# ############################################################################### +# Full dataset calculation + +proc calc_dataset_item*(cache: seq[Hash[512]], i: Natural): Hash[512] {.noSideEffect, noInit.} = + let n = cache.len + const r: uint32 = HASH_BYTES div WORD_BYTES + + # Alias for the result value. Interpreted as an array of uint32 words + var mix = cast[ptr array[16, uint32]](addr result) + + mix[] = cache[i mod n].as_u32_words + when system.cpuEndian == littleEndian: + mix[0] = mix[0] xor i.uint32 + else: + mix[high(mix)] = mix[high(mix)] xor i.uint32 + result = keccak512 mix[] + + # FNV with a lots of random cache nodes based on i + for j in 0'u32 ..< DATASET_PARENTS: + let cache_index = fnv(i.uint32 xor j, mix[j mod r]) + mix[] = zipMap(mix[], cache[cache_index.int mod n].as_u32_words, fnv(x, y)) + + result = keccak512 mix[] + +when defined(openmp): + # Remove stacktraces when using OpenMP, heap alloc from strings will crash. + {.push stacktrace: off.} +proc calc_dataset*(full_size: Natural, cache: seq[Hash[512]]): seq[Hash[512]] = + + result = newSeq[Hash[512]](full_size div HASH_BYTES) + for i in `||`(0, result.len - 1, "simd"): + # OpenMP loop + result[i] = calc_dataset_item(cache, i) + +when defined(openmp): + # Remove stacktraces when using OpenMP, heap alloc from strings will crash. + {.pop.} + +# ############################################################################### +# Main loop + +type HashimotoHash = tuple[mix_digest, value: Hash[256]] + +template hashimoto(header: Hash[256], + nonce: uint64, + full_size: Natural, + dataset_lookup_p: untyped, + dataset_lookup_p1: untyped, + result: var HashimotoHash + ) = + let + n = uint32 full_size div HASH_BYTES + w = uint32 MIX_BYTES div WORD_BYTES + mixhashes = uint32 MIX_BYTES div HASH_BYTES + + assert full_size mod HASH_BYTES == 0 + assert MIX_BYTES mod HASH_BYTES == 0 + + # combine header+nonce into a 64 byte seed + var s{.noInit.}: Hash[512] + let s_bytes = cast[ptr array[64, byte]](addr s) # Alias for to interpret s as a byte array + let s_words = cast[ptr array[16, uint32]](addr s) # Alias for to interpret s as an uint32 array + + s_bytes[][0..<32] = header.data # We first populate the first 40 bytes of s with the concatenation + # In template we need to dereference first otherwise it's not considered as var + + var nonceLE{.noInit.}: array[8, byte] # the nonce should be concatenated with its LITTLE ENDIAN representation + littleEndian64(addr nonceLE, unsafeAddr nonce) + s_bytes[][32..<40] = cast[array[8,byte]](nonceLE) + + s = keccak_512 s_bytes[][0..<40] # TODO: Does this allocate a seq? + + # start the mix with replicated s + assert MIX_BYTES div HASH_BYTES == 2 + var mix{.noInit.}: array[32, uint32] + mix[0..<16] = s_words[] + mix[16..<32] = s_words[] + + # mix in random dataset nodes + for i in 0'u32 ..< ACCESSES: + let p{.inject.} = fnv(i xor s_words[0], mix[i mod w]) mod (n div mixhashes) * mixhashes + let p1{.inject.} = p + 1 + + # Unrolled: for j in range(MIX_BYTES / HASH_BYTES): => for j in 0 ..< 2 + var newdata{.noInit.}: type mix + newdata[0..<16] = cast[array[16, uint32]](dataset_lookup_p) + newdata[16..<32] = cast[array[16, uint32]](dataset_lookup_p1) + + mix = zipMap(mix, newdata, fnv(x, y)) + + # compress mix + # ⚠⚠ Warning ⚠⚠: Another bigEndian littleEndian issue? + # It doesn't seem like the uint32 in cmix need to be changed to big endian + # cmix is an alias to the result.mix_digest + let cmix = cast[ptr array[8, uint32]](addr result.mix_digest) + for i in countup(0, mix.len - 1, 4): + cmix[i div 4] = mix[i].fnv(mix[i+1]).fnv(mix[i+2]).fnv(mix[i+3]) + + var concat{.noInit.}: array[64 + 32, byte] + concat[0..<64] = s_bytes[] + concat[64..<96] = cast[array[32, byte]](result.mix_digest) + result.value = keccak_256(concat) + +proc hashimoto_light*(full_size:Natural, cache: seq[Hash[512]], + header: Hash[256], nonce: uint64): HashimotoHash {.noSideEffect.} = + + hashimoto(header, + nonce, + full_size, + calc_data_set_item(cache, p), + calc_data_set_item(cache, p1), + result) + +proc hashimoto_full*(full_size:Natural, dataset: seq[Hash[512]], + header: Hash[256], nonce: uint64): HashimotoHash {.noSideEffect.} = + # TODO spec mentions full_size but I don't think we need it (retrieve it from dataset.len) + hashimoto(header, + nonce, + full_size, + dataset[int(p)], + dataset[int(p1)], + result) +# ############################################################################### +# Defining the seed hash + +proc get_seedhash*(block_number: uint64): Hash[256] {.noSideEffect.} = + for i in 0 ..< int(block_number div EPOCH_LENGTH): + result = keccak256 result.data diff --git a/src/mining.nim b/src/mining.nim index b9e398f..d49d816 100644 --- a/src/mining.nim +++ b/src/mining.nim @@ -1,7 +1,7 @@ # Copyright (c) 2018 Status Research & Development GmbH # Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0). -import ./proof_of_work, ./private/casting +import ./proof_of_work, ./private/conversion import endians, random, math proc mulCarry(a, b: uint64): tuple[carry, unit: uint64] = diff --git a/src/private/casting.nim b/src/private/conversion.nim similarity index 83% rename from src/private/casting.nim rename to src/private/conversion.nim index 3f46f49..c13e02f 100644 --- a/src/private/casting.nim +++ b/src/private/conversion.nim @@ -7,9 +7,6 @@ proc as_u32_words*[N: static[int]](x: Hash[N]): array[N div 32, uint32] {.inline # Convert an hash to its uint32 representation cast[type result](x) -type ByteArrayBE*[N: static[int]] = array[N, byte] - ## A byte array that stores bytes in big-endian order - proc readHexChar(c: char): byte {.noSideEffect.}= ## Converts an hex char to a byte case c @@ -19,7 +16,7 @@ proc readHexChar(c: char): byte {.noSideEffect.}= else: raise newException(ValueError, $c & "is not a hexademical character") -proc hexToByteArrayBE*[N: static[int]](hexStr: string): ByteArrayBE[N] {.noSideEffect, noInit.}= +proc hexToByteArrayBE*[N: static[int]](hexStr: string): array[N, byte] {.noSideEffect, noInit.}= ## Read an hex string and store it in a Byte Array in Big-Endian order var i = 0 if hexStr[i] == '0' and (hexStr[i+1] == 'x' or hexStr[i+1] == 'X'): @@ -44,7 +41,7 @@ proc hexToSeqBytesBE*(hexStr: string): seq[byte] {.noSideEffect.}= result[i] = hexStr[2*i].readHexChar shl 4 or hexStr[2*i+1].readHexChar inc(i) -proc toHex*[N: static[int]](ba: ByteArrayBE[N]): string {.noSideEffect.}= +proc toHex*[N: static[int]](ba: array[N, byte]): string {.noSideEffect.}= ## Convert a big-endian byte array to its hex representation ## Output is in lowercase @@ -68,7 +65,7 @@ proc toHex*(ba: seq[byte]): string {.noSideEffect, noInit.}= result[2*i] = hexChars[int ba[i] shr 4 and 0xF] result[2*i+1] = hexChars[int ba[i] and 0xF] -proc toByteArrayBE*[T: SomeInteger](num: T): ByteArrayBE[T.sizeof] {.noSideEffect, noInit, inline.}= +proc toByteArrayBE*[T: SomeInteger](num: T): array[T.sizeof, byte] {.noSideEffect, noInit, inline.}= ## Convert an int (in native host endianness) to a big-endian byte array # Note: only works on devel @@ -81,5 +78,5 @@ proc toByteArrayBE*[T: SomeInteger](num: T): ByteArrayBE[T.sizeof] {.noSideEffec for i in 0 ..< N: result[i] = byte(num shr T((N-1-i) * 8)) -proc toByteArrayBE*[N: static[int]](x: Hash[N]): ByteArrayBE[N div 8] {.inline, noSideEffect, noInit.}= - cast[type result](x.data) \ No newline at end of file +proc toByteArrayBE*[N: static[int]](x: Hash[N]): array[N div 8, byte] {.inline, noSideEffect, noInit.}= + cast[type result](x.data) diff --git a/src/private/functional.nim b/src/private/functional.nim index 31dbb48..c82b28d 100644 --- a/src/private/functional.nim +++ b/src/private/functional.nim @@ -4,8 +4,6 @@ # Pending https://github.com/alehander42/zero-functional/issues/6 # A zip + map that avoids heap allocation -import ./casting - iterator enumerateZip[N: static[int], T, U]( a: array[N, T], b: array[N, U] @@ -28,10 +26,11 @@ template zipMap*[N: static[int], T, U]( op )) - var result: array[N, outType] + {.pragma: align64, codegenDecl: "$# $# __attribute__((aligned(64)))".} + var result{.noInit, align64.}: array[N, outType] for i, x {.inject.}, y {.inject.} in enumerateZip(a, b): - {.unroll: 4.} + {.unroll: 4.} # This is a no-op at the moment result[i] = op result diff --git a/src/proof_of_work.nim b/src/proof_of_work.nim index ef78e04..661995d 100644 --- a/src/proof_of_work.nim +++ b/src/proof_of_work.nim @@ -4,7 +4,7 @@ import math, endians, keccak_tiny -import ./private/[primes, casting, functional, intmath] +import ./private/[primes, conversion, functional, intmath] export toHex, hexToByteArrayBE, hexToSeqBytesBE, toByteArrayBE # debug functions export keccak_tiny