From 60a143ad0f8137eaa808ff5156d21734dafce56b Mon Sep 17 00:00:00 2001
From: mratsim <mamy.ratsimbazafy_dev@gadz.org>
Date: Sat, 24 Mar 2018 17:35:47 +0100
Subject: [PATCH] Cosmetic change on the conversion proc + keep a copy of
 keccak_tiny implementation as benchmark

---
 README.md                                   |   2 +-
 benchmarks/proof_of_work_keccak_tiny.nim    | 230 ++++++++++++++++++++
 src/mining.nim                              |   2 +-
 src/private/{casting.nim => conversion.nim} |  13 +-
 src/private/functional.nim                  |   7 +-
 src/proof_of_work.nim                       |   2 +-
 6 files changed, 241 insertions(+), 15 deletions(-)
 create mode 100644 benchmarks/proof_of_work_keccak_tiny.nim
 rename src/private/{casting.nim => conversion.nim} (83%)

diff --git a/README.md b/README.md
index ae6b508..89ee592 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Nim Ethash
 
-[![Build Status (Travis)](https://img.shields.io/travis/status-im/nim-ethash/master.svg?label=Linux%20/%20macOS "Linux/macOS build status (Travis)")](https://travis-ci.org/status-im/nim-ethash)[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) ![Stability: experimental](https://img.shields.io/badge/stability-experimental-orange.svg)
+[![Build Status (Travis)](https://img.shields.io/travis/status-im/nim-ethash/master.svg?label=Linux%20/%20macOS "Linux/macOS build status (Travis)")](https://travis-ci.org/status-im/nim-ethash)[![License: Apache](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) ![Stability: experimental](https://img.shields.io/badge/stability-experimental-orange.svg)
 
 A pure Nim implementation of Ethash, the Ethereum proof of work
 
diff --git a/benchmarks/proof_of_work_keccak_tiny.nim b/benchmarks/proof_of_work_keccak_tiny.nim
new file mode 100644
index 0000000..f608ed0
--- /dev/null
+++ b/benchmarks/proof_of_work_keccak_tiny.nim
@@ -0,0 +1,230 @@
+# Copyright (c) 2018 Status Research & Development GmbH
+# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
+
+import  math, endians,
+        keccak_tiny
+
+import  ../src/private/[primes, conversion, functional, intmath]
+import ../src/data_sizes
+
+# ###############################################################################
+# Definitions
+
+const
+  REVISION* = 23                     # Based on spec revision 23
+  WORD_BYTES = 4                     # bytes in word - in Nim we use 64 bits words # TODO check that
+  DATASET_BYTES_INIT* = 2'u^30       # bytes in dataset at genesis
+  DATASET_BYTES_GROWTH* = 2'u^23     # dataset growth per epoch
+  CACHE_BYTES_INIT* = 2'u^24         # bytes in cache at genesis
+  CACHE_BYTES_GROWTH* = 2'u^17       # cache growth per epoch
+  CACHE_MULTIPLIER = 1024            # Size of the DAG relative to the cache
+  EPOCH_LENGTH* = 30000              # blocks per epoch
+  MIX_BYTES* = 128                   # width of mix
+  HASH_BYTES* = 64                   # hash length in bytes
+  DATASET_PARENTS* = 256             # number of parents of each dataset element
+  CACHE_ROUNDS* = 3                  # number of rounds in cache production
+  ACCESSES* = 64                     # number of accesses in hashimoto loop
+
+# ###############################################################################
+# Parameters
+
+proc get_cache_size*(block_number: uint): uint {.noSideEffect.}=
+  result = CACHE_BYTES_INIT + CACHE_BYTES_GROWTH * (block_number div EPOCH_LENGTH)
+  result -= HASH_BYTES
+  while (let dm = divmod(result, HASH_BYTES);
+        dm.rem == 0 and not dm.quot.isPrime):
+        # In a static lang, checking that the result of a division is prime
+        # Means checking that reminder == 0 and quotient is prime
+    result -= 2 * HASH_BYTES
+
+proc get_data_size*(block_number: uint): uint {.noSideEffect.}=
+  result = DATASET_BYTES_INIT + DATASET_BYTES_GROWTH * (block_number div EPOCH_LENGTH)
+  result -= MIX_BYTES
+  while (let dm = divmod(result, MIX_BYTES);
+        dm.rem == 0 and not dm.quot.isPrime):
+    result -= 2 * MIX_BYTES
+
+# ###############################################################################
+# Fetch from lookup tables of 2048 epochs of data sizes and cache sizes
+
+proc get_datasize_lut*(block_number: Natural): uint64 {.noSideEffect, inline.} =
+  data_sizes[block_number div EPOCH_LENGTH]
+
+proc get_cachesize_lut*(block_number: Natural): uint64 {.noSideEffect, inline.} =
+  cache_sizes[block_number div EPOCH_LENGTH]
+
+# ###############################################################################
+# Cache generation
+
+proc mkcache*(cache_size: uint64, seed: Hash[256]): seq[Hash[512]] {.noSideEffect.}=
+
+  # Cache size
+  let n = int(cache_size div HASH_BYTES)
+
+  # Sequentially produce the initial dataset
+  result = newSeq[Hash[512]](n)
+  result[0] = keccak512 seed.data
+
+  for i in 1 ..< n:
+    result[i] = keccak512 result[i-1].data
+
+  # Use a low-round version of randmemohash
+  for _ in 0 ..< CACHE_ROUNDS:
+    for i in 0 ..< n:
+      let
+        v = result[i].as_u32_words[0] mod n.uint32
+        a = result[(i-1+n) mod n].data
+        b = result[v.int].data
+      result[i] = keccak512 zipMap(a, b, x xor y)
+
+# ###############################################################################
+# Data aggregation function
+
+const FNV_PRIME = 0x01000193
+
+proc fnv*[T: SomeUnsignedInt or Natural](v1, v2: T): uint32 {.inline, noSideEffect.}=
+
+  # Original formula is ((v1 * FNV_PRIME) xor v2) mod 2^32
+  # However contrary to Python and depending on the type T,
+  # in Nim (v1 * FNV_PRIME) can overflow
+  # We can't do 2^32 with an int (only 2^32-1)
+  # and in general (a xor b) mod c != (a mod c) xor (b mod c)
+  #
+  # Thankfully
+  # We know that:
+  #   - (a xor b) and c == (a and c) xor (b and c)
+  #   - for powers of 2: a mod 2^p == a and (2^p - 1)
+  #   - 2^32 - 1 == high(uint32)
+
+  # So casting to uint32 should do the modulo and masking just fine
+
+  (v1.uint32 * FNV_PRIME) xor v2.uint32
+
+# ###############################################################################
+# Full dataset calculation
+
+proc calc_dataset_item*(cache: seq[Hash[512]], i: Natural): Hash[512] {.noSideEffect, noInit.} =
+  let n = cache.len
+  const r: uint32 = HASH_BYTES div WORD_BYTES
+
+  # Alias for the result value. Interpreted as an array of uint32 words
+  var mix = cast[ptr array[16, uint32]](addr result)
+
+  mix[] = cache[i mod n].as_u32_words
+  when system.cpuEndian == littleEndian:
+    mix[0] = mix[0] xor i.uint32
+  else:
+    mix[high(mix)] = mix[high(mix)] xor i.uint32
+  result = keccak512 mix[]
+
+  # FNV with a lots of random cache nodes based on i
+  for j in 0'u32 ..< DATASET_PARENTS:
+    let cache_index = fnv(i.uint32 xor j, mix[j mod r])
+    mix[] = zipMap(mix[], cache[cache_index.int mod n].as_u32_words, fnv(x, y))
+
+  result = keccak512 mix[]
+
+when defined(openmp):
+  # Remove stacktraces when using OpenMP, heap alloc from strings will crash.
+  {.push stacktrace: off.}
+proc calc_dataset*(full_size: Natural, cache: seq[Hash[512]]): seq[Hash[512]] =
+
+  result = newSeq[Hash[512]](full_size div HASH_BYTES)
+  for i in `||`(0, result.len - 1, "simd"):
+    # OpenMP loop
+    result[i] = calc_dataset_item(cache, i)
+
+when defined(openmp):
+  # Remove stacktraces when using OpenMP, heap alloc from strings will crash.
+  {.pop.}
+
+# ###############################################################################
+# Main loop
+
+type HashimotoHash = tuple[mix_digest, value: Hash[256]]
+
+template hashimoto(header: Hash[256],
+              nonce: uint64,
+              full_size: Natural,
+              dataset_lookup_p: untyped,
+              dataset_lookup_p1: untyped,
+              result: var HashimotoHash
+              ) =
+  let
+    n = uint32 full_size div HASH_BYTES
+    w = uint32 MIX_BYTES div WORD_BYTES
+    mixhashes = uint32 MIX_BYTES div HASH_BYTES
+
+  assert full_size mod HASH_BYTES == 0
+  assert MIX_BYTES mod HASH_BYTES == 0
+
+  # combine header+nonce into a 64 byte seed
+  var s{.noInit.}: Hash[512]
+  let s_bytes = cast[ptr array[64, byte]](addr s)   # Alias for to interpret s as a byte array
+  let s_words = cast[ptr array[16, uint32]](addr s) # Alias for to interpret s as an uint32 array
+
+  s_bytes[][0..<32] = header.data                   # We first populate the first 40 bytes of s with the concatenation
+                                                    # In template we need to dereference first otherwise it's not considered as var
+
+  var nonceLE{.noInit.}: array[8, byte]             # the nonce should be concatenated with its LITTLE ENDIAN representation
+  littleEndian64(addr nonceLE, unsafeAddr nonce)
+  s_bytes[][32..<40] = cast[array[8,byte]](nonceLE)
+
+  s = keccak_512 s_bytes[][0..<40]                  # TODO: Does this allocate a seq?
+
+  # start the mix with replicated s
+  assert MIX_BYTES div HASH_BYTES == 2
+  var mix{.noInit.}: array[32, uint32]
+  mix[0..<16] = s_words[]
+  mix[16..<32] = s_words[]
+
+  # mix in random dataset nodes
+  for i in 0'u32 ..< ACCESSES:
+    let p{.inject.} = fnv(i xor s_words[0], mix[i mod w]) mod (n div mixhashes) * mixhashes
+    let p1{.inject.} = p + 1
+
+    # Unrolled: for j in range(MIX_BYTES / HASH_BYTES): => for j in 0 ..< 2
+    var newdata{.noInit.}: type mix
+    newdata[0..<16] = cast[array[16, uint32]](dataset_lookup_p)
+    newdata[16..<32] = cast[array[16, uint32]](dataset_lookup_p1)
+
+    mix = zipMap(mix, newdata, fnv(x, y))
+
+  # compress mix
+  # ⚠⚠ Warning ⚠⚠: Another bigEndian littleEndian issue?
+  # It doesn't seem like the uint32 in cmix need to be changed to big endian
+  # cmix is an alias to the result.mix_digest
+  let cmix = cast[ptr array[8, uint32]](addr result.mix_digest)
+  for i in countup(0, mix.len - 1, 4):
+    cmix[i div 4] = mix[i].fnv(mix[i+1]).fnv(mix[i+2]).fnv(mix[i+3])
+
+  var concat{.noInit.}: array[64 + 32, byte]
+  concat[0..<64] = s_bytes[]
+  concat[64..<96] = cast[array[32, byte]](result.mix_digest)
+  result.value = keccak_256(concat)
+
+proc hashimoto_light*(full_size:Natural, cache: seq[Hash[512]],
+                      header: Hash[256], nonce: uint64): HashimotoHash {.noSideEffect.} =
+
+  hashimoto(header,
+            nonce,
+            full_size,
+            calc_data_set_item(cache, p),
+            calc_data_set_item(cache, p1),
+            result)
+
+proc hashimoto_full*(full_size:Natural, dataset: seq[Hash[512]],
+                    header: Hash[256], nonce: uint64): HashimotoHash {.noSideEffect.} =
+  # TODO spec mentions full_size but I don't think we need it (retrieve it from dataset.len)
+  hashimoto(header,
+            nonce,
+            full_size,
+            dataset[int(p)],
+            dataset[int(p1)],
+            result)
+# ###############################################################################
+# Defining the seed hash
+
+proc get_seedhash*(block_number: uint64): Hash[256] {.noSideEffect.} =
+  for i in 0 ..< int(block_number div EPOCH_LENGTH):
+    result = keccak256 result.data
diff --git a/src/mining.nim b/src/mining.nim
index b9e398f..d49d816 100644
--- a/src/mining.nim
+++ b/src/mining.nim
@@ -1,7 +1,7 @@
 # Copyright (c) 2018 Status Research & Development GmbH
 # Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
 
-import ./proof_of_work, ./private/casting
+import ./proof_of_work, ./private/conversion
 import endians, random, math
 
 proc mulCarry(a, b: uint64): tuple[carry, unit: uint64] =
diff --git a/src/private/casting.nim b/src/private/conversion.nim
similarity index 83%
rename from src/private/casting.nim
rename to src/private/conversion.nim
index 3f46f49..c13e02f 100644
--- a/src/private/casting.nim
+++ b/src/private/conversion.nim
@@ -7,9 +7,6 @@ proc as_u32_words*[N: static[int]](x: Hash[N]): array[N div 32, uint32] {.inline
   # Convert an hash to its uint32 representation
   cast[type result](x)
 
-type ByteArrayBE*[N: static[int]] = array[N, byte]
-  ## A byte array that stores bytes in big-endian order
-
 proc readHexChar(c: char): byte {.noSideEffect.}=
   ## Converts an hex char to a byte
   case c
@@ -19,7 +16,7 @@ proc readHexChar(c: char): byte {.noSideEffect.}=
   else:
     raise newException(ValueError, $c & "is not a hexademical character")
 
-proc hexToByteArrayBE*[N: static[int]](hexStr: string): ByteArrayBE[N] {.noSideEffect, noInit.}=
+proc hexToByteArrayBE*[N: static[int]](hexStr: string): array[N, byte] {.noSideEffect, noInit.}=
   ## Read an hex string and store it in a Byte Array in Big-Endian order
   var i = 0
   if hexStr[i] == '0' and (hexStr[i+1] == 'x' or hexStr[i+1] == 'X'):
@@ -44,7 +41,7 @@ proc hexToSeqBytesBE*(hexStr: string): seq[byte] {.noSideEffect.}=
     result[i] = hexStr[2*i].readHexChar shl 4 or hexStr[2*i+1].readHexChar
     inc(i)
 
-proc toHex*[N: static[int]](ba: ByteArrayBE[N]): string {.noSideEffect.}=
+proc toHex*[N: static[int]](ba: array[N, byte]): string {.noSideEffect.}=
   ## Convert a big-endian byte array to its hex representation
   ## Output is in lowercase
 
@@ -68,7 +65,7 @@ proc toHex*(ba: seq[byte]): string {.noSideEffect, noInit.}=
     result[2*i] = hexChars[int ba[i] shr 4 and 0xF]
     result[2*i+1] = hexChars[int ba[i] and 0xF]
 
-proc toByteArrayBE*[T: SomeInteger](num: T): ByteArrayBE[T.sizeof] {.noSideEffect, noInit, inline.}=
+proc toByteArrayBE*[T: SomeInteger](num: T): array[T.sizeof, byte] {.noSideEffect, noInit, inline.}=
   ## Convert an int (in native host endianness) to a big-endian byte array
   # Note: only works on devel
 
@@ -81,5 +78,5 @@ proc toByteArrayBE*[T: SomeInteger](num: T): ByteArrayBE[T.sizeof] {.noSideEffec
     for i in 0 ..< N:
       result[i] = byte(num shr T((N-1-i) * 8))
 
-proc toByteArrayBE*[N: static[int]](x: Hash[N]): ByteArrayBE[N div 8] {.inline, noSideEffect, noInit.}=
-  cast[type result](x.data)
\ No newline at end of file
+proc toByteArrayBE*[N: static[int]](x: Hash[N]): array[N div 8, byte] {.inline, noSideEffect, noInit.}=
+  cast[type result](x.data)
diff --git a/src/private/functional.nim b/src/private/functional.nim
index 31dbb48..c82b28d 100644
--- a/src/private/functional.nim
+++ b/src/private/functional.nim
@@ -4,8 +4,6 @@
 
 # Pending https://github.com/alehander42/zero-functional/issues/6
 # A zip + map that avoids heap allocation
-import ./casting
-
 iterator enumerateZip[N: static[int], T, U](
                       a: array[N, T],
                       b: array[N, U]
@@ -28,10 +26,11 @@ template zipMap*[N: static[int], T, U](
       op
   ))
 
-  var result: array[N, outType]
+  {.pragma: align64, codegenDecl: "$# $# __attribute__((aligned(64)))".}
+  var result{.noInit, align64.}: array[N, outType]
 
   for i, x {.inject.}, y {.inject.} in enumerateZip(a, b):
-    {.unroll: 4.}
+    {.unroll: 4.} # This is a no-op at the moment
     result[i] = op
 
   result
diff --git a/src/proof_of_work.nim b/src/proof_of_work.nim
index ef78e04..661995d 100644
--- a/src/proof_of_work.nim
+++ b/src/proof_of_work.nim
@@ -4,7 +4,7 @@
 import  math, endians,
         keccak_tiny
 
-import  ./private/[primes, casting, functional, intmath]
+import  ./private/[primes, conversion, functional, intmath]
 export toHex, hexToByteArrayBE, hexToSeqBytesBE, toByteArrayBE # debug functions
 export keccak_tiny