updated hashimoto (Heiko)

2025-02-26 21:50:37 +00:00 · 2014-12-08 15:16:12 -05:00 · 2014-12-08 15:16:12 -05:00 · d4a36a28c4
commit d4a36a28c4
parent e25467da56
2 changed files with 225 additions and 74 deletions
--- a/diffadjust/blkdiff.py
+++ b/diffadjust/blkdiff.py
@ -6,8 +6,8 @@ hashpower = [float(x) for x in open('hashpower.csv').readlines()]
 target = 12
 seconds_in_day = 8640
 ema_factor = 0.005
-f = 30
-threshold = 1.5
+f = 80
+threshold = 1.3
 maxadjust = 0.1


--- a/mining/hashimoto.py
+++ b/mining/hashimoto.py
@ -1,15 +1,51 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Requirements:
+- I/O bound: cycles spent on I/O â‰« cycles spent in cpu
+- no sharding: impossible to implement data locality strategy
+- easy verification
+
+Thoughts:
+
+Efficient implementations will not switch context (threading) when waiting for data.
+But they would leverage all fill buffers and have concurrent memory accesses.
+It can be assumed, that code can be written in a way to calculate N (<10)
+nonces in parallel (on a single core).
+
+So, after all maybe memory bandwidth rather than latency is the actual bottleneck.
+Can this be solved in a way that aligns with hashing nonces and allows
+for a quick verification? Probably not.
+
+Loop unrolling:
+Initially proposed dagger sets offer data locality which allows to scale the algo 
+on multiple cores/l2chaches. 320MB / 40sets = 8MB (< L2 cache)
+A solution is to make accessed mem location depended on the value of the
+previous access.
+
+Partitial Memory:
+If a users only keeps e.g. one third of each DAG in memory (i.e. to 
+have in L3 cache), he still can answer ~0.5**k of accesses by substituting 
+them through previous node lookups. 
+This can be mitigated by
+a) making each node deterministically depend on the value of at
+least one close high memory node. Optionally for quick validation, select
+the 2nd dependency for the lower (cached) memory. see produce_dag_k2dr
+b) for DAG creation, using a hashing function which needs more cycles
+than multiple memory lookups would - even for GPUs/FPGAs/ASICs.
+"""
+
+
 try:
    shathree = __import__('sha3')
 except:
    shathree = __import__('python_sha3')
-import random
 import time


 def sha3(x):
    return decode_int(shathree.sha3_256(x).digest()) #

-
 def decode_int(s):
    o = 0
    for i in range(len(s)):
@ -19,110 +55,225 @@ def decode_int(s):

 def encode_int(x):
    o = ''
-    for _ in range(32):
+    for _ in range(64):
        o = chr(x % 256) + o
        x //= 256
    return o

-P = 2**256 - 4294968273
+
+
+def get_daggerset(params, seedset):
+    return [produce_dag(params, i) for i in seedset]
+
+def update_daggerset(params, daggerset, seedset, seed):
+    idx = decode_int(seed) % len(daggerset)
+    seedset[idx] = seed
+    daggerset[idx] = produce_dag(params, seed)
+
+
+P = (2**256 - 4294968273)**2


 def produce_dag(params, seed):
-    o = [sha3(seed)]
+    k, w, d = params.k, params.w, params.d
+    o = [sha3(seed)**2]
    init = o[0]
    picker = 1
-    for i in range(1, params["n"]):
+    for i in range(1, params.dag_size):
+        x = 0
+        picker = (picker * init) % P
+        #assert picker == pow(init, i, P)
+        curpicker = picker
+        for j in range(k): # can be flattend if params are known
+            pos = curpicker % i
+            x |= o[pos]
+            curpicker >>= 10
+        o.append(pow(x, w, P))  # use any "hash function" here
+    return o
+
+def quick_calc(params, seed, pos, known={}):
+    init = sha3(seed)**2
+    k, w, d = params.k, params.w, params.d
+    known[0] = init
+    def calc(i):
+        if i not in known:
+            curpicker = pow(init, i, P)
+            x = 0
+            for j in range(k):
+                pos = curpicker % i
+                x |= calc(pos)
+                curpicker >>= 10
+            known[i] = pow(x, w, P)
+        return known[i]
+    o = calc(pos)
+    return o
+
+
+def produce_dag_k2dr(params, seed):
+    """
+    # k=2 and dependency ranges d  [:i/d], [-i/d:]
+    Idea is to prevent partitial memory availability in
+    which a significant part of the higher mem acesses
+    can be substituted by two low mem accesses, plus some calc.
+    """
+    w, d = params.w, params.d
+    o = [sha3(seed)**2]
+    init = o[0]
+    picker = 1
+    for i in range(1, params.dag_size):
        x = 0
        picker = (picker * init) % P
        curpicker = picker
-        for j in range(params["k"]):
-            x |= o[curpicker % i]
-            curpicker >>= 10
-        o.append((x * x) % P)  # use any "hash function" here
+        # higher end
+        f = i/d + 1
+        pos = i - f + curpicker % f
+        x |= o[pos]
+        curpicker >>= 10
+        # lower end
+        pos = f - curpicker % f - 1
+        x |= o[pos]
+        o.append(pow(x, w, P))  # use any "hash function" here
    return o


-def quick_calc(params, seed, pos):
-    init = sha3(seed)
-    known = {0: init}
-
-    def calc(p):
-        if p not in known:
-            picker = pow(init, p, P)
+def quick_calc_k2dr(params, seed, pos, known={}):
+    # k=2 and dependency ranges d [:i/d], [-i/d:]
+    init = sha3(seed) ** 2
+    k, w, d = params.k, params.w, params.d
+    known[0] = init
+    def calc(i):
+        if i not in known:
+            curpicker = pow(init, i, P)
            x = 0
-            for j in range(params["k"]):
-                x |= calc(picker % p)
-                picker >>= 10
-            known[p] = (x * x) % P
-        return known[p]
-
+            # higher end
+            f = i/d + 1
+            pos = i - f + curpicker % f
+            x |= calc(pos)
+            curpicker >>= 10
+            # lower end
+            pos = f - curpicker % f - 1
+            x |= calc(pos)
+            known[i] = pow(x, w, P)
+        return known[i]
    o = calc(pos)
-    print 'Calculated pos %d with %d accesses' % (pos, len(known))
    return o

+produce_dag = produce_dag_k2dr
+quick_calc = quick_calc_k2dr

-def hashimoto(daggerset, params, header, nonce):
-    rand = sha3(header+encode_int(nonce))
-    mix = 0
-    for i in range(40):
-        shifted_A = rand >> i
-        dag = daggerset[shifted_A % params["numdags"]]
-        mix ^= dag[(shifted_A // params["numdags"]) % params["n"]]
-    return mix ^ rand
+def hashimoto(daggerset, lookups, header, nonce):
+    """
+    Requirements:
+    - I/O bound: cycles spent on I/O â‰« cycles spent in cpu
+    - no sharding: impossible to implement data locality strategy
+
+    # I/O bound:
+    e.g. lookups = 16
+    sha3:       12 * 32   ~384 cycles
+    lookups:    16 * 160 ~2560 cycles # if zero cache
+    loop:       16 * 3     ~48 cycles
+    I/O / cpu = 2560/432 = ~ 6/1
+
+    # no sharding
+    lookups depend on previous lookup results
+    impossible to route computation/lookups based on the initial sha3
+    """
+    num_dags = len(daggerset)
+    dag_size = len(daggerset[0])
+    mix = sha3(header + encode_int(nonce)) ** 2
+    # loop, that can not be unrolled
+    # dag and dag[pos] depended on previous lookup
+    for i in range(lookups):
+        dag = daggerset[mix % num_dags] # modulo
+        pos = mix % dag_size    # modulo
+        mix ^= dag[pos]         # xor
+    return mix
+
+def light_hashimoto(params, seedset, header, nonce):
+    lookups = params.lookups
+    dag_size = params.dag_size
+    known = dict((s, {}) for s in seedset) # cache results for each dag
+    mix = sha3(header + encode_int(nonce)) ** 2 
+    for i in range(lookups):
+        seed = seedset[mix % len(seedset)]
+        pos = mix % dag_size
+        mix ^= quick_calc(params, seed, pos, known[seed])
+    num_accesses = sum(len(known[s]) for s in seedset)
+    print 'Calculated %d lookups with %d accesses' % (lookups, num_accesses)
+    return mix
+
+def light_verify(params, seedset, header, nonce):
+    return light_hashimoto(params, seedset, header, nonce) \
+        <= 2**512 / params.diff


-def get_daggerset(params, block):
-    if block.number == 0:
-        return [produce_dag(params, i) for i in range(params["numdags"])]
-    elif block.number % params["epochtime"]:
-        return get_daggerset(block.parent)
-    else:
-        o = get_daggerset(block.parent)
-        o[sha3(block.parent.nonce) % params["numdags"]] = \
-            produce_dag(params, sha3(block.parent.nonce))
-        return o
-
-
-def mine(daggerset, params, header):
-    nonce = random.randrange(2**50)
+def mine(daggerset, params, header, nonce=0):
    orignonce = nonce
    origtime = time.time()
    while 1:
-        h = hashimoto(daggerset, params, header, nonce)
-        if h <= 2**256 / params["diff"]:
+        h = hashimoto(daggerset, params.lookups, header, nonce)
+        if h <= 2**512 / params.diff:
            noncediff = nonce - orignonce
            timediff = time.time() - origtime
-            print 'Found nonce: %d, tested %d nonces in %f seconds (%f per sec)' % \
+            print 'Found nonce: %d, tested %d nonces in %.2f seconds (%d per sec)' % \
                (nonce, noncediff, timediff, noncediff / timediff)
            return nonce
        nonce += 1


-def verify(daggerset, params, header, nonce):
-    return hashimoto(daggerset, params, header, nonce) \
-        <= 2**256 / params["diff"]
+class params(object):
+    """
+    === tuning ===
+    memory: memory requirements â‰« L2/L3/L4 cache sizes
+    lookups:  hashes_per_sec(lookups=0) â‰« hashes_per_sec(lookups_mem_hard)
+    k:        ?
+    d:        higher values enfore memory availability but require more quick_calcs
+    numdags:  so that a dag can be updated in reasonable time
+    """
+    memory = 512 * 1024**2          # memory usage
+    numdags = 128                   # number of dags
+    dag_size = memory /numdags / 64 # num 64byte values per dag
+    lookups = 512                   # memory lookups per hash
+    diff = 2**14                    # higher is harder
+    k = 2                           # num dependecies of each dag value
+    d = 8                           # max distance of first dependency (1/d=fraction of size)
+    w = 2


-def light_hashimoto(seedset, params, header, nonce):
-    rand = sha3(header+encode_int(nonce))
-    mix = 0
-    for i in range(40):
-        shifted_A = rand >> i
-        seed = seedset[shifted_A % params["numdags"]]
-        # can further optimize with cross-round memoization
-        mix ^= quick_calc(params, seed,
-                          (shifted_A // params["numdags"]) % params["n"])
-    return mix ^ rand
+if __name__ == '__main__':
+    print dict((k,v) for k,v in params.__dict__.items() if isinstance(v,int))
+
+    # odds of a partitial storage attack
+    missing_mem = 0.01
+    P_partitial_mem_success = (1-missing_mem) ** params.lookups
+    print 'P success per hash with %d%% mem missing: %d%%' %(missing_mem*100, P_partitial_mem_success*100)
+
+    # which actually only results in a slower mining, as more hashes must be tried
+    slowdown = 1/ P_partitial_mem_success
+    print 'x%.1f speedup required to offset %d%% missing mem' % (slowdown, missing_mem*100)
+
+    # create set of DAGs
+    st = time.time()
+    seedset = [str(i) for i in range(params.numdags)]
+    daggerset = get_daggerset(params, seedset)
+    print 'daggerset with %d dags' % len(daggerset), 'size:', 64*params.dag_size*params.numdags / 1024**2 , 'MB'
+    print 'creation took %.2fs' % (time.time() - st)
+
+    # update DAG
+    st = time.time()
+    update_daggerset(params, daggerset, seedset, seed='new') 
+    print 'updating 1 dag took %.2fs' % (time.time() - st)
+
+    # Mine
+    for i in range(10):
+        header = 'test%d' % i
+        print '\nmining', header
+        nonce = mine(daggerset, params, header)
+        # verify
+        st = time.time()
+        assert light_verify(params, seedset, header, nonce)
+        print 'verification took %.2fs' % (time.time() - st)
+


-def light_verify(seedset, params, header, nonce):
-    return light_hashimoto(seedset, params, header, nonce) \
-        <= 2**256 / params["diff"]

-params = {
-    "numdags": 40,
-    "n": 250000,
-    "diff": 2**14,
-    "epochtime": 100,
-    "k": 3
-}