research/mining/hashimoto.py

258 lines
8.4 KiB
Python
Raw Normal View History

2014-12-08 15:16:12 -05:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Requirements:
- I/O bound: cycles spent on I/O â« cycles spent in cpu
- no sharding: impossible to implement data locality strategy
- easy verification
Thoughts:
Efficient implementations will not switch context (threading) when waiting for data.
But they would leverage all fill buffers and have concurrent memory accesses.
It can be assumed, that code can be written in a way to calculate N (<10)
nonces in parallel (on a single core).
So, after all maybe memory bandwidth rather than latency is the actual bottleneck.
Can this be solved in a way that aligns with hashing nonces and allows
for a quick verification? Probably not.
Loop unrolling:
Initially proposed dagger sets offer data locality which allows to scale the algo
on multiple cores/l2chaches. 320MB / 40sets = 8MB (< L2 cache)
A solution is to make accessed mem location depended on the value of the
previous access.
Partitial Memory:
If a users only keeps e.g. one third of each DAG in memory (i.e. to
have in L3 cache), he still can answer ~0.5**k of accesses by substituting
them through previous node lookups.
This can be mitigated by
a) making each node deterministically depend on the value of at
least one close high memory node. Optionally for quick validation, select
the 2nd dependency for the lower (cached) memory. see produce_dag_k2dr
b) for DAG creation, using a hashing function which needs more cycles
than multiple memory lookups would - even for GPUs/FPGAs/ASICs.
"""
2014-10-24 03:56:42 -04:00
import time
2014-12-09 09:30:40 -05:00
from pyethereum import utils
2014-10-24 03:56:42 -04:00
def decode_int(s):
o = 0
for i in range(len(s)):
o = o * 256 + ord(s[i])
return o
def encode_int(x):
o = ''
2014-12-08 15:16:12 -05:00
for _ in range(64):
2014-10-24 03:56:42 -04:00
o = chr(x % 256) + o
x //= 256
return o
2014-12-08 15:16:12 -05:00
2014-12-09 09:30:40 -05:00
def sha3(x):
return decode_int(utils.sha3(x))
def cantor_pair(x, y, p):
return ((x+y) * (x+y+1) / 2 + y) % p
2014-12-08 15:16:12 -05:00
def get_daggerset(params, seedset):
return [produce_dag(params, i) for i in seedset]
2014-12-09 09:30:40 -05:00
2014-12-08 15:16:12 -05:00
def update_daggerset(params, daggerset, seedset, seed):
idx = decode_int(seed) % len(daggerset)
seedset[idx] = seed
daggerset[idx] = produce_dag(params, seed)
2014-10-24 03:56:42 -04:00
def produce_dag(params, seed):
2014-12-09 09:30:40 -05:00
k, hk, w, hw, n, p, t = params.k, params.hk, params.w, \
params.hw, params.dag_size, params.p, params.h_threshold
print 'Producing dag of size %d (%d memory)' % (n, n * params.wordsz)
o = [sha3(seed)]
2014-10-24 03:56:42 -04:00
init = o[0]
picker = 1
2014-12-09 09:30:40 -05:00
for i in range(1, n):
2014-10-24 03:56:42 -04:00
x = 0
2014-12-09 09:30:40 -05:00
picker = (picker * init) % p
2014-10-24 03:56:42 -04:00
curpicker = picker
2014-12-09 09:30:40 -05:00
if i < t:
for j in range(k): # can be flattend if params are known
x ^= o[curpicker % i]
2014-12-08 15:16:12 -05:00
curpicker >>= 10
2014-12-09 09:30:40 -05:00
else:
for j in range(hk):
x ^= o[curpicker % t]
curpicker >>= 10
o.append(pow(x, w if i < t else hw, p)) # use any "hash function" here
2014-12-08 15:16:12 -05:00
return o
2014-10-24 03:56:42 -04:00
2014-12-09 09:30:40 -05:00
def quick_calc(params, seed, pos, known=None):
k, hk, w, hw, p, t = params.k, params.hk, params.w, \
params.hw, params.p, params.h_threshold
init = sha3(seed) % p
if known is None:
known = {}
2014-12-08 15:16:12 -05:00
known[0] = init
2014-12-09 09:30:40 -05:00
2014-12-08 15:16:12 -05:00
def calc(i):
if i not in known:
2014-12-09 09:30:40 -05:00
curpicker = pow(init, i, p)
2014-12-08 15:16:12 -05:00
x = 0
2014-12-09 09:30:40 -05:00
if i < t:
for j in range(k):
x ^= calc(curpicker % i)
curpicker >>= 10
known[i] = pow(x, w, p)
else:
for j in range(hk):
x ^= calc(curpicker % t)
curpicker >>= 10
known[i] = pow(x, hw, p)
2014-12-08 15:16:12 -05:00
return known[i]
2014-10-24 03:56:42 -04:00
o = calc(pos)
2014-12-09 09:30:40 -05:00
print 'Calculated index %d in %d lookups' % (pos, len(known))
2014-10-24 03:56:42 -04:00
return o
2014-12-09 09:30:40 -05:00
def hashimoto(params, daggerset, header, nonce):
2014-12-08 15:16:12 -05:00
"""
Requirements:
- I/O bound: cycles spent on I/O â« cycles spent in cpu
- no sharding: impossible to implement data locality strategy
2014-10-24 03:56:42 -04:00
2014-12-08 15:16:12 -05:00
# I/O bound:
e.g. lookups = 16
sha3: 12 * 32 ~384 cycles
lookups: 16 * 160 ~2560 cycles # if zero cache
loop: 16 * 3 ~48 cycles
I/O / cpu = 2560/432 = ~ 6/1
2014-10-24 03:56:42 -04:00
2014-12-08 15:16:12 -05:00
# no sharding
lookups depend on previous lookup results
impossible to route computation/lookups based on the initial sha3
"""
2014-12-09 09:30:40 -05:00
rand = sha3(header + encode_int(nonce)) % params.p
mix = rand
2014-12-08 15:16:12 -05:00
# loop, that can not be unrolled
# dag and dag[pos] depended on previous lookup
2014-12-09 09:30:40 -05:00
for i in range(params.lookups):
v = mix if params.is_serial else rand >> i
dag = daggerset[v % params.num_dags] # modulo
pos = v % params.dag_size # modulo
2014-12-08 15:16:12 -05:00
mix ^= dag[pos] # xor
2014-12-09 09:30:40 -05:00
# print v % params.num_dags, pos, dag[pos]
print header, nonce, mix
2014-12-08 15:16:12 -05:00
return mix
2014-10-24 03:56:42 -04:00
2014-12-09 09:30:40 -05:00
2014-12-08 15:16:12 -05:00
def light_hashimoto(params, seedset, header, nonce):
2014-12-09 09:30:40 -05:00
rand = sha3(header + encode_int(nonce)) % params.p
mix = rand
for i in range(params.lookups):
v = mix if params.is_serial else rand >> i
seed = seedset[v % len(seedset)]
pos = v % params.dag_size
qc = quick_calc(params, seed, pos)
# print v % params.num_dags, pos, qc
mix ^= qc
print 'Calculated %d lookups' % \
(params.lookups)
print header, nonce, mix
2014-12-08 15:16:12 -05:00
return mix
2014-10-24 03:56:42 -04:00
2014-12-09 09:30:40 -05:00
2014-12-08 15:16:12 -05:00
def light_verify(params, seedset, header, nonce):
2014-12-09 09:30:40 -05:00
h = light_hashimoto(params, seedset, header, nonce)
return h <= 256**params.wordsz / params.diff
2014-12-08 15:16:12 -05:00
def mine(daggerset, params, header, nonce=0):
2014-10-24 03:56:42 -04:00
orignonce = nonce
origtime = time.time()
while 1:
2014-12-09 09:30:40 -05:00
h = hashimoto(params, daggerset, header, nonce)
if h <= 256**params.wordsz / params.diff:
2014-10-24 03:56:42 -04:00
noncediff = nonce - orignonce
timediff = time.time() - origtime
2014-12-08 15:16:12 -05:00
print 'Found nonce: %d, tested %d nonces in %.2f seconds (%d per sec)' % \
2014-10-24 03:56:42 -04:00
(nonce, noncediff, timediff, noncediff / timediff)
return nonce
nonce += 1
2014-12-08 15:16:12 -05:00
class params(object):
"""
=== tuning ===
memory: memory requirements â« L2/L3/L4 cache sizes
lookups: hashes_per_sec(lookups=0) â« hashes_per_sec(lookups_mem_hard)
k: ?
d: higher values enfore memory availability but require more quick_calcs
2014-12-09 09:30:40 -05:00
num_dags: so that a dag can be updated in reasonable time
2014-12-08 15:16:12 -05:00
"""
2014-12-09 09:30:40 -05:00
p = (2 ** 256 - 4294968273)**2 # prime modulus
wordsz = 64 # word size
memory = 10 * 1024**2 # memory usage
num_dags = 2 # number of dags
dag_size = memory/num_dags/wordsz # num 64byte values per dag
lookups = 40 # memory lookups per hash
diff = 2**14 # higher is harder
k = 2 # num dependecies of each dag value
hk = 8 # dependencies for final nodes
d = 8 # max distance of first dependency (1/d=fraction of size)
w = 2 # work factor on node generation
hw = 8 # work factor on final node generation
h_threshold = dag_size*2/5 # cutoff between final and nonfinal nodes
is_serial = False # hashimoto is serial
2014-12-08 15:16:12 -05:00
if __name__ == '__main__':
2014-12-09 09:30:40 -05:00
print dict((k, v) for k, v in params.__dict__.items()
if isinstance(v, int))
2014-12-08 15:16:12 -05:00
# odds of a partitial storage attack
missing_mem = 0.01
P_partitial_mem_success = (1-missing_mem) ** params.lookups
2014-12-09 09:30:40 -05:00
print 'P success per hash with %d%% mem missing: %d%%' % \
(missing_mem*100, P_partitial_mem_success*100)
2014-12-08 15:16:12 -05:00
2014-12-09 09:30:40 -05:00
# which actually only results in a slower mining,
# as more hashes must be tried
slowdown = 1 / P_partitial_mem_success
print 'x%.1f speedup required to offset %d%% missing mem' % \
(slowdown, missing_mem*100)
2014-12-08 15:16:12 -05:00
# create set of DAGs
st = time.time()
2014-12-09 09:30:40 -05:00
seedset = [str(i) for i in range(params.num_dags)]
2014-12-08 15:16:12 -05:00
daggerset = get_daggerset(params, seedset)
2014-12-09 09:30:40 -05:00
print 'daggerset with %d dags' % len(daggerset), 'size:', \
64*params.dag_size*params.num_dags / 1024**2, 'MB'
2014-12-08 15:16:12 -05:00
print 'creation took %.2fs' % (time.time() - st)
# update DAG
st = time.time()
2014-12-09 09:30:40 -05:00
update_daggerset(params, daggerset, seedset, seed='qwe')
2014-12-08 15:16:12 -05:00
print 'updating 1 dag took %.2fs' % (time.time() - st)
2014-10-24 03:56:42 -04:00
2014-12-08 15:16:12 -05:00
# Mine
2014-12-09 09:30:40 -05:00
for i in range(1):
2014-12-08 15:16:12 -05:00
header = 'test%d' % i
print '\nmining', header
nonce = mine(daggerset, params, header)
# verify
st = time.time()
assert light_verify(params, seedset, header, nonce)
print 'verification took %.2fs' % (time.time() - st)