research/uncle_regressions/tx_and_bytes_regression.py

43 lines
1.8 KiB
Python

data = [[float(y) for y in x.strip().split(', ')] for x in open('block_datadump.csv').readlines()]
for i in range(0, 2283416, 200000):
print 'Checking 200k blocks from %d' % i
dataset = []
for j in range(i, min(i + 200000, 2283400), 100):
gas = 0
nonzeroes = 0
txs = 0
uncs = 0
zeroes = 0
for num, _uncs, uncrew, uncgas, _txs, _gas, _length, _zeroes in data[j:j+100]:
txs += _txs
gas += _gas
nonzeroes += _length - _zeroes
zeroes += _zeroes
uncs += _uncs
dataset.append([gas, txs, nonzeroes, zeroes, uncs])
mean_x = sum([x[0] for x in dataset]) * 1.0 / len(dataset)
mean_y = sum([x[-1] for x in dataset]) * 1.0 / len(dataset)
covar = sum([(x[0] - mean_x) * (x[-1] - mean_y) for x in dataset])
var = sum([(x[0] - mean_x) ** 2 for x in dataset])
for d in dataset:
d.append(d[-1] - covar / var * (d[0] - mean_x))
mean_x1 = sum([x[1] for x in dataset]) * 1.0 / len(dataset)
mean_x2 = sum([x[2] for x in dataset]) * 1.0 / len(dataset)
mean_x3 = sum([x[3] for x in dataset]) * 1.0 / len(dataset)
mean_y2 = sum([x[-1] for x in dataset]) * 1.0 / len(dataset)
covar1 = sum([(x[1] - mean_x1) * (x[-1] - mean_y2) for x in dataset])
var1 = sum([(x[1] - mean_x1) ** 2 for x in dataset])
covar2 = sum([(x[2] - mean_x2) * (x[-1] - mean_y2) for x in dataset])
var2 = sum([(x[2] - mean_x2) ** 2 for x in dataset])
covar3 = sum([(x[3] - mean_x2) * (x[-1] - mean_y2) for x in dataset])
var3 = sum([(x[3] - mean_x2) ** 2 for x in dataset])
print 'Base m =', covar / var
print 'Base b =', mean_y - mean_x * (covar / var)
print 'Excess m for txs=', covar1 / var1
print 'Excess m for nonzero bytes=', covar2 / var2
print 'Excess m for zero bytes=', covar3 / var3