Added erasure code-based data availability checking code
This commit is contained in:
parent
efb1a85848
commit
e3db8989b1
|
@ -0,0 +1,152 @@
|
||||||
|
import copy
|
||||||
|
import poly_utils
|
||||||
|
import rlp
|
||||||
|
|
||||||
|
try:
|
||||||
|
from Crypto.Hash import keccak
|
||||||
|
sha3 = lambda x: keccak.new(digest_bits=256, data=x).digest()
|
||||||
|
except ImportError:
|
||||||
|
import sha3 as _sha3
|
||||||
|
sha3 = lambda x: _sha3.sha3_256(x).digest()
|
||||||
|
|
||||||
|
# Every point is an element of GF(2**16), so represents two bytes
|
||||||
|
POINT_SIZE = 2
|
||||||
|
# Every chunk contains 128 points
|
||||||
|
POINTS_IN_CHUNK = 128
|
||||||
|
# A chunk is 256 bytes
|
||||||
|
CHUNK_SIZE = POINT_SIZE * POINTS_IN_CHUNK
|
||||||
|
|
||||||
|
def bytes_to_num(bytez):
|
||||||
|
o = 0
|
||||||
|
for b in bytez:
|
||||||
|
o = (o * 256) + b
|
||||||
|
return o
|
||||||
|
|
||||||
|
def num_to_bytes(inp, n):
|
||||||
|
o = b''
|
||||||
|
for i in range(n):
|
||||||
|
o = bytes([inp % 256]) + o
|
||||||
|
inp //= 256
|
||||||
|
return o
|
||||||
|
|
||||||
|
assert bytes_to_num(num_to_bytes(31337, 2)) == 31337
|
||||||
|
|
||||||
|
# Returns the smallest power of 2 equal to or greater than a number
|
||||||
|
def higher_power_of_2(x):
|
||||||
|
higher_power_of_2 = 1
|
||||||
|
while higher_power_of_2 < x:
|
||||||
|
higher_power_of_2 *= 2
|
||||||
|
return higher_power_of_2
|
||||||
|
|
||||||
|
# Unfortunately, most padding schemes standardized in cryptography seem to only work for
|
||||||
|
# block sizes strictly less than 256 bytes. So we'll use RLP plus zero byte padding
|
||||||
|
# instead (pre-RLP-encode because the RLP encoding adds length data, so the padding
|
||||||
|
# becomes reversible even in cases where the original data ends in zero bytes)
|
||||||
|
def pad(data):
|
||||||
|
med = rlp.encode(data)
|
||||||
|
return med + b'\x00' * (higher_power_of_2(len(med)) - len(med))
|
||||||
|
|
||||||
|
def unpad(data):
|
||||||
|
c, l1, l2 = rlp.codec.consume_length_prefix(data)
|
||||||
|
assert c == str
|
||||||
|
return data[:l1 + l2]
|
||||||
|
|
||||||
|
# Deserialize a chunk into a list of points in GF2**16
|
||||||
|
def chunk_to_points(chunk):
|
||||||
|
return [bytes_to_num(chunk[i: i + POINT_SIZE]) for i in range(0, CHUNK_SIZE, POINT_SIZE)]
|
||||||
|
|
||||||
|
# Serialize a list of points into a chunk
|
||||||
|
def points_to_chunk(points):
|
||||||
|
return b''.join([num_to_bytes(p, POINT_SIZE) for p in points])
|
||||||
|
|
||||||
|
testdata = sha3(b'cow') * (CHUNK_SIZE // 32)
|
||||||
|
assert points_to_chunk(chunk_to_points(testdata)) == testdata
|
||||||
|
|
||||||
|
# Make a Merkle tree out of a set of chunks
|
||||||
|
def merklize(chunks):
|
||||||
|
# Only accept a list of size which is exactly a power of two
|
||||||
|
assert higher_power_of_2(len(chunks)) == len(chunks)
|
||||||
|
merkle_nodes = [sha3(x) for x in chunks]
|
||||||
|
lower_tier = merkle_nodes[::]
|
||||||
|
higher_tier = []
|
||||||
|
while len(higher_tier) != 1:
|
||||||
|
higher_tier = [sha3(lower_tier[i] + lower_tier[i + 1]) for i in range(0, len(lower_tier), 2)]
|
||||||
|
merkle_nodes = higher_tier + merkle_nodes
|
||||||
|
lower_tier = higher_tier
|
||||||
|
merkle_nodes.insert(0, b'\x00' * 32)
|
||||||
|
return merkle_nodes
|
||||||
|
|
||||||
|
|
||||||
|
class Prover():
|
||||||
|
def __init__(self, data):
|
||||||
|
# Pad data
|
||||||
|
pdata = pad(data)
|
||||||
|
byte_chunks = [pdata[i: i + CHUNK_SIZE] for i in range(0, len(pdata), CHUNK_SIZE)]
|
||||||
|
# Decompose it into chunks, where each chunk is a collection of numbers
|
||||||
|
chunks = []
|
||||||
|
for byte_chunk in byte_chunks:
|
||||||
|
chunks.append(chunk_to_points(byte_chunk))
|
||||||
|
# Compute the polynomials representing the ith number in each chunk
|
||||||
|
polys = [poly_utils.lagrange_interp([chunk[i] for chunk in chunks], list(range(len(chunks)))) for i in range(POINTS_IN_CHUNK)]
|
||||||
|
# Use the polynomials to extend the chunks
|
||||||
|
new_chunks = []
|
||||||
|
for x in range(len(chunks), len(chunks) * 2):
|
||||||
|
new_chunks.append(points_to_chunk([poly_utils.eval_poly_at(poly, x) for poly in polys]))
|
||||||
|
# Total length of data including new points
|
||||||
|
self.length = len(byte_chunks + new_chunks)
|
||||||
|
self.extended_data = byte_chunks + new_chunks
|
||||||
|
# Build up the Merkle tree
|
||||||
|
self.merkle_nodes = merklize(self.extended_data)
|
||||||
|
assert len(self.merkle_nodes) == 2 * self.length
|
||||||
|
self.merkle_root = self.merkle_nodes[1]
|
||||||
|
|
||||||
|
# Make a Merkle proof for some index
|
||||||
|
def prove(self, index):
|
||||||
|
assert 0 <= index < self.length
|
||||||
|
adjusted_index = self.length + index
|
||||||
|
o = [self.extended_data[index]]
|
||||||
|
while adjusted_index > 1:
|
||||||
|
o.append(self.merkle_nodes[adjusted_index ^ 1])
|
||||||
|
adjusted_index >>= 1
|
||||||
|
return o
|
||||||
|
|
||||||
|
# Verify a merkle proof of some index (light client friendly)
|
||||||
|
def verify_proof(merkle_root, proof, index):
|
||||||
|
h = sha3(proof[0])
|
||||||
|
for p in proof[1:]:
|
||||||
|
if index % 2:
|
||||||
|
h = sha3(p + h)
|
||||||
|
else:
|
||||||
|
h = sha3(h + p)
|
||||||
|
index //= 2
|
||||||
|
return h == merkle_root
|
||||||
|
|
||||||
|
# Fill data from partially available proofs
|
||||||
|
# This method returning False can also be used as a verifier for fraud proofs
|
||||||
|
def fill(merkle_root, orig_data_length, proofs, indices):
|
||||||
|
if len(proofs) < orig_data_length:
|
||||||
|
raise Exception("Not enough proofs")
|
||||||
|
if len(proofs) > orig_data_length:
|
||||||
|
raise Exception("Too many proofs; if original data has n chunks, n chunks suffice")
|
||||||
|
for proof, index in zip(proofs, indices):
|
||||||
|
if not verify_proof(merkle_root, proof, index):
|
||||||
|
raise Exception("Merkle proof for index %d invalid" % index)
|
||||||
|
# Convert to points
|
||||||
|
coords = [chunk_to_points(p[0]) for p in proofs]
|
||||||
|
# Extract polynomials
|
||||||
|
polys = [poly_utils.lagrange_interp([c[i] for c in coords], indices) for i in range(POINTS_IN_CHUNK)]
|
||||||
|
# Fill in the remaining values
|
||||||
|
full_coords = [None] * orig_data_length * 2
|
||||||
|
for points, index in zip(coords, indices):
|
||||||
|
full_coords[index] = points
|
||||||
|
for i in range(len(full_coords)):
|
||||||
|
if full_coords[i] is None:
|
||||||
|
full_coords[i] = [poly_utils.eval_poly_at(poly, i) for poly in polys]
|
||||||
|
# Serialize
|
||||||
|
full_chunks = [points_to_chunk(points) for points in full_coords]
|
||||||
|
# Merklize
|
||||||
|
merkle_nodes = merklize(full_chunks)
|
||||||
|
# Check equality of the Merkle root
|
||||||
|
if merkle_root != merkle_nodes[1]:
|
||||||
|
return False
|
||||||
|
return full_chunks
|
|
@ -0,0 +1,105 @@
|
||||||
|
modulus_poly = [1, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 1, 0, 1, 0, 0, 1,
|
||||||
|
1]
|
||||||
|
modulus_poly_as_int = sum([(v << i) for i, v in enumerate(modulus_poly)])
|
||||||
|
degree = len(modulus_poly) - 1
|
||||||
|
|
||||||
|
two_to_the_degree = 2**degree
|
||||||
|
two_to_the_degree_m1 = 2**degree - 1
|
||||||
|
|
||||||
|
def galoistpl(a):
|
||||||
|
# 2 is not a primitive root, so we have to use 3 as our logarithm base
|
||||||
|
if a * 2 < two_to_the_degree:
|
||||||
|
return (a * 2) ^ a
|
||||||
|
else:
|
||||||
|
return (a * 2) ^ a ^ modulus_poly_as_int
|
||||||
|
|
||||||
|
# Precomputing a log table for increased speed of addition and multiplication
|
||||||
|
glogtable = [0] * (two_to_the_degree)
|
||||||
|
gexptable = []
|
||||||
|
v = 1
|
||||||
|
for i in range(two_to_the_degree_m1):
|
||||||
|
glogtable[v] = i
|
||||||
|
gexptable.append(v)
|
||||||
|
v = galoistpl(v)
|
||||||
|
|
||||||
|
gexptable += gexptable + gexptable
|
||||||
|
|
||||||
|
# Add two values in the Galois field
|
||||||
|
def galois_add(x, y):
|
||||||
|
return x ^ y
|
||||||
|
|
||||||
|
# In binary fields, addition and subtraction are the same thing
|
||||||
|
galois_sub = galois_add
|
||||||
|
|
||||||
|
# Multiply two values in the Galois field
|
||||||
|
def galois_mul(x, y):
|
||||||
|
return 0 if x*y == 0 else gexptable[glogtable[x] + glogtable[y]]
|
||||||
|
|
||||||
|
# Divide two values in the Galois field
|
||||||
|
def galois_div(x, y):
|
||||||
|
return 0 if x == 0 else gexptable[(glogtable[x] - glogtable[y]) % two_to_the_degree_m1]
|
||||||
|
|
||||||
|
# Evaluate a polynomial at a point
|
||||||
|
def eval_poly_at(p, x):
|
||||||
|
if x == 0:
|
||||||
|
return p[0]
|
||||||
|
y = 0
|
||||||
|
logx = glogtable[x]
|
||||||
|
for i, p_coeff in enumerate(p):
|
||||||
|
if p_coeff:
|
||||||
|
# Add x**i * coeff
|
||||||
|
y ^= gexptable[(logx * i + glogtable[p_coeff]) % two_to_the_degree_m1]
|
||||||
|
return y
|
||||||
|
|
||||||
|
|
||||||
|
# Given p+1 y values and x values with no errors, recovers the original
|
||||||
|
# p+1 degree polynomial.
|
||||||
|
# Lagrange interpolation works roughly in the following way.
|
||||||
|
# 1. Suppose you have a set of points, eg. x = [1, 2, 3], y = [2, 5, 10]
|
||||||
|
# 2. For each x, generate a polynomial which equals its corresponding
|
||||||
|
# y coordinate at that point and 0 at all other points provided.
|
||||||
|
# 3. Add these polynomials together.
|
||||||
|
|
||||||
|
def lagrange_interp(pieces, xs):
|
||||||
|
# Generate master numerator polynomial, eg. (x - x1) * (x - x2) * ... * (x - xn)
|
||||||
|
root = [1]
|
||||||
|
for x in xs:
|
||||||
|
logx = glogtable[x]
|
||||||
|
root.insert(0, 0)
|
||||||
|
for j in range(len(root)-1):
|
||||||
|
if root[j+1] and x:
|
||||||
|
root[j] ^= gexptable[glogtable[root[j+1]] + logx]
|
||||||
|
assert len(root) == len(pieces) + 1
|
||||||
|
# Generate per-value numerator polynomials, eg. for x=x2,
|
||||||
|
# (x - x1) * (x - x3) * ... * (x - xn), by dividing the master
|
||||||
|
# polynomial back by each x coordinate
|
||||||
|
nums = []
|
||||||
|
for x in xs:
|
||||||
|
output = [0] * (len(root) - 2) + [1]
|
||||||
|
logx = glogtable[x]
|
||||||
|
for j in range(len(root) - 2, 0, -1):
|
||||||
|
if output[j] and x:
|
||||||
|
output[j-1] = root[j] ^ gexptable[glogtable[output[j]] + logx]
|
||||||
|
else:
|
||||||
|
output[j-1] = root[j]
|
||||||
|
assert len(output) == len(pieces)
|
||||||
|
nums.append(output)
|
||||||
|
# Generate denominators by evaluating numerator polys at each x
|
||||||
|
denoms = [eval_poly_at(nums[i], xs[i]) for i in range(len(xs))]
|
||||||
|
# Generate output polynomial, which is the sum of the per-value numerator
|
||||||
|
# polynomials rescaled to have the right y values
|
||||||
|
b = [0 for p in pieces]
|
||||||
|
for i in range(len(xs)):
|
||||||
|
log_yslice = glogtable[pieces[i]] - glogtable[denoms[i]] + two_to_the_degree_m1
|
||||||
|
for j in range(len(pieces)):
|
||||||
|
if nums[i][j] and pieces[i]:
|
||||||
|
b[j] ^= gexptable[glogtable[nums[i][j]] + log_yslice]
|
||||||
|
return b
|
||||||
|
|
||||||
|
|
||||||
|
a = 124
|
||||||
|
b = 8932
|
||||||
|
c = 12415
|
||||||
|
|
||||||
|
assert galois_mul(galois_add(a, b), c) == galois_add(galois_mul(a, c), galois_mul(b, c))
|
|
@ -0,0 +1,19 @@
|
||||||
|
import ec65536
|
||||||
|
import rlp
|
||||||
|
|
||||||
|
# 12.8 kilobyte test string
|
||||||
|
testdata = 'the cow jumped over the moon!!! ' * 400
|
||||||
|
|
||||||
|
prover = ec65536.Prover(testdata)
|
||||||
|
print("Created prover")
|
||||||
|
|
||||||
|
assert ec65536.verify_proof(prover.merkle_root, prover.prove(13), 13)
|
||||||
|
|
||||||
|
proofs = [prover.prove(i) for i in range(0, prover.length, 2)]
|
||||||
|
print("Created merkle proofs")
|
||||||
|
|
||||||
|
print("Starting to attempt fill")
|
||||||
|
response = ec65536.fill(prover.merkle_root, prover.length // 2, proofs, list(range(0, prover.length, 2)))
|
||||||
|
assert response is not False
|
||||||
|
assert b''.join(response)[:len(rlp.encode(testdata))] == rlp.encode(testdata)
|
||||||
|
print("Fill successful")
|
|
@ -1,695 +0,0 @@
|
||||||
import copy
|
|
||||||
|
|
||||||
|
|
||||||
# Galois field class and logtable
|
|
||||||
#
|
|
||||||
# See: https://en.wikipedia.org/wiki/Finite_field
|
|
||||||
#
|
|
||||||
# Note that you can substitute "Galois" with "float" in the code, and
|
|
||||||
# the code will then magically start using the plain old field of rationals
|
|
||||||
# instead of this spooky modulo polynomial thing. If you are not an expert in
|
|
||||||
# finite field theory and want to dig deep into how this code works, I
|
|
||||||
# recommend adding the line "Galois = float" immediately after this class (and
|
|
||||||
# not using the methods that require serialization)
|
|
||||||
#
|
|
||||||
# As a quick intro to finite field theory, the idea is that there exist these
|
|
||||||
# things called fields, which are basically sets of objects together with
|
|
||||||
# rules for addition, subtraction, multiplication, division, such that algebra
|
|
||||||
# within this field is consistent, even if the results look nonsensical from
|
|
||||||
# a "normal numbers" perspective. For instance, consider the field of integers
|
|
||||||
# modulo 7. Here, for example, 2 * 5 = 3, 3 * 4 = 5, 6 * 6 = 1, 6 + 6 = 5.
|
|
||||||
# However, all algebra still works; for example, (a^2 - b^2) = (a + b)(a - b)
|
|
||||||
# works for all a,b. For this reason, we can do secret sharing arithmetic
|
|
||||||
# "over" any field. The reason why Galois fields are preferable is that all
|
|
||||||
# elements in the Galois field are values in [0 ... 255] (at least using the
|
|
||||||
# canonical serialization that we use here); no amount of addition,
|
|
||||||
# multiplication, subtraction or division will ever get you anything else.
|
|
||||||
# This guarantees that our secret shares will always be serializable as byte
|
|
||||||
# arrays. The way the Galois field we use here works is that the elements are
|
|
||||||
# polynomials of elements in the field of integers mod 2, so addition and
|
|
||||||
# subtraction are xor, and multiplication is modulo x^8 + x^4 + x^3 + x + 1,
|
|
||||||
# and division is defined by a/b = c iff bc = a and b != 0. In practice, we
|
|
||||||
# do multiplication and division via a precomputed log table using x+1 as a
|
|
||||||
# base
|
|
||||||
|
|
||||||
# per-byte 2^8 Galois field
|
|
||||||
# Note that this imposes a hard limit that the number of extended chunks can
|
|
||||||
# be at most 256 along each dimension
|
|
||||||
|
|
||||||
|
|
||||||
def galoistpl(a):
|
|
||||||
# 2 is not a primitive root, so we have to use 3 as our logarithm base
|
|
||||||
unrolla = [a/(2**k) % 2 for k in range(8)]
|
|
||||||
res = [0] + unrolla
|
|
||||||
for i in range(8):
|
|
||||||
res[i] = (res[i] + unrolla[i]) % 2
|
|
||||||
if res[-1] == 0:
|
|
||||||
res.pop()
|
|
||||||
else:
|
|
||||||
# AES Polynomial
|
|
||||||
for i in range(9):
|
|
||||||
res[i] = (res[i] - [1, 1, 0, 1, 1, 0, 0, 0, 1][i]) % 2
|
|
||||||
res.pop()
|
|
||||||
return sum([res[k] * 2**k for k in range(8)])
|
|
||||||
|
|
||||||
# Precomputing a multiplication and XOR table for increased speed
|
|
||||||
glogtable = [0] * 256
|
|
||||||
gexptable = []
|
|
||||||
v = 1
|
|
||||||
for i in range(255):
|
|
||||||
glogtable[v] = i
|
|
||||||
gexptable.append(v)
|
|
||||||
v = galoistpl(v)
|
|
||||||
|
|
||||||
|
|
||||||
class Galois:
|
|
||||||
val = 0
|
|
||||||
|
|
||||||
def __init__(self, val):
|
|
||||||
self.val = val.val if isinstance(self.val, Galois) else val
|
|
||||||
|
|
||||||
def __add__(self, other):
|
|
||||||
return Galois(self.val ^ other.val)
|
|
||||||
|
|
||||||
def __mul__(self, other):
|
|
||||||
if self.val == 0 or other.val == 0:
|
|
||||||
return Galois(0)
|
|
||||||
return Galois(gexptable[(glogtable[self.val] +
|
|
||||||
glogtable[other.val]) % 255])
|
|
||||||
|
|
||||||
def __sub__(self, other):
|
|
||||||
return Galois(self.val ^ other.val)
|
|
||||||
|
|
||||||
def __div__(self, other):
|
|
||||||
if other.val == 0:
|
|
||||||
raise ZeroDivisionError
|
|
||||||
if self.val == 0:
|
|
||||||
return Galois(0)
|
|
||||||
return Galois(gexptable[(glogtable[self.val] -
|
|
||||||
glogtable[other.val]) % 255])
|
|
||||||
|
|
||||||
def __int__(self):
|
|
||||||
return self.val
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return repr(self.val)
|
|
||||||
|
|
||||||
|
|
||||||
# Modular division class
|
|
||||||
|
|
||||||
def mkModuloClass(n):
|
|
||||||
|
|
||||||
if pow(2, n, n) != 2:
|
|
||||||
raise Exception("n must be prime!")
|
|
||||||
|
|
||||||
class Mod:
|
|
||||||
val = 0
|
|
||||||
|
|
||||||
def __init__(self, val):
|
|
||||||
self.val = val.val if isinstance(
|
|
||||||
self.val, self.__class__) else val
|
|
||||||
|
|
||||||
def __add__(self, other):
|
|
||||||
return self.__class__((self.val + other.val) % n)
|
|
||||||
|
|
||||||
def __mul__(self, other):
|
|
||||||
return self.__class__((self.val * other.val) % n)
|
|
||||||
|
|
||||||
def __sub__(self, other):
|
|
||||||
return self.__class__((self.val - other.val) % n)
|
|
||||||
|
|
||||||
def __div__(self, other):
|
|
||||||
return self.__class__((self.val * other.val ** (n-2)) % n)
|
|
||||||
|
|
||||||
def __int__(self):
|
|
||||||
return self.val
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return repr(self.val)
|
|
||||||
return Mod
|
|
||||||
|
|
||||||
# Evaluates a polynomial in little-endian form, eg. x^2 + 3x + 2 = [2, 3, 1]
|
|
||||||
# (normally I hate little-endian, but in this case dealing with polynomials
|
|
||||||
# it's justified, since you get the nice property that p[n] is the nth degree
|
|
||||||
# term of p) at coordinate x, eg. eval_poly_at([2, 3, 1], 5) = 42 if you are
|
|
||||||
# using float as your arithmetic
|
|
||||||
|
|
||||||
|
|
||||||
def eval_poly_at(p, x):
|
|
||||||
arithmetic = p[0].__class__
|
|
||||||
y = arithmetic(0)
|
|
||||||
x_to_the_i = arithmetic(1)
|
|
||||||
for i in range(len(p)):
|
|
||||||
y += x_to_the_i * p[i]
|
|
||||||
x_to_the_i *= x
|
|
||||||
return y
|
|
||||||
|
|
||||||
|
|
||||||
# Given p+1 y values and x values with no errors, recovers the original
|
|
||||||
# p+1 degree polynomial. For example,
|
|
||||||
# lagrange_interp([51.0, 59.0, 66.0], [1, 3, 4]) = [50.0, 0, 1.0]
|
|
||||||
# if you are using float as your arithmetic
|
|
||||||
|
|
||||||
|
|
||||||
def lagrange_interp(pieces, xs):
|
|
||||||
arithmetic = pieces[0].__class__
|
|
||||||
zero, one = arithmetic(0), arithmetic(1)
|
|
||||||
# Generate master numerator polynomial
|
|
||||||
root = [one]
|
|
||||||
for i in range(len(xs)):
|
|
||||||
root.insert(0, zero)
|
|
||||||
for j in range(len(root)-1):
|
|
||||||
root[j] = root[j] - root[j+1] * xs[i]
|
|
||||||
# Generate per-value numerator polynomials by dividing the master
|
|
||||||
# polynomial back by each x coordinate
|
|
||||||
nums = []
|
|
||||||
for i in range(len(xs)):
|
|
||||||
output = []
|
|
||||||
last = one
|
|
||||||
for j in range(2, len(root)+1):
|
|
||||||
output.insert(0, last)
|
|
||||||
if j != len(root):
|
|
||||||
last = root[-j] + last * xs[i]
|
|
||||||
nums.append(output)
|
|
||||||
# Generate denominators by evaluating numerator polys at their x
|
|
||||||
denoms = []
|
|
||||||
for i in range(len(xs)):
|
|
||||||
denom = zero
|
|
||||||
x_to_the_j = one
|
|
||||||
for j in range(len(nums[i])):
|
|
||||||
denom += x_to_the_j * nums[i][j]
|
|
||||||
x_to_the_j *= xs[i]
|
|
||||||
denoms.append(denom)
|
|
||||||
# Generate output polynomial
|
|
||||||
b = [zero for i in range(len(pieces))]
|
|
||||||
for i in range(len(xs)):
|
|
||||||
yslice = pieces[int(i)] / denoms[int(i)]
|
|
||||||
for j in range(len(pieces)):
|
|
||||||
b[j] += nums[i][j] * yslice
|
|
||||||
return b
|
|
||||||
|
|
||||||
|
|
||||||
# Compresses two linear equations of length n into one
|
|
||||||
# equation of length n-1
|
|
||||||
# Format:
|
|
||||||
# 3x + 4y = 80 (ie. 3x + 4y - 80 = 0) -> a = [3,4,-80]
|
|
||||||
# 5x + 2y = 70 (ie. 5x + 2y - 70 = 0) -> b = [5,2,-70]
|
|
||||||
|
|
||||||
|
|
||||||
def elim(a, b):
|
|
||||||
aprime = [x*b[0] for x in a]
|
|
||||||
bprime = [x*a[0] for x in b]
|
|
||||||
c = [aprime[i] - bprime[i] for i in range(1, len(a))]
|
|
||||||
return c
|
|
||||||
|
|
||||||
|
|
||||||
# Linear equation solver
|
|
||||||
# Format:
|
|
||||||
# 3x + 4y = 80, y = 5 (ie. 3x + 4y - 80z = 0, y = 5, z = 1)
|
|
||||||
# -> coeffs = [3,4,-80], vals = [5,1]
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate(coeffs, vals):
|
|
||||||
arithmetic = coeffs[0].__class__
|
|
||||||
tot = arithmetic(0)
|
|
||||||
for i in range(len(vals)):
|
|
||||||
tot -= coeffs[i+1] * vals[i]
|
|
||||||
if int(coeffs[0]) == 0:
|
|
||||||
raise ZeroDivisionError
|
|
||||||
return tot / coeffs[0]
|
|
||||||
|
|
||||||
|
|
||||||
# Linear equation system solver
|
|
||||||
# Format:
|
|
||||||
# ax + by + c = 0, dx + ey + f = 0
|
|
||||||
# -> [[a, b, c], [d, e, f]]
|
|
||||||
# eg.
|
|
||||||
# [[3.0, 5.0, -13.0], [9.0, 1.0, -11.0]] -> [1.0, 2.0]
|
|
||||||
|
|
||||||
|
|
||||||
def sys_solve(eqs):
|
|
||||||
arithmetic = eqs[0][0].__class__
|
|
||||||
one = arithmetic(1)
|
|
||||||
back_eqs = [eqs[0]]
|
|
||||||
while len(eqs) > 1:
|
|
||||||
neweqs = []
|
|
||||||
for i in range(len(eqs)-1):
|
|
||||||
neweqs.append(elim(eqs[i], eqs[i+1]))
|
|
||||||
eqs = neweqs
|
|
||||||
i = 0
|
|
||||||
while i < len(eqs) - 1 and int(eqs[i][0]) == 0:
|
|
||||||
i += 1
|
|
||||||
back_eqs.insert(0, eqs[i])
|
|
||||||
kvals = [one]
|
|
||||||
for i in range(len(back_eqs)):
|
|
||||||
kvals.insert(0, evaluate(back_eqs[i], kvals))
|
|
||||||
return kvals[:-1]
|
|
||||||
|
|
||||||
|
|
||||||
def polydiv(Q, E):
|
|
||||||
qpoly = copy.deepcopy(Q)
|
|
||||||
epoly = copy.deepcopy(E)
|
|
||||||
div = []
|
|
||||||
while len(qpoly) >= len(epoly):
|
|
||||||
div.insert(0, qpoly[-1] / epoly[-1])
|
|
||||||
for i in range(2, len(epoly)+1):
|
|
||||||
qpoly[-i] -= div[0] * epoly[-i]
|
|
||||||
qpoly.pop()
|
|
||||||
return div
|
|
||||||
|
|
||||||
|
|
||||||
# Given a set of y coordinates and x coordinates, and the degree of the
|
|
||||||
# original polynomial, determines the original polynomial even if some of
|
|
||||||
# the y coordinates are wrong. If m is the minimal number of pieces (ie.
|
|
||||||
# degree + 1), t is the total number of pieces provided, then the algo can
|
|
||||||
# handle up to (t-m)/2 errors. See:
|
|
||||||
# http://en.wikipedia.org/wiki/Berlekamp%E2%80%93Welch_algorithm#Example
|
|
||||||
# (just skip to my example, the rest of the article sucks imo)
|
|
||||||
|
|
||||||
|
|
||||||
def berlekamp_welch_attempt(pieces, xs, master_degree):
|
|
||||||
error_locator_degree = (len(pieces) - master_degree - 1) / 2
|
|
||||||
arithmetic = pieces[0].__class__
|
|
||||||
zero, one = arithmetic(0), arithmetic(1)
|
|
||||||
# Set up the equations for y[i]E(x[i]) = Q(x[i])
|
|
||||||
# degree(E) = error_locator_degree
|
|
||||||
# degree(Q) = master_degree + error_locator_degree - 1
|
|
||||||
eqs = []
|
|
||||||
for i in range(2 * error_locator_degree + master_degree + 1):
|
|
||||||
eqs.append([])
|
|
||||||
for i in range(2 * error_locator_degree + master_degree + 1):
|
|
||||||
neg_x_to_the_j = zero - one
|
|
||||||
for j in range(error_locator_degree + master_degree + 1):
|
|
||||||
eqs[i].append(neg_x_to_the_j)
|
|
||||||
neg_x_to_the_j *= xs[i]
|
|
||||||
x_to_the_j = one
|
|
||||||
for j in range(error_locator_degree + 1):
|
|
||||||
eqs[i].append(x_to_the_j * pieces[i])
|
|
||||||
x_to_the_j *= xs[i]
|
|
||||||
# Solve 'em
|
|
||||||
# Assume the top error polynomial term to be one
|
|
||||||
errors = error_locator_degree
|
|
||||||
ones = 1
|
|
||||||
while errors >= 0:
|
|
||||||
try:
|
|
||||||
polys = sys_solve(eqs) + [one] * ones
|
|
||||||
qpoly = polys[:errors + master_degree + 1]
|
|
||||||
epoly = polys[errors + master_degree + 1:]
|
|
||||||
break
|
|
||||||
except ZeroDivisionError:
|
|
||||||
for eq in eqs:
|
|
||||||
eq[-2] += eq[-1]
|
|
||||||
eq.pop()
|
|
||||||
eqs.pop()
|
|
||||||
errors -= 1
|
|
||||||
ones += 1
|
|
||||||
if errors < 0:
|
|
||||||
raise Exception("Not enough data!")
|
|
||||||
# Divide the polynomials
|
|
||||||
qpoly = polys[:error_locator_degree + master_degree + 1]
|
|
||||||
epoly = polys[error_locator_degree + master_degree + 1:]
|
|
||||||
div = []
|
|
||||||
while len(qpoly) >= len(epoly):
|
|
||||||
div.insert(0, qpoly[-1] / epoly[-1])
|
|
||||||
for i in range(2, len(epoly)+1):
|
|
||||||
qpoly[-i] -= div[0] * epoly[-i]
|
|
||||||
qpoly.pop()
|
|
||||||
# Check
|
|
||||||
corrects = 0
|
|
||||||
for i, x in enumerate(xs):
|
|
||||||
if int(eval_poly_at(div, x)) == int(pieces[i]):
|
|
||||||
corrects += 1
|
|
||||||
if corrects < master_degree + errors:
|
|
||||||
raise Exception("Answer doesn't match (too many errors)!")
|
|
||||||
return div
|
|
||||||
|
|
||||||
|
|
||||||
# Extends a list of integers in [0 ... 255] (if using Galois arithmetic) by
|
|
||||||
# adding n redundant error-correction values
|
|
||||||
|
|
||||||
|
|
||||||
def extend(data, n, arithmetic=Galois):
|
|
||||||
data2 = map(arithmetic, data)
|
|
||||||
data3 = data[:]
|
|
||||||
poly = berlekamp_welch_attempt(data2,
|
|
||||||
map(arithmetic, range(len(data))),
|
|
||||||
len(data) - 1)
|
|
||||||
for i in range(n):
|
|
||||||
data3.append(int(eval_poly_at(poly, arithmetic(len(data) + i))))
|
|
||||||
return data3
|
|
||||||
|
|
||||||
|
|
||||||
# Repairs a list of integers in [0 ... 255]. Some integers can be erroneous,
|
|
||||||
# and you can put None in place of an integer if you know that a certain
|
|
||||||
# value is defective or missing. Uses the Berlekamp-Welch algorithm to
|
|
||||||
# do error-correction
|
|
||||||
|
|
||||||
|
|
||||||
def repair(data, datasize, arithmetic=Galois):
|
|
||||||
vs, xs = [], []
|
|
||||||
for i, v in enumerate(data):
|
|
||||||
if v is not None:
|
|
||||||
vs.append(arithmetic(v))
|
|
||||||
xs.append(arithmetic(i))
|
|
||||||
poly = berlekamp_welch_attempt(vs, xs, datasize - 1)
|
|
||||||
return [int(eval_poly_at(poly, arithmetic(i))) for i in range(len(data))]
|
|
||||||
|
|
||||||
|
|
||||||
# Extends a list of bytearrays
|
|
||||||
# eg. extend_chunks([map(ord, 'hello'), map(ord, 'world')], 2)
|
|
||||||
# n is the number of redundant error-correction chunks to add
|
|
||||||
|
|
||||||
|
|
||||||
def extend_chunks(data, n, arithmetic=Galois):
|
|
||||||
o = []
|
|
||||||
for i in range(len(data[0])):
|
|
||||||
o.append(extend(map(lambda x: x[i], data), n, arithmetic))
|
|
||||||
return map(list, zip(*o))
|
|
||||||
|
|
||||||
|
|
||||||
# Repairs a list of bytearrays. Use None in place of a missing array.
|
|
||||||
# Individual arrays can contain some missing or erroneous data.
|
|
||||||
|
|
||||||
|
|
||||||
def repair_chunks(data, datasize, arithmetic=Galois):
|
|
||||||
first_nonzero = 0
|
|
||||||
while not data[first_nonzero]:
|
|
||||||
first_nonzero += 1
|
|
||||||
for i in range(len(data)):
|
|
||||||
if data[i] is None:
|
|
||||||
data[i] = [None] * len(data[first_nonzero])
|
|
||||||
o = []
|
|
||||||
for i in range(len(data[0])):
|
|
||||||
o.append(repair(map(lambda x: x[i], data), datasize, arithmetic))
|
|
||||||
return map(list, zip(*o))
|
|
||||||
|
|
||||||
|
|
||||||
# Extends either a bytearray or a list of bytearrays or a list of lists...
|
|
||||||
# Used in the cubify method to expand a cube in all dimensions
|
|
||||||
|
|
||||||
|
|
||||||
def deep_extend_chunks(data, n, arithmetic=Galois):
|
|
||||||
if not isinstance(data[0], list):
|
|
||||||
return extend(data, n, arithmetic)
|
|
||||||
else:
|
|
||||||
o = []
|
|
||||||
for i in range(len(data[0])):
|
|
||||||
o.append(
|
|
||||||
deep_extend_chunks(map(lambda x: x[i], data), n, arithmetic))
|
|
||||||
return map(list, zip(*o))
|
|
||||||
|
|
||||||
|
|
||||||
# ISO/IEC 7816-4 padding
|
|
||||||
|
|
||||||
|
|
||||||
def pad(data, size):
|
|
||||||
data = data[:]
|
|
||||||
data.append(128)
|
|
||||||
while len(data) % size != 0:
|
|
||||||
data.append(0)
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
# Removes ISO/IEC 7816-4 padding
|
|
||||||
|
|
||||||
|
|
||||||
def unpad(data):
|
|
||||||
data = data[:]
|
|
||||||
while data[-1] != 128:
|
|
||||||
data.pop()
|
|
||||||
data.pop()
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
# Splits a bytearray into a given number of chunks with some
|
|
||||||
# redundant chunks
|
|
||||||
|
|
||||||
|
|
||||||
def split(data, numchunks, redund):
|
|
||||||
chunksize = len(data) / numchunks + 1
|
|
||||||
data = pad(data, chunksize)
|
|
||||||
chunks = []
|
|
||||||
for i in range(0, len(data), chunksize):
|
|
||||||
chunks.append(data[i: i+chunksize])
|
|
||||||
o = extend_chunks(chunks, redund)
|
|
||||||
return o
|
|
||||||
|
|
||||||
|
|
||||||
# Recombines chunks into the original bytearray
|
|
||||||
|
|
||||||
|
|
||||||
def recombine(chunks, datalength):
|
|
||||||
datasize = datalength / len(chunks[0]) + 1
|
|
||||||
c = repair_chunks(chunks, datasize)
|
|
||||||
return unpad(sum(c[:datasize], []))
|
|
||||||
|
|
||||||
|
|
||||||
h = '0123456789abcdef'
|
|
||||||
hexfy = lambda x: h[x//16]+h[x % 16]
|
|
||||||
unhexfy = lambda x: h.find(x[0]) * 16 + h.find(x[1])
|
|
||||||
split2 = lambda x: map(lambda a: ''.join(a), zip(x[::2], x[1::2]))
|
|
||||||
|
|
||||||
|
|
||||||
# Canonical serialization. First argument is a bytearray, remaining
|
|
||||||
# arguments are strings to prepend
|
|
||||||
|
|
||||||
|
|
||||||
def serialize_chunk(*args):
|
|
||||||
chunk = args[0]
|
|
||||||
if not chunk or chunk[0] is None:
|
|
||||||
return None
|
|
||||||
metadata = args[1:]
|
|
||||||
return '-'.join(map(str, metadata) + [''.join(map(hexfy, chunk))])
|
|
||||||
|
|
||||||
|
|
||||||
def deserialize_chunk(chunk):
|
|
||||||
data = chunk.split('-')
|
|
||||||
metadata, main = data[:-1], data[-1]
|
|
||||||
return metadata, map(unhexfy, split2(main))
|
|
||||||
|
|
||||||
|
|
||||||
# Splits a string into a given number of chunks with some redundant chunks
|
|
||||||
|
|
||||||
|
|
||||||
def split_file(f, numchunks=5, redund=5):
|
|
||||||
f = map(ord, f)
|
|
||||||
ec = split(f, numchunks, redund)
|
|
||||||
o = []
|
|
||||||
for i, c in enumerate(ec):
|
|
||||||
o.append(
|
|
||||||
serialize_chunk(c, *[i, numchunks, numchunks + redund, len(f)]))
|
|
||||||
return o
|
|
||||||
|
|
||||||
|
|
||||||
def recombine_file(chunks):
|
|
||||||
chunks2 = map(deserialize_chunk, chunks)
|
|
||||||
metadata = map(int, chunks2[0][0])
|
|
||||||
o = [None] * metadata[2]
|
|
||||||
for chunk in chunks2:
|
|
||||||
o[int(chunk[0][0])] = chunk[1]
|
|
||||||
return ''.join(map(chr, recombine(o, metadata[3])))
|
|
||||||
|
|
||||||
outersplitn = lambda x, k: map(lambda i: x[i:i+k], range(len(x)))
|
|
||||||
|
|
||||||
|
|
||||||
# Splits a bytearray into a hypercube with `dim` dimensions with the original
|
|
||||||
# data being in a sub-cube of width `width` and the expanded cube being of
|
|
||||||
# width `width+redund`. The cube is self-healing; if any edge in any dimension
|
|
||||||
# has missing or erroneous pieces, we can use the Berlekamp-Welch algorithm
|
|
||||||
# to fix this
|
|
||||||
|
|
||||||
|
|
||||||
def cubify(f, width, dim, redund):
|
|
||||||
chunksize = len(f) / width**dim + 1
|
|
||||||
data = pad(f, width**dim)
|
|
||||||
chunks = []
|
|
||||||
for i in range(0, len(data), chunksize * width):
|
|
||||||
for j in range(width):
|
|
||||||
chunks.append(data[i+j*chunksize: i+j*chunksize+chunksize])
|
|
||||||
|
|
||||||
for i in range(dim):
|
|
||||||
o = []
|
|
||||||
for j in range(0, len(chunks), width):
|
|
||||||
e = chunks[j: j + width]
|
|
||||||
o.append(
|
|
||||||
deep_extend_chunks(e, redund))
|
|
||||||
chunks = o
|
|
||||||
|
|
||||||
return chunks[0]
|
|
||||||
|
|
||||||
|
|
||||||
# `pos` is an array of coordinates. Go deep into a nested list
|
|
||||||
|
|
||||||
|
|
||||||
def descend(obj, pos):
|
|
||||||
for p in pos:
|
|
||||||
obj = obj[p]
|
|
||||||
return obj
|
|
||||||
|
|
||||||
|
|
||||||
# Go deep into a nested list and modify the value
|
|
||||||
|
|
||||||
|
|
||||||
def descend_and_set(obj, pos, val):
|
|
||||||
immed = descend(obj, pos[:-1])
|
|
||||||
immed[pos[-1]] = val
|
|
||||||
|
|
||||||
|
|
||||||
# Use the Berlekamp-Welch algorithm to try to "heal" a particular missing
|
|
||||||
# or damaged coordinate
|
|
||||||
|
|
||||||
|
|
||||||
def heal_cube(cube, width, dim, pos, datalen):
|
|
||||||
for d in range(len(pos)):
|
|
||||||
o = []
|
|
||||||
for i in range(len(cube)):
|
|
||||||
o.append(descend(cube, pos[:d] + [i] + pos[d+1:]))
|
|
||||||
try:
|
|
||||||
o = repair_chunks(o, width)
|
|
||||||
for i in range(len(cube)):
|
|
||||||
path = pos[:d] + [i] + pos[d+1:]
|
|
||||||
descend_and_set(cube, path, o[i])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def pack_metadata(meta):
|
|
||||||
return map(str, meta['coords']) + [
|
|
||||||
str(meta['base_width']),
|
|
||||||
str(meta['extended_width']),
|
|
||||||
str(meta['filesize'])
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def unpack_metadata(meta):
|
|
||||||
return {
|
|
||||||
'coords': map(int, meta[:-3]),
|
|
||||||
'base_width': int(meta[-3]),
|
|
||||||
'extended_width': int(meta[-2]),
|
|
||||||
'filesize': int(meta[-1])
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Helper to serialize the contents of a cube of byte arrays
|
|
||||||
|
|
||||||
|
|
||||||
def _ser(chunk, meta):
|
|
||||||
if chunk is None or (not isinstance(chunk[0], list) and
|
|
||||||
chunk[0] is not None):
|
|
||||||
u = serialize_chunk(chunk, *pack_metadata(meta))
|
|
||||||
return u
|
|
||||||
else:
|
|
||||||
o = []
|
|
||||||
for i, c in enumerate(chunk):
|
|
||||||
meta2 = copy.deepcopy(meta)
|
|
||||||
meta2['coords'] += [i]
|
|
||||||
o.append(_ser(c, meta2))
|
|
||||||
return o
|
|
||||||
|
|
||||||
|
|
||||||
# Converts a deep list into a shallow list
|
|
||||||
|
|
||||||
|
|
||||||
def flatten(chunks):
|
|
||||||
if not isinstance(chunks, list):
|
|
||||||
return [chunks]
|
|
||||||
else:
|
|
||||||
o = []
|
|
||||||
for c in chunks:
|
|
||||||
o.extend(flatten(c))
|
|
||||||
return o
|
|
||||||
|
|
||||||
|
|
||||||
# Converts a file into a multidimensional set of chunks with
|
|
||||||
# the desired parameters
|
|
||||||
|
|
||||||
|
|
||||||
def serialize_cubify(f, width, dim, redund):
|
|
||||||
f = map(ord, f)
|
|
||||||
cube = cubify(f, width, dim, redund)
|
|
||||||
metadata = {
|
|
||||||
'base_width': width,
|
|
||||||
'extended_width': width + redund,
|
|
||||||
'coords': [],
|
|
||||||
'filesize': len(f)
|
|
||||||
}
|
|
||||||
cube_of_serialized_chunks = _ser(cube, metadata)
|
|
||||||
return flatten(cube_of_serialized_chunks)
|
|
||||||
|
|
||||||
|
|
||||||
# Converts a set of serialized chunks into a partially filled cube
|
|
||||||
|
|
||||||
|
|
||||||
def construct_cube(pieces):
|
|
||||||
pieces = map(deserialize_chunk, pieces)
|
|
||||||
metadata = unpack_metadata(pieces[0][0])
|
|
||||||
dim = len(metadata['coords'])
|
|
||||||
cube = None
|
|
||||||
for i in range(dim):
|
|
||||||
cube = [copy.deepcopy(cube) for i in range(metadata['extended_width'])]
|
|
||||||
for p in pieces:
|
|
||||||
descend_and_set(cube, unpack_metadata(p[0])['coords'], p[1])
|
|
||||||
return cube
|
|
||||||
|
|
||||||
|
|
||||||
# Tries to recreate the chunk at a particular coordinate given a set of
|
|
||||||
# other chunks
|
|
||||||
|
|
||||||
|
|
||||||
def heal_set(pieces, coords):
|
|
||||||
c = construct_cube(pieces)
|
|
||||||
metadata, piecezzz = deserialize_chunk(pieces[0])
|
|
||||||
metadata = unpack_metadata(metadata)
|
|
||||||
heal_cube(c,
|
|
||||||
metadata['base_width'],
|
|
||||||
len(metadata['coords']),
|
|
||||||
coords,
|
|
||||||
metadata['filesize'])
|
|
||||||
metadata2 = copy.deepcopy(metadata)
|
|
||||||
metadata2["coords"] = []
|
|
||||||
return filter(lambda x: x, flatten(_ser(c, metadata2)))
|
|
||||||
|
|
||||||
|
|
||||||
def number_to_coords(n, w, dim):
|
|
||||||
c = [0] * dim
|
|
||||||
for i in range(dim):
|
|
||||||
c[i] = n / w**(dim - i - 1)
|
|
||||||
n %= w**(dim - i - 1)
|
|
||||||
return c
|
|
||||||
|
|
||||||
|
|
||||||
def full_heal_set(pieces):
|
|
||||||
c = construct_cube(pieces)
|
|
||||||
metadata, piecezzz = deserialize_chunk(pieces[0])
|
|
||||||
metadata = unpack_metadata(metadata)
|
|
||||||
while 1:
|
|
||||||
done = True
|
|
||||||
unfilled = False
|
|
||||||
i = 0
|
|
||||||
while i < metadata['extended_width'] ** len(metadata['coords']):
|
|
||||||
coords = number_to_coords(i,
|
|
||||||
metadata['extended_width'],
|
|
||||||
len(metadata['coords']))
|
|
||||||
v = descend(c, coords)
|
|
||||||
heal_cube(c,
|
|
||||||
metadata['base_width'],
|
|
||||||
len(metadata['coords']),
|
|
||||||
coords,
|
|
||||||
metadata['filesize'])
|
|
||||||
v2 = descend(c, coords)
|
|
||||||
if v != v2:
|
|
||||||
done = False
|
|
||||||
if v is None and v2 is None:
|
|
||||||
unfilled = True
|
|
||||||
i += 1
|
|
||||||
if done and not unfilled:
|
|
||||||
break
|
|
||||||
elif done and unfilled:
|
|
||||||
raise Exception("not enough data or too much corrupted data")
|
|
||||||
o = []
|
|
||||||
for i in range(metadata['base_width'] ** len(metadata['coords'])):
|
|
||||||
coords = number_to_coords(i,
|
|
||||||
metadata['base_width'],
|
|
||||||
len(metadata['coords']))
|
|
||||||
o.extend(descend(c, coords))
|
|
||||||
return ''.join(map(chr, unpad(o)))
|
|
Loading…
Reference in New Issue