diff --git a/erasure_code/share.cpp b/erasure_code/ec256/share.cpp similarity index 100% rename from erasure_code/share.cpp rename to erasure_code/ec256/share.cpp diff --git a/erasure_code/share.go b/erasure_code/ec256/share.go similarity index 100% rename from erasure_code/share.go rename to erasure_code/ec256/share.go diff --git a/erasure_code/share.h b/erasure_code/ec256/share.h similarity index 100% rename from erasure_code/share.h rename to erasure_code/ec256/share.h diff --git a/erasure_code/share.js b/erasure_code/ec256/share.js similarity index 100% rename from erasure_code/share.js rename to erasure_code/ec256/share.js diff --git a/erasure_code/test.py b/erasure_code/ec256/test.py similarity index 100% rename from erasure_code/test.py rename to erasure_code/ec256/test.py diff --git a/erasure_code/utils.h b/erasure_code/ec256/utils.h similarity index 100% rename from erasure_code/utils.h rename to erasure_code/ec256/utils.h diff --git a/erasure_code/ec65536/ec65536.py b/erasure_code/ec65536/ec65536.py new file mode 100644 index 0000000..b93a307 --- /dev/null +++ b/erasure_code/ec65536/ec65536.py @@ -0,0 +1,152 @@ +import copy +import poly_utils +import rlp + +try: + from Crypto.Hash import keccak + sha3 = lambda x: keccak.new(digest_bits=256, data=x).digest() +except ImportError: + import sha3 as _sha3 + sha3 = lambda x: _sha3.sha3_256(x).digest() + +# Every point is an element of GF(2**16), so represents two bytes +POINT_SIZE = 2 +# Every chunk contains 128 points +POINTS_IN_CHUNK = 128 +# A chunk is 256 bytes +CHUNK_SIZE = POINT_SIZE * POINTS_IN_CHUNK + +def bytes_to_num(bytez): + o = 0 + for b in bytez: + o = (o * 256) + b + return o + +def num_to_bytes(inp, n): + o = b'' + for i in range(n): + o = bytes([inp % 256]) + o + inp //= 256 + return o + +assert bytes_to_num(num_to_bytes(31337, 2)) == 31337 + +# Returns the smallest power of 2 equal to or greater than a number +def higher_power_of_2(x): + higher_power_of_2 = 1 + while higher_power_of_2 < x: + higher_power_of_2 *= 2 + return higher_power_of_2 + +# Unfortunately, most padding schemes standardized in cryptography seem to only work for +# block sizes strictly less than 256 bytes. So we'll use RLP plus zero byte padding +# instead (pre-RLP-encode because the RLP encoding adds length data, so the padding +# becomes reversible even in cases where the original data ends in zero bytes) +def pad(data): + med = rlp.encode(data) + return med + b'\x00' * (higher_power_of_2(len(med)) - len(med)) + +def unpad(data): + c, l1, l2 = rlp.codec.consume_length_prefix(data) + assert c == str + return data[:l1 + l2] + +# Deserialize a chunk into a list of points in GF2**16 +def chunk_to_points(chunk): + return [bytes_to_num(chunk[i: i + POINT_SIZE]) for i in range(0, CHUNK_SIZE, POINT_SIZE)] + +# Serialize a list of points into a chunk +def points_to_chunk(points): + return b''.join([num_to_bytes(p, POINT_SIZE) for p in points]) + +testdata = sha3(b'cow') * (CHUNK_SIZE // 32) +assert points_to_chunk(chunk_to_points(testdata)) == testdata + +# Make a Merkle tree out of a set of chunks +def merklize(chunks): + # Only accept a list of size which is exactly a power of two + assert higher_power_of_2(len(chunks)) == len(chunks) + merkle_nodes = [sha3(x) for x in chunks] + lower_tier = merkle_nodes[::] + higher_tier = [] + while len(higher_tier) != 1: + higher_tier = [sha3(lower_tier[i] + lower_tier[i + 1]) for i in range(0, len(lower_tier), 2)] + merkle_nodes = higher_tier + merkle_nodes + lower_tier = higher_tier + merkle_nodes.insert(0, b'\x00' * 32) + return merkle_nodes + + +class Prover(): + def __init__(self, data): + # Pad data + pdata = pad(data) + byte_chunks = [pdata[i: i + CHUNK_SIZE] for i in range(0, len(pdata), CHUNK_SIZE)] + # Decompose it into chunks, where each chunk is a collection of numbers + chunks = [] + for byte_chunk in byte_chunks: + chunks.append(chunk_to_points(byte_chunk)) + # Compute the polynomials representing the ith number in each chunk + polys = [poly_utils.lagrange_interp([chunk[i] for chunk in chunks], list(range(len(chunks)))) for i in range(POINTS_IN_CHUNK)] + # Use the polynomials to extend the chunks + new_chunks = [] + for x in range(len(chunks), len(chunks) * 2): + new_chunks.append(points_to_chunk([poly_utils.eval_poly_at(poly, x) for poly in polys])) + # Total length of data including new points + self.length = len(byte_chunks + new_chunks) + self.extended_data = byte_chunks + new_chunks + # Build up the Merkle tree + self.merkle_nodes = merklize(self.extended_data) + assert len(self.merkle_nodes) == 2 * self.length + self.merkle_root = self.merkle_nodes[1] + + # Make a Merkle proof for some index + def prove(self, index): + assert 0 <= index < self.length + adjusted_index = self.length + index + o = [self.extended_data[index]] + while adjusted_index > 1: + o.append(self.merkle_nodes[adjusted_index ^ 1]) + adjusted_index >>= 1 + return o + +# Verify a merkle proof of some index (light client friendly) +def verify_proof(merkle_root, proof, index): + h = sha3(proof[0]) + for p in proof[1:]: + if index % 2: + h = sha3(p + h) + else: + h = sha3(h + p) + index //= 2 + return h == merkle_root + +# Fill data from partially available proofs +# This method returning False can also be used as a verifier for fraud proofs +def fill(merkle_root, orig_data_length, proofs, indices): + if len(proofs) < orig_data_length: + raise Exception("Not enough proofs") + if len(proofs) > orig_data_length: + raise Exception("Too many proofs; if original data has n chunks, n chunks suffice") + for proof, index in zip(proofs, indices): + if not verify_proof(merkle_root, proof, index): + raise Exception("Merkle proof for index %d invalid" % index) + # Convert to points + coords = [chunk_to_points(p[0]) for p in proofs] + # Extract polynomials + polys = [poly_utils.lagrange_interp([c[i] for c in coords], indices) for i in range(POINTS_IN_CHUNK)] + # Fill in the remaining values + full_coords = [None] * orig_data_length * 2 + for points, index in zip(coords, indices): + full_coords[index] = points + for i in range(len(full_coords)): + if full_coords[i] is None: + full_coords[i] = [poly_utils.eval_poly_at(poly, i) for poly in polys] + # Serialize + full_chunks = [points_to_chunk(points) for points in full_coords] + # Merklize + merkle_nodes = merklize(full_chunks) + # Check equality of the Merkle root + if merkle_root != merkle_nodes[1]: + return False + return full_chunks diff --git a/erasure_code/ec65536/poly_utils.py b/erasure_code/ec65536/poly_utils.py new file mode 100644 index 0000000..8469ad9 --- /dev/null +++ b/erasure_code/ec65536/poly_utils.py @@ -0,0 +1,105 @@ +modulus_poly = [1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 1, 0, 0, 1, + 1] +modulus_poly_as_int = sum([(v << i) for i, v in enumerate(modulus_poly)]) +degree = len(modulus_poly) - 1 + +two_to_the_degree = 2**degree +two_to_the_degree_m1 = 2**degree - 1 + +def galoistpl(a): + # 2 is not a primitive root, so we have to use 3 as our logarithm base + if a * 2 < two_to_the_degree: + return (a * 2) ^ a + else: + return (a * 2) ^ a ^ modulus_poly_as_int + +# Precomputing a log table for increased speed of addition and multiplication +glogtable = [0] * (two_to_the_degree) +gexptable = [] +v = 1 +for i in range(two_to_the_degree_m1): + glogtable[v] = i + gexptable.append(v) + v = galoistpl(v) + +gexptable += gexptable + gexptable + +# Add two values in the Galois field +def galois_add(x, y): + return x ^ y + +# In binary fields, addition and subtraction are the same thing +galois_sub = galois_add + +# Multiply two values in the Galois field +def galois_mul(x, y): + return 0 if x*y == 0 else gexptable[glogtable[x] + glogtable[y]] + +# Divide two values in the Galois field +def galois_div(x, y): + return 0 if x == 0 else gexptable[(glogtable[x] - glogtable[y]) % two_to_the_degree_m1] + +# Evaluate a polynomial at a point +def eval_poly_at(p, x): + if x == 0: + return p[0] + y = 0 + logx = glogtable[x] + for i, p_coeff in enumerate(p): + if p_coeff: + # Add x**i * coeff + y ^= gexptable[(logx * i + glogtable[p_coeff]) % two_to_the_degree_m1] + return y + + +# Given p+1 y values and x values with no errors, recovers the original +# p+1 degree polynomial. +# Lagrange interpolation works roughly in the following way. +# 1. Suppose you have a set of points, eg. x = [1, 2, 3], y = [2, 5, 10] +# 2. For each x, generate a polynomial which equals its corresponding +# y coordinate at that point and 0 at all other points provided. +# 3. Add these polynomials together. + +def lagrange_interp(pieces, xs): + # Generate master numerator polynomial, eg. (x - x1) * (x - x2) * ... * (x - xn) + root = [1] + for x in xs: + logx = glogtable[x] + root.insert(0, 0) + for j in range(len(root)-1): + if root[j+1] and x: + root[j] ^= gexptable[glogtable[root[j+1]] + logx] + assert len(root) == len(pieces) + 1 + # Generate per-value numerator polynomials, eg. for x=x2, + # (x - x1) * (x - x3) * ... * (x - xn), by dividing the master + # polynomial back by each x coordinate + nums = [] + for x in xs: + output = [0] * (len(root) - 2) + [1] + logx = glogtable[x] + for j in range(len(root) - 2, 0, -1): + if output[j] and x: + output[j-1] = root[j] ^ gexptable[glogtable[output[j]] + logx] + else: + output[j-1] = root[j] + assert len(output) == len(pieces) + nums.append(output) + # Generate denominators by evaluating numerator polys at each x + denoms = [eval_poly_at(nums[i], xs[i]) for i in range(len(xs))] + # Generate output polynomial, which is the sum of the per-value numerator + # polynomials rescaled to have the right y values + b = [0 for p in pieces] + for i in range(len(xs)): + log_yslice = glogtable[pieces[i]] - glogtable[denoms[i]] + two_to_the_degree_m1 + for j in range(len(pieces)): + if nums[i][j] and pieces[i]: + b[j] ^= gexptable[glogtable[nums[i][j]] + log_yslice] + return b + + +a = 124 +b = 8932 +c = 12415 + +assert galois_mul(galois_add(a, b), c) == galois_add(galois_mul(a, c), galois_mul(b, c)) diff --git a/erasure_code/ec65536/test_ec65536.py b/erasure_code/ec65536/test_ec65536.py new file mode 100644 index 0000000..0c74729 --- /dev/null +++ b/erasure_code/ec65536/test_ec65536.py @@ -0,0 +1,19 @@ +import ec65536 +import rlp + +# 12.8 kilobyte test string +testdata = 'the cow jumped over the moon!!! ' * 400 + +prover = ec65536.Prover(testdata) +print("Created prover") + +assert ec65536.verify_proof(prover.merkle_root, prover.prove(13), 13) + +proofs = [prover.prove(i) for i in range(0, prover.length, 2)] +print("Created merkle proofs") + +print("Starting to attempt fill") +response = ec65536.fill(prover.merkle_root, prover.length // 2, proofs, list(range(0, prover.length, 2))) +assert response is not False +assert b''.join(response)[:len(rlp.encode(testdata))] == rlp.encode(testdata) +print("Fill successful") diff --git a/erasure_code/share.py b/erasure_code/share.py deleted file mode 100644 index 37e154c..0000000 --- a/erasure_code/share.py +++ /dev/null @@ -1,695 +0,0 @@ -import copy - - -# Galois field class and logtable -# -# See: https://en.wikipedia.org/wiki/Finite_field -# -# Note that you can substitute "Galois" with "float" in the code, and -# the code will then magically start using the plain old field of rationals -# instead of this spooky modulo polynomial thing. If you are not an expert in -# finite field theory and want to dig deep into how this code works, I -# recommend adding the line "Galois = float" immediately after this class (and -# not using the methods that require serialization) -# -# As a quick intro to finite field theory, the idea is that there exist these -# things called fields, which are basically sets of objects together with -# rules for addition, subtraction, multiplication, division, such that algebra -# within this field is consistent, even if the results look nonsensical from -# a "normal numbers" perspective. For instance, consider the field of integers -# modulo 7. Here, for example, 2 * 5 = 3, 3 * 4 = 5, 6 * 6 = 1, 6 + 6 = 5. -# However, all algebra still works; for example, (a^2 - b^2) = (a + b)(a - b) -# works for all a,b. For this reason, we can do secret sharing arithmetic -# "over" any field. The reason why Galois fields are preferable is that all -# elements in the Galois field are values in [0 ... 255] (at least using the -# canonical serialization that we use here); no amount of addition, -# multiplication, subtraction or division will ever get you anything else. -# This guarantees that our secret shares will always be serializable as byte -# arrays. The way the Galois field we use here works is that the elements are -# polynomials of elements in the field of integers mod 2, so addition and -# subtraction are xor, and multiplication is modulo x^8 + x^4 + x^3 + x + 1, -# and division is defined by a/b = c iff bc = a and b != 0. In practice, we -# do multiplication and division via a precomputed log table using x+1 as a -# base - -# per-byte 2^8 Galois field -# Note that this imposes a hard limit that the number of extended chunks can -# be at most 256 along each dimension - - -def galoistpl(a): - # 2 is not a primitive root, so we have to use 3 as our logarithm base - unrolla = [a/(2**k) % 2 for k in range(8)] - res = [0] + unrolla - for i in range(8): - res[i] = (res[i] + unrolla[i]) % 2 - if res[-1] == 0: - res.pop() - else: - # AES Polynomial - for i in range(9): - res[i] = (res[i] - [1, 1, 0, 1, 1, 0, 0, 0, 1][i]) % 2 - res.pop() - return sum([res[k] * 2**k for k in range(8)]) - -# Precomputing a multiplication and XOR table for increased speed -glogtable = [0] * 256 -gexptable = [] -v = 1 -for i in range(255): - glogtable[v] = i - gexptable.append(v) - v = galoistpl(v) - - -class Galois: - val = 0 - - def __init__(self, val): - self.val = val.val if isinstance(self.val, Galois) else val - - def __add__(self, other): - return Galois(self.val ^ other.val) - - def __mul__(self, other): - if self.val == 0 or other.val == 0: - return Galois(0) - return Galois(gexptable[(glogtable[self.val] + - glogtable[other.val]) % 255]) - - def __sub__(self, other): - return Galois(self.val ^ other.val) - - def __div__(self, other): - if other.val == 0: - raise ZeroDivisionError - if self.val == 0: - return Galois(0) - return Galois(gexptable[(glogtable[self.val] - - glogtable[other.val]) % 255]) - - def __int__(self): - return self.val - - def __repr__(self): - return repr(self.val) - - -# Modular division class - -def mkModuloClass(n): - - if pow(2, n, n) != 2: - raise Exception("n must be prime!") - - class Mod: - val = 0 - - def __init__(self, val): - self.val = val.val if isinstance( - self.val, self.__class__) else val - - def __add__(self, other): - return self.__class__((self.val + other.val) % n) - - def __mul__(self, other): - return self.__class__((self.val * other.val) % n) - - def __sub__(self, other): - return self.__class__((self.val - other.val) % n) - - def __div__(self, other): - return self.__class__((self.val * other.val ** (n-2)) % n) - - def __int__(self): - return self.val - - def __repr__(self): - return repr(self.val) - return Mod - -# Evaluates a polynomial in little-endian form, eg. x^2 + 3x + 2 = [2, 3, 1] -# (normally I hate little-endian, but in this case dealing with polynomials -# it's justified, since you get the nice property that p[n] is the nth degree -# term of p) at coordinate x, eg. eval_poly_at([2, 3, 1], 5) = 42 if you are -# using float as your arithmetic - - -def eval_poly_at(p, x): - arithmetic = p[0].__class__ - y = arithmetic(0) - x_to_the_i = arithmetic(1) - for i in range(len(p)): - y += x_to_the_i * p[i] - x_to_the_i *= x - return y - - -# Given p+1 y values and x values with no errors, recovers the original -# p+1 degree polynomial. For example, -# lagrange_interp([51.0, 59.0, 66.0], [1, 3, 4]) = [50.0, 0, 1.0] -# if you are using float as your arithmetic - - -def lagrange_interp(pieces, xs): - arithmetic = pieces[0].__class__ - zero, one = arithmetic(0), arithmetic(1) - # Generate master numerator polynomial - root = [one] - for i in range(len(xs)): - root.insert(0, zero) - for j in range(len(root)-1): - root[j] = root[j] - root[j+1] * xs[i] - # Generate per-value numerator polynomials by dividing the master - # polynomial back by each x coordinate - nums = [] - for i in range(len(xs)): - output = [] - last = one - for j in range(2, len(root)+1): - output.insert(0, last) - if j != len(root): - last = root[-j] + last * xs[i] - nums.append(output) - # Generate denominators by evaluating numerator polys at their x - denoms = [] - for i in range(len(xs)): - denom = zero - x_to_the_j = one - for j in range(len(nums[i])): - denom += x_to_the_j * nums[i][j] - x_to_the_j *= xs[i] - denoms.append(denom) - # Generate output polynomial - b = [zero for i in range(len(pieces))] - for i in range(len(xs)): - yslice = pieces[int(i)] / denoms[int(i)] - for j in range(len(pieces)): - b[j] += nums[i][j] * yslice - return b - - -# Compresses two linear equations of length n into one -# equation of length n-1 -# Format: -# 3x + 4y = 80 (ie. 3x + 4y - 80 = 0) -> a = [3,4,-80] -# 5x + 2y = 70 (ie. 5x + 2y - 70 = 0) -> b = [5,2,-70] - - -def elim(a, b): - aprime = [x*b[0] for x in a] - bprime = [x*a[0] for x in b] - c = [aprime[i] - bprime[i] for i in range(1, len(a))] - return c - - -# Linear equation solver -# Format: -# 3x + 4y = 80, y = 5 (ie. 3x + 4y - 80z = 0, y = 5, z = 1) -# -> coeffs = [3,4,-80], vals = [5,1] - - -def evaluate(coeffs, vals): - arithmetic = coeffs[0].__class__ - tot = arithmetic(0) - for i in range(len(vals)): - tot -= coeffs[i+1] * vals[i] - if int(coeffs[0]) == 0: - raise ZeroDivisionError - return tot / coeffs[0] - - -# Linear equation system solver -# Format: -# ax + by + c = 0, dx + ey + f = 0 -# -> [[a, b, c], [d, e, f]] -# eg. -# [[3.0, 5.0, -13.0], [9.0, 1.0, -11.0]] -> [1.0, 2.0] - - -def sys_solve(eqs): - arithmetic = eqs[0][0].__class__ - one = arithmetic(1) - back_eqs = [eqs[0]] - while len(eqs) > 1: - neweqs = [] - for i in range(len(eqs)-1): - neweqs.append(elim(eqs[i], eqs[i+1])) - eqs = neweqs - i = 0 - while i < len(eqs) - 1 and int(eqs[i][0]) == 0: - i += 1 - back_eqs.insert(0, eqs[i]) - kvals = [one] - for i in range(len(back_eqs)): - kvals.insert(0, evaluate(back_eqs[i], kvals)) - return kvals[:-1] - - -def polydiv(Q, E): - qpoly = copy.deepcopy(Q) - epoly = copy.deepcopy(E) - div = [] - while len(qpoly) >= len(epoly): - div.insert(0, qpoly[-1] / epoly[-1]) - for i in range(2, len(epoly)+1): - qpoly[-i] -= div[0] * epoly[-i] - qpoly.pop() - return div - - -# Given a set of y coordinates and x coordinates, and the degree of the -# original polynomial, determines the original polynomial even if some of -# the y coordinates are wrong. If m is the minimal number of pieces (ie. -# degree + 1), t is the total number of pieces provided, then the algo can -# handle up to (t-m)/2 errors. See: -# http://en.wikipedia.org/wiki/Berlekamp%E2%80%93Welch_algorithm#Example -# (just skip to my example, the rest of the article sucks imo) - - -def berlekamp_welch_attempt(pieces, xs, master_degree): - error_locator_degree = (len(pieces) - master_degree - 1) / 2 - arithmetic = pieces[0].__class__ - zero, one = arithmetic(0), arithmetic(1) - # Set up the equations for y[i]E(x[i]) = Q(x[i]) - # degree(E) = error_locator_degree - # degree(Q) = master_degree + error_locator_degree - 1 - eqs = [] - for i in range(2 * error_locator_degree + master_degree + 1): - eqs.append([]) - for i in range(2 * error_locator_degree + master_degree + 1): - neg_x_to_the_j = zero - one - for j in range(error_locator_degree + master_degree + 1): - eqs[i].append(neg_x_to_the_j) - neg_x_to_the_j *= xs[i] - x_to_the_j = one - for j in range(error_locator_degree + 1): - eqs[i].append(x_to_the_j * pieces[i]) - x_to_the_j *= xs[i] - # Solve 'em - # Assume the top error polynomial term to be one - errors = error_locator_degree - ones = 1 - while errors >= 0: - try: - polys = sys_solve(eqs) + [one] * ones - qpoly = polys[:errors + master_degree + 1] - epoly = polys[errors + master_degree + 1:] - break - except ZeroDivisionError: - for eq in eqs: - eq[-2] += eq[-1] - eq.pop() - eqs.pop() - errors -= 1 - ones += 1 - if errors < 0: - raise Exception("Not enough data!") - # Divide the polynomials - qpoly = polys[:error_locator_degree + master_degree + 1] - epoly = polys[error_locator_degree + master_degree + 1:] - div = [] - while len(qpoly) >= len(epoly): - div.insert(0, qpoly[-1] / epoly[-1]) - for i in range(2, len(epoly)+1): - qpoly[-i] -= div[0] * epoly[-i] - qpoly.pop() - # Check - corrects = 0 - for i, x in enumerate(xs): - if int(eval_poly_at(div, x)) == int(pieces[i]): - corrects += 1 - if corrects < master_degree + errors: - raise Exception("Answer doesn't match (too many errors)!") - return div - - -# Extends a list of integers in [0 ... 255] (if using Galois arithmetic) by -# adding n redundant error-correction values - - -def extend(data, n, arithmetic=Galois): - data2 = map(arithmetic, data) - data3 = data[:] - poly = berlekamp_welch_attempt(data2, - map(arithmetic, range(len(data))), - len(data) - 1) - for i in range(n): - data3.append(int(eval_poly_at(poly, arithmetic(len(data) + i)))) - return data3 - - -# Repairs a list of integers in [0 ... 255]. Some integers can be erroneous, -# and you can put None in place of an integer if you know that a certain -# value is defective or missing. Uses the Berlekamp-Welch algorithm to -# do error-correction - - -def repair(data, datasize, arithmetic=Galois): - vs, xs = [], [] - for i, v in enumerate(data): - if v is not None: - vs.append(arithmetic(v)) - xs.append(arithmetic(i)) - poly = berlekamp_welch_attempt(vs, xs, datasize - 1) - return [int(eval_poly_at(poly, arithmetic(i))) for i in range(len(data))] - - -# Extends a list of bytearrays -# eg. extend_chunks([map(ord, 'hello'), map(ord, 'world')], 2) -# n is the number of redundant error-correction chunks to add - - -def extend_chunks(data, n, arithmetic=Galois): - o = [] - for i in range(len(data[0])): - o.append(extend(map(lambda x: x[i], data), n, arithmetic)) - return map(list, zip(*o)) - - -# Repairs a list of bytearrays. Use None in place of a missing array. -# Individual arrays can contain some missing or erroneous data. - - -def repair_chunks(data, datasize, arithmetic=Galois): - first_nonzero = 0 - while not data[first_nonzero]: - first_nonzero += 1 - for i in range(len(data)): - if data[i] is None: - data[i] = [None] * len(data[first_nonzero]) - o = [] - for i in range(len(data[0])): - o.append(repair(map(lambda x: x[i], data), datasize, arithmetic)) - return map(list, zip(*o)) - - -# Extends either a bytearray or a list of bytearrays or a list of lists... -# Used in the cubify method to expand a cube in all dimensions - - -def deep_extend_chunks(data, n, arithmetic=Galois): - if not isinstance(data[0], list): - return extend(data, n, arithmetic) - else: - o = [] - for i in range(len(data[0])): - o.append( - deep_extend_chunks(map(lambda x: x[i], data), n, arithmetic)) - return map(list, zip(*o)) - - -# ISO/IEC 7816-4 padding - - -def pad(data, size): - data = data[:] - data.append(128) - while len(data) % size != 0: - data.append(0) - return data - - -# Removes ISO/IEC 7816-4 padding - - -def unpad(data): - data = data[:] - while data[-1] != 128: - data.pop() - data.pop() - return data - - -# Splits a bytearray into a given number of chunks with some -# redundant chunks - - -def split(data, numchunks, redund): - chunksize = len(data) / numchunks + 1 - data = pad(data, chunksize) - chunks = [] - for i in range(0, len(data), chunksize): - chunks.append(data[i: i+chunksize]) - o = extend_chunks(chunks, redund) - return o - - -# Recombines chunks into the original bytearray - - -def recombine(chunks, datalength): - datasize = datalength / len(chunks[0]) + 1 - c = repair_chunks(chunks, datasize) - return unpad(sum(c[:datasize], [])) - - -h = '0123456789abcdef' -hexfy = lambda x: h[x//16]+h[x % 16] -unhexfy = lambda x: h.find(x[0]) * 16 + h.find(x[1]) -split2 = lambda x: map(lambda a: ''.join(a), zip(x[::2], x[1::2])) - - -# Canonical serialization. First argument is a bytearray, remaining -# arguments are strings to prepend - - -def serialize_chunk(*args): - chunk = args[0] - if not chunk or chunk[0] is None: - return None - metadata = args[1:] - return '-'.join(map(str, metadata) + [''.join(map(hexfy, chunk))]) - - -def deserialize_chunk(chunk): - data = chunk.split('-') - metadata, main = data[:-1], data[-1] - return metadata, map(unhexfy, split2(main)) - - -# Splits a string into a given number of chunks with some redundant chunks - - -def split_file(f, numchunks=5, redund=5): - f = map(ord, f) - ec = split(f, numchunks, redund) - o = [] - for i, c in enumerate(ec): - o.append( - serialize_chunk(c, *[i, numchunks, numchunks + redund, len(f)])) - return o - - -def recombine_file(chunks): - chunks2 = map(deserialize_chunk, chunks) - metadata = map(int, chunks2[0][0]) - o = [None] * metadata[2] - for chunk in chunks2: - o[int(chunk[0][0])] = chunk[1] - return ''.join(map(chr, recombine(o, metadata[3]))) - -outersplitn = lambda x, k: map(lambda i: x[i:i+k], range(len(x))) - - -# Splits a bytearray into a hypercube with `dim` dimensions with the original -# data being in a sub-cube of width `width` and the expanded cube being of -# width `width+redund`. The cube is self-healing; if any edge in any dimension -# has missing or erroneous pieces, we can use the Berlekamp-Welch algorithm -# to fix this - - -def cubify(f, width, dim, redund): - chunksize = len(f) / width**dim + 1 - data = pad(f, width**dim) - chunks = [] - for i in range(0, len(data), chunksize * width): - for j in range(width): - chunks.append(data[i+j*chunksize: i+j*chunksize+chunksize]) - - for i in range(dim): - o = [] - for j in range(0, len(chunks), width): - e = chunks[j: j + width] - o.append( - deep_extend_chunks(e, redund)) - chunks = o - - return chunks[0] - - -# `pos` is an array of coordinates. Go deep into a nested list - - -def descend(obj, pos): - for p in pos: - obj = obj[p] - return obj - - -# Go deep into a nested list and modify the value - - -def descend_and_set(obj, pos, val): - immed = descend(obj, pos[:-1]) - immed[pos[-1]] = val - - -# Use the Berlekamp-Welch algorithm to try to "heal" a particular missing -# or damaged coordinate - - -def heal_cube(cube, width, dim, pos, datalen): - for d in range(len(pos)): - o = [] - for i in range(len(cube)): - o.append(descend(cube, pos[:d] + [i] + pos[d+1:])) - try: - o = repair_chunks(o, width) - for i in range(len(cube)): - path = pos[:d] + [i] + pos[d+1:] - descend_and_set(cube, path, o[i]) - except: - pass - - -def pack_metadata(meta): - return map(str, meta['coords']) + [ - str(meta['base_width']), - str(meta['extended_width']), - str(meta['filesize']) - ] - - -def unpack_metadata(meta): - return { - 'coords': map(int, meta[:-3]), - 'base_width': int(meta[-3]), - 'extended_width': int(meta[-2]), - 'filesize': int(meta[-1]) - } - - -# Helper to serialize the contents of a cube of byte arrays - - -def _ser(chunk, meta): - if chunk is None or (not isinstance(chunk[0], list) and - chunk[0] is not None): - u = serialize_chunk(chunk, *pack_metadata(meta)) - return u - else: - o = [] - for i, c in enumerate(chunk): - meta2 = copy.deepcopy(meta) - meta2['coords'] += [i] - o.append(_ser(c, meta2)) - return o - - -# Converts a deep list into a shallow list - - -def flatten(chunks): - if not isinstance(chunks, list): - return [chunks] - else: - o = [] - for c in chunks: - o.extend(flatten(c)) - return o - - -# Converts a file into a multidimensional set of chunks with -# the desired parameters - - -def serialize_cubify(f, width, dim, redund): - f = map(ord, f) - cube = cubify(f, width, dim, redund) - metadata = { - 'base_width': width, - 'extended_width': width + redund, - 'coords': [], - 'filesize': len(f) - } - cube_of_serialized_chunks = _ser(cube, metadata) - return flatten(cube_of_serialized_chunks) - - -# Converts a set of serialized chunks into a partially filled cube - - -def construct_cube(pieces): - pieces = map(deserialize_chunk, pieces) - metadata = unpack_metadata(pieces[0][0]) - dim = len(metadata['coords']) - cube = None - for i in range(dim): - cube = [copy.deepcopy(cube) for i in range(metadata['extended_width'])] - for p in pieces: - descend_and_set(cube, unpack_metadata(p[0])['coords'], p[1]) - return cube - - -# Tries to recreate the chunk at a particular coordinate given a set of -# other chunks - - -def heal_set(pieces, coords): - c = construct_cube(pieces) - metadata, piecezzz = deserialize_chunk(pieces[0]) - metadata = unpack_metadata(metadata) - heal_cube(c, - metadata['base_width'], - len(metadata['coords']), - coords, - metadata['filesize']) - metadata2 = copy.deepcopy(metadata) - metadata2["coords"] = [] - return filter(lambda x: x, flatten(_ser(c, metadata2))) - - -def number_to_coords(n, w, dim): - c = [0] * dim - for i in range(dim): - c[i] = n / w**(dim - i - 1) - n %= w**(dim - i - 1) - return c - - -def full_heal_set(pieces): - c = construct_cube(pieces) - metadata, piecezzz = deserialize_chunk(pieces[0]) - metadata = unpack_metadata(metadata) - while 1: - done = True - unfilled = False - i = 0 - while i < metadata['extended_width'] ** len(metadata['coords']): - coords = number_to_coords(i, - metadata['extended_width'], - len(metadata['coords'])) - v = descend(c, coords) - heal_cube(c, - metadata['base_width'], - len(metadata['coords']), - coords, - metadata['filesize']) - v2 = descend(c, coords) - if v != v2: - done = False - if v is None and v2 is None: - unfilled = True - i += 1 - if done and not unfilled: - break - elif done and unfilled: - raise Exception("not enough data or too much corrupted data") - o = [] - for i in range(metadata['base_width'] ** len(metadata['coords'])): - coords = number_to_coords(i, - metadata['base_width'], - len(metadata['coords'])) - o.extend(descend(c, coords)) - return ''.join(map(chr, unpad(o)))