From 8fbea3b3ef0319df014e500ea68a042ab30dcb65 Mon Sep 17 00:00:00 2001 From: Ben Edgington Date: Wed, 3 Mar 2021 11:48:56 +0000 Subject: [PATCH] Handle many missing (step 2) --- src/c_kzg_util.c | 14 ++++++ src/c_kzg_util.h | 2 + src/poly.h | 5 ++ src/zero_poly.c | 114 ++++++++++++++++++------------------------- src/zero_poly.h | 5 +- src/zero_poly_test.c | 59 +++++++++++----------- 6 files changed, 102 insertions(+), 97 deletions(-) diff --git a/src/c_kzg_util.c b/src/c_kzg_util.c index 771f0f4..cfd5a93 100644 --- a/src/c_kzg_util.c +++ b/src/c_kzg_util.c @@ -122,3 +122,17 @@ C_KZG_RET new_g1_array_2(g1_t ***x, size_t n) { C_KZG_RET new_g2_array(g2_t **x, size_t n) { return c_kzg_malloc((void **)x, n * sizeof **x); } + +/** + * Allocate memory for an array of polynomial headers. + * + * @remark Free the space later using `free()`, after freeing the individual polynomials via #free_poly. + * + * @param[out] x Pointer to the allocated space + * @param[in] n The number of polynomial headers to be allocated + * @retval C_CZK_OK All is well + * @retval C_CZK_MALLOC Memory allocation failed + */ +C_KZG_RET new_poly_array(poly **x, size_t n) { + return c_kzg_malloc((void **)x, n * sizeof **x); +} diff --git a/src/c_kzg_util.h b/src/c_kzg_util.h index d057cfb..c2487a1 100644 --- a/src/c_kzg_util.h +++ b/src/c_kzg_util.h @@ -18,6 +18,7 @@ #include // free() #include "c_kzg.h" +#include "poly.h" C_KZG_RET c_kzg_malloc(void **p, size_t n); C_KZG_RET new_uint64_array(uint64_t **x, size_t n); @@ -26,3 +27,4 @@ C_KZG_RET new_fr_array_2(fr_t ***x, size_t n); C_KZG_RET new_g1_array(g1_t **x, size_t n); C_KZG_RET new_g1_array_2(g1_t ***x, size_t n); C_KZG_RET new_g2_array(g2_t **x, size_t n); +C_KZG_RET new_poly_array(poly **x, size_t n); \ No newline at end of file diff --git a/src/poly.h b/src/poly.h index 6ed2d92..352fbae 100644 --- a/src/poly.h +++ b/src/poly.h @@ -16,6 +16,9 @@ /** @file poly.h */ +#ifndef POLY_H +#define POLY_H + #include "c_kzg.h" /** @@ -33,3 +36,5 @@ C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor C_KZG_RET new_poly(poly *out, uint64_t length); C_KZG_RET new_poly_with_coeffs(poly *out, const fr_t *coeffs, uint64_t length); void free_poly(poly *p); + +#endif // POLY_H diff --git a/src/zero_poly.c b/src/zero_poly.c index e19162b..09ea4e0 100644 --- a/src/zero_poly.c +++ b/src/zero_poly.c @@ -39,6 +39,8 @@ * @param[in] fs The FFT settings previously initialised with #new_fft_settings * @retval C_CZK_OK All is well * @retval C_CZK_BADARGS Invalid parameters were supplied + * + * @todo rework to pass polynomials in and out */ C_KZG_RET do_zero_poly_mul_leaf(fr_t *dst, uint64_t len_dst, const uint64_t *indices, uint64_t len_indices, uint64_t stride, const FFTSettings *fs) { @@ -106,48 +108,38 @@ C_KZG_RET pad_p(fr_t *out, uint64_t out_len, const fr_t *p, uint64_t p_len) { * @retval C_CZK_OK All is well * @retval C_CZK_BADARGS Invalid parameters were supplied * @retval C_CZK_ERROR An internal error occurred - * - * @todo Check if we can make `ps` a proper 2d array rather than an array of pointers to arrays. */ -C_KZG_RET reduce_leaves(fr_t *dst, uint64_t len_dst, fr_t *scratch, uint64_t len_scratch, blst_fr **ps, uint64_t len_ps, - const uint64_t *len_p, const FFTSettings *fs) { - CHECK(is_power_of_two(len_dst)); - CHECK(len_scratch >= 3 * len_dst); - CHECK(len_ps > 0); - // The degree of the output is the sum of the degrees of the input polynomials. - // TODO A more relaxed check should be ok: `len_ps * (len_p[0] - 1) < len_dst` (or even sum up the lengths) - // CHECK(len_ps * len_p[0] <= len_dst); - uint64_t total_length = 0; - for (int i = 0; i < len_ps; i++) { - total_length += len_p[i] - 1; +C_KZG_RET reduce_leaves(poly *out, uint64_t len_out, fr_t *scratch, uint64_t len_scratch, const poly *leaves, + uint64_t leaf_count, const FFTSettings *fs) { + CHECK(is_power_of_two(len_out)); + CHECK(len_scratch >= 3 * len_out); + CHECK(leaf_count > 0); + // The degree of the output polynomial is the sum of the degrees of the input polynomials. + uint64_t out_degree = 0; + for (int i = 0; i < leaf_count; i++) { + out_degree += leaves[i].length - 1; } - if (total_length + 1 > len_dst) { - printf("Total length: %lu, len dest: %lu\n", total_length, len_dst); - printf("\n"); - for (int i = 0; i < len_ps; i++) { - printf("Len %d = %lu\n", i, len_p[i]); - } - } - CHECK(total_length + 1 <= len_dst); + CHECK(out_degree + 1 <= len_out); // Split `scratch` up into three equally sized working arrays fr_t *p_padded = scratch; - fr_t *mul_eval_ps = scratch + len_dst; - fr_t *p_eval = scratch + 2 * len_dst; + fr_t *mul_eval_ps = scratch + len_out; + fr_t *p_eval = scratch + 2 * len_out; // Do the last leaf first: it may be shorter than the others and the padding can remain in place for the rest. - TRY(pad_p(p_padded, len_dst, ps[len_ps - 1], len_p[len_ps - 1])); - TRY(fft_fr(mul_eval_ps, p_padded, false, len_dst, fs)); + TRY(pad_p(p_padded, len_out, leaves[leaf_count - 1].coeffs, leaves[leaf_count - 1].length)); + TRY(fft_fr(mul_eval_ps, p_padded, false, len_out, fs)); - for (uint64_t i = 0; i < len_ps - 1; i++) { - TRY(pad_p(p_padded, len_p[i], ps[i], len_p[i])); - TRY(fft_fr(p_eval, p_padded, false, len_dst, fs)); - for (uint64_t j = 0; j < len_dst; j++) { + for (uint64_t i = 0; i < leaf_count - 1; i++) { + TRY(pad_p(p_padded, leaves[i].length, leaves[i].coeffs, leaves[i].length)); + TRY(fft_fr(p_eval, p_padded, false, len_out, fs)); + for (uint64_t j = 0; j < len_out; j++) { fr_mul(&mul_eval_ps[j], &mul_eval_ps[j], &p_eval[j]); } } - TRY(fft_fr(dst, mul_eval_ps, true, len_dst, fs)); + TRY(fft_fr(out->coeffs, mul_eval_ps, true, len_out, fs)); + out->length = out_degree + 1; return C_KZG_OK; } @@ -163,7 +155,8 @@ C_KZG_RET reduce_leaves(fr_t *dst, uint64_t len_dst, fr_t *scratch, uint64_t len * * @remark Fails for very high numbers of missing indices. For example, with `fs.max_width = 256` and `length = 256`, * this will fail for len_missing = 253 or more. In this case, `length` (and maybe `fs.max_width`) needs to be doubled. - * But this failure is probably OK for our use case. + * But this failure is probably OK for our use case. TODO: no longer true. But it does fail if the whole domain is + * missing. We know the answer for that case anyway. * * @remark Note that @p zero_poly is used as workspace during calculation. * @@ -192,7 +185,7 @@ C_KZG_RET zero_polynomial_via_multiplication(fr_t *zero_eval, fr_t *zero_poly, u } return C_KZG_OK; } - CHECK(len_missing < length); // The output would be larger than length otherwise + CHECK(len_missing < length); // The output would be larger than length otherwise, (TODO describe in docs) CHECK(length <= fs->max_width); CHECK(is_power_of_two(length)); @@ -208,8 +201,8 @@ C_KZG_RET zero_polynomial_via_multiplication(fr_t *zero_eval, fr_t *zero_poly, u TRY(fft_fr(zero_eval, zero_poly, false, length, fs)); *zero_poly_len = len_missing + 1; } else { - // Work space for reducing the leaves - `zero_poly` is large enough due to the above check, so use that. - // fr_t *work = zero_poly; + + // Work space for building and reducing the leaves fr_t *work; TRY(new_fr_array(&work, next_power_of_two(leaf_count * per_leaf_poly))); @@ -217,75 +210,62 @@ C_KZG_RET zero_polynomial_via_multiplication(fr_t *zero_eval, fr_t *zero_poly, u // Just allocate pointers here since we're re-using `work` for the leaf processing // Combining leaves can be done mostly in-place, using a scratchpad. - fr_t **leaves, *scratch, *reduced; - uint64_t *leaf_lengths; - TRY(new_fr_array_2(&leaves, leaf_count)); - TRY(new_uint64_array(&leaf_lengths, leaf_count)); + poly *leaves; + TRY(new_poly_array(&leaves, leaf_count)); uint64_t offset = 0, out_offset = 0, max = len_missing; for (int i = 0; i < leaf_count; i++) { uint64_t end = offset + per_leaf; if (end > max) end = max; - leaves[i] = &work[out_offset]; - leaf_lengths[i] = per_leaf_poly; - TRY(do_zero_poly_mul_leaf(leaves[i], per_leaf_poly, &missing_indices[offset], end - offset, domain_stride, - fs)); + leaves[i].coeffs = &work[out_offset]; + leaves[i].length = per_leaf_poly; + TRY(do_zero_poly_mul_leaf(leaves[i].coeffs, per_leaf_poly, &missing_indices[offset], end - offset, + domain_stride, fs)); offset += per_leaf; out_offset += per_leaf_poly; } // Adjust the length of the last leaf // leaf_lengths[leaf_count - 1] = 1 + len_missing % per_leaf; - leaf_lengths[leaf_count - 1] = 1 + len_missing - (leaf_count - 1) * per_leaf; + leaves[leaf_count - 1].length = 1 + len_missing - (leaf_count - 1) * per_leaf; // Now reduce all the leaves to a single poly - int reduction_factor = 4; // must be a power of 2 (why?) + int reduction_factor = 4; // must be a power of 2 (TODO why?) + fr_t *scratch; TRY(new_fr_array(&scratch, n * 3)); while (leaf_count > 1) { uint64_t reduced_count = (leaf_count + reduction_factor - 1) / reduction_factor; - // All the leaves are the same length, except possibly the last leaf, but that's ok. - uint64_t leaf_size = next_power_of_two(leaf_lengths[0]); + uint64_t leaf_size = next_power_of_two(leaves[0].length); for (uint64_t i = 0; i < reduced_count; i++) { uint64_t start = i * reduction_factor; uint64_t end = start + reduction_factor; // E.g. if we *started* with 2 leaves, we won't have more than that since it is already a power // of 2. If we had 3, it would have been rounded up anyway. So just pick the end uint64_t out_end = end * leaf_size; - if (out_end > n) { - out_end = n; - } - reduced = work + start * leaf_size; + if (out_end > n) out_end = n; + fr_t *reduced = work + start * leaf_size; uint64_t reduced_len = out_end - start * leaf_size; if (reduced_len > length) reduced_len = length; - if (end > leaf_count) { - end = leaf_count; - } + if (end > leaf_count) end = leaf_count; uint64_t leaves_slice_len = end - start; if (leaves_slice_len > 1) { - TRY(reduce_leaves(reduced, reduced_len, scratch, n * 3, &leaves[start], leaves_slice_len, - &leaf_lengths[start], fs)); - // leaf_lengths[i] = reduced_len; - // } else { - // leaf_lengths[i] = leaf_lengths[start]; + leaves[i].coeffs = reduced; + TRY(reduce_leaves(&leaves[i], reduced_len, scratch, n * 3, &leaves[start], leaves_slice_len, fs)); + } else { + leaves[i].coeffs = reduced; + leaves[i].length = leaves[start].length; } - leaves[i] = reduced; - uint64_t total_length = 0; - for (int j = start; j < end; j++) { - total_length += leaf_lengths[j] - 1; - } - leaf_lengths[i] = total_length + 1; } leaf_count = reduced_count; } - *zero_poly_len = leaf_lengths[0]; + *zero_poly_len = leaves[0].length; for (uint64_t i = 0; i < length; i++) { - zero_poly[i] = i < *zero_poly_len ? leaves[0][i] : fr_zero; + zero_poly[i] = i < *zero_poly_len ? leaves[0].coeffs[i] : fr_zero; } TRY(fft_fr(zero_eval, zero_poly, false, length, fs)); free(work); free(leaves); - free(leaf_lengths); free(scratch); } diff --git a/src/zero_poly.h b/src/zero_poly.h index ffc0d85..10bea6b 100644 --- a/src/zero_poly.h +++ b/src/zero_poly.h @@ -22,11 +22,12 @@ #include "c_kzg.h" #include "fft_common.h" +#include "poly.h" C_KZG_RET do_zero_poly_mul_leaf(fr_t *dst, uint64_t len_dst, const uint64_t *indices, uint64_t len_indices, uint64_t stride, const FFTSettings *fs); -C_KZG_RET reduce_leaves(fr_t *dst, uint64_t len_dst, fr_t *scratch, uint64_t len_scratch, blst_fr **ps, uint64_t len_ps, - const uint64_t *len_p, const FFTSettings *fs); +C_KZG_RET reduce_leaves(poly *dst, uint64_t len_dst, fr_t *scratch, uint64_t len_scratch, const poly *leaves, + uint64_t leaf_count, const FFTSettings *fs); C_KZG_RET zero_polynomial_via_multiplication(fr_t *zero_eval, fr_t *zero_poly, uint64_t *zero_poly_len, uint64_t length, const uint64_t *missing_indices, uint64_t len_missing, const FFTSettings *fs); diff --git a/src/zero_poly_test.c b/src/zero_poly_test.c index dada50f..c90e6de 100644 --- a/src/zero_poly_test.c +++ b/src/zero_poly_test.c @@ -18,7 +18,6 @@ #include "c_kzg_util.h" #include "test_util.h" #include "zero_poly.h" -#include "poly.h" #include "fft_fr.h" #include "debug_util.h" @@ -64,22 +63,23 @@ uint64_t expected_poly_u64[16][4] = { void test_reduce_leaves(void) { FFTSettings fs; TEST_CHECK(C_KZG_OK == new_fft_settings(&fs, 4)); - fr_t from_tree_reduction[16], from_direct[9], scratch[48]; + fr_t from_tree_reduction_coeffs[16], from_direct[9], scratch[48]; + poly from_tree_reduction; + from_tree_reduction.coeffs = from_tree_reduction_coeffs; // Via reduce_leaves - fr_t *leaves[4]; + poly leaves[4]; fr_t leaf0[3], leaf1[3], leaf2[3], leaf3[3]; - leaves[0] = leaf0; - leaves[1] = leaf1; - leaves[2] = leaf2; - leaves[3] = leaf3; - uint64_t leaf_lengths[] = {3, 3, 3, 3}; + leaves[0].coeffs = leaf0, leaves[0].length = 3; + leaves[1].coeffs = leaf1, leaves[1].length = 3; + leaves[2].coeffs = leaf2, leaves[2].length = 3; + leaves[3].coeffs = leaf3, leaves[3].length = 3; const uint64_t leaf_indices[4][2] = {{1, 3}, {7, 8}, {9, 10}, {12, 13}}; for (int i = 0; i < 4; i++) { - TEST_CHECK(C_KZG_OK == do_zero_poly_mul_leaf(leaves[i], 3, leaf_indices[i], 2, 1, &fs)); + TEST_CHECK(C_KZG_OK == do_zero_poly_mul_leaf(leaves[i].coeffs, 3, leaf_indices[i], 2, 1, &fs)); } - TEST_CHECK(C_KZG_OK == reduce_leaves(from_tree_reduction, 16, scratch, 48, leaves, 4, leaf_lengths, &fs)); + TEST_CHECK(C_KZG_OK == reduce_leaves(&from_tree_reduction, 16, scratch, 48, leaves, 4, &fs)); // Direct uint64_t indices[] = {1, 3, 7, 8, 9, 10, 12, 13}; @@ -87,7 +87,7 @@ void test_reduce_leaves(void) { // Compare for (int i = 0; i < 9; i++) { - TEST_CHECK(fr_equal(&from_tree_reduction[i], &from_direct[i])); + TEST_CHECK(fr_equal(&from_tree_reduction.coeffs[i], &from_direct[i])); } free_fft_settings(&fs); @@ -111,32 +111,32 @@ void reduce_leaves_random(void) { shuffle(missing, point_count); // Build the leaves - fr_t **leaves; + poly *leaves; const int points_per_leaf = 63; uint64_t indices[points_per_leaf]; uint64_t leaf_count = (missing_count + points_per_leaf - 1) / points_per_leaf; - uint64_t *leaf_lengths; - TEST_CHECK(C_KZG_OK == new_uint64_array(&leaf_lengths, leaf_count)); - TEST_CHECK(C_KZG_OK == new_fr_array_2(&leaves, leaf_count)); + TEST_CHECK(C_KZG_OK == new_poly_array(&leaves, leaf_count)); for (uint64_t i = 0; i < leaf_count; i++) { uint64_t start = i * points_per_leaf; uint64_t end = start + points_per_leaf; if (end > missing_count) end = missing_count; uint64_t leaf_size = end - start; - TEST_CHECK(C_KZG_OK == new_fr_array(&leaves[i], leaf_size + 1)); + TEST_CHECK(C_KZG_OK == new_fr_array(&leaves[i].coeffs, leaf_size + 1)); for (int j = 0; j < leaf_size; j++) { indices[j] = missing[i * points_per_leaf + j]; } - leaf_lengths[i] = leaf_size + 1; - TEST_CHECK(C_KZG_OK == do_zero_poly_mul_leaf(leaves[i], leaf_lengths[i], indices, leaf_size, 1, &fs)); + leaves[i].length = leaf_size + 1; + TEST_CHECK(C_KZG_OK == + do_zero_poly_mul_leaf(leaves[i].coeffs, leaves[i].length, indices, leaf_size, 1, &fs)); } // From tree reduction - fr_t *from_tree_reduction, *scratch; - TEST_CHECK(C_KZG_OK == new_fr_array(&from_tree_reduction, point_count)); + poly from_tree_reduction; + TEST_CHECK(C_KZG_OK == new_poly(&from_tree_reduction, point_count)); + fr_t *scratch; TEST_CHECK(C_KZG_OK == new_fr_array(&scratch, point_count * 3)); - TEST_CHECK(C_KZG_OK == reduce_leaves(from_tree_reduction, point_count, scratch, point_count * 3, leaves, - leaf_count, leaf_lengths, &fs)); + TEST_CHECK(C_KZG_OK == reduce_leaves(&from_tree_reduction, point_count, scratch, point_count * 3, leaves, + leaf_count, &fs)); // From direct fr_t *from_direct; @@ -145,17 +145,16 @@ void reduce_leaves_random(void) { fs.max_width / point_count, &fs)); for (uint64_t i = 0; i < missing_count + 1; i++) { - TEST_CHECK(fr_equal(&from_tree_reduction[i], &from_direct[i])); + TEST_CHECK(fr_equal(&from_tree_reduction.coeffs[i], &from_direct[i])); } - free(from_tree_reduction); + free_poly(&from_tree_reduction); free(from_direct); free(scratch); for (uint64_t i = 0; i < leaf_count; i++) { - free(leaves[i]); + free_poly(&leaves[i]); } free(leaves); - free(leaf_lengths); free(missing); free_fft_settings(&fs); } @@ -274,6 +273,9 @@ void zero_poly_random(void) { TEST_CHECK(C_KZG_OK == zero_polynomial_via_multiplication(zero_eval, zero_poly, &zero_poly_len, fs.max_width, missing, len_missing, &fs)); + TEST_CHECK(len_missing + 1 == zero_poly_len); + TEST_MSG("ZeroPolyLen: expected %d, got %lu", len_missing + 1, zero_poly_len); + poly p; p.length = zero_poly_len; p.coeffs = zero_poly; @@ -282,9 +284,10 @@ void zero_poly_random(void) { fr_t out; eval_poly(&out, &p, &fs.expanded_roots_of_unity[missing[i]]); ret = TEST_CHECK(fr_is_zero(&out)); + TEST_MSG("Failed for scale = %d, len_missing = %d, zero_poly_len = %lu", scale, len_missing, + zero_poly_len); TEST_MSG("Failed for missing[%d] = %lu", i, missing[i]); } - TEST_MSG("Failed for scale = %d, len_missing = %d, zero_poly_len = %lu", scale, len_missing, zero_poly_len); fr_t *zero_eval_fft; TEST_CHECK(C_KZG_OK == new_fr_array(&zero_eval_fft, fs.max_width)); @@ -373,7 +376,7 @@ void zero_poly_252(void) { TEST_CHECK(C_KZG_OK == zero_polynomial_via_multiplication(zero_eval, zero_poly, &zero_poly_len, fs.max_width, missing, len_missing, &fs)); - TEST_CHECK(zero_poly_len == 253); + TEST_CHECK(253 == zero_poly_len); TEST_MSG("ZeroPolyLen: expected %d, got %lu", len_missing + 1, zero_poly_len); poly p;