Implements fast division for long divisors (#7)

2025-02-18 21:16:57 +00:00 · 2021-06-28 12:42:14 +01:00 · 2021-06-28 12:42:14 +01:00 · ba81164330
commit ba81164330
parent 244bfe8740
13 changed files with 1120 additions and 99 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,7 @@
 *_test
 *_bench
 *_debug
 *_tune
 *.prof
 *.out
 *.log
--- a/6
+++ b/6
@ -875,7 +875,7 @@ EXCLUDE                = inc \
                         tmp \
                         src/test_util.h \
                         src/bench_util.h \
-                         src/debug_util.h \
+                         src/debug_util.h
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
@ -891,7 +891,9 @@ EXCLUDE_SYMLINKS       = NO
 # Note that the wildcards are matched against the file with absolute path, so to
 # exclude all test directories for example use the pattern */test/*
-EXCLUDE_PATTERNS       =
+EXCLUDE_PATTERNS       = *_test.c \
                         *_bench.c \
                         *_tune.c
 # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
 # (namespaces, classes, functions, etc.) that should be excluded from the
--- a/src/Makefile
+++ b/src/Makefile
@ -1,6 +1,7 @@
 TESTS = bls12_381_test das_extension_test c_kzg_util_test fft_common_test fft_fr_test fft_g1_test \
 	fk20_proofs_test kzg_proofs_test poly_test recover_test utility_test zero_poly_test
-BENCH = fft_fr_bench fft_g1_bench recover_bench zero_poly_bench kzg_proofs_bench
+BENCH = fft_fr_bench fft_g1_bench recover_bench zero_poly_bench kzg_proofs_bench poly_bench
 TUNE = poly_mul_tune poly_div_tune
 LIB_SRC = bls12_381.c c_kzg_util.c das_extension.c fft_common.c fft_fr.c fft_g1.c fk20_proofs.c kzg_proofs.c poly.c recover.c utility.c zero_poly.c
 LIB_OBJ = $(LIB_SRC:.c=.o)
@ -30,6 +31,12 @@ libckzg.a: $(LIB_OBJ) Makefile
 	clang -Wall -I$(INCLUDE_DIRS) $(CFLAGS) $(KZG_CFLAGS) -o $@ $@.c bench_util.o test_util.o $(LIB_OBJ) -L../lib -lblst
 	./$@
 # Tuning
 %_tune: KZG_CFLAGS += -O
 %_tune: %_tune.c bench_util.o test_util.o $(LIB_OBJ) Makefile
 	clang -Wall -I$(INCLUDE_DIRS) $(CFLAGS) $(KZG_CFLAGS) -o $@ $@.c bench_util.o test_util.o $(LIB_OBJ) -L../lib -lblst
 	./$@
 lib: KZG_CFLAGS += -O
 lib: clean libckzg.a
@ -45,5 +52,6 @@ clean:
 	rm -f libckzg.a
 	rm -f $(TESTS)
 	rm -f $(BENCH)
 	rm -f $(TUNE)
 	rm -f *_debug
 	rm -f a.out
--- a/src/c_kzg_util.h
+++ b/src/c_kzg_util.h
@ -20,6 +20,10 @@
 #include "c_kzg.h"
 #include "poly.h"
 /** @def min_u64
 *
 * Safely find the minimum of two uint_64s.
 */
 #define min_u64(a, b)                                                                                                  \
    ({                                                                                                                 \
        uint64_t _a = (a);                                                                                             \
@ -27,6 +31,17 @@
        _a < _b ? _a : _b;                                                                                             \
    })
 /** @def max_u64
 *
 * Safely find the maximum of two uint_64s.
 */
 #define max_u64(a, b)                                                                                                  \
    ({                                                                                                                 \
        uint64_t _a = (a);                                                                                             \
        uint64_t _b = (b);                                                                                             \
        _a > _b ? _a : _b;                                                                                             \
    })
 C_KZG_RET c_kzg_malloc(void **p, size_t n);
 C_KZG_RET new_uint64_array(uint64_t **x, size_t n);
 C_KZG_RET new_fr_array(fr_t **x, size_t n);
--- a/src/kzg_proofs.c
+++ b/src/kzg_proofs.c
@ -124,7 +124,7 @@ C_KZG_RET compute_proof_multi(g1_t *out, const poly *p, const fr_t *x0, uint64_t
    divisor.coeffs[n] = fr_one;
    // Calculate q = p / (x^n - x0^n)
-    TRY(new_poly_long_div(&q, p, &divisor));
+    TRY(new_poly_div(&q, p, &divisor));
    TRY(commit_to_poly(out, &q, ks));
--- a/src/poly.c
+++ b/src/poly.c
@ -15,12 +15,13 @@
 */
 /**
- *  @file poly.c
+ * @file poly.c
 *
 * Operations on polynomials defined over the finite field.
 */
 #include "c_kzg_util.h"
 #include "utility.h"
 #include "poly.h"
 /**
@ -34,6 +35,23 @@ static uint64_t poly_quotient_length(const poly *dividend, const poly *divisor)
    return dividend->length >= divisor->length ? dividend->length - divisor->length + 1 : 0;
 }
 /**
 * Return a copy of a polynomial ensuring that the order is correct.
 *
 * @param[in] p A pointer to the polynomial to be normalised
 * @return The normalised polynomial
 */
 static poly poly_norm(const poly *p) {
    poly ret = *p;
    while (ret.length > 0 && fr_is_zero(&ret.coeffs[ret.length - 1])) {
        ret.length--;
    }
    if (ret.length == 0) {
        ret.coeffs = NULL;
    }
    return ret;
 }
 /**
 * Evaluate a polynomial over the finite field at a point.
 *
@ -67,35 +85,46 @@ void eval_poly(fr_t *out, const poly *p, const fr_t *x) {
 }
 /**
- * Polynomial division in the finite field.
+ * Polynomial division in the finite field via long division.
 *
 * Returns the polynomial resulting from dividing @p dividend by @p divisor.
 *
- * @remark @p out must be an uninitialised #poly. Space is allocated for it here, which
+ * Should be O(m.n) where m is the length of the dividend, and n the length of the divisor.
 * must be later reclaimed by calling #free_poly().
 *
- * @param[out] out      An uninitialised poly type that will contain the result of the division
+ * @remark @p out must be sized large enough for the resulting polynomial.
 *
 * @remark For some ranges of @p dividend and @p divisor, #poly_fast_div is much, much faster.
 *
 * @param[out] out      An appropriately sized poly type that will contain the result of the division
 * @param[in]  dividend The dividend polynomial
 * @param[in]  divisor  The divisor polynomial
 * @retval C_CZK_OK      All is well
 * @retval C_CZK_BADARGS Invalid parameters were supplied
 * @retval C_CZK_MALLOC  Memory allocation failed
 */
-C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor) {
+C_KZG_RET poly_long_div(poly *out, const poly *dividend, const poly *divisor) {
    uint64_t a_pos = dividend->length - 1;
    uint64_t b_pos = divisor->length - 1;
    uint64_t diff = a_pos - b_pos;
-    fr_t a[dividend->length];
+    fr_t *a;
    // Dividing by zero is undefined
    CHECK(divisor->length > 0);
-    // Initialise the output polynomial
+    // The divisor's highest coefficient must be non-zero
-    TRY(new_poly(out, poly_quotient_length(dividend, divisor)));
+    CHECK(!fr_is_zero(&divisor->coeffs[divisor->length - 1]));
    // Deal with the size of the output polynomial
    uint64_t out_length = poly_quotient_length(dividend, divisor);
    CHECK(out->length >= out_length);
    out->length = out_length;
    // If the divisor is larger than the dividend, the result is zero-length
-    if (out->length == 0) return C_KZG_OK;
+    if (out_length == 0) {
        return C_KZG_OK;
    }
    TRY(new_fr_array(&a, dividend->length));
    for (uint64_t i = 0; i < dividend->length; i++) {
        a[i] = dividend->coeffs[i];
    }
@ -113,9 +142,393 @@ C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor
    }
    fr_div(&out->coeffs[0], &a[a_pos], &divisor->coeffs[b_pos]);
    free(a);
    return C_KZG_OK;
 }
 /**
 * Calculate the (possibly truncated) product of two polynomials.
 *
 * The size of the output polynomial determines the number of coefficients returned.
 *
 * @param[in,out] out The result of the division - its size determines the number of coefficients returned
 * @param[in]     a   The muliplicand polynomial
 * @param[in]     b   The multiplier polynomial
 * @retval C_CZK_OK   All is well
 */
 C_KZG_RET poly_mul_direct(poly *out, const poly *a, const poly *b) {
    uint64_t a_degree = a->length - 1;
    uint64_t b_degree = b->length - 1;
    for (uint64_t k = 0; k < out->length; k++) {
        out->coeffs[k] = fr_zero;
    }
    // Truncate the output to the length of the output polynomial
    for (uint64_t i = 0; i <= a_degree; i++) {
        for (uint64_t j = 0; j <= b_degree && i + j < out->length; j++) {
            fr_t tmp;
            fr_mul(&tmp, &a->coeffs[i], &b->coeffs[j]);
            fr_add(&out->coeffs[i + j], &out->coeffs[i + j], &tmp);
        }
    }
    return C_KZG_OK;
 }
 /**
 * Pad with zeros or truncate an array of field elements to a specific size.
 *
 * @param[out] out   The padded/truncated array
 * @param[in]  in    The original array to be padded
 * @param[in]  n_in  The number of elements of @p in to take
 * @param[in]  n_out The length of @p out
 */
 void pad(fr_t *out, const fr_t *in, uint64_t n_in, uint64_t n_out) {
    uint64_t num = min_u64(n_in, n_out);
    for (int i = 0; i < num; i++) {
        out[i] = in[i];
    }
    for (int i = num; i < n_out; i++) {
        out[i] = fr_zero;
    }
 }
 /**
 * Calculate the (possibly truncated) product of two polynomials.
 *
 * The size of the output polynomial determines the number of coefficients returned.
 *
 * @remark This version uses FFTs to calculate the product via convolution, and is very efficient for large
 * calculations. If @p fs is supplied as NULL, then the FFTSettings are allocated internally, otherwise the supplied
 * settings are used, which must be sufficiently sized for the calculation.
 *
 * @param[in,out] out The result of the division - its size determines the number of coefficients returned
 * @param[in]     a   The muliplicand polynomial
 * @param[in]     b   The multiplier polynomial
 * @param[in]     fs_ Either NULL or a sufficiently sized FFTSettings structure
 * @retval C_CZK_OK   All is well
 * @retval C_CZK_BADARGS Invalid parameters were supplied
 * @retval C_CZK_ERROR   An internal error occurred
 * @retval C_CZK_MALLOC  Memory allocation failed
 */
 C_KZG_RET poly_mul_fft(poly *out, const poly *a, const poly *b, FFTSettings *fs_) {
    // Truncate a and b so as not to do excess work for the number of coefficients required.
    uint64_t a_len = min_u64(a->length, out->length);
    uint64_t b_len = min_u64(b->length, out->length);
    uint64_t length = next_power_of_two(a_len + b_len - 1);
    // If the FFT settings are NULL then make a local set, otherwise use the ones passed in.
    FFTSettings fs, *fs_p;
    if (fs_ != NULL) {
        fs_p = fs_;
    } else {
        fs_p = &fs;
        int scale = log2_pow2(length); // TODO only good up to length < 32 bits
        TRY(new_fft_settings(fs_p, scale));
    }
    CHECK(length <= fs_p->max_width);
    fr_t *a_pad, *b_pad, *a_fft, *b_fft;
    TRY(new_fr_array(&a_pad, length));
    TRY(new_fr_array(&b_pad, length));
    pad(a_pad, a->coeffs, a_len, length);
    pad(b_pad, b->coeffs, b_len, length);
    TRY(new_fr_array(&a_fft, length));
    TRY(new_fr_array(&b_fft, length));
    TRY(fft_fr(a_fft, a_pad, false, length, fs_p));
    TRY(fft_fr(b_fft, b_pad, false, length, fs_p));
    fr_t *ab_fft = a_pad; // reuse the a_pad array
    fr_t *ab = b_pad;     // reuse the b_pad array
    for (uint64_t i = 0; i < length; i++) {
        fr_mul(&ab_fft[i], &a_fft[i], &b_fft[i]);
    }
    TRY(fft_fr(ab, ab_fft, true, length, fs_p));
    // Copy result to output
    uint64_t data_len = min_u64(out->length, length);
    for (uint64_t i = 0; i < data_len; i++) {
        out->coeffs[i] = ab[i];
    }
    for (uint64_t i = data_len; i < out->length; i++) {
        out->coeffs[i] = fr_zero;
    }
    free(a_pad);
    free(b_pad);
    free(a_fft);
    free(b_fft);
    if (fs_p == &fs) {
        free_fft_settings(fs_p);
    }
    return C_KZG_OK;
 }
 /**
 * Calculate terms in the inverse of a polynomial.
 *
 * Returns terms in the expansion of `1 / b(x)` (aka the Maclaurin series).
 *
 * The size of @p out determines the number of terms returned.
 *
 * This is a non-recursive version of the algorithm in https://tc-arg.tk/pdfs/2020/fft.pdf theorem 3.4.
 *
 * @remark The constant term of @p b must be nonzero.
 *
 * @param[in, out] out A poly whose length determines the number of terms returned
 * @param[in]      b   The polynomial to be inverted
 * @retval C_CZK_OK      All is well
 * @retval C_CZK_BADARGS Invalid parameters were supplied
 * @retval C_CZK_ERROR   An internal error occurred
 * @retval C_CZK_MALLOC  Memory allocation failed
 */
 C_KZG_RET poly_inverse(poly *out, poly *b) {
    poly tmp0, tmp1;
    fr_t fr_two;
    CHECK(out->length > 0);
    CHECK(b->length > 0);
    CHECK(!fr_is_zero(&b->coeffs[0]));
    // If the input polynomial is constant, the remainder of the series is zero
    if (b->length == 1) {
        fr_inv(&out->coeffs[0], &b->coeffs[0]);
        for (uint64_t i = 1; i < out->length; i++) {
            out->coeffs[i] = fr_zero;
        }
        return C_KZG_OK;
    }
    uint64_t length = out->length;
    uint64_t maxd = length - 1;
    uint64_t d = 0;
    // Max space for multiplications is (2 * length - 1)
    int scale = log2_pow2(next_power_of_two(2 * length - 1));
    FFTSettings fs;
    TRY(new_fft_settings(&fs, scale));
    // To store intermediate results
    TRY(new_poly(&tmp0, length));
    TRY(new_poly(&tmp1, length));
    // Base case for d == 0
    fr_inv(&out->coeffs[0], &b->coeffs[0]);
    out->length = 1;
    uint64_t mask = (uint64_t)1 << log2_u64(maxd);
    while (mask) {
        d = 2 * d + ((maxd & mask) != 0);
        mask >>= 1;
        // b.c -> tmp0 (we're using out for c)
        tmp0.length = min_u64(d + 1, b->length + out->length - 1);
        TRY(poly_mul_(&tmp0, b, out, &fs));
        // 2 - b.c -> tmp0
        for (int i = 0; i < tmp0.length; i++) {
            fr_negate(&tmp0.coeffs[i], &tmp0.coeffs[i]);
        }
        fr_from_uint64(&fr_two, 2);
        fr_add(&tmp0.coeffs[0], &tmp0.coeffs[0], &fr_two);
        // c.(2 - b.c) -> tmp1;
        tmp1.length = d + 1;
        TRY(poly_mul_(&tmp1, out, &tmp0, &fs));
        // tmp1 -> c
        out->length = tmp1.length;
        for (uint64_t i = 0; i < out->length; i++) {
            out->coeffs[i] = tmp1.coeffs[i];
        }
    }
    ASSERT(d + 1 == length);
    free_poly(&tmp0);
    free_poly(&tmp1);
    free_fft_settings(&fs);
    return C_KZG_OK;
 }
 /**
 * Reverse the order of the coefficients of a polynomial.
 *
 * Corresponds to returning x^n.p(1/x).
 *
 * @param[out] out The flipped polynomial. Its size must be the same as the size of @p in
 * @param[in]  in  The polynomial to be flipped
 * @retval C_CZK_OK      All is well
 * @retval C_CZK_BADARGS Invalid parameters were supplied
 */
 C_KZG_RET poly_flip(poly *out, const poly *in) {
    CHECK(out->length == in->length);
    for (uint64_t i = 0; i < in->length; i++) {
        out->coeffs[out->length - i - 1] = in->coeffs[i];
    }
    return C_KZG_OK;
 }
 /**
 * Fast polynomial division in the finite field.
 *
 * Returns the polynomial resulting from dividing @p dividend by @p divisor.
 *
 * Implements https://tc-arg.tk/pdfs/2020/fft.pdf theorem 3.5.
 *
 * Should be O(m.log(m)) where m is the length of the dividend.
 *
 * @remark @p out must be sized large enough for the resulting polynomial.
 *
 * @remark For some ranges of @p dividend and @p divisor, #poly_long_div may be a little faster.
 *
 * @param[out] out      An appropriately sized poly type that will contain the result of the division
 * @param[in]  dividend The dividend polynomial
 * @param[in]  divisor  The divisor polynomial
 * @retval C_CZK_OK      All is well
 * @retval C_CZK_BADARGS Invalid parameters were supplied
 * @retval C_CZK_ERROR   An internal error occurred
 * @retval C_CZK_MALLOC  Memory allocation failed
 */
 C_KZG_RET poly_fast_div(poly *out, const poly *dividend, const poly *divisor) {
    // Dividing by zero is undefined
    CHECK(divisor->length > 0);
    // The divisor's highest coefficient must be non-zero
    CHECK(!fr_is_zero(&divisor->coeffs[divisor->length - 1]));
    uint64_t m = dividend->length - 1;
    uint64_t n = divisor->length - 1;
    // If the divisor is larger than the dividend, the result is zero-length
    if (n > m) {
        out->length = 0;
        return C_KZG_OK;
    }
    // Ensure the output poly has enough space allocated
    CHECK(out->length >= m - n + 1);
    // Ensure that the divisor is well-formed for the inverse operation
    CHECK(!fr_is_zero(&divisor->coeffs[divisor->length - 1]));
    // Special case for divisor.length == 1 (it's a constant)
    if (divisor->length == 1) {
        out->length = dividend->length;
        for (uint64_t i = 0; i < out->length; i++) {
            fr_div(&out->coeffs[i], &dividend->coeffs[i], &divisor->coeffs[0]);
        }
        return C_KZG_OK;
    }
    poly a_flip, b_flip;
    TRY(new_poly(&a_flip, dividend->length));
    TRY(new_poly(&b_flip, divisor->length));
    TRY(poly_flip(&a_flip, dividend));
    TRY(poly_flip(&b_flip, divisor));
    poly inv_b_flip;
    TRY(new_poly(&inv_b_flip, m - n + 1));
    TRY(poly_inverse(&inv_b_flip, &b_flip));
    poly q_flip;
    // We need only m - n + 1 coefficients of q_flip
    TRY(new_poly(&q_flip, m - n + 1));
    TRY(poly_mul(&q_flip, &a_flip, &inv_b_flip));
    out->length = m - n + 1;
    TRY(poly_flip(out, &q_flip));
    free_poly(&a_flip);
    free_poly(&b_flip);
    free_poly(&inv_b_flip);
    free_poly(&q_flip);
    return C_KZG_OK;
 }
 /**
 * Calculate the (possibly truncated) product of two polynomials.
 *
 * This is just a wrapper around #poly_mul_direct and #poly_mul_fft that selects the faster based on the size of the
 * problem.
 *
 * @param[in,out] out The result of the division - its size determines the number of coefficients returned
 * @param[in]     a   The muliplicand polynomial
 * @param[in]     b   The multiplier polynomial
 * @param[in]     fs  Either NULL or a sufficiently sized FFTSettings structure
 * @retval C_CZK_OK   All is well
 * @retval C_CZK_BADARGS Invalid parameters were supplied
 * @retval C_CZK_ERROR   An internal error occurred
 * @retval C_CZK_MALLOC  Memory allocation failed
 *
 * @todo Implement routine selection
 */
 C_KZG_RET poly_mul_(poly *out, const poly *a, const poly *b, FFTSettings *fs) {
    if (a->length < 64 || b->length < 64 || out->length < 128) { // Tunable parameter
        return poly_mul_direct(out, a, b);
    } else {
        return poly_mul_fft(out, a, b, fs);
    }
 }
 /**
 * Calculate the (possibly truncated) product of two polynomials.
 *
 * This is just a wrapper around #poly_mul_direct and #poly_mul_fft that selects the faster based on the size of the
 * problem.
 *
 * @param[in,out] out The result of the division - its size determines the number of coefficients returned
 * @param[in]     a   The muliplicand polynomial
 * @param[in]     b   The multiplier polynomial
 * @retval C_CZK_OK   All is well
 * @retval C_CZK_BADARGS Invalid parameters were supplied
 * @retval C_CZK_ERROR   An internal error occurred
 * @retval C_CZK_MALLOC  Memory allocation failed
 *
 * @todo Implement routine selection
 */
 C_KZG_RET poly_mul(poly *out, const poly *a, const poly *b) {
    return poly_mul_(out, a, b, NULL);
 }
 /**
 * Polynomial division in the finite field.
 *
 * Returns the polynomial resulting from dividing @p dividend_ by @p divisor_.
 *
 * This is a wrapper around #poly_long_div and #poly_fast_div that chooses the fastest based on problem size.
 *
 * @remark @p out must be an uninitialised #poly. Space is allocated for it here, which
 * must be later reclaimed by calling #free_poly().
 *
 * @param[out] out       An uninitialised poly type that will contain the result of the division
 * @param[in]  dividend_ The dividend polynomial
 * @param[in]  divisor_  The divisor polynomial
 * @retval C_CZK_OK      All is well
 * @retval C_CZK_BADARGS Invalid parameters were supplied
 * @retval C_CZK_ERROR   An internal error occurred
 * @retval C_CZK_MALLOC  Memory allocation failed
 */
 C_KZG_RET new_poly_div(poly *out, const poly *dividend_, const poly *divisor_) {
    poly dividend = poly_norm(dividend_);
    poly divisor = poly_norm(divisor_);
    TRY(new_poly(out, poly_quotient_length(&dividend, &divisor)));
    if (divisor.length >= dividend.length || divisor.length < 128) { // Tunable paramter
        return poly_long_div(out, &dividend, &divisor);
    } else {
        return poly_fast_div(out, &dividend, &divisor);
    }
 }
 /**
 * Initialise an empty polynomial of the given size.
 *
--- a/src/poly.h
+++ b/src/poly.h
@ -20,6 +20,7 @@
 #define POLY_H
 #include "c_kzg.h"
 #include "fft_fr.h"
 /**
 * Defines a polynomial whose coefficients are members of the finite field F_r.
@ -32,7 +33,14 @@ typedef struct {
 } poly;
 void eval_poly(fr_t *out, const poly *p, const fr_t *x);
-C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor);
+C_KZG_RET poly_long_div(poly *out, const poly *dividend, const poly *divisor);
 C_KZG_RET poly_mul_direct(poly *out, const poly *a, const poly *b);
 C_KZG_RET poly_mul_fft(poly *out, const poly *a, const poly *b, FFTSettings *fs);
 C_KZG_RET poly_inverse(poly *out, poly *b);
 C_KZG_RET poly_fast_div(poly *out, const poly *dividend, const poly *divisor);
 C_KZG_RET poly_mul(poly *out, const poly *a, const poly *b);
 C_KZG_RET poly_mul_(poly *out, const poly *a, const poly *b, FFTSettings *fs);
 C_KZG_RET new_poly_div(poly *out, const poly *dividend, const poly *divisor);
 C_KZG_RET new_poly(poly *out, uint64_t length);
 C_KZG_RET new_poly_with_coeffs(poly *out, const fr_t *coeffs, uint64_t length);
 void free_poly(poly *p);
--- a/src/poly_bench.c
+++ b/src/poly_bench.c
@ -0,0 +1,97 @@
 /*
 * Copyright 2021 Benjamin Edgington
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <stdlib.h> // malloc(), free(), atoi()
 #include <stdio.h>  // printf()
 #include <assert.h> // assert()
 #include <unistd.h> // EXIT_SUCCESS/FAILURE
 #include "bench_util.h"
 #include "test_util.h"
 #include "poly.h"
 // Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
 long run_bench(int scale, int max_seconds) {
    timespec_t t0, t1;
    unsigned long total_time = 0, nits = 0;
    uint64_t width = (uint64_t)1 << scale;
    int dividend_length = width;
    int divisor_length = width / 2; // What would be a relevant value of kzg multi-proofs?
    poly dividend, divisor, q;
    new_poly(&dividend, dividend_length);
    new_poly(&divisor, divisor_length);
    for (int i = 0; i < dividend_length; i++) {
        dividend.coeffs[i] = rand_fr();
    }
    for (int i = 0; i < divisor_length; i++) {
        divisor.coeffs[i] = rand_fr();
    }
    // Ensure that the polynomials' orders corresponds to their lengths
    if (fr_is_zero(&dividend.coeffs[dividend.length - 1])) {
        dividend.coeffs[dividend.length - 1] = fr_one;
    }
    if (fr_is_zero(&divisor.coeffs[divisor.length - 1])) {
        divisor.coeffs[divisor.length - 1] = fr_one;
    }
    while (total_time < max_seconds * NANO) {
        clock_gettime(CLOCK_REALTIME, &t0);
        assert(C_KZG_OK == new_poly_div(&q, &dividend, &divisor));
        clock_gettime(CLOCK_REALTIME, &t1);
        nits++;
        total_time += tdiff(t0, t1);
        free_poly(&q);
    }
    free_poly(&dividend);
    free_poly(&divisor);
    return total_time / nits;
 }
 int main(int argc, char *argv[]) {
    int nsec = 0;
    switch (argc) {
    case 1:
        nsec = NSEC;
        break;
    case 2:
        nsec = atoi(argv[1]);
        break;
    default:
        break;
    };
    if (nsec == 0) {
        printf("Usage: %s [test time in seconds > 0]\n", argv[0]);
        exit(EXIT_FAILURE);
    }
    printf("*** Benchmarking Polynomial Division, %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
    for (int scale = 6; scale <= 15; scale++) {
        printf("new_poly_div/scale_%d %lu ns/op\n", scale, run_bench(scale, nsec));
    }
    return EXIT_SUCCESS;
 }
--- a/src/poly_div_tune.c
+++ b/src/poly_div_tune.c
@ -0,0 +1,119 @@
 /*
 * Copyright 2021 Benjamin Edgington
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <stdlib.h> // malloc(), free(), atoi()
 #include <stdio.h>  // printf()
 #include <assert.h> // assert()
 #include <unistd.h> // EXIT_SUCCESS/FAILURE
 #include "bench_util.h"
 #include "test_util.h"
 #include "poly.h"
 // Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
 long run_bench(int scale_0, int scale_1, int max_seconds) {
    timespec_t t0, t1;
    unsigned long total_time = 0, nits = 0;
    uint64_t width_0 = (uint64_t)1 << scale_0;
    // uint64_t width_1 = (uint64_t)1 << scale_1;
    uint64_t width_1 = width_0 - ((uint64_t)1 << scale_1);
    int dividend_length = width_0;
    int divisor_length = width_1;
    poly dividend, divisor, q;
    new_poly(&dividend, dividend_length);
    new_poly(&divisor, divisor_length);
    for (int i = 0; i < dividend_length; i++) {
        dividend.coeffs[i] = rand_fr();
    }
    for (int i = 0; i < divisor_length; i++) {
        divisor.coeffs[i] = rand_fr();
    }
    // Ensure that the polynomials' orders corresponds to their lengths
    if (fr_is_zero(&dividend.coeffs[dividend.length - 1])) {
        dividend.coeffs[dividend.length - 1] = fr_one;
    }
    if (fr_is_zero(&divisor.coeffs[divisor.length - 1])) {
        divisor.coeffs[divisor.length - 1] = fr_one;
    }
    new_poly(&q, dividend.length - divisor.length + 1);
    while (total_time < max_seconds * NANO) {
        clock_gettime(CLOCK_REALTIME, &t0);
 #if 0
        assert(C_KZG_OK == poly_long_div(&q, &dividend, &divisor));
 #else
        assert(C_KZG_OK == poly_fast_div(&q, &dividend, &divisor));
 #endif
        clock_gettime(CLOCK_REALTIME, &t1);
        nits++;
        total_time += tdiff(t0, t1);
    }
    free_poly(&dividend);
    free_poly(&divisor);
    free_poly(&q);
    return total_time / nits;
 }
 int main(int argc, char *argv[]) {
    int nsec = 0;
    switch (argc) {
    case 1:
        nsec = NSEC;
        break;
    case 2:
        nsec = atoi(argv[1]);
        break;
    default:
        break;
    };
    if (nsec == 0) {
        printf("Usage: %s [test time in seconds > 0]\n", argv[0]);
        exit(EXIT_FAILURE);
    }
    int scale_min = 5;
    int scale_max = 14;
 #if 0
    printf("*** Benchmarking poly_long_div() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
 #else
    printf("*** Benchmarking poly_fast_div() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
 #endif
    printf(",");
    for (int i = /* scale_min */ 0; i <= scale_max; i++) {
        printf("%d,", i);
    }
    printf("\n");
    for (int scale_0 = scale_min; scale_0 <= scale_max; scale_0++) {
        printf("%d,", scale_0);
        for (int scale_1 = /* scale_min */ 0; scale_1 < scale_0; scale_1++) {
            printf("%lu,", run_bench(scale_0, scale_1, nsec));
        }
        printf("\n");
    }
    return EXIT_SUCCESS;
 }
--- a/src/poly_mul_tune.c
+++ b/src/poly_mul_tune.c
@ -0,0 +1,116 @@
 /*
 * Copyright 2021 Benjamin Edgington
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <stdlib.h> // malloc(), free(), atoi()
 #include <stdio.h>  // printf()
 #include <assert.h> // assert()
 #include <unistd.h> // EXIT_SUCCESS/FAILURE
 #include "bench_util.h"
 #include "test_util.h"
 #include "poly.h"
 // Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
 long run_bench(int scale_0, int scale_1, int max_seconds) {
    timespec_t t0, t1;
    unsigned long total_time = 0, nits = 0;
    uint64_t width_0 = (uint64_t)1 << scale_0;
    uint64_t width_1 = (uint64_t)1 << scale_1;
    poly multiplicand, multiplier, r;
    new_poly(&multiplicand, width_0);
    new_poly(&multiplier, width_1);
    for (int i = 0; i < width_0; i++) {
        multiplicand.coeffs[i] = rand_fr();
    }
    for (int i = 0; i < width_1; i++) {
        multiplier.coeffs[i] = rand_fr();
    }
    // Ensure that the polynomials' orders corresponds to their lengths
    if (fr_is_zero(&multiplicand.coeffs[multiplicand.length - 1])) {
        multiplicand.coeffs[multiplicand.length - 1] = fr_one;
    }
    if (fr_is_zero(&multiplier.coeffs[multiplier.length - 1])) {
        multiplier.coeffs[multiplier.length - 1] = fr_one;
    }
    new_poly(&r, multiplicand.length + multiplier.length - 1);
    while (total_time < max_seconds * NANO) {
        clock_gettime(CLOCK_REALTIME, &t0);
 #if 0
        assert(C_KZG_OK == poly_mul_fft(&r, &multiplicand, &multiplier, NULL));
 #else
        assert(C_KZG_OK == poly_mul(&r, &multiplicand, &multiplier));
 #endif
        clock_gettime(CLOCK_REALTIME, &t1);
        nits++;
        total_time += tdiff(t0, t1);
    }
    free_poly(&multiplicand);
    free_poly(&multiplier);
    free_poly(&r);
    return total_time / nits;
 }
 int main(int argc, char *argv[]) {
    int nsec = 0;
    switch (argc) {
    case 1:
        nsec = NSEC;
        break;
    case 2:
        nsec = atoi(argv[1]);
        break;
    default:
        break;
    };
    if (nsec == 0) {
        printf("Usage: %s [test time in seconds > 0]\n", argv[0]);
        exit(EXIT_FAILURE);
    }
    int scale_min = 5;
    int scale_max = 12;
 #if 1
    printf("*** Benchmarking poly_mul_fft() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
 #else
    printf("*** Benchmarking poly_mul_direct() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
 #endif
    printf(",");
    for (int i = scale_min; i <= scale_max; i++) {
        printf("%d,", i);
    }
    printf("\n");
    for (int scale_0 = scale_min; scale_0 <= scale_max; scale_0++) {
        printf("%d,", scale_0);
        for (int scale_1 = scale_min; scale_1 <= scale_max; scale_1++) {
            printf("%lu,", run_bench(scale_0, scale_1, nsec));
        }
        printf("\n");
    }
    return EXIT_SUCCESS;
 }
--- a/src/poly_test.c
+++ b/src/poly_test.c
@ -18,97 +18,93 @@
 #include "test_util.h"
 #include "poly.h"
-void poly_div_0(void) {
+// If anyone has a nicer way to initialise these test data, I'd love to hear it.
-    fr_t a[3], b[2], expected[2];
+typedef struct polydata {
-    poly dividend, divisor, actual;
+    int length;
    int coeffs[];
 } polydata;
-    // Calculate (x^2 - 1) / (x + 1) = x - 1
+// (x^2 - 1) / (x + 1) = x - 1
 polydata test_0_0 = {3, {-1, 0, 1}};
 polydata test_0_1 = {2, {1, 1}};
 polydata test_0_2 = {2, {-1, 1}};
-    // Dividend
+// (12x^3 - 11x^2 + 9x + 18) / (4x + 3) = 3x^2 - 5x + 6
-    fr_from_uint64(&a[0], 1);
+polydata test_1_0 = {4, {18, 9, -11, 12}};
-    fr_negate(&a[0], &a[0]);
+polydata test_1_1 = {2, {3, 4}};
-    fr_from_uint64(&a[1], 0);
+polydata test_1_2 = {3, {6, -5, 3}};
    fr_from_uint64(&a[2], 1);
    dividend.length = 3;
    dividend.coeffs = a;
-    // Divisor
+// (x + 1) / (x^2 - 1) = nil
-    fr_from_uint64(&b[0], 1);
+polydata test_2_0 = {2, {1, 1}};
-    fr_from_uint64(&b[1], 1);
+polydata test_2_1 = {3, {-1, 0, 2}};
-    divisor.length = 2;
+polydata test_2_2 = {0, {}};
    divisor.coeffs = b;
-    // Known result
+// (10x^2 + 20x + 30) / 10 = x^2 + 2x + 3
-    fr_from_uint64(&expected[0], 1);
+polydata test_3_0 = {3, {30, 20, 10}};
-    fr_negate(&expected[0], &expected[0]);
+polydata test_3_1 = {1, {10}};
-    fr_from_uint64(&expected[1], 1);
+polydata test_3_2 = {3, {3, 2, 1}};
-    TEST_CHECK(C_KZG_OK == new_poly_long_div(&actual, &dividend, &divisor));
+// (x^2 + x) / (x + 1) = x
-    TEST_CHECK(fr_equal(&expected[0], &actual.coeffs[0]));
+polydata test_4_0 = {3, {0, 1, 1}};
-    TEST_CHECK(fr_equal(&expected[1], &actual.coeffs[1]));
+polydata test_4_1 = {2, {1, 1}};
 polydata test_4_2 = {2, {0, 1}};
-    free_poly(&actual);
+// (x^2 + x + 1) / 1 = x^2 + x + 1
 polydata test_5_0 = {3, {1, 1, 1}};
 polydata test_5_1 = {1, {1}};
 polydata test_5_2 = {3, {1, 1, 1}};
 // (x^2 + x + 1) / (0x + 1) = x^2 + x + 1
 polydata test_6_0 = {3, {1, 1, 1}};
 polydata test_6_1 = {2, {1, 0}}; // The highest coefficient is zero
 polydata test_6_2 = {3, {1, 1, 1}};
 polydata *test[][3] = {{&test_0_0, &test_0_1, &test_0_2}, {&test_1_0, &test_1_1, &test_1_2},
                       {&test_2_0, &test_2_1, &test_2_2}, {&test_3_0, &test_3_1, &test_3_2},
                       {&test_4_0, &test_4_1, &test_4_2}, {&test_5_0, &test_5_1, &test_5_2},
                       {&test_6_0, &test_6_1, &test_6_2}};
 /* Internal utility function */
 void new_test_poly(poly *p, polydata *data) {
    new_poly(p, data->length);
    for (int i = 0; i < p->length; i++) {
        int coeff = data->coeffs[i];
        if (coeff >= 0) {
            fr_from_uint64(&p->coeffs[i], coeff);
        } else {
            fr_from_uint64(&p->coeffs[i], -coeff);
            fr_negate(&p->coeffs[i], &p->coeffs[i]);
        }
    }
 }
-void poly_div_1(void) {
+void poly_test_div(void) {
-    fr_t a[4], b[2], expected[3];
+    poly dividend, divisor, expected, actual;
-    poly dividend, divisor, actual;
+    int ntest = sizeof test / sizeof test[0];
-    // Calculate (12x^3 - 11x^2 + 9x + 18) / (4x + 3) = 3x^2 - 5x + 6
+    for (int i = 0; i < ntest; i++) {
        new_test_poly(&dividend, test[i][0]);
        new_test_poly(&divisor, test[i][1]);
        new_test_poly(&expected, test[i][2]);
-    // Dividend
+        if (TEST_CHECK(C_KZG_OK == new_poly_div(&actual, &dividend, &divisor))) {
-    fr_from_uint64(&a[0], 18);
+            if (TEST_CHECK(actual.length == expected.length)) {
-    fr_from_uint64(&a[1], 9);
+                for (int j = 0; j < actual.length; j++) {
-    fr_from_uint64(&a[2], 11);
+                    TEST_CHECK(fr_equal(&actual.coeffs[j], &expected.coeffs[j]));
-    fr_negate(&a[2], &a[2]);
+                    TEST_MSG("Failed test %d with incorrect value", i);
-    fr_from_uint64(&a[3], 12);
+                }
-    dividend.length = 4;
+            } else {
-    dividend.coeffs = a;
+                TEST_MSG("Failed test %d with incorrect length.", i);
            }
        } else {
            TEST_MSG("Failed test %d with bad return value.", i);
        }
-    // Divisor
+        free_poly(&dividend);
-    fr_from_uint64(&b[0], 3);
+        free_poly(&divisor);
-    fr_from_uint64(&b[1], 4);
+        free_poly(&expected);
-    divisor.length = 2;
+        free_poly(&actual);
-    divisor.coeffs = b;
+    }
    // Known result
    fr_from_uint64(&expected[0], 6);
    fr_from_uint64(&expected[1], 5);
    fr_negate(&expected[1], &expected[1]);
    fr_from_uint64(&expected[2], 3);
    TEST_CHECK(C_KZG_OK == new_poly_long_div(&actual, &dividend, &divisor));
    TEST_CHECK(fr_equal(&expected[0], &actual.coeffs[0]));
    TEST_CHECK(fr_equal(&expected[1], &actual.coeffs[1]));
    TEST_CHECK(fr_equal(&expected[2], &actual.coeffs[2]));
    free_poly(&actual);
 }
 void poly_div_2(void) {
    fr_t a[2], b[3];
    poly dividend, divisor, actual;
    // Calculate (x + 1) / (x^2 - 1) = nil
    // Dividend
    fr_from_uint64(&a[0], 1);
    fr_from_uint64(&a[1], 1);
    dividend.length = 2;
    dividend.coeffs = a;
    // Divisor
    fr_from_uint64(&b[0], 1);
    fr_negate(&b[0], &b[0]);
    fr_from_uint64(&b[1], 0);
    fr_from_uint64(&b[2], 1);
    divisor.length = 3;
    divisor.coeffs = b;
    TEST_CHECK(C_KZG_OK == new_poly_long_div(&actual, &dividend, &divisor));
    TEST_CHECK(NULL == actual.coeffs);
    free_poly(&actual);
 }
 void poly_div_by_zero(void) {
@ -126,7 +122,10 @@ void poly_div_by_zero(void) {
    // Divisor
    new_poly(&divisor, 0);
-    TEST_CHECK(C_KZG_BADARGS == new_poly_long_div(&dummy, &dividend, &divisor));
+    TEST_CHECK(C_KZG_BADARGS == new_poly_div(&dummy, &dividend, &divisor));
    free_poly(&divisor);
    free_poly(&dummy);
 }
 void poly_eval_check(void) {
@ -176,14 +175,242 @@ void poly_eval_nil_check(void) {
    free_poly(&p);
 }
 void poly_mul_direct_test(void) {
    // Calculate (3x^2 - 5x + 6) * (4x + 3) = 12x^3 - 11x^2 + 9x + 18
    static polydata multiplier_data = {3, {6, -5, 3}};
    static polydata multiplicand_data = {2, {3, 4}};
    static polydata expected_data = {4, {18, 9, -11, 12}};
    poly multiplicand, multiplier, expected, actual0, actual1;
    new_test_poly(&multiplicand, &multiplicand_data);
    new_test_poly(&multiplier, &multiplier_data);
    new_test_poly(&expected, &expected_data);
    new_poly(&actual0, 4);
    TEST_CHECK(C_KZG_OK == poly_mul_direct(&actual0, &multiplicand, &multiplier));
    TEST_CHECK(fr_equal(&expected.coeffs[0], &actual0.coeffs[0]));
    TEST_CHECK(fr_equal(&expected.coeffs[1], &actual0.coeffs[1]));
    TEST_CHECK(fr_equal(&expected.coeffs[2], &actual0.coeffs[2]));
    TEST_CHECK(fr_equal(&expected.coeffs[3], &actual0.coeffs[3]));
    // Check commutativity
    new_poly(&actual1, 4);
    TEST_CHECK(C_KZG_OK == poly_mul_direct(&actual1, &multiplier, &multiplicand));
    TEST_CHECK(fr_equal(&expected.coeffs[0], &actual1.coeffs[0]));
    TEST_CHECK(fr_equal(&expected.coeffs[1], &actual1.coeffs[1]));
    TEST_CHECK(fr_equal(&expected.coeffs[2], &actual1.coeffs[2]));
    TEST_CHECK(fr_equal(&expected.coeffs[3], &actual1.coeffs[3]));
    free_poly(&multiplicand);
    free_poly(&multiplier);
    free_poly(&expected);
    free_poly(&actual0);
    free_poly(&actual1);
 }
 void poly_mul_fft_test(void) {
    // Calculate (3x^2 - 5x + 6) * (4x + 3) = 12x^3 - 11x^2 + 9x + 18
    static polydata multiplier_data = {3, {6, -5, 3}};
    static polydata multiplicand_data = {2, {3, 4}};
    static polydata expected_data = {4, {18, 9, -11, 12}};
    poly multiplicand, multiplier, expected, actual0, actual1;
    new_test_poly(&multiplicand, &multiplicand_data);
    new_test_poly(&multiplier, &multiplier_data);
    new_test_poly(&expected, &expected_data);
    new_poly(&actual0, 4);
    TEST_CHECK(C_KZG_OK == poly_mul_fft(&actual0, &multiplicand, &multiplier, NULL));
    TEST_CHECK(fr_equal(&expected.coeffs[0], &actual0.coeffs[0]));
    TEST_CHECK(fr_equal(&expected.coeffs[1], &actual0.coeffs[1]));
    TEST_CHECK(fr_equal(&expected.coeffs[2], &actual0.coeffs[2]));
    TEST_CHECK(fr_equal(&expected.coeffs[3], &actual0.coeffs[3]));
    // Check commutativity
    new_poly(&actual1, 4);
    TEST_CHECK(C_KZG_OK == poly_mul_fft(&actual1, &multiplier, &multiplicand, NULL));
    TEST_CHECK(fr_equal(&expected.coeffs[0], &actual1.coeffs[0]));
    TEST_CHECK(fr_equal(&expected.coeffs[1], &actual1.coeffs[1]));
    TEST_CHECK(fr_equal(&expected.coeffs[2], &actual1.coeffs[2]));
    TEST_CHECK(fr_equal(&expected.coeffs[3], &actual1.coeffs[3]));
    free_poly(&multiplicand);
    free_poly(&multiplier);
    free_poly(&expected);
    free_poly(&actual0);
    free_poly(&actual1);
 }
 void poly_inverse_simple_0(void) {
    // 1 / (1 - x) = 1 + x + x^2 + ...
    poly p, q;
    int d = 16; // The number of terms to take
    new_poly(&p, 2);
    p.coeffs[0] = fr_one;
    p.coeffs[1] = fr_one;
    fr_negate(&p.coeffs[1], &p.coeffs[1]);
    new_poly(&q, d);
    TEST_CHECK(C_KZG_OK == poly_inverse(&q, &p));
    for (int i = 0; i < d; i++) {
        TEST_CHECK(fr_is_one(&q.coeffs[i]));
    }
    free_poly(&p);
    free_poly(&q);
 }
 void poly_inverse_simple_1(void) {
    // 1 / (1 + x) = 1 - x + x^2 - ...
    poly p, q;
    int d = 16; // The number of terms to take
    new_poly(&p, 2);
    p.coeffs[0] = fr_one;
    p.coeffs[1] = fr_one;
    new_poly(&q, d);
    TEST_CHECK(C_KZG_OK == poly_inverse(&q, &p));
    for (int i = 0; i < d; i++) {
        fr_t tmp = q.coeffs[i];
        if (i & 1) {
            fr_negate(&tmp, &tmp);
        }
        TEST_CHECK(fr_is_one(&tmp));
    }
    free_poly(&p);
    free_poly(&q);
 }
 void poly_mul_random(void) {
    // Compare the output of poly_mul_direct() and poly_mul_fft()
    poly multiplicand, multiplier;
    poly q0, q1;
    for (int k = 0; k < 256; k++) {
        int multiplicand_length = 1 + rand() % 1000;
        int multiplier_length = 1 + rand() % 1000;
        int out_length = 1 + rand() % 1000;
        new_poly(&multiplicand, multiplicand_length);
        new_poly(&multiplier, multiplier_length);
        for (int i = 0; i < multiplicand_length; i++) {
            multiplicand.coeffs[i] = rand_fr();
        }
        for (int i = 0; i < multiplier_length; i++) {
            multiplier.coeffs[i] = rand_fr();
        }
        // Ensure that the polynomials' orders corresponds to their lengths
        if (fr_is_zero(&multiplicand.coeffs[multiplicand.length - 1])) {
            multiplicand.coeffs[multiplicand.length - 1] = fr_one;
        }
        if (fr_is_zero(&multiplier.coeffs[multiplier.length - 1])) {
            multiplier.coeffs[multiplier.length - 1] = fr_one;
        }
        new_poly(&q0, out_length); // Truncate the result
        TEST_CHECK(C_KZG_OK == poly_mul_direct(&q0, &multiplicand, &multiplier));
        new_poly(&q1, out_length);
        TEST_CHECK(C_KZG_OK == poly_mul_fft(&q1, &multiplicand, &multiplier, NULL));
        TEST_CHECK(q1.length == q0.length);
        for (int i = 0; i < q0.length; i++) {
            if (!TEST_CHECK(fr_equal(&q0.coeffs[i], &q1.coeffs[i]))) {
                TEST_MSG("round = %d, i = %d, multiplicand_length = %lu, multiplier_length = %lu, out_length = %lu", k,
                         i, multiplicand.length, multiplier.length, q0.length);
            }
        }
        free_poly(&multiplicand);
        free_poly(&multiplier);
        free_poly(&q0);
        free_poly(&q1);
    }
 }
 void poly_div_random(void) {
    // Compare the output of poly_fast_div() and poly_long_div()
    poly dividend, divisor;
    poly q0, q1;
    for (int k = 0; k < 256; k++) {
        int dividend_length = 2 + rand() % 1000;
        int divisor_length = 1 + rand() % dividend_length;
        new_poly(&dividend, dividend_length);
        new_poly(&divisor, divisor_length);
        for (int i = 0; i < dividend_length; i++) {
            dividend.coeffs[i] = rand_fr();
        }
        for (int i = 0; i < divisor_length; i++) {
            divisor.coeffs[i] = rand_fr();
        }
        // Ensure that the polynomials' orders corresponds to their lengths
        if (fr_is_zero(&dividend.coeffs[dividend.length - 1])) {
            dividend.coeffs[dividend.length - 1] = fr_one;
        }
        if (fr_is_zero(&divisor.coeffs[divisor.length - 1])) {
            divisor.coeffs[divisor.length - 1] = fr_one;
        }
        new_poly(&q0, dividend.length - divisor.length + 1);
        TEST_CHECK(C_KZG_OK == poly_long_div(&q0, &dividend, &divisor));
        new_poly(&q1, dividend.length - divisor.length + 1);
        TEST_CHECK(C_KZG_OK == poly_fast_div(&q1, &dividend, &divisor));
        TEST_CHECK(q1.length == q0.length);
        for (int i = 0; i < q0.length; i++) {
            if (!TEST_CHECK(fr_equal(&q0.coeffs[i], &q1.coeffs[i]))) {
                TEST_MSG("round = %d, dividend_length = %lu, divisor_length = %lu, i = %d", k, dividend.length,
                         divisor.length, i);
            }
        }
        free_poly(&dividend);
        free_poly(&divisor);
        free_poly(&q0);
        free_poly(&q1);
    }
 }
 TEST_LIST = {
    {"POLY_TEST", title},
-    {"poly_div_0", poly_div_0},
+    {"poly_test_div", poly_test_div},
-    {"poly_div_1", poly_div_1},
+#ifndef DEBUG
    {"poly_div_2", poly_div_2},
    {"poly_div_by_zero", poly_div_by_zero},
 #endif
    {"poly_eval_check", poly_eval_check},
    {"poly_eval_0_check", poly_eval_0_check},
    {"poly_eval_nil_check", poly_eval_nil_check},
    {"poly_mul_direct_test", poly_mul_direct_test},
    {"poly_mul_fft_test", poly_mul_fft_test},
    {"poly_inverse_simple_0", poly_inverse_simple_0},
    {"poly_inverse_simple_1", poly_inverse_simple_1},
    {"poly_mul_random", poly_mul_random},
    {"poly_div_random", poly_div_random},
    {NULL, NULL} /* zero record marks the end of the list */
 };
--- a/src/utility.c
+++ b/src/utility.c
@ -58,6 +58,20 @@ int log2_pow2(uint32_t n) {
    return r;
 }
 /**
 * Calculate log base two of a power of two.
 *
 * In other words, the bit index of the highest one bit.
 *
 * @param[in] n The 64 bit unsigned integer to take the logarithm of
 * @return the log base two of n
 */
 int log2_u64(uint64_t n) {
    int r = 0;
    while (n >>= 1) r++;
    return r;
 }
 /**
 * Return the next highest power of two.
 *
--- a/src/utility.h
+++ b/src/utility.h
@ -44,6 +44,7 @@
 bool is_power_of_two(uint64_t n);
 int log2_pow2(uint32_t n);
 int log2_u64(uint64_t n);
 uint64_t next_power_of_two(uint64_t v);
 uint32_t reverse_bits(uint32_t a);
 uint32_t reverse_bits_limited(uint32_t n, uint32_t value);