diff --git a/.gitignore b/.gitignore index bb68a04..56d45d7 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *_test *_bench *_debug +*_tune *.prof *.out *.log diff --git a/Doxyfile b/Doxyfile index 4d6e30a..ea76be2 100644 --- a/Doxyfile +++ b/Doxyfile @@ -875,7 +875,7 @@ EXCLUDE = inc \ tmp \ src/test_util.h \ src/bench_util.h \ - src/debug_util.h \ + src/debug_util.h # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded @@ -891,7 +891,9 @@ EXCLUDE_SYMLINKS = NO # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* -EXCLUDE_PATTERNS = +EXCLUDE_PATTERNS = *_test.c \ + *_bench.c \ + *_tune.c # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the diff --git a/src/Makefile b/src/Makefile index b7492db..3817959 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,6 +1,7 @@ TESTS = bls12_381_test das_extension_test c_kzg_util_test fft_common_test fft_fr_test fft_g1_test \ fk20_proofs_test kzg_proofs_test poly_test recover_test utility_test zero_poly_test -BENCH = fft_fr_bench fft_g1_bench recover_bench zero_poly_bench kzg_proofs_bench +BENCH = fft_fr_bench fft_g1_bench recover_bench zero_poly_bench kzg_proofs_bench poly_bench +TUNE = poly_mul_tune poly_div_tune LIB_SRC = bls12_381.c c_kzg_util.c das_extension.c fft_common.c fft_fr.c fft_g1.c fk20_proofs.c kzg_proofs.c poly.c recover.c utility.c zero_poly.c LIB_OBJ = $(LIB_SRC:.c=.o) @@ -30,6 +31,12 @@ libckzg.a: $(LIB_OBJ) Makefile clang -Wall -I$(INCLUDE_DIRS) $(CFLAGS) $(KZG_CFLAGS) -o $@ $@.c bench_util.o test_util.o $(LIB_OBJ) -L../lib -lblst ./$@ +# Tuning +%_tune: KZG_CFLAGS += -O +%_tune: %_tune.c bench_util.o test_util.o $(LIB_OBJ) Makefile + clang -Wall -I$(INCLUDE_DIRS) $(CFLAGS) $(KZG_CFLAGS) -o $@ $@.c bench_util.o test_util.o $(LIB_OBJ) -L../lib -lblst + ./$@ + lib: KZG_CFLAGS += -O lib: clean libckzg.a @@ -45,5 +52,6 @@ clean: rm -f libckzg.a rm -f $(TESTS) rm -f $(BENCH) + rm -f $(TUNE) rm -f *_debug rm -f a.out diff --git a/src/c_kzg_util.h b/src/c_kzg_util.h index b49b2a2..d6230cc 100644 --- a/src/c_kzg_util.h +++ b/src/c_kzg_util.h @@ -20,6 +20,10 @@ #include "c_kzg.h" #include "poly.h" +/** @def min_u64 + * + * Safely find the minimum of two uint_64s. + */ #define min_u64(a, b) \ ({ \ uint64_t _a = (a); \ @@ -27,6 +31,17 @@ _a < _b ? _a : _b; \ }) +/** @def max_u64 + * + * Safely find the maximum of two uint_64s. + */ +#define max_u64(a, b) \ + ({ \ + uint64_t _a = (a); \ + uint64_t _b = (b); \ + _a > _b ? _a : _b; \ + }) + C_KZG_RET c_kzg_malloc(void **p, size_t n); C_KZG_RET new_uint64_array(uint64_t **x, size_t n); C_KZG_RET new_fr_array(fr_t **x, size_t n); diff --git a/src/kzg_proofs.c b/src/kzg_proofs.c index a600be8..7216e0b 100644 --- a/src/kzg_proofs.c +++ b/src/kzg_proofs.c @@ -124,7 +124,7 @@ C_KZG_RET compute_proof_multi(g1_t *out, const poly *p, const fr_t *x0, uint64_t divisor.coeffs[n] = fr_one; // Calculate q = p / (x^n - x0^n) - TRY(new_poly_long_div(&q, p, &divisor)); + TRY(new_poly_div(&q, p, &divisor)); TRY(commit_to_poly(out, &q, ks)); diff --git a/src/poly.c b/src/poly.c index 0b1d902..73d0dc4 100644 --- a/src/poly.c +++ b/src/poly.c @@ -15,12 +15,13 @@ */ /** - * @file poly.c + * @file poly.c * * Operations on polynomials defined over the finite field. */ #include "c_kzg_util.h" +#include "utility.h" #include "poly.h" /** @@ -34,6 +35,23 @@ static uint64_t poly_quotient_length(const poly *dividend, const poly *divisor) return dividend->length >= divisor->length ? dividend->length - divisor->length + 1 : 0; } +/** + * Return a copy of a polynomial ensuring that the order is correct. + * + * @param[in] p A pointer to the polynomial to be normalised + * @return The normalised polynomial + */ +static poly poly_norm(const poly *p) { + poly ret = *p; + while (ret.length > 0 && fr_is_zero(&ret.coeffs[ret.length - 1])) { + ret.length--; + } + if (ret.length == 0) { + ret.coeffs = NULL; + } + return ret; +} + /** * Evaluate a polynomial over the finite field at a point. * @@ -67,35 +85,46 @@ void eval_poly(fr_t *out, const poly *p, const fr_t *x) { } /** - * Polynomial division in the finite field. + * Polynomial division in the finite field via long division. * * Returns the polynomial resulting from dividing @p dividend by @p divisor. * - * @remark @p out must be an uninitialised #poly. Space is allocated for it here, which - * must be later reclaimed by calling #free_poly(). + * Should be O(m.n) where m is the length of the dividend, and n the length of the divisor. * - * @param[out] out An uninitialised poly type that will contain the result of the division + * @remark @p out must be sized large enough for the resulting polynomial. + * + * @remark For some ranges of @p dividend and @p divisor, #poly_fast_div is much, much faster. + * + * @param[out] out An appropriately sized poly type that will contain the result of the division * @param[in] dividend The dividend polynomial * @param[in] divisor The divisor polynomial * @retval C_CZK_OK All is well * @retval C_CZK_BADARGS Invalid parameters were supplied * @retval C_CZK_MALLOC Memory allocation failed */ -C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor) { +C_KZG_RET poly_long_div(poly *out, const poly *dividend, const poly *divisor) { uint64_t a_pos = dividend->length - 1; uint64_t b_pos = divisor->length - 1; uint64_t diff = a_pos - b_pos; - fr_t a[dividend->length]; + fr_t *a; // Dividing by zero is undefined CHECK(divisor->length > 0); - // Initialise the output polynomial - TRY(new_poly(out, poly_quotient_length(dividend, divisor))); + // The divisor's highest coefficient must be non-zero + CHECK(!fr_is_zero(&divisor->coeffs[divisor->length - 1])); + + // Deal with the size of the output polynomial + uint64_t out_length = poly_quotient_length(dividend, divisor); + CHECK(out->length >= out_length); + out->length = out_length; // If the divisor is larger than the dividend, the result is zero-length - if (out->length == 0) return C_KZG_OK; + if (out_length == 0) { + return C_KZG_OK; + } + TRY(new_fr_array(&a, dividend->length)); for (uint64_t i = 0; i < dividend->length; i++) { a[i] = dividend->coeffs[i]; } @@ -113,9 +142,393 @@ C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor } fr_div(&out->coeffs[0], &a[a_pos], &divisor->coeffs[b_pos]); + free(a); return C_KZG_OK; } +/** + * Calculate the (possibly truncated) product of two polynomials. + * + * The size of the output polynomial determines the number of coefficients returned. + * + * @param[in,out] out The result of the division - its size determines the number of coefficients returned + * @param[in] a The muliplicand polynomial + * @param[in] b The multiplier polynomial + * @retval C_CZK_OK All is well + */ +C_KZG_RET poly_mul_direct(poly *out, const poly *a, const poly *b) { + + uint64_t a_degree = a->length - 1; + uint64_t b_degree = b->length - 1; + + for (uint64_t k = 0; k < out->length; k++) { + out->coeffs[k] = fr_zero; + } + + // Truncate the output to the length of the output polynomial + for (uint64_t i = 0; i <= a_degree; i++) { + for (uint64_t j = 0; j <= b_degree && i + j < out->length; j++) { + fr_t tmp; + fr_mul(&tmp, &a->coeffs[i], &b->coeffs[j]); + fr_add(&out->coeffs[i + j], &out->coeffs[i + j], &tmp); + } + } + + return C_KZG_OK; +} +/** + * Pad with zeros or truncate an array of field elements to a specific size. + * + * @param[out] out The padded/truncated array + * @param[in] in The original array to be padded + * @param[in] n_in The number of elements of @p in to take + * @param[in] n_out The length of @p out + */ +void pad(fr_t *out, const fr_t *in, uint64_t n_in, uint64_t n_out) { + uint64_t num = min_u64(n_in, n_out); + for (int i = 0; i < num; i++) { + out[i] = in[i]; + } + for (int i = num; i < n_out; i++) { + out[i] = fr_zero; + } +} + +/** + * Calculate the (possibly truncated) product of two polynomials. + * + * The size of the output polynomial determines the number of coefficients returned. + * + * @remark This version uses FFTs to calculate the product via convolution, and is very efficient for large + * calculations. If @p fs is supplied as NULL, then the FFTSettings are allocated internally, otherwise the supplied + * settings are used, which must be sufficiently sized for the calculation. + * + * @param[in,out] out The result of the division - its size determines the number of coefficients returned + * @param[in] a The muliplicand polynomial + * @param[in] b The multiplier polynomial + * @param[in] fs_ Either NULL or a sufficiently sized FFTSettings structure + * @retval C_CZK_OK All is well + * @retval C_CZK_BADARGS Invalid parameters were supplied + * @retval C_CZK_ERROR An internal error occurred + * @retval C_CZK_MALLOC Memory allocation failed + */ +C_KZG_RET poly_mul_fft(poly *out, const poly *a, const poly *b, FFTSettings *fs_) { + + // Truncate a and b so as not to do excess work for the number of coefficients required. + uint64_t a_len = min_u64(a->length, out->length); + uint64_t b_len = min_u64(b->length, out->length); + uint64_t length = next_power_of_two(a_len + b_len - 1); + + // If the FFT settings are NULL then make a local set, otherwise use the ones passed in. + FFTSettings fs, *fs_p; + if (fs_ != NULL) { + fs_p = fs_; + } else { + fs_p = &fs; + int scale = log2_pow2(length); // TODO only good up to length < 32 bits + TRY(new_fft_settings(fs_p, scale)); + } + CHECK(length <= fs_p->max_width); + + fr_t *a_pad, *b_pad, *a_fft, *b_fft; + TRY(new_fr_array(&a_pad, length)); + TRY(new_fr_array(&b_pad, length)); + pad(a_pad, a->coeffs, a_len, length); + pad(b_pad, b->coeffs, b_len, length); + + TRY(new_fr_array(&a_fft, length)); + TRY(new_fr_array(&b_fft, length)); + TRY(fft_fr(a_fft, a_pad, false, length, fs_p)); + TRY(fft_fr(b_fft, b_pad, false, length, fs_p)); + + fr_t *ab_fft = a_pad; // reuse the a_pad array + fr_t *ab = b_pad; // reuse the b_pad array + for (uint64_t i = 0; i < length; i++) { + fr_mul(&ab_fft[i], &a_fft[i], &b_fft[i]); + } + TRY(fft_fr(ab, ab_fft, true, length, fs_p)); + + // Copy result to output + uint64_t data_len = min_u64(out->length, length); + for (uint64_t i = 0; i < data_len; i++) { + out->coeffs[i] = ab[i]; + } + for (uint64_t i = data_len; i < out->length; i++) { + out->coeffs[i] = fr_zero; + } + + free(a_pad); + free(b_pad); + free(a_fft); + free(b_fft); + if (fs_p == &fs) { + free_fft_settings(fs_p); + } + + return C_KZG_OK; +} + +/** + * Calculate terms in the inverse of a polynomial. + * + * Returns terms in the expansion of `1 / b(x)` (aka the Maclaurin series). + * + * The size of @p out determines the number of terms returned. + * + * This is a non-recursive version of the algorithm in https://tc-arg.tk/pdfs/2020/fft.pdf theorem 3.4. + * + * @remark The constant term of @p b must be nonzero. + * + * @param[in, out] out A poly whose length determines the number of terms returned + * @param[in] b The polynomial to be inverted + * @retval C_CZK_OK All is well + * @retval C_CZK_BADARGS Invalid parameters were supplied + * @retval C_CZK_ERROR An internal error occurred + * @retval C_CZK_MALLOC Memory allocation failed + */ +C_KZG_RET poly_inverse(poly *out, poly *b) { + poly tmp0, tmp1; + fr_t fr_two; + + CHECK(out->length > 0); + CHECK(b->length > 0); + CHECK(!fr_is_zero(&b->coeffs[0])); + + // If the input polynomial is constant, the remainder of the series is zero + if (b->length == 1) { + fr_inv(&out->coeffs[0], &b->coeffs[0]); + for (uint64_t i = 1; i < out->length; i++) { + out->coeffs[i] = fr_zero; + } + return C_KZG_OK; + } + + uint64_t length = out->length; + uint64_t maxd = length - 1; + uint64_t d = 0; + + // Max space for multiplications is (2 * length - 1) + int scale = log2_pow2(next_power_of_two(2 * length - 1)); + FFTSettings fs; + TRY(new_fft_settings(&fs, scale)); + + // To store intermediate results + TRY(new_poly(&tmp0, length)); + TRY(new_poly(&tmp1, length)); + + // Base case for d == 0 + fr_inv(&out->coeffs[0], &b->coeffs[0]); + out->length = 1; + + uint64_t mask = (uint64_t)1 << log2_u64(maxd); + while (mask) { + + d = 2 * d + ((maxd & mask) != 0); + mask >>= 1; + + // b.c -> tmp0 (we're using out for c) + tmp0.length = min_u64(d + 1, b->length + out->length - 1); + TRY(poly_mul_(&tmp0, b, out, &fs)); + + // 2 - b.c -> tmp0 + for (int i = 0; i < tmp0.length; i++) { + fr_negate(&tmp0.coeffs[i], &tmp0.coeffs[i]); + } + fr_from_uint64(&fr_two, 2); + fr_add(&tmp0.coeffs[0], &tmp0.coeffs[0], &fr_two); + + // c.(2 - b.c) -> tmp1; + tmp1.length = d + 1; + TRY(poly_mul_(&tmp1, out, &tmp0, &fs)); + + // tmp1 -> c + out->length = tmp1.length; + for (uint64_t i = 0; i < out->length; i++) { + out->coeffs[i] = tmp1.coeffs[i]; + } + } + ASSERT(d + 1 == length); + + free_poly(&tmp0); + free_poly(&tmp1); + free_fft_settings(&fs); + + return C_KZG_OK; +} + +/** + * Reverse the order of the coefficients of a polynomial. + * + * Corresponds to returning x^n.p(1/x). + * + * @param[out] out The flipped polynomial. Its size must be the same as the size of @p in + * @param[in] in The polynomial to be flipped + * @retval C_CZK_OK All is well + * @retval C_CZK_BADARGS Invalid parameters were supplied + */ +C_KZG_RET poly_flip(poly *out, const poly *in) { + CHECK(out->length == in->length); + for (uint64_t i = 0; i < in->length; i++) { + out->coeffs[out->length - i - 1] = in->coeffs[i]; + } + return C_KZG_OK; +} + +/** + * Fast polynomial division in the finite field. + * + * Returns the polynomial resulting from dividing @p dividend by @p divisor. + * + * Implements https://tc-arg.tk/pdfs/2020/fft.pdf theorem 3.5. + * + * Should be O(m.log(m)) where m is the length of the dividend. + * + * @remark @p out must be sized large enough for the resulting polynomial. + * + * @remark For some ranges of @p dividend and @p divisor, #poly_long_div may be a little faster. + * + * @param[out] out An appropriately sized poly type that will contain the result of the division + * @param[in] dividend The dividend polynomial + * @param[in] divisor The divisor polynomial + * @retval C_CZK_OK All is well + * @retval C_CZK_BADARGS Invalid parameters were supplied + * @retval C_CZK_ERROR An internal error occurred + * @retval C_CZK_MALLOC Memory allocation failed + */ +C_KZG_RET poly_fast_div(poly *out, const poly *dividend, const poly *divisor) { + + // Dividing by zero is undefined + CHECK(divisor->length > 0); + + // The divisor's highest coefficient must be non-zero + CHECK(!fr_is_zero(&divisor->coeffs[divisor->length - 1])); + + uint64_t m = dividend->length - 1; + uint64_t n = divisor->length - 1; + + // If the divisor is larger than the dividend, the result is zero-length + if (n > m) { + out->length = 0; + return C_KZG_OK; + } + + // Ensure the output poly has enough space allocated + CHECK(out->length >= m - n + 1); + + // Ensure that the divisor is well-formed for the inverse operation + CHECK(!fr_is_zero(&divisor->coeffs[divisor->length - 1])); + + // Special case for divisor.length == 1 (it's a constant) + if (divisor->length == 1) { + out->length = dividend->length; + for (uint64_t i = 0; i < out->length; i++) { + fr_div(&out->coeffs[i], ÷nd->coeffs[i], &divisor->coeffs[0]); + } + return C_KZG_OK; + } + + poly a_flip, b_flip; + TRY(new_poly(&a_flip, dividend->length)); + TRY(new_poly(&b_flip, divisor->length)); + TRY(poly_flip(&a_flip, dividend)); + TRY(poly_flip(&b_flip, divisor)); + + poly inv_b_flip; + TRY(new_poly(&inv_b_flip, m - n + 1)); + TRY(poly_inverse(&inv_b_flip, &b_flip)); + + poly q_flip; + // We need only m - n + 1 coefficients of q_flip + TRY(new_poly(&q_flip, m - n + 1)); + TRY(poly_mul(&q_flip, &a_flip, &inv_b_flip)); + + out->length = m - n + 1; + TRY(poly_flip(out, &q_flip)); + + free_poly(&a_flip); + free_poly(&b_flip); + free_poly(&inv_b_flip); + free_poly(&q_flip); + + return C_KZG_OK; +} + +/** + * Calculate the (possibly truncated) product of two polynomials. + * + * This is just a wrapper around #poly_mul_direct and #poly_mul_fft that selects the faster based on the size of the + * problem. + * + * @param[in,out] out The result of the division - its size determines the number of coefficients returned + * @param[in] a The muliplicand polynomial + * @param[in] b The multiplier polynomial + * @param[in] fs Either NULL or a sufficiently sized FFTSettings structure + * @retval C_CZK_OK All is well + * @retval C_CZK_BADARGS Invalid parameters were supplied + * @retval C_CZK_ERROR An internal error occurred + * @retval C_CZK_MALLOC Memory allocation failed + * + * @todo Implement routine selection + */ +C_KZG_RET poly_mul_(poly *out, const poly *a, const poly *b, FFTSettings *fs) { + if (a->length < 64 || b->length < 64 || out->length < 128) { // Tunable parameter + return poly_mul_direct(out, a, b); + } else { + return poly_mul_fft(out, a, b, fs); + } +} + +/** + * Calculate the (possibly truncated) product of two polynomials. + * + * This is just a wrapper around #poly_mul_direct and #poly_mul_fft that selects the faster based on the size of the + * problem. + * + * @param[in,out] out The result of the division - its size determines the number of coefficients returned + * @param[in] a The muliplicand polynomial + * @param[in] b The multiplier polynomial + * @retval C_CZK_OK All is well + * @retval C_CZK_BADARGS Invalid parameters were supplied + * @retval C_CZK_ERROR An internal error occurred + * @retval C_CZK_MALLOC Memory allocation failed + * + * @todo Implement routine selection + */ +C_KZG_RET poly_mul(poly *out, const poly *a, const poly *b) { + return poly_mul_(out, a, b, NULL); +} + +/** + * Polynomial division in the finite field. + * + * Returns the polynomial resulting from dividing @p dividend_ by @p divisor_. + * + * This is a wrapper around #poly_long_div and #poly_fast_div that chooses the fastest based on problem size. + * + * @remark @p out must be an uninitialised #poly. Space is allocated for it here, which + * must be later reclaimed by calling #free_poly(). + * + * @param[out] out An uninitialised poly type that will contain the result of the division + * @param[in] dividend_ The dividend polynomial + * @param[in] divisor_ The divisor polynomial + * @retval C_CZK_OK All is well + * @retval C_CZK_BADARGS Invalid parameters were supplied + * @retval C_CZK_ERROR An internal error occurred + * @retval C_CZK_MALLOC Memory allocation failed + */ +C_KZG_RET new_poly_div(poly *out, const poly *dividend_, const poly *divisor_) { + + poly dividend = poly_norm(dividend_); + poly divisor = poly_norm(divisor_); + + TRY(new_poly(out, poly_quotient_length(÷nd, &divisor))); + if (divisor.length >= dividend.length || divisor.length < 128) { // Tunable paramter + return poly_long_div(out, ÷nd, &divisor); + } else { + return poly_fast_div(out, ÷nd, &divisor); + } +} + /** * Initialise an empty polynomial of the given size. * diff --git a/src/poly.h b/src/poly.h index 352fbae..6d8b26a 100644 --- a/src/poly.h +++ b/src/poly.h @@ -20,6 +20,7 @@ #define POLY_H #include "c_kzg.h" +#include "fft_fr.h" /** * Defines a polynomial whose coefficients are members of the finite field F_r. @@ -32,7 +33,14 @@ typedef struct { } poly; void eval_poly(fr_t *out, const poly *p, const fr_t *x); -C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor); +C_KZG_RET poly_long_div(poly *out, const poly *dividend, const poly *divisor); +C_KZG_RET poly_mul_direct(poly *out, const poly *a, const poly *b); +C_KZG_RET poly_mul_fft(poly *out, const poly *a, const poly *b, FFTSettings *fs); +C_KZG_RET poly_inverse(poly *out, poly *b); +C_KZG_RET poly_fast_div(poly *out, const poly *dividend, const poly *divisor); +C_KZG_RET poly_mul(poly *out, const poly *a, const poly *b); +C_KZG_RET poly_mul_(poly *out, const poly *a, const poly *b, FFTSettings *fs); +C_KZG_RET new_poly_div(poly *out, const poly *dividend, const poly *divisor); C_KZG_RET new_poly(poly *out, uint64_t length); C_KZG_RET new_poly_with_coeffs(poly *out, const fr_t *coeffs, uint64_t length); void free_poly(poly *p); diff --git a/src/poly_bench.c b/src/poly_bench.c new file mode 100644 index 0000000..5c2dafd --- /dev/null +++ b/src/poly_bench.c @@ -0,0 +1,97 @@ +/* + * Copyright 2021 Benjamin Edgington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // malloc(), free(), atoi() +#include // printf() +#include // assert() +#include // EXIT_SUCCESS/FAILURE +#include "bench_util.h" +#include "test_util.h" +#include "poly.h" + +// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds. +long run_bench(int scale, int max_seconds) { + timespec_t t0, t1; + unsigned long total_time = 0, nits = 0; + + uint64_t width = (uint64_t)1 << scale; + + int dividend_length = width; + int divisor_length = width / 2; // What would be a relevant value of kzg multi-proofs? + + poly dividend, divisor, q; + new_poly(÷nd, dividend_length); + new_poly(&divisor, divisor_length); + + for (int i = 0; i < dividend_length; i++) { + dividend.coeffs[i] = rand_fr(); + } + for (int i = 0; i < divisor_length; i++) { + divisor.coeffs[i] = rand_fr(); + } + + // Ensure that the polynomials' orders corresponds to their lengths + if (fr_is_zero(÷nd.coeffs[dividend.length - 1])) { + dividend.coeffs[dividend.length - 1] = fr_one; + } + if (fr_is_zero(&divisor.coeffs[divisor.length - 1])) { + divisor.coeffs[divisor.length - 1] = fr_one; + } + + while (total_time < max_seconds * NANO) { + clock_gettime(CLOCK_REALTIME, &t0); + + assert(C_KZG_OK == new_poly_div(&q, ÷nd, &divisor)); + + clock_gettime(CLOCK_REALTIME, &t1); + nits++; + total_time += tdiff(t0, t1); + + free_poly(&q); + } + + free_poly(÷nd); + free_poly(&divisor); + + return total_time / nits; +} + +int main(int argc, char *argv[]) { + int nsec = 0; + + switch (argc) { + case 1: + nsec = NSEC; + break; + case 2: + nsec = atoi(argv[1]); + break; + default: + break; + }; + + if (nsec == 0) { + printf("Usage: %s [test time in seconds > 0]\n", argv[0]); + exit(EXIT_FAILURE); + } + + printf("*** Benchmarking Polynomial Division, %d second%s per test.\n", nsec, nsec == 1 ? "" : "s"); + for (int scale = 6; scale <= 15; scale++) { + printf("new_poly_div/scale_%d %lu ns/op\n", scale, run_bench(scale, nsec)); + } + + return EXIT_SUCCESS; +} diff --git a/src/poly_div_tune.c b/src/poly_div_tune.c new file mode 100644 index 0000000..14fc27e --- /dev/null +++ b/src/poly_div_tune.c @@ -0,0 +1,119 @@ +/* + * Copyright 2021 Benjamin Edgington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // malloc(), free(), atoi() +#include // printf() +#include // assert() +#include // EXIT_SUCCESS/FAILURE +#include "bench_util.h" +#include "test_util.h" +#include "poly.h" + +// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds. +long run_bench(int scale_0, int scale_1, int max_seconds) { + timespec_t t0, t1; + unsigned long total_time = 0, nits = 0; + + uint64_t width_0 = (uint64_t)1 << scale_0; + // uint64_t width_1 = (uint64_t)1 << scale_1; + uint64_t width_1 = width_0 - ((uint64_t)1 << scale_1); + + int dividend_length = width_0; + int divisor_length = width_1; + + poly dividend, divisor, q; + new_poly(÷nd, dividend_length); + new_poly(&divisor, divisor_length); + + for (int i = 0; i < dividend_length; i++) { + dividend.coeffs[i] = rand_fr(); + } + for (int i = 0; i < divisor_length; i++) { + divisor.coeffs[i] = rand_fr(); + } + + // Ensure that the polynomials' orders corresponds to their lengths + if (fr_is_zero(÷nd.coeffs[dividend.length - 1])) { + dividend.coeffs[dividend.length - 1] = fr_one; + } + if (fr_is_zero(&divisor.coeffs[divisor.length - 1])) { + divisor.coeffs[divisor.length - 1] = fr_one; + } + + new_poly(&q, dividend.length - divisor.length + 1); + + while (total_time < max_seconds * NANO) { + clock_gettime(CLOCK_REALTIME, &t0); + +#if 0 + assert(C_KZG_OK == poly_long_div(&q, ÷nd, &divisor)); +#else + assert(C_KZG_OK == poly_fast_div(&q, ÷nd, &divisor)); +#endif + clock_gettime(CLOCK_REALTIME, &t1); + nits++; + total_time += tdiff(t0, t1); + } + + free_poly(÷nd); + free_poly(&divisor); + free_poly(&q); + + return total_time / nits; +} + +int main(int argc, char *argv[]) { + int nsec = 0; + + switch (argc) { + case 1: + nsec = NSEC; + break; + case 2: + nsec = atoi(argv[1]); + break; + default: + break; + }; + + if (nsec == 0) { + printf("Usage: %s [test time in seconds > 0]\n", argv[0]); + exit(EXIT_FAILURE); + } + + int scale_min = 5; + int scale_max = 14; + +#if 0 + printf("*** Benchmarking poly_long_div() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s"); +#else + printf("*** Benchmarking poly_fast_div() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s"); +#endif + printf(","); + for (int i = /* scale_min */ 0; i <= scale_max; i++) { + printf("%d,", i); + } + printf("\n"); + for (int scale_0 = scale_min; scale_0 <= scale_max; scale_0++) { + printf("%d,", scale_0); + for (int scale_1 = /* scale_min */ 0; scale_1 < scale_0; scale_1++) { + printf("%lu,", run_bench(scale_0, scale_1, nsec)); + } + printf("\n"); + } + + return EXIT_SUCCESS; +} diff --git a/src/poly_mul_tune.c b/src/poly_mul_tune.c new file mode 100644 index 0000000..ca9725d --- /dev/null +++ b/src/poly_mul_tune.c @@ -0,0 +1,116 @@ +/* + * Copyright 2021 Benjamin Edgington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // malloc(), free(), atoi() +#include // printf() +#include // assert() +#include // EXIT_SUCCESS/FAILURE +#include "bench_util.h" +#include "test_util.h" +#include "poly.h" + +// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds. +long run_bench(int scale_0, int scale_1, int max_seconds) { + timespec_t t0, t1; + unsigned long total_time = 0, nits = 0; + + uint64_t width_0 = (uint64_t)1 << scale_0; + uint64_t width_1 = (uint64_t)1 << scale_1; + + poly multiplicand, multiplier, r; + new_poly(&multiplicand, width_0); + new_poly(&multiplier, width_1); + + for (int i = 0; i < width_0; i++) { + multiplicand.coeffs[i] = rand_fr(); + } + for (int i = 0; i < width_1; i++) { + multiplier.coeffs[i] = rand_fr(); + } + + // Ensure that the polynomials' orders corresponds to their lengths + if (fr_is_zero(&multiplicand.coeffs[multiplicand.length - 1])) { + multiplicand.coeffs[multiplicand.length - 1] = fr_one; + } + if (fr_is_zero(&multiplier.coeffs[multiplier.length - 1])) { + multiplier.coeffs[multiplier.length - 1] = fr_one; + } + + new_poly(&r, multiplicand.length + multiplier.length - 1); + + while (total_time < max_seconds * NANO) { + clock_gettime(CLOCK_REALTIME, &t0); + +#if 0 + assert(C_KZG_OK == poly_mul_fft(&r, &multiplicand, &multiplier, NULL)); +#else + assert(C_KZG_OK == poly_mul(&r, &multiplicand, &multiplier)); +#endif + + clock_gettime(CLOCK_REALTIME, &t1); + nits++; + total_time += tdiff(t0, t1); + } + + free_poly(&multiplicand); + free_poly(&multiplier); + free_poly(&r); + + return total_time / nits; +} + +int main(int argc, char *argv[]) { + int nsec = 0; + + switch (argc) { + case 1: + nsec = NSEC; + break; + case 2: + nsec = atoi(argv[1]); + break; + default: + break; + }; + + if (nsec == 0) { + printf("Usage: %s [test time in seconds > 0]\n", argv[0]); + exit(EXIT_FAILURE); + } + + int scale_min = 5; + int scale_max = 12; + +#if 1 + printf("*** Benchmarking poly_mul_fft() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s"); +#else + printf("*** Benchmarking poly_mul_direct() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s"); +#endif + printf(","); + for (int i = scale_min; i <= scale_max; i++) { + printf("%d,", i); + } + printf("\n"); + for (int scale_0 = scale_min; scale_0 <= scale_max; scale_0++) { + printf("%d,", scale_0); + for (int scale_1 = scale_min; scale_1 <= scale_max; scale_1++) { + printf("%lu,", run_bench(scale_0, scale_1, nsec)); + } + printf("\n"); + } + + return EXIT_SUCCESS; +} diff --git a/src/poly_test.c b/src/poly_test.c index d54d84e..91dd1c1 100644 --- a/src/poly_test.c +++ b/src/poly_test.c @@ -18,97 +18,93 @@ #include "test_util.h" #include "poly.h" -void poly_div_0(void) { - fr_t a[3], b[2], expected[2]; - poly dividend, divisor, actual; +// If anyone has a nicer way to initialise these test data, I'd love to hear it. +typedef struct polydata { + int length; + int coeffs[]; +} polydata; - // Calculate (x^2 - 1) / (x + 1) = x - 1 +// (x^2 - 1) / (x + 1) = x - 1 +polydata test_0_0 = {3, {-1, 0, 1}}; +polydata test_0_1 = {2, {1, 1}}; +polydata test_0_2 = {2, {-1, 1}}; - // Dividend - fr_from_uint64(&a[0], 1); - fr_negate(&a[0], &a[0]); - fr_from_uint64(&a[1], 0); - fr_from_uint64(&a[2], 1); - dividend.length = 3; - dividend.coeffs = a; +// (12x^3 - 11x^2 + 9x + 18) / (4x + 3) = 3x^2 - 5x + 6 +polydata test_1_0 = {4, {18, 9, -11, 12}}; +polydata test_1_1 = {2, {3, 4}}; +polydata test_1_2 = {3, {6, -5, 3}}; - // Divisor - fr_from_uint64(&b[0], 1); - fr_from_uint64(&b[1], 1); - divisor.length = 2; - divisor.coeffs = b; +// (x + 1) / (x^2 - 1) = nil +polydata test_2_0 = {2, {1, 1}}; +polydata test_2_1 = {3, {-1, 0, 2}}; +polydata test_2_2 = {0, {}}; - // Known result - fr_from_uint64(&expected[0], 1); - fr_negate(&expected[0], &expected[0]); - fr_from_uint64(&expected[1], 1); +// (10x^2 + 20x + 30) / 10 = x^2 + 2x + 3 +polydata test_3_0 = {3, {30, 20, 10}}; +polydata test_3_1 = {1, {10}}; +polydata test_3_2 = {3, {3, 2, 1}}; - TEST_CHECK(C_KZG_OK == new_poly_long_div(&actual, ÷nd, &divisor)); - TEST_CHECK(fr_equal(&expected[0], &actual.coeffs[0])); - TEST_CHECK(fr_equal(&expected[1], &actual.coeffs[1])); +// (x^2 + x) / (x + 1) = x +polydata test_4_0 = {3, {0, 1, 1}}; +polydata test_4_1 = {2, {1, 1}}; +polydata test_4_2 = {2, {0, 1}}; - free_poly(&actual); +// (x^2 + x + 1) / 1 = x^2 + x + 1 +polydata test_5_0 = {3, {1, 1, 1}}; +polydata test_5_1 = {1, {1}}; +polydata test_5_2 = {3, {1, 1, 1}}; + +// (x^2 + x + 1) / (0x + 1) = x^2 + x + 1 +polydata test_6_0 = {3, {1, 1, 1}}; +polydata test_6_1 = {2, {1, 0}}; // The highest coefficient is zero +polydata test_6_2 = {3, {1, 1, 1}}; + +polydata *test[][3] = {{&test_0_0, &test_0_1, &test_0_2}, {&test_1_0, &test_1_1, &test_1_2}, + {&test_2_0, &test_2_1, &test_2_2}, {&test_3_0, &test_3_1, &test_3_2}, + {&test_4_0, &test_4_1, &test_4_2}, {&test_5_0, &test_5_1, &test_5_2}, + {&test_6_0, &test_6_1, &test_6_2}}; + +/* Internal utility function */ +void new_test_poly(poly *p, polydata *data) { + new_poly(p, data->length); + for (int i = 0; i < p->length; i++) { + int coeff = data->coeffs[i]; + if (coeff >= 0) { + fr_from_uint64(&p->coeffs[i], coeff); + } else { + fr_from_uint64(&p->coeffs[i], -coeff); + fr_negate(&p->coeffs[i], &p->coeffs[i]); + } + } } -void poly_div_1(void) { - fr_t a[4], b[2], expected[3]; - poly dividend, divisor, actual; +void poly_test_div(void) { + poly dividend, divisor, expected, actual; + int ntest = sizeof test / sizeof test[0]; - // Calculate (12x^3 - 11x^2 + 9x + 18) / (4x + 3) = 3x^2 - 5x + 6 + for (int i = 0; i < ntest; i++) { + new_test_poly(÷nd, test[i][0]); + new_test_poly(&divisor, test[i][1]); + new_test_poly(&expected, test[i][2]); - // Dividend - fr_from_uint64(&a[0], 18); - fr_from_uint64(&a[1], 9); - fr_from_uint64(&a[2], 11); - fr_negate(&a[2], &a[2]); - fr_from_uint64(&a[3], 12); - dividend.length = 4; - dividend.coeffs = a; + if (TEST_CHECK(C_KZG_OK == new_poly_div(&actual, ÷nd, &divisor))) { + if (TEST_CHECK(actual.length == expected.length)) { + for (int j = 0; j < actual.length; j++) { + TEST_CHECK(fr_equal(&actual.coeffs[j], &expected.coeffs[j])); + TEST_MSG("Failed test %d with incorrect value", i); + } + } else { + TEST_MSG("Failed test %d with incorrect length.", i); + } + } else { + TEST_MSG("Failed test %d with bad return value.", i); + } - // Divisor - fr_from_uint64(&b[0], 3); - fr_from_uint64(&b[1], 4); - divisor.length = 2; - divisor.coeffs = b; - - // Known result - fr_from_uint64(&expected[0], 6); - fr_from_uint64(&expected[1], 5); - fr_negate(&expected[1], &expected[1]); - fr_from_uint64(&expected[2], 3); - - TEST_CHECK(C_KZG_OK == new_poly_long_div(&actual, ÷nd, &divisor)); - TEST_CHECK(fr_equal(&expected[0], &actual.coeffs[0])); - TEST_CHECK(fr_equal(&expected[1], &actual.coeffs[1])); - TEST_CHECK(fr_equal(&expected[2], &actual.coeffs[2])); - - free_poly(&actual); -} - -void poly_div_2(void) { - fr_t a[2], b[3]; - poly dividend, divisor, actual; - - // Calculate (x + 1) / (x^2 - 1) = nil - - // Dividend - fr_from_uint64(&a[0], 1); - fr_from_uint64(&a[1], 1); - dividend.length = 2; - dividend.coeffs = a; - - // Divisor - fr_from_uint64(&b[0], 1); - fr_negate(&b[0], &b[0]); - fr_from_uint64(&b[1], 0); - fr_from_uint64(&b[2], 1); - divisor.length = 3; - divisor.coeffs = b; - - TEST_CHECK(C_KZG_OK == new_poly_long_div(&actual, ÷nd, &divisor)); - TEST_CHECK(NULL == actual.coeffs); - - free_poly(&actual); + free_poly(÷nd); + free_poly(&divisor); + free_poly(&expected); + free_poly(&actual); + } } void poly_div_by_zero(void) { @@ -126,7 +122,10 @@ void poly_div_by_zero(void) { // Divisor new_poly(&divisor, 0); - TEST_CHECK(C_KZG_BADARGS == new_poly_long_div(&dummy, ÷nd, &divisor)); + TEST_CHECK(C_KZG_BADARGS == new_poly_div(&dummy, ÷nd, &divisor)); + + free_poly(&divisor); + free_poly(&dummy); } void poly_eval_check(void) { @@ -176,14 +175,242 @@ void poly_eval_nil_check(void) { free_poly(&p); } +void poly_mul_direct_test(void) { + + // Calculate (3x^2 - 5x + 6) * (4x + 3) = 12x^3 - 11x^2 + 9x + 18 + static polydata multiplier_data = {3, {6, -5, 3}}; + static polydata multiplicand_data = {2, {3, 4}}; + static polydata expected_data = {4, {18, 9, -11, 12}}; + + poly multiplicand, multiplier, expected, actual0, actual1; + + new_test_poly(&multiplicand, &multiplicand_data); + new_test_poly(&multiplier, &multiplier_data); + new_test_poly(&expected, &expected_data); + + new_poly(&actual0, 4); + TEST_CHECK(C_KZG_OK == poly_mul_direct(&actual0, &multiplicand, &multiplier)); + TEST_CHECK(fr_equal(&expected.coeffs[0], &actual0.coeffs[0])); + TEST_CHECK(fr_equal(&expected.coeffs[1], &actual0.coeffs[1])); + TEST_CHECK(fr_equal(&expected.coeffs[2], &actual0.coeffs[2])); + TEST_CHECK(fr_equal(&expected.coeffs[3], &actual0.coeffs[3])); + + // Check commutativity + new_poly(&actual1, 4); + TEST_CHECK(C_KZG_OK == poly_mul_direct(&actual1, &multiplier, &multiplicand)); + TEST_CHECK(fr_equal(&expected.coeffs[0], &actual1.coeffs[0])); + TEST_CHECK(fr_equal(&expected.coeffs[1], &actual1.coeffs[1])); + TEST_CHECK(fr_equal(&expected.coeffs[2], &actual1.coeffs[2])); + TEST_CHECK(fr_equal(&expected.coeffs[3], &actual1.coeffs[3])); + + free_poly(&multiplicand); + free_poly(&multiplier); + free_poly(&expected); + free_poly(&actual0); + free_poly(&actual1); +} + +void poly_mul_fft_test(void) { + + // Calculate (3x^2 - 5x + 6) * (4x + 3) = 12x^3 - 11x^2 + 9x + 18 + static polydata multiplier_data = {3, {6, -5, 3}}; + static polydata multiplicand_data = {2, {3, 4}}; + static polydata expected_data = {4, {18, 9, -11, 12}}; + + poly multiplicand, multiplier, expected, actual0, actual1; + + new_test_poly(&multiplicand, &multiplicand_data); + new_test_poly(&multiplier, &multiplier_data); + new_test_poly(&expected, &expected_data); + + new_poly(&actual0, 4); + TEST_CHECK(C_KZG_OK == poly_mul_fft(&actual0, &multiplicand, &multiplier, NULL)); + TEST_CHECK(fr_equal(&expected.coeffs[0], &actual0.coeffs[0])); + TEST_CHECK(fr_equal(&expected.coeffs[1], &actual0.coeffs[1])); + TEST_CHECK(fr_equal(&expected.coeffs[2], &actual0.coeffs[2])); + TEST_CHECK(fr_equal(&expected.coeffs[3], &actual0.coeffs[3])); + + // Check commutativity + new_poly(&actual1, 4); + TEST_CHECK(C_KZG_OK == poly_mul_fft(&actual1, &multiplier, &multiplicand, NULL)); + TEST_CHECK(fr_equal(&expected.coeffs[0], &actual1.coeffs[0])); + TEST_CHECK(fr_equal(&expected.coeffs[1], &actual1.coeffs[1])); + TEST_CHECK(fr_equal(&expected.coeffs[2], &actual1.coeffs[2])); + TEST_CHECK(fr_equal(&expected.coeffs[3], &actual1.coeffs[3])); + + free_poly(&multiplicand); + free_poly(&multiplier); + free_poly(&expected); + free_poly(&actual0); + free_poly(&actual1); +} + +void poly_inverse_simple_0(void) { + + // 1 / (1 - x) = 1 + x + x^2 + ... + + poly p, q; + int d = 16; // The number of terms to take + + new_poly(&p, 2); + p.coeffs[0] = fr_one; + p.coeffs[1] = fr_one; + fr_negate(&p.coeffs[1], &p.coeffs[1]); + + new_poly(&q, d); + TEST_CHECK(C_KZG_OK == poly_inverse(&q, &p)); + + for (int i = 0; i < d; i++) { + TEST_CHECK(fr_is_one(&q.coeffs[i])); + } + + free_poly(&p); + free_poly(&q); +} + +void poly_inverse_simple_1(void) { + + // 1 / (1 + x) = 1 - x + x^2 - ... + + poly p, q; + int d = 16; // The number of terms to take + + new_poly(&p, 2); + p.coeffs[0] = fr_one; + p.coeffs[1] = fr_one; + + new_poly(&q, d); + TEST_CHECK(C_KZG_OK == poly_inverse(&q, &p)); + + for (int i = 0; i < d; i++) { + fr_t tmp = q.coeffs[i]; + if (i & 1) { + fr_negate(&tmp, &tmp); + } + TEST_CHECK(fr_is_one(&tmp)); + } + + free_poly(&p); + free_poly(&q); +} + +void poly_mul_random(void) { + + // Compare the output of poly_mul_direct() and poly_mul_fft() + + poly multiplicand, multiplier; + poly q0, q1; + + for (int k = 0; k < 256; k++) { + + int multiplicand_length = 1 + rand() % 1000; + int multiplier_length = 1 + rand() % 1000; + int out_length = 1 + rand() % 1000; + + new_poly(&multiplicand, multiplicand_length); + new_poly(&multiplier, multiplier_length); + + for (int i = 0; i < multiplicand_length; i++) { + multiplicand.coeffs[i] = rand_fr(); + } + for (int i = 0; i < multiplier_length; i++) { + multiplier.coeffs[i] = rand_fr(); + } + + // Ensure that the polynomials' orders corresponds to their lengths + if (fr_is_zero(&multiplicand.coeffs[multiplicand.length - 1])) { + multiplicand.coeffs[multiplicand.length - 1] = fr_one; + } + if (fr_is_zero(&multiplier.coeffs[multiplier.length - 1])) { + multiplier.coeffs[multiplier.length - 1] = fr_one; + } + + new_poly(&q0, out_length); // Truncate the result + TEST_CHECK(C_KZG_OK == poly_mul_direct(&q0, &multiplicand, &multiplier)); + + new_poly(&q1, out_length); + TEST_CHECK(C_KZG_OK == poly_mul_fft(&q1, &multiplicand, &multiplier, NULL)); + + TEST_CHECK(q1.length == q0.length); + for (int i = 0; i < q0.length; i++) { + if (!TEST_CHECK(fr_equal(&q0.coeffs[i], &q1.coeffs[i]))) { + TEST_MSG("round = %d, i = %d, multiplicand_length = %lu, multiplier_length = %lu, out_length = %lu", k, + i, multiplicand.length, multiplier.length, q0.length); + } + } + + free_poly(&multiplicand); + free_poly(&multiplier); + free_poly(&q0); + free_poly(&q1); + } +} + +void poly_div_random(void) { + + // Compare the output of poly_fast_div() and poly_long_div() + + poly dividend, divisor; + poly q0, q1; + + for (int k = 0; k < 256; k++) { + + int dividend_length = 2 + rand() % 1000; + int divisor_length = 1 + rand() % dividend_length; + + new_poly(÷nd, dividend_length); + new_poly(&divisor, divisor_length); + + for (int i = 0; i < dividend_length; i++) { + dividend.coeffs[i] = rand_fr(); + } + for (int i = 0; i < divisor_length; i++) { + divisor.coeffs[i] = rand_fr(); + } + + // Ensure that the polynomials' orders corresponds to their lengths + if (fr_is_zero(÷nd.coeffs[dividend.length - 1])) { + dividend.coeffs[dividend.length - 1] = fr_one; + } + if (fr_is_zero(&divisor.coeffs[divisor.length - 1])) { + divisor.coeffs[divisor.length - 1] = fr_one; + } + + new_poly(&q0, dividend.length - divisor.length + 1); + TEST_CHECK(C_KZG_OK == poly_long_div(&q0, ÷nd, &divisor)); + + new_poly(&q1, dividend.length - divisor.length + 1); + TEST_CHECK(C_KZG_OK == poly_fast_div(&q1, ÷nd, &divisor)); + + TEST_CHECK(q1.length == q0.length); + for (int i = 0; i < q0.length; i++) { + if (!TEST_CHECK(fr_equal(&q0.coeffs[i], &q1.coeffs[i]))) { + TEST_MSG("round = %d, dividend_length = %lu, divisor_length = %lu, i = %d", k, dividend.length, + divisor.length, i); + } + } + + free_poly(÷nd); + free_poly(&divisor); + free_poly(&q0); + free_poly(&q1); + } +} + TEST_LIST = { {"POLY_TEST", title}, - {"poly_div_0", poly_div_0}, - {"poly_div_1", poly_div_1}, - {"poly_div_2", poly_div_2}, + {"poly_test_div", poly_test_div}, +#ifndef DEBUG {"poly_div_by_zero", poly_div_by_zero}, +#endif {"poly_eval_check", poly_eval_check}, {"poly_eval_0_check", poly_eval_0_check}, {"poly_eval_nil_check", poly_eval_nil_check}, + {"poly_mul_direct_test", poly_mul_direct_test}, + {"poly_mul_fft_test", poly_mul_fft_test}, + {"poly_inverse_simple_0", poly_inverse_simple_0}, + {"poly_inverse_simple_1", poly_inverse_simple_1}, + {"poly_mul_random", poly_mul_random}, + {"poly_div_random", poly_div_random}, {NULL, NULL} /* zero record marks the end of the list */ }; diff --git a/src/utility.c b/src/utility.c index 03c090a..64a5c2a 100644 --- a/src/utility.c +++ b/src/utility.c @@ -58,6 +58,20 @@ int log2_pow2(uint32_t n) { return r; } +/** + * Calculate log base two of a power of two. + * + * In other words, the bit index of the highest one bit. + * + * @param[in] n The 64 bit unsigned integer to take the logarithm of + * @return the log base two of n + */ +int log2_u64(uint64_t n) { + int r = 0; + while (n >>= 1) r++; + return r; +} + /** * Return the next highest power of two. * diff --git a/src/utility.h b/src/utility.h index 5aaaf7e..e5af837 100644 --- a/src/utility.h +++ b/src/utility.h @@ -44,6 +44,7 @@ bool is_power_of_two(uint64_t n); int log2_pow2(uint32_t n); +int log2_u64(uint64_t n); uint64_t next_power_of_two(uint64_t v); uint32_t reverse_bits(uint32_t a); uint32_t reverse_bits_limited(uint32_t n, uint32_t value);