Implements fast division for long divisors (#7)

This commit is contained in:
Ben Edgington 2021-06-28 12:42:14 +01:00 committed by GitHub
parent 244bfe8740
commit ba81164330
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 1120 additions and 99 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
*_test
*_bench
*_debug
*_tune
*.prof
*.out
*.log

View File

@ -875,7 +875,7 @@ EXCLUDE = inc \
tmp \
src/test_util.h \
src/bench_util.h \
src/debug_util.h \
src/debug_util.h
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded
@ -891,7 +891,9 @@ EXCLUDE_SYMLINKS = NO
# Note that the wildcards are matched against the file with absolute path, so to
# exclude all test directories for example use the pattern */test/*
EXCLUDE_PATTERNS =
EXCLUDE_PATTERNS = *_test.c \
*_bench.c \
*_tune.c
# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
# (namespaces, classes, functions, etc.) that should be excluded from the

View File

@ -1,6 +1,7 @@
TESTS = bls12_381_test das_extension_test c_kzg_util_test fft_common_test fft_fr_test fft_g1_test \
fk20_proofs_test kzg_proofs_test poly_test recover_test utility_test zero_poly_test
BENCH = fft_fr_bench fft_g1_bench recover_bench zero_poly_bench kzg_proofs_bench
BENCH = fft_fr_bench fft_g1_bench recover_bench zero_poly_bench kzg_proofs_bench poly_bench
TUNE = poly_mul_tune poly_div_tune
LIB_SRC = bls12_381.c c_kzg_util.c das_extension.c fft_common.c fft_fr.c fft_g1.c fk20_proofs.c kzg_proofs.c poly.c recover.c utility.c zero_poly.c
LIB_OBJ = $(LIB_SRC:.c=.o)
@ -30,6 +31,12 @@ libckzg.a: $(LIB_OBJ) Makefile
clang -Wall -I$(INCLUDE_DIRS) $(CFLAGS) $(KZG_CFLAGS) -o $@ $@.c bench_util.o test_util.o $(LIB_OBJ) -L../lib -lblst
./$@
# Tuning
%_tune: KZG_CFLAGS += -O
%_tune: %_tune.c bench_util.o test_util.o $(LIB_OBJ) Makefile
clang -Wall -I$(INCLUDE_DIRS) $(CFLAGS) $(KZG_CFLAGS) -o $@ $@.c bench_util.o test_util.o $(LIB_OBJ) -L../lib -lblst
./$@
lib: KZG_CFLAGS += -O
lib: clean libckzg.a
@ -45,5 +52,6 @@ clean:
rm -f libckzg.a
rm -f $(TESTS)
rm -f $(BENCH)
rm -f $(TUNE)
rm -f *_debug
rm -f a.out

View File

@ -20,6 +20,10 @@
#include "c_kzg.h"
#include "poly.h"
/** @def min_u64
*
* Safely find the minimum of two uint_64s.
*/
#define min_u64(a, b) \
({ \
uint64_t _a = (a); \
@ -27,6 +31,17 @@
_a < _b ? _a : _b; \
})
/** @def max_u64
*
* Safely find the maximum of two uint_64s.
*/
#define max_u64(a, b) \
({ \
uint64_t _a = (a); \
uint64_t _b = (b); \
_a > _b ? _a : _b; \
})
C_KZG_RET c_kzg_malloc(void **p, size_t n);
C_KZG_RET new_uint64_array(uint64_t **x, size_t n);
C_KZG_RET new_fr_array(fr_t **x, size_t n);

View File

@ -124,7 +124,7 @@ C_KZG_RET compute_proof_multi(g1_t *out, const poly *p, const fr_t *x0, uint64_t
divisor.coeffs[n] = fr_one;
// Calculate q = p / (x^n - x0^n)
TRY(new_poly_long_div(&q, p, &divisor));
TRY(new_poly_div(&q, p, &divisor));
TRY(commit_to_poly(out, &q, ks));

View File

@ -15,12 +15,13 @@
*/
/**
* @file poly.c
* @file poly.c
*
* Operations on polynomials defined over the finite field.
*/
#include "c_kzg_util.h"
#include "utility.h"
#include "poly.h"
/**
@ -34,6 +35,23 @@ static uint64_t poly_quotient_length(const poly *dividend, const poly *divisor)
return dividend->length >= divisor->length ? dividend->length - divisor->length + 1 : 0;
}
/**
* Return a copy of a polynomial ensuring that the order is correct.
*
* @param[in] p A pointer to the polynomial to be normalised
* @return The normalised polynomial
*/
static poly poly_norm(const poly *p) {
poly ret = *p;
while (ret.length > 0 && fr_is_zero(&ret.coeffs[ret.length - 1])) {
ret.length--;
}
if (ret.length == 0) {
ret.coeffs = NULL;
}
return ret;
}
/**
* Evaluate a polynomial over the finite field at a point.
*
@ -67,35 +85,46 @@ void eval_poly(fr_t *out, const poly *p, const fr_t *x) {
}
/**
* Polynomial division in the finite field.
* Polynomial division in the finite field via long division.
*
* Returns the polynomial resulting from dividing @p dividend by @p divisor.
*
* @remark @p out must be an uninitialised #poly. Space is allocated for it here, which
* must be later reclaimed by calling #free_poly().
* Should be O(m.n) where m is the length of the dividend, and n the length of the divisor.
*
* @param[out] out An uninitialised poly type that will contain the result of the division
* @remark @p out must be sized large enough for the resulting polynomial.
*
* @remark For some ranges of @p dividend and @p divisor, #poly_fast_div is much, much faster.
*
* @param[out] out An appropriately sized poly type that will contain the result of the division
* @param[in] dividend The dividend polynomial
* @param[in] divisor The divisor polynomial
* @retval C_CZK_OK All is well
* @retval C_CZK_BADARGS Invalid parameters were supplied
* @retval C_CZK_MALLOC Memory allocation failed
*/
C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor) {
C_KZG_RET poly_long_div(poly *out, const poly *dividend, const poly *divisor) {
uint64_t a_pos = dividend->length - 1;
uint64_t b_pos = divisor->length - 1;
uint64_t diff = a_pos - b_pos;
fr_t a[dividend->length];
fr_t *a;
// Dividing by zero is undefined
CHECK(divisor->length > 0);
// Initialise the output polynomial
TRY(new_poly(out, poly_quotient_length(dividend, divisor)));
// The divisor's highest coefficient must be non-zero
CHECK(!fr_is_zero(&divisor->coeffs[divisor->length - 1]));
// Deal with the size of the output polynomial
uint64_t out_length = poly_quotient_length(dividend, divisor);
CHECK(out->length >= out_length);
out->length = out_length;
// If the divisor is larger than the dividend, the result is zero-length
if (out->length == 0) return C_KZG_OK;
if (out_length == 0) {
return C_KZG_OK;
}
TRY(new_fr_array(&a, dividend->length));
for (uint64_t i = 0; i < dividend->length; i++) {
a[i] = dividend->coeffs[i];
}
@ -113,9 +142,393 @@ C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor
}
fr_div(&out->coeffs[0], &a[a_pos], &divisor->coeffs[b_pos]);
free(a);
return C_KZG_OK;
}
/**
* Calculate the (possibly truncated) product of two polynomials.
*
* The size of the output polynomial determines the number of coefficients returned.
*
* @param[in,out] out The result of the division - its size determines the number of coefficients returned
* @param[in] a The muliplicand polynomial
* @param[in] b The multiplier polynomial
* @retval C_CZK_OK All is well
*/
C_KZG_RET poly_mul_direct(poly *out, const poly *a, const poly *b) {
uint64_t a_degree = a->length - 1;
uint64_t b_degree = b->length - 1;
for (uint64_t k = 0; k < out->length; k++) {
out->coeffs[k] = fr_zero;
}
// Truncate the output to the length of the output polynomial
for (uint64_t i = 0; i <= a_degree; i++) {
for (uint64_t j = 0; j <= b_degree && i + j < out->length; j++) {
fr_t tmp;
fr_mul(&tmp, &a->coeffs[i], &b->coeffs[j]);
fr_add(&out->coeffs[i + j], &out->coeffs[i + j], &tmp);
}
}
return C_KZG_OK;
}
/**
* Pad with zeros or truncate an array of field elements to a specific size.
*
* @param[out] out The padded/truncated array
* @param[in] in The original array to be padded
* @param[in] n_in The number of elements of @p in to take
* @param[in] n_out The length of @p out
*/
void pad(fr_t *out, const fr_t *in, uint64_t n_in, uint64_t n_out) {
uint64_t num = min_u64(n_in, n_out);
for (int i = 0; i < num; i++) {
out[i] = in[i];
}
for (int i = num; i < n_out; i++) {
out[i] = fr_zero;
}
}
/**
* Calculate the (possibly truncated) product of two polynomials.
*
* The size of the output polynomial determines the number of coefficients returned.
*
* @remark This version uses FFTs to calculate the product via convolution, and is very efficient for large
* calculations. If @p fs is supplied as NULL, then the FFTSettings are allocated internally, otherwise the supplied
* settings are used, which must be sufficiently sized for the calculation.
*
* @param[in,out] out The result of the division - its size determines the number of coefficients returned
* @param[in] a The muliplicand polynomial
* @param[in] b The multiplier polynomial
* @param[in] fs_ Either NULL or a sufficiently sized FFTSettings structure
* @retval C_CZK_OK All is well
* @retval C_CZK_BADARGS Invalid parameters were supplied
* @retval C_CZK_ERROR An internal error occurred
* @retval C_CZK_MALLOC Memory allocation failed
*/
C_KZG_RET poly_mul_fft(poly *out, const poly *a, const poly *b, FFTSettings *fs_) {
// Truncate a and b so as not to do excess work for the number of coefficients required.
uint64_t a_len = min_u64(a->length, out->length);
uint64_t b_len = min_u64(b->length, out->length);
uint64_t length = next_power_of_two(a_len + b_len - 1);
// If the FFT settings are NULL then make a local set, otherwise use the ones passed in.
FFTSettings fs, *fs_p;
if (fs_ != NULL) {
fs_p = fs_;
} else {
fs_p = &fs;
int scale = log2_pow2(length); // TODO only good up to length < 32 bits
TRY(new_fft_settings(fs_p, scale));
}
CHECK(length <= fs_p->max_width);
fr_t *a_pad, *b_pad, *a_fft, *b_fft;
TRY(new_fr_array(&a_pad, length));
TRY(new_fr_array(&b_pad, length));
pad(a_pad, a->coeffs, a_len, length);
pad(b_pad, b->coeffs, b_len, length);
TRY(new_fr_array(&a_fft, length));
TRY(new_fr_array(&b_fft, length));
TRY(fft_fr(a_fft, a_pad, false, length, fs_p));
TRY(fft_fr(b_fft, b_pad, false, length, fs_p));
fr_t *ab_fft = a_pad; // reuse the a_pad array
fr_t *ab = b_pad; // reuse the b_pad array
for (uint64_t i = 0; i < length; i++) {
fr_mul(&ab_fft[i], &a_fft[i], &b_fft[i]);
}
TRY(fft_fr(ab, ab_fft, true, length, fs_p));
// Copy result to output
uint64_t data_len = min_u64(out->length, length);
for (uint64_t i = 0; i < data_len; i++) {
out->coeffs[i] = ab[i];
}
for (uint64_t i = data_len; i < out->length; i++) {
out->coeffs[i] = fr_zero;
}
free(a_pad);
free(b_pad);
free(a_fft);
free(b_fft);
if (fs_p == &fs) {
free_fft_settings(fs_p);
}
return C_KZG_OK;
}
/**
* Calculate terms in the inverse of a polynomial.
*
* Returns terms in the expansion of `1 / b(x)` (aka the Maclaurin series).
*
* The size of @p out determines the number of terms returned.
*
* This is a non-recursive version of the algorithm in https://tc-arg.tk/pdfs/2020/fft.pdf theorem 3.4.
*
* @remark The constant term of @p b must be nonzero.
*
* @param[in, out] out A poly whose length determines the number of terms returned
* @param[in] b The polynomial to be inverted
* @retval C_CZK_OK All is well
* @retval C_CZK_BADARGS Invalid parameters were supplied
* @retval C_CZK_ERROR An internal error occurred
* @retval C_CZK_MALLOC Memory allocation failed
*/
C_KZG_RET poly_inverse(poly *out, poly *b) {
poly tmp0, tmp1;
fr_t fr_two;
CHECK(out->length > 0);
CHECK(b->length > 0);
CHECK(!fr_is_zero(&b->coeffs[0]));
// If the input polynomial is constant, the remainder of the series is zero
if (b->length == 1) {
fr_inv(&out->coeffs[0], &b->coeffs[0]);
for (uint64_t i = 1; i < out->length; i++) {
out->coeffs[i] = fr_zero;
}
return C_KZG_OK;
}
uint64_t length = out->length;
uint64_t maxd = length - 1;
uint64_t d = 0;
// Max space for multiplications is (2 * length - 1)
int scale = log2_pow2(next_power_of_two(2 * length - 1));
FFTSettings fs;
TRY(new_fft_settings(&fs, scale));
// To store intermediate results
TRY(new_poly(&tmp0, length));
TRY(new_poly(&tmp1, length));
// Base case for d == 0
fr_inv(&out->coeffs[0], &b->coeffs[0]);
out->length = 1;
uint64_t mask = (uint64_t)1 << log2_u64(maxd);
while (mask) {
d = 2 * d + ((maxd & mask) != 0);
mask >>= 1;
// b.c -> tmp0 (we're using out for c)
tmp0.length = min_u64(d + 1, b->length + out->length - 1);
TRY(poly_mul_(&tmp0, b, out, &fs));
// 2 - b.c -> tmp0
for (int i = 0; i < tmp0.length; i++) {
fr_negate(&tmp0.coeffs[i], &tmp0.coeffs[i]);
}
fr_from_uint64(&fr_two, 2);
fr_add(&tmp0.coeffs[0], &tmp0.coeffs[0], &fr_two);
// c.(2 - b.c) -> tmp1;
tmp1.length = d + 1;
TRY(poly_mul_(&tmp1, out, &tmp0, &fs));
// tmp1 -> c
out->length = tmp1.length;
for (uint64_t i = 0; i < out->length; i++) {
out->coeffs[i] = tmp1.coeffs[i];
}
}
ASSERT(d + 1 == length);
free_poly(&tmp0);
free_poly(&tmp1);
free_fft_settings(&fs);
return C_KZG_OK;
}
/**
* Reverse the order of the coefficients of a polynomial.
*
* Corresponds to returning x^n.p(1/x).
*
* @param[out] out The flipped polynomial. Its size must be the same as the size of @p in
* @param[in] in The polynomial to be flipped
* @retval C_CZK_OK All is well
* @retval C_CZK_BADARGS Invalid parameters were supplied
*/
C_KZG_RET poly_flip(poly *out, const poly *in) {
CHECK(out->length == in->length);
for (uint64_t i = 0; i < in->length; i++) {
out->coeffs[out->length - i - 1] = in->coeffs[i];
}
return C_KZG_OK;
}
/**
* Fast polynomial division in the finite field.
*
* Returns the polynomial resulting from dividing @p dividend by @p divisor.
*
* Implements https://tc-arg.tk/pdfs/2020/fft.pdf theorem 3.5.
*
* Should be O(m.log(m)) where m is the length of the dividend.
*
* @remark @p out must be sized large enough for the resulting polynomial.
*
* @remark For some ranges of @p dividend and @p divisor, #poly_long_div may be a little faster.
*
* @param[out] out An appropriately sized poly type that will contain the result of the division
* @param[in] dividend The dividend polynomial
* @param[in] divisor The divisor polynomial
* @retval C_CZK_OK All is well
* @retval C_CZK_BADARGS Invalid parameters were supplied
* @retval C_CZK_ERROR An internal error occurred
* @retval C_CZK_MALLOC Memory allocation failed
*/
C_KZG_RET poly_fast_div(poly *out, const poly *dividend, const poly *divisor) {
// Dividing by zero is undefined
CHECK(divisor->length > 0);
// The divisor's highest coefficient must be non-zero
CHECK(!fr_is_zero(&divisor->coeffs[divisor->length - 1]));
uint64_t m = dividend->length - 1;
uint64_t n = divisor->length - 1;
// If the divisor is larger than the dividend, the result is zero-length
if (n > m) {
out->length = 0;
return C_KZG_OK;
}
// Ensure the output poly has enough space allocated
CHECK(out->length >= m - n + 1);
// Ensure that the divisor is well-formed for the inverse operation
CHECK(!fr_is_zero(&divisor->coeffs[divisor->length - 1]));
// Special case for divisor.length == 1 (it's a constant)
if (divisor->length == 1) {
out->length = dividend->length;
for (uint64_t i = 0; i < out->length; i++) {
fr_div(&out->coeffs[i], &dividend->coeffs[i], &divisor->coeffs[0]);
}
return C_KZG_OK;
}
poly a_flip, b_flip;
TRY(new_poly(&a_flip, dividend->length));
TRY(new_poly(&b_flip, divisor->length));
TRY(poly_flip(&a_flip, dividend));
TRY(poly_flip(&b_flip, divisor));
poly inv_b_flip;
TRY(new_poly(&inv_b_flip, m - n + 1));
TRY(poly_inverse(&inv_b_flip, &b_flip));
poly q_flip;
// We need only m - n + 1 coefficients of q_flip
TRY(new_poly(&q_flip, m - n + 1));
TRY(poly_mul(&q_flip, &a_flip, &inv_b_flip));
out->length = m - n + 1;
TRY(poly_flip(out, &q_flip));
free_poly(&a_flip);
free_poly(&b_flip);
free_poly(&inv_b_flip);
free_poly(&q_flip);
return C_KZG_OK;
}
/**
* Calculate the (possibly truncated) product of two polynomials.
*
* This is just a wrapper around #poly_mul_direct and #poly_mul_fft that selects the faster based on the size of the
* problem.
*
* @param[in,out] out The result of the division - its size determines the number of coefficients returned
* @param[in] a The muliplicand polynomial
* @param[in] b The multiplier polynomial
* @param[in] fs Either NULL or a sufficiently sized FFTSettings structure
* @retval C_CZK_OK All is well
* @retval C_CZK_BADARGS Invalid parameters were supplied
* @retval C_CZK_ERROR An internal error occurred
* @retval C_CZK_MALLOC Memory allocation failed
*
* @todo Implement routine selection
*/
C_KZG_RET poly_mul_(poly *out, const poly *a, const poly *b, FFTSettings *fs) {
if (a->length < 64 || b->length < 64 || out->length < 128) { // Tunable parameter
return poly_mul_direct(out, a, b);
} else {
return poly_mul_fft(out, a, b, fs);
}
}
/**
* Calculate the (possibly truncated) product of two polynomials.
*
* This is just a wrapper around #poly_mul_direct and #poly_mul_fft that selects the faster based on the size of the
* problem.
*
* @param[in,out] out The result of the division - its size determines the number of coefficients returned
* @param[in] a The muliplicand polynomial
* @param[in] b The multiplier polynomial
* @retval C_CZK_OK All is well
* @retval C_CZK_BADARGS Invalid parameters were supplied
* @retval C_CZK_ERROR An internal error occurred
* @retval C_CZK_MALLOC Memory allocation failed
*
* @todo Implement routine selection
*/
C_KZG_RET poly_mul(poly *out, const poly *a, const poly *b) {
return poly_mul_(out, a, b, NULL);
}
/**
* Polynomial division in the finite field.
*
* Returns the polynomial resulting from dividing @p dividend_ by @p divisor_.
*
* This is a wrapper around #poly_long_div and #poly_fast_div that chooses the fastest based on problem size.
*
* @remark @p out must be an uninitialised #poly. Space is allocated for it here, which
* must be later reclaimed by calling #free_poly().
*
* @param[out] out An uninitialised poly type that will contain the result of the division
* @param[in] dividend_ The dividend polynomial
* @param[in] divisor_ The divisor polynomial
* @retval C_CZK_OK All is well
* @retval C_CZK_BADARGS Invalid parameters were supplied
* @retval C_CZK_ERROR An internal error occurred
* @retval C_CZK_MALLOC Memory allocation failed
*/
C_KZG_RET new_poly_div(poly *out, const poly *dividend_, const poly *divisor_) {
poly dividend = poly_norm(dividend_);
poly divisor = poly_norm(divisor_);
TRY(new_poly(out, poly_quotient_length(&dividend, &divisor)));
if (divisor.length >= dividend.length || divisor.length < 128) { // Tunable paramter
return poly_long_div(out, &dividend, &divisor);
} else {
return poly_fast_div(out, &dividend, &divisor);
}
}
/**
* Initialise an empty polynomial of the given size.
*

View File

@ -20,6 +20,7 @@
#define POLY_H
#include "c_kzg.h"
#include "fft_fr.h"
/**
* Defines a polynomial whose coefficients are members of the finite field F_r.
@ -32,7 +33,14 @@ typedef struct {
} poly;
void eval_poly(fr_t *out, const poly *p, const fr_t *x);
C_KZG_RET new_poly_long_div(poly *out, const poly *dividend, const poly *divisor);
C_KZG_RET poly_long_div(poly *out, const poly *dividend, const poly *divisor);
C_KZG_RET poly_mul_direct(poly *out, const poly *a, const poly *b);
C_KZG_RET poly_mul_fft(poly *out, const poly *a, const poly *b, FFTSettings *fs);
C_KZG_RET poly_inverse(poly *out, poly *b);
C_KZG_RET poly_fast_div(poly *out, const poly *dividend, const poly *divisor);
C_KZG_RET poly_mul(poly *out, const poly *a, const poly *b);
C_KZG_RET poly_mul_(poly *out, const poly *a, const poly *b, FFTSettings *fs);
C_KZG_RET new_poly_div(poly *out, const poly *dividend, const poly *divisor);
C_KZG_RET new_poly(poly *out, uint64_t length);
C_KZG_RET new_poly_with_coeffs(poly *out, const fr_t *coeffs, uint64_t length);
void free_poly(poly *p);

97
src/poly_bench.c Normal file
View File

@ -0,0 +1,97 @@
/*
* Copyright 2021 Benjamin Edgington
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h> // malloc(), free(), atoi()
#include <stdio.h> // printf()
#include <assert.h> // assert()
#include <unistd.h> // EXIT_SUCCESS/FAILURE
#include "bench_util.h"
#include "test_util.h"
#include "poly.h"
// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
long run_bench(int scale, int max_seconds) {
timespec_t t0, t1;
unsigned long total_time = 0, nits = 0;
uint64_t width = (uint64_t)1 << scale;
int dividend_length = width;
int divisor_length = width / 2; // What would be a relevant value of kzg multi-proofs?
poly dividend, divisor, q;
new_poly(&dividend, dividend_length);
new_poly(&divisor, divisor_length);
for (int i = 0; i < dividend_length; i++) {
dividend.coeffs[i] = rand_fr();
}
for (int i = 0; i < divisor_length; i++) {
divisor.coeffs[i] = rand_fr();
}
// Ensure that the polynomials' orders corresponds to their lengths
if (fr_is_zero(&dividend.coeffs[dividend.length - 1])) {
dividend.coeffs[dividend.length - 1] = fr_one;
}
if (fr_is_zero(&divisor.coeffs[divisor.length - 1])) {
divisor.coeffs[divisor.length - 1] = fr_one;
}
while (total_time < max_seconds * NANO) {
clock_gettime(CLOCK_REALTIME, &t0);
assert(C_KZG_OK == new_poly_div(&q, &dividend, &divisor));
clock_gettime(CLOCK_REALTIME, &t1);
nits++;
total_time += tdiff(t0, t1);
free_poly(&q);
}
free_poly(&dividend);
free_poly(&divisor);
return total_time / nits;
}
int main(int argc, char *argv[]) {
int nsec = 0;
switch (argc) {
case 1:
nsec = NSEC;
break;
case 2:
nsec = atoi(argv[1]);
break;
default:
break;
};
if (nsec == 0) {
printf("Usage: %s [test time in seconds > 0]\n", argv[0]);
exit(EXIT_FAILURE);
}
printf("*** Benchmarking Polynomial Division, %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
for (int scale = 6; scale <= 15; scale++) {
printf("new_poly_div/scale_%d %lu ns/op\n", scale, run_bench(scale, nsec));
}
return EXIT_SUCCESS;
}

119
src/poly_div_tune.c Normal file
View File

@ -0,0 +1,119 @@
/*
* Copyright 2021 Benjamin Edgington
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h> // malloc(), free(), atoi()
#include <stdio.h> // printf()
#include <assert.h> // assert()
#include <unistd.h> // EXIT_SUCCESS/FAILURE
#include "bench_util.h"
#include "test_util.h"
#include "poly.h"
// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
long run_bench(int scale_0, int scale_1, int max_seconds) {
timespec_t t0, t1;
unsigned long total_time = 0, nits = 0;
uint64_t width_0 = (uint64_t)1 << scale_0;
// uint64_t width_1 = (uint64_t)1 << scale_1;
uint64_t width_1 = width_0 - ((uint64_t)1 << scale_1);
int dividend_length = width_0;
int divisor_length = width_1;
poly dividend, divisor, q;
new_poly(&dividend, dividend_length);
new_poly(&divisor, divisor_length);
for (int i = 0; i < dividend_length; i++) {
dividend.coeffs[i] = rand_fr();
}
for (int i = 0; i < divisor_length; i++) {
divisor.coeffs[i] = rand_fr();
}
// Ensure that the polynomials' orders corresponds to their lengths
if (fr_is_zero(&dividend.coeffs[dividend.length - 1])) {
dividend.coeffs[dividend.length - 1] = fr_one;
}
if (fr_is_zero(&divisor.coeffs[divisor.length - 1])) {
divisor.coeffs[divisor.length - 1] = fr_one;
}
new_poly(&q, dividend.length - divisor.length + 1);
while (total_time < max_seconds * NANO) {
clock_gettime(CLOCK_REALTIME, &t0);
#if 0
assert(C_KZG_OK == poly_long_div(&q, &dividend, &divisor));
#else
assert(C_KZG_OK == poly_fast_div(&q, &dividend, &divisor));
#endif
clock_gettime(CLOCK_REALTIME, &t1);
nits++;
total_time += tdiff(t0, t1);
}
free_poly(&dividend);
free_poly(&divisor);
free_poly(&q);
return total_time / nits;
}
int main(int argc, char *argv[]) {
int nsec = 0;
switch (argc) {
case 1:
nsec = NSEC;
break;
case 2:
nsec = atoi(argv[1]);
break;
default:
break;
};
if (nsec == 0) {
printf("Usage: %s [test time in seconds > 0]\n", argv[0]);
exit(EXIT_FAILURE);
}
int scale_min = 5;
int scale_max = 14;
#if 0
printf("*** Benchmarking poly_long_div() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
#else
printf("*** Benchmarking poly_fast_div() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
#endif
printf(",");
for (int i = /* scale_min */ 0; i <= scale_max; i++) {
printf("%d,", i);
}
printf("\n");
for (int scale_0 = scale_min; scale_0 <= scale_max; scale_0++) {
printf("%d,", scale_0);
for (int scale_1 = /* scale_min */ 0; scale_1 < scale_0; scale_1++) {
printf("%lu,", run_bench(scale_0, scale_1, nsec));
}
printf("\n");
}
return EXIT_SUCCESS;
}

116
src/poly_mul_tune.c Normal file
View File

@ -0,0 +1,116 @@
/*
* Copyright 2021 Benjamin Edgington
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h> // malloc(), free(), atoi()
#include <stdio.h> // printf()
#include <assert.h> // assert()
#include <unistd.h> // EXIT_SUCCESS/FAILURE
#include "bench_util.h"
#include "test_util.h"
#include "poly.h"
// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
long run_bench(int scale_0, int scale_1, int max_seconds) {
timespec_t t0, t1;
unsigned long total_time = 0, nits = 0;
uint64_t width_0 = (uint64_t)1 << scale_0;
uint64_t width_1 = (uint64_t)1 << scale_1;
poly multiplicand, multiplier, r;
new_poly(&multiplicand, width_0);
new_poly(&multiplier, width_1);
for (int i = 0; i < width_0; i++) {
multiplicand.coeffs[i] = rand_fr();
}
for (int i = 0; i < width_1; i++) {
multiplier.coeffs[i] = rand_fr();
}
// Ensure that the polynomials' orders corresponds to their lengths
if (fr_is_zero(&multiplicand.coeffs[multiplicand.length - 1])) {
multiplicand.coeffs[multiplicand.length - 1] = fr_one;
}
if (fr_is_zero(&multiplier.coeffs[multiplier.length - 1])) {
multiplier.coeffs[multiplier.length - 1] = fr_one;
}
new_poly(&r, multiplicand.length + multiplier.length - 1);
while (total_time < max_seconds * NANO) {
clock_gettime(CLOCK_REALTIME, &t0);
#if 0
assert(C_KZG_OK == poly_mul_fft(&r, &multiplicand, &multiplier, NULL));
#else
assert(C_KZG_OK == poly_mul(&r, &multiplicand, &multiplier));
#endif
clock_gettime(CLOCK_REALTIME, &t1);
nits++;
total_time += tdiff(t0, t1);
}
free_poly(&multiplicand);
free_poly(&multiplier);
free_poly(&r);
return total_time / nits;
}
int main(int argc, char *argv[]) {
int nsec = 0;
switch (argc) {
case 1:
nsec = NSEC;
break;
case 2:
nsec = atoi(argv[1]);
break;
default:
break;
};
if (nsec == 0) {
printf("Usage: %s [test time in seconds > 0]\n", argv[0]);
exit(EXIT_FAILURE);
}
int scale_min = 5;
int scale_max = 12;
#if 1
printf("*** Benchmarking poly_mul_fft() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
#else
printf("*** Benchmarking poly_mul_direct() %d second%s per test.\n", nsec, nsec == 1 ? "" : "s");
#endif
printf(",");
for (int i = scale_min; i <= scale_max; i++) {
printf("%d,", i);
}
printf("\n");
for (int scale_0 = scale_min; scale_0 <= scale_max; scale_0++) {
printf("%d,", scale_0);
for (int scale_1 = scale_min; scale_1 <= scale_max; scale_1++) {
printf("%lu,", run_bench(scale_0, scale_1, nsec));
}
printf("\n");
}
return EXIT_SUCCESS;
}

View File

@ -18,97 +18,93 @@
#include "test_util.h"
#include "poly.h"
void poly_div_0(void) {
fr_t a[3], b[2], expected[2];
poly dividend, divisor, actual;
// If anyone has a nicer way to initialise these test data, I'd love to hear it.
typedef struct polydata {
int length;
int coeffs[];
} polydata;
// Calculate (x^2 - 1) / (x + 1) = x - 1
// (x^2 - 1) / (x + 1) = x - 1
polydata test_0_0 = {3, {-1, 0, 1}};
polydata test_0_1 = {2, {1, 1}};
polydata test_0_2 = {2, {-1, 1}};
// Dividend
fr_from_uint64(&a[0], 1);
fr_negate(&a[0], &a[0]);
fr_from_uint64(&a[1], 0);
fr_from_uint64(&a[2], 1);
dividend.length = 3;
dividend.coeffs = a;
// (12x^3 - 11x^2 + 9x + 18) / (4x + 3) = 3x^2 - 5x + 6
polydata test_1_0 = {4, {18, 9, -11, 12}};
polydata test_1_1 = {2, {3, 4}};
polydata test_1_2 = {3, {6, -5, 3}};
// Divisor
fr_from_uint64(&b[0], 1);
fr_from_uint64(&b[1], 1);
divisor.length = 2;
divisor.coeffs = b;
// (x + 1) / (x^2 - 1) = nil
polydata test_2_0 = {2, {1, 1}};
polydata test_2_1 = {3, {-1, 0, 2}};
polydata test_2_2 = {0, {}};
// Known result
fr_from_uint64(&expected[0], 1);
fr_negate(&expected[0], &expected[0]);
fr_from_uint64(&expected[1], 1);
// (10x^2 + 20x + 30) / 10 = x^2 + 2x + 3
polydata test_3_0 = {3, {30, 20, 10}};
polydata test_3_1 = {1, {10}};
polydata test_3_2 = {3, {3, 2, 1}};
TEST_CHECK(C_KZG_OK == new_poly_long_div(&actual, &dividend, &divisor));
TEST_CHECK(fr_equal(&expected[0], &actual.coeffs[0]));
TEST_CHECK(fr_equal(&expected[1], &actual.coeffs[1]));
// (x^2 + x) / (x + 1) = x
polydata test_4_0 = {3, {0, 1, 1}};
polydata test_4_1 = {2, {1, 1}};
polydata test_4_2 = {2, {0, 1}};
free_poly(&actual);
// (x^2 + x + 1) / 1 = x^2 + x + 1
polydata test_5_0 = {3, {1, 1, 1}};
polydata test_5_1 = {1, {1}};
polydata test_5_2 = {3, {1, 1, 1}};
// (x^2 + x + 1) / (0x + 1) = x^2 + x + 1
polydata test_6_0 = {3, {1, 1, 1}};
polydata test_6_1 = {2, {1, 0}}; // The highest coefficient is zero
polydata test_6_2 = {3, {1, 1, 1}};
polydata *test[][3] = {{&test_0_0, &test_0_1, &test_0_2}, {&test_1_0, &test_1_1, &test_1_2},
{&test_2_0, &test_2_1, &test_2_2}, {&test_3_0, &test_3_1, &test_3_2},
{&test_4_0, &test_4_1, &test_4_2}, {&test_5_0, &test_5_1, &test_5_2},
{&test_6_0, &test_6_1, &test_6_2}};
/* Internal utility function */
void new_test_poly(poly *p, polydata *data) {
new_poly(p, data->length);
for (int i = 0; i < p->length; i++) {
int coeff = data->coeffs[i];
if (coeff >= 0) {
fr_from_uint64(&p->coeffs[i], coeff);
} else {
fr_from_uint64(&p->coeffs[i], -coeff);
fr_negate(&p->coeffs[i], &p->coeffs[i]);
}
}
}
void poly_div_1(void) {
fr_t a[4], b[2], expected[3];
poly dividend, divisor, actual;
void poly_test_div(void) {
poly dividend, divisor, expected, actual;
int ntest = sizeof test / sizeof test[0];
// Calculate (12x^3 - 11x^2 + 9x + 18) / (4x + 3) = 3x^2 - 5x + 6
for (int i = 0; i < ntest; i++) {
new_test_poly(&dividend, test[i][0]);
new_test_poly(&divisor, test[i][1]);
new_test_poly(&expected, test[i][2]);
// Dividend
fr_from_uint64(&a[0], 18);
fr_from_uint64(&a[1], 9);
fr_from_uint64(&a[2], 11);
fr_negate(&a[2], &a[2]);
fr_from_uint64(&a[3], 12);
dividend.length = 4;
dividend.coeffs = a;
if (TEST_CHECK(C_KZG_OK == new_poly_div(&actual, &dividend, &divisor))) {
if (TEST_CHECK(actual.length == expected.length)) {
for (int j = 0; j < actual.length; j++) {
TEST_CHECK(fr_equal(&actual.coeffs[j], &expected.coeffs[j]));
TEST_MSG("Failed test %d with incorrect value", i);
}
} else {
TEST_MSG("Failed test %d with incorrect length.", i);
}
} else {
TEST_MSG("Failed test %d with bad return value.", i);
}
// Divisor
fr_from_uint64(&b[0], 3);
fr_from_uint64(&b[1], 4);
divisor.length = 2;
divisor.coeffs = b;
// Known result
fr_from_uint64(&expected[0], 6);
fr_from_uint64(&expected[1], 5);
fr_negate(&expected[1], &expected[1]);
fr_from_uint64(&expected[2], 3);
TEST_CHECK(C_KZG_OK == new_poly_long_div(&actual, &dividend, &divisor));
TEST_CHECK(fr_equal(&expected[0], &actual.coeffs[0]));
TEST_CHECK(fr_equal(&expected[1], &actual.coeffs[1]));
TEST_CHECK(fr_equal(&expected[2], &actual.coeffs[2]));
free_poly(&actual);
}
void poly_div_2(void) {
fr_t a[2], b[3];
poly dividend, divisor, actual;
// Calculate (x + 1) / (x^2 - 1) = nil
// Dividend
fr_from_uint64(&a[0], 1);
fr_from_uint64(&a[1], 1);
dividend.length = 2;
dividend.coeffs = a;
// Divisor
fr_from_uint64(&b[0], 1);
fr_negate(&b[0], &b[0]);
fr_from_uint64(&b[1], 0);
fr_from_uint64(&b[2], 1);
divisor.length = 3;
divisor.coeffs = b;
TEST_CHECK(C_KZG_OK == new_poly_long_div(&actual, &dividend, &divisor));
TEST_CHECK(NULL == actual.coeffs);
free_poly(&actual);
free_poly(&dividend);
free_poly(&divisor);
free_poly(&expected);
free_poly(&actual);
}
}
void poly_div_by_zero(void) {
@ -126,7 +122,10 @@ void poly_div_by_zero(void) {
// Divisor
new_poly(&divisor, 0);
TEST_CHECK(C_KZG_BADARGS == new_poly_long_div(&dummy, &dividend, &divisor));
TEST_CHECK(C_KZG_BADARGS == new_poly_div(&dummy, &dividend, &divisor));
free_poly(&divisor);
free_poly(&dummy);
}
void poly_eval_check(void) {
@ -176,14 +175,242 @@ void poly_eval_nil_check(void) {
free_poly(&p);
}
void poly_mul_direct_test(void) {
// Calculate (3x^2 - 5x + 6) * (4x + 3) = 12x^3 - 11x^2 + 9x + 18
static polydata multiplier_data = {3, {6, -5, 3}};
static polydata multiplicand_data = {2, {3, 4}};
static polydata expected_data = {4, {18, 9, -11, 12}};
poly multiplicand, multiplier, expected, actual0, actual1;
new_test_poly(&multiplicand, &multiplicand_data);
new_test_poly(&multiplier, &multiplier_data);
new_test_poly(&expected, &expected_data);
new_poly(&actual0, 4);
TEST_CHECK(C_KZG_OK == poly_mul_direct(&actual0, &multiplicand, &multiplier));
TEST_CHECK(fr_equal(&expected.coeffs[0], &actual0.coeffs[0]));
TEST_CHECK(fr_equal(&expected.coeffs[1], &actual0.coeffs[1]));
TEST_CHECK(fr_equal(&expected.coeffs[2], &actual0.coeffs[2]));
TEST_CHECK(fr_equal(&expected.coeffs[3], &actual0.coeffs[3]));
// Check commutativity
new_poly(&actual1, 4);
TEST_CHECK(C_KZG_OK == poly_mul_direct(&actual1, &multiplier, &multiplicand));
TEST_CHECK(fr_equal(&expected.coeffs[0], &actual1.coeffs[0]));
TEST_CHECK(fr_equal(&expected.coeffs[1], &actual1.coeffs[1]));
TEST_CHECK(fr_equal(&expected.coeffs[2], &actual1.coeffs[2]));
TEST_CHECK(fr_equal(&expected.coeffs[3], &actual1.coeffs[3]));
free_poly(&multiplicand);
free_poly(&multiplier);
free_poly(&expected);
free_poly(&actual0);
free_poly(&actual1);
}
void poly_mul_fft_test(void) {
// Calculate (3x^2 - 5x + 6) * (4x + 3) = 12x^3 - 11x^2 + 9x + 18
static polydata multiplier_data = {3, {6, -5, 3}};
static polydata multiplicand_data = {2, {3, 4}};
static polydata expected_data = {4, {18, 9, -11, 12}};
poly multiplicand, multiplier, expected, actual0, actual1;
new_test_poly(&multiplicand, &multiplicand_data);
new_test_poly(&multiplier, &multiplier_data);
new_test_poly(&expected, &expected_data);
new_poly(&actual0, 4);
TEST_CHECK(C_KZG_OK == poly_mul_fft(&actual0, &multiplicand, &multiplier, NULL));
TEST_CHECK(fr_equal(&expected.coeffs[0], &actual0.coeffs[0]));
TEST_CHECK(fr_equal(&expected.coeffs[1], &actual0.coeffs[1]));
TEST_CHECK(fr_equal(&expected.coeffs[2], &actual0.coeffs[2]));
TEST_CHECK(fr_equal(&expected.coeffs[3], &actual0.coeffs[3]));
// Check commutativity
new_poly(&actual1, 4);
TEST_CHECK(C_KZG_OK == poly_mul_fft(&actual1, &multiplier, &multiplicand, NULL));
TEST_CHECK(fr_equal(&expected.coeffs[0], &actual1.coeffs[0]));
TEST_CHECK(fr_equal(&expected.coeffs[1], &actual1.coeffs[1]));
TEST_CHECK(fr_equal(&expected.coeffs[2], &actual1.coeffs[2]));
TEST_CHECK(fr_equal(&expected.coeffs[3], &actual1.coeffs[3]));
free_poly(&multiplicand);
free_poly(&multiplier);
free_poly(&expected);
free_poly(&actual0);
free_poly(&actual1);
}
void poly_inverse_simple_0(void) {
// 1 / (1 - x) = 1 + x + x^2 + ...
poly p, q;
int d = 16; // The number of terms to take
new_poly(&p, 2);
p.coeffs[0] = fr_one;
p.coeffs[1] = fr_one;
fr_negate(&p.coeffs[1], &p.coeffs[1]);
new_poly(&q, d);
TEST_CHECK(C_KZG_OK == poly_inverse(&q, &p));
for (int i = 0; i < d; i++) {
TEST_CHECK(fr_is_one(&q.coeffs[i]));
}
free_poly(&p);
free_poly(&q);
}
void poly_inverse_simple_1(void) {
// 1 / (1 + x) = 1 - x + x^2 - ...
poly p, q;
int d = 16; // The number of terms to take
new_poly(&p, 2);
p.coeffs[0] = fr_one;
p.coeffs[1] = fr_one;
new_poly(&q, d);
TEST_CHECK(C_KZG_OK == poly_inverse(&q, &p));
for (int i = 0; i < d; i++) {
fr_t tmp = q.coeffs[i];
if (i & 1) {
fr_negate(&tmp, &tmp);
}
TEST_CHECK(fr_is_one(&tmp));
}
free_poly(&p);
free_poly(&q);
}
void poly_mul_random(void) {
// Compare the output of poly_mul_direct() and poly_mul_fft()
poly multiplicand, multiplier;
poly q0, q1;
for (int k = 0; k < 256; k++) {
int multiplicand_length = 1 + rand() % 1000;
int multiplier_length = 1 + rand() % 1000;
int out_length = 1 + rand() % 1000;
new_poly(&multiplicand, multiplicand_length);
new_poly(&multiplier, multiplier_length);
for (int i = 0; i < multiplicand_length; i++) {
multiplicand.coeffs[i] = rand_fr();
}
for (int i = 0; i < multiplier_length; i++) {
multiplier.coeffs[i] = rand_fr();
}
// Ensure that the polynomials' orders corresponds to their lengths
if (fr_is_zero(&multiplicand.coeffs[multiplicand.length - 1])) {
multiplicand.coeffs[multiplicand.length - 1] = fr_one;
}
if (fr_is_zero(&multiplier.coeffs[multiplier.length - 1])) {
multiplier.coeffs[multiplier.length - 1] = fr_one;
}
new_poly(&q0, out_length); // Truncate the result
TEST_CHECK(C_KZG_OK == poly_mul_direct(&q0, &multiplicand, &multiplier));
new_poly(&q1, out_length);
TEST_CHECK(C_KZG_OK == poly_mul_fft(&q1, &multiplicand, &multiplier, NULL));
TEST_CHECK(q1.length == q0.length);
for (int i = 0; i < q0.length; i++) {
if (!TEST_CHECK(fr_equal(&q0.coeffs[i], &q1.coeffs[i]))) {
TEST_MSG("round = %d, i = %d, multiplicand_length = %lu, multiplier_length = %lu, out_length = %lu", k,
i, multiplicand.length, multiplier.length, q0.length);
}
}
free_poly(&multiplicand);
free_poly(&multiplier);
free_poly(&q0);
free_poly(&q1);
}
}
void poly_div_random(void) {
// Compare the output of poly_fast_div() and poly_long_div()
poly dividend, divisor;
poly q0, q1;
for (int k = 0; k < 256; k++) {
int dividend_length = 2 + rand() % 1000;
int divisor_length = 1 + rand() % dividend_length;
new_poly(&dividend, dividend_length);
new_poly(&divisor, divisor_length);
for (int i = 0; i < dividend_length; i++) {
dividend.coeffs[i] = rand_fr();
}
for (int i = 0; i < divisor_length; i++) {
divisor.coeffs[i] = rand_fr();
}
// Ensure that the polynomials' orders corresponds to their lengths
if (fr_is_zero(&dividend.coeffs[dividend.length - 1])) {
dividend.coeffs[dividend.length - 1] = fr_one;
}
if (fr_is_zero(&divisor.coeffs[divisor.length - 1])) {
divisor.coeffs[divisor.length - 1] = fr_one;
}
new_poly(&q0, dividend.length - divisor.length + 1);
TEST_CHECK(C_KZG_OK == poly_long_div(&q0, &dividend, &divisor));
new_poly(&q1, dividend.length - divisor.length + 1);
TEST_CHECK(C_KZG_OK == poly_fast_div(&q1, &dividend, &divisor));
TEST_CHECK(q1.length == q0.length);
for (int i = 0; i < q0.length; i++) {
if (!TEST_CHECK(fr_equal(&q0.coeffs[i], &q1.coeffs[i]))) {
TEST_MSG("round = %d, dividend_length = %lu, divisor_length = %lu, i = %d", k, dividend.length,
divisor.length, i);
}
}
free_poly(&dividend);
free_poly(&divisor);
free_poly(&q0);
free_poly(&q1);
}
}
TEST_LIST = {
{"POLY_TEST", title},
{"poly_div_0", poly_div_0},
{"poly_div_1", poly_div_1},
{"poly_div_2", poly_div_2},
{"poly_test_div", poly_test_div},
#ifndef DEBUG
{"poly_div_by_zero", poly_div_by_zero},
#endif
{"poly_eval_check", poly_eval_check},
{"poly_eval_0_check", poly_eval_0_check},
{"poly_eval_nil_check", poly_eval_nil_check},
{"poly_mul_direct_test", poly_mul_direct_test},
{"poly_mul_fft_test", poly_mul_fft_test},
{"poly_inverse_simple_0", poly_inverse_simple_0},
{"poly_inverse_simple_1", poly_inverse_simple_1},
{"poly_mul_random", poly_mul_random},
{"poly_div_random", poly_div_random},
{NULL, NULL} /* zero record marks the end of the list */
};

View File

@ -58,6 +58,20 @@ int log2_pow2(uint32_t n) {
return r;
}
/**
* Calculate log base two of a power of two.
*
* In other words, the bit index of the highest one bit.
*
* @param[in] n The 64 bit unsigned integer to take the logarithm of
* @return the log base two of n
*/
int log2_u64(uint64_t n) {
int r = 0;
while (n >>= 1) r++;
return r;
}
/**
* Return the next highest power of two.
*

View File

@ -44,6 +44,7 @@
bool is_power_of_two(uint64_t n);
int log2_pow2(uint32_t n);
int log2_u64(uint64_t n);
uint64_t next_power_of_two(uint64_t v);
uint32_t reverse_bits(uint32_t a);
uint32_t reverse_bits_limited(uint32_t n, uint32_t value);