diff --git a/src/Makefile b/src/Makefile index fc3bb5c..33a2ec5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,6 +1,6 @@ TESTS = bls12_381_test das_extension_test c_kzg_util_test fft_common_test fft_fr_test fft_g1_test \ fk20_proofs_test kzg_proofs_test poly_test utility_test zero_poly_test -BENCH = fft_fr_bench fft_g1_bench fft_fr_in_place_0_bench fft_fr_in_place_1_bench +BENCH = fft_fr_bench fft_g1_bench LIB_SRC = bls12_381.c c_kzg_util.c das_extension.c fft_common.c fft_fr.c fft_g1.c fk20_proofs.c kzg_proofs.c poly.c utility.c zero_poly.c LIB_OBJ = $(LIB_SRC:.c=.o) diff --git a/src/fft_fr.c b/src/fft_fr.c index 97482ce..07f5f63 100644 --- a/src/fft_fr.c +++ b/src/fft_fr.c @@ -111,77 +111,3 @@ C_KZG_RET fft_fr(fr_t *out, const fr_t *in, bool inverse, uint64_t n, const FFTS } return C_KZG_OK; } - -/** - * Perform an in-place FFT by copying the results over the input. - * - * @remark This is almost as fast as #fft_fr. It is slower than #fft_fr_in_place_lomem, but, unlike that routine, - * allocates a extra array the same size as the input data to copy the results. - * - * @param[in,out] data The input data and output results (array of length @p n) - * @param[in] inverse `false` for forward transform, `true` for inverse transform - * @param[in] n Length of the FFT, must be a power of two - * @param[in] fs Pointer to previously initialised FFTSettings structure with `max_width` at least @p n. - * @retval C_CZK_OK All is well - * @retval C_CZK_BADARGS Invalid parameters were supplied - * @retval C_CZK_MALLOC Memory allocation failed - */ -C_KZG_RET fft_fr_in_place(fr_t *data, bool inverse, uint64_t n, const FFTSettings *fs) { - fr_t *out; - CHECK(n <= fs->max_width); - CHECK(is_power_of_two(n)); - TRY(new_fr_array(&out, fs->max_width)); - TRY(fft_fr(out, data, inverse, n, fs)); - for (uint64_t i = 0; i < n; i++) { - data[i] = out[i]; - } - free(out); - return C_KZG_OK; -} - -/** - * Perform an in-place FFT without allocating any extra memory. - * - * @remark This is about 25% slower than #fft_fr_in_place, but does not allocate any extra memory, whereas that routine - * has a memory overhead the same size as the input array. - * - * @param[in,out] data The input data and output results (array of length @p n) - * @param[in] inverse `false` for forward transform, `true` for inverse transform - * @param[in] n Length of the FFT, must be a power of two - * @param[in] fs Pointer to previously initialised FFTSettings structure with `max_width` at least @p n. - * @retval C_CZK_OK All is well - * @retval C_CZK_BADARGS Invalid parameters were supplied - */ -C_KZG_RET fft_fr_in_place_lomem(fr_t *data, bool inverse, uint64_t n, const FFTSettings *fs) { - uint64_t stride = fs->max_width / n; - CHECK(n <= fs->max_width); - CHECK(is_power_of_two(n)); - - fr_t *roots = inverse ? fs->reverse_roots_of_unity : fs->expanded_roots_of_unity; - - reverse_bit_order(data, sizeof data[0], n); - - uint64_t m = 1; - while (m < n) { - m <<= 1; - for (uint64_t k = 0; k < n; k += m) { - for (uint64_t j = 0; j < m / 2; j++) { - fr_t t, w = roots[j * stride * n / m]; - fr_mul(&t, &w, &data[k + j + m / 2]); - fr_sub(&data[k + j + m / 2], &data[k + j], &t); - fr_add(&data[k + j], &data[k + j], &t); - } - } - } - - if (inverse) { - fr_t inv_len; - fr_from_uint64(&inv_len, n); - fr_inv(&inv_len, &inv_len); - for (uint64_t i = 0; i < n; i++) { - fr_mul(&data[i], &data[i], &inv_len); - } - } - - return C_KZG_OK; -} diff --git a/src/fft_fr.h b/src/fft_fr.h index 3af2ac1..90648f2 100644 --- a/src/fft_fr.h +++ b/src/fft_fr.h @@ -21,5 +21,3 @@ void fft_fr_slow(fr_t *out, const fr_t *in, uint64_t stride, const fr_t *roots, uint64_t roots_stride, uint64_t n); void fft_fr_fast(fr_t *out, const fr_t *in, uint64_t stride, const fr_t *roots, uint64_t roots_stride, uint64_t n); C_KZG_RET fft_fr(fr_t *out, const fr_t *in, bool inverse, uint64_t n, const FFTSettings *fs); -C_KZG_RET fft_fr_in_place(fr_t *data, bool inverse, uint64_t n, const FFTSettings *fs); -C_KZG_RET fft_fr_in_place_lomem(fr_t *data, bool inverse, uint64_t n, const FFTSettings *fs); diff --git a/src/fft_fr_in_place_0_bench.c b/src/fft_fr_in_place_0_bench.c deleted file mode 100644 index 10cdc3e..0000000 --- a/src/fft_fr_in_place_0_bench.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2021 Benjamin Edgington - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include // malloc(), free(), atoi() -#include // printf() -#include // assert() -#include // EXIT_SUCCESS/FAILURE -#include "bench_util.h" -#include "test_util.h" -#include "fft_fr.h" - -// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds. -long run_bench(int scale, int max_seconds) { - timespec_t t0, t1; - unsigned long total_time = 0, nits = 0; - FFTSettings fs; - - assert(C_KZG_OK == new_fft_settings(&fs, scale)); - - // Allocate on the heap to avoid stack overflow for large sizes - fr_t *data; - data = malloc(fs.max_width * sizeof(fr_t)); - - // Fill with randomness - for (uint64_t i = 0; i < fs.max_width; i++) { - data[i] = rand_fr(); - } - - while (total_time < max_seconds * NANO) { - clock_gettime(CLOCK_REALTIME, &t0); - assert(C_KZG_OK == fft_fr_in_place_lomem(data, false, fs.max_width, &fs)); - clock_gettime(CLOCK_REALTIME, &t1); - nits++; - total_time += tdiff(t0, t1); - } - - free(data); - - return total_time / nits; -} - -int main(int argc, char *argv[]) { - int nsec = 0; - - switch (argc) { - case 1: - nsec = NSEC; - break; - case 2: - nsec = atoi(argv[1]); - break; - default: - break; - }; - - if (nsec == 0) { - printf("Usage: %s [test time in seconds > 0]\n", argv[0]); - exit(EXIT_FAILURE); - } - - printf("*** Benchmarking FFT_fr in-place, low memory, %d second%s per test.\n", nsec, nsec == 1 ? "" : "s"); - for (int scale = 4; scale <= 15; scale++) { - printf("fft_fr/scale_%d %lu ns/op\n", scale, run_bench(scale, nsec)); - } - - return EXIT_SUCCESS; -} diff --git a/src/fft_fr_in_place_1_bench.c b/src/fft_fr_in_place_1_bench.c deleted file mode 100644 index 9e9c1da..0000000 --- a/src/fft_fr_in_place_1_bench.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2021 Benjamin Edgington - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include // malloc(), free(), atoi() -#include // printf() -#include // assert() -#include // EXIT_SUCCESS/FAILURE -#include "bench_util.h" -#include "test_util.h" -#include "fft_fr.h" - -// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds. -long run_bench(int scale, int max_seconds) { - timespec_t t0, t1; - unsigned long total_time = 0, nits = 0; - FFTSettings fs; - - assert(C_KZG_OK == new_fft_settings(&fs, scale)); - - // Allocate on the heap to avoid stack overflow for large sizes - fr_t *data; - data = malloc(fs.max_width * sizeof(fr_t)); - - // Fill with randomness - for (uint64_t i = 0; i < fs.max_width; i++) { - data[i] = rand_fr(); - } - - while (total_time < max_seconds * NANO) { - clock_gettime(CLOCK_REALTIME, &t0); - assert(C_KZG_OK == fft_fr_in_place(data, false, fs.max_width, &fs)); - clock_gettime(CLOCK_REALTIME, &t1); - nits++; - total_time += tdiff(t0, t1); - } - - free(data); - - return total_time / nits; -} - -int main(int argc, char *argv[]) { - int nsec = 0; - - switch (argc) { - case 1: - nsec = NSEC; - break; - case 2: - nsec = atoi(argv[1]); - break; - default: - break; - }; - - if (nsec == 0) { - printf("Usage: %s [test time in seconds > 0]\n", argv[0]); - exit(EXIT_FAILURE); - } - - printf("*** Benchmarking FFT_fr in-place, %d second%s per test.\n", nsec, nsec == 1 ? "" : "s"); - for (int scale = 4; scale <= 15; scale++) { - printf("fft_fr/scale_%d %lu ns/op\n", scale, run_bench(scale, nsec)); - } - - return EXIT_SUCCESS; -} diff --git a/src/fft_fr_test.c b/src/fft_fr_test.c index 9ca02ff..608d8ea 100644 --- a/src/fft_fr_test.c +++ b/src/fft_fr_test.c @@ -128,86 +128,12 @@ void stride_fft(void) { free_fft_settings(&fs2); } -void in_place_fft(void) { - // Initialise: ascending values of i, and arbitrary size - uint64_t size = 10; - uint64_t width = (uint64_t)1 << size; - FFTSettings fs; - TEST_CHECK(new_fft_settings(&fs, size) == C_KZG_OK); - - fr_t data[width], coeffs[width]; - for (int i = 0; i < width; i++) { - fr_from_uint64(data + i, i); - } - - // Output is separate from input - TEST_CHECK(fft_fr(coeffs, data, false, width, &fs) == C_KZG_OK); - // Output and input array is the same - TEST_CHECK(fft_fr_in_place(data, false, width, &fs) == C_KZG_OK); - - for (int i = 0; i < width; i++) { - TEST_CHECK(fr_equal(&coeffs[i], &data[i])); - } - - free_fft_settings(&fs); -} - -void in_place_inverse_fft(void) { - // Initialise: ascending values of i, and arbitrary size - uint64_t size = 11; - uint64_t width = (uint64_t)1 << size; - FFTSettings fs; - TEST_CHECK(new_fft_settings(&fs, size + 1) == C_KZG_OK); - - fr_t data[width], coeffs[width]; - for (int i = 0; i < width; i++) { - fr_from_uint64(data + i, i); - } - - // Output is separate from input - TEST_CHECK(fft_fr(coeffs, data, true, width, &fs) == C_KZG_OK); - // Output and input array is the same - TEST_CHECK(fft_fr_in_place(data, true, width, &fs) == C_KZG_OK); - - for (int i = 0; i < width; i++) { - TEST_CHECK(fr_equal(&coeffs[i], &data[i])); - } - - free_fft_settings(&fs); -} - -void in_place_fft_lomem(void) { - // Initialise: ascending values of i, and arbitrary size - uint64_t size = 13; - uint64_t width = (uint64_t)1 << size; - FFTSettings fs; - // Make this one strided for good measure - TEST_CHECK(new_fft_settings(&fs, size + 2) == C_KZG_OK); - - fr_t data0[width], data1[width]; - for (int i = 0; i < width; i++) { - fr_from_uint64(data0 + i, i); - data1[i] = data0[i]; - } - - TEST_CHECK(fft_fr_in_place_lomem(data0, false, width, &fs) == C_KZG_OK); - TEST_CHECK(fft_fr_in_place(data1, false, width, &fs) == C_KZG_OK); - - for (int i = 0; i < width; i++) { - TEST_CHECK(fr_equal(&data0[i], &data1[i])); - } - - free_fft_settings(&fs); -} - TEST_LIST = { {"FFT_FR_TEST", title}, {"compare_sft_fft", compare_sft_fft}, {"roundtrip_fft", roundtrip_fft}, {"inverse_fft", inverse_fft}, {"stride_fft", stride_fft}, - {"in_place_fft", in_place_fft}, - {"in_place_inverse_fft", in_place_inverse_fft}, - {"in_place_fft_lomem", in_place_fft_lomem}, + {NULL, NULL} /* zero record marks the end of the list */ };