diff --git a/.gitignore b/.gitignore index f15fe4b..5fe2876 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,10 @@ *.o *.a *_test -a.out +*_bench +*.prof +*.out +*.log tmp/ inc/blst.h inc/blst_aux.h diff --git a/src/Makefile b/src/Makefile index f63c535..fa2de1a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,6 @@ TESTS = blst_util_test c_kzg_util_test fft_common_test fft_fr_test fft_g1_test \ kzg_proofs_test poly_test +BENCH = fft_fr_bench fft_g1_bench LIB_SRC = blst_util.c c_kzg_util.c fft_common.c fft_fr.c fft_g1.c kzg_proofs.c poly.c LIB_OBJ = $(LIB_SRC:.c=.o) @@ -17,13 +18,25 @@ libckzg.a: $(LIB_OBJ) Makefile clang -Wall $(CFLAGS) -o $@ $@.c debug_util.o libckzg.a -L../lib -lblst ./$@ +%_bench: %_bench.c bench_util.o $(LIB_OBJ) Makefile + clang -Wall $(CFLAGS) -o $@ $@.c bench_util.o $(LIB_OBJ) -L../lib -lblst + ./$@ + lib: clean libckzg.a -debuglib: CFLAGS += -g -DDEBUG +debuglib: CFLAGS += -O1 -DDEBUG debuglib: clean libckzg.a +optlib: CFLAGS += -O2 +optlib: clean libckzg.a + +profilelib: CFLAGS += -fprofile-instr-generate -fcoverage-mapping +profilelib: clean libckzg.a + test: $(TESTS) +bench: $(BENCH) + clean: rm -f *.o rm -f libckzg.a diff --git a/src/bench_util.c b/src/bench_util.c new file mode 100644 index 0000000..cb03b5e --- /dev/null +++ b/src/bench_util.c @@ -0,0 +1,47 @@ +/* + * Copyright 2021 Benjamin Edgington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // rand() +#include "bench_util.h" +#include "blst_util.h" + +unsigned long tdiff(timespec start, timespec end) { + return (end.tv_sec - start.tv_sec) * NANO + (end.tv_nsec - start.tv_nsec); +} + +uint64_t rand_uint64() { + uint64_t a = (uint64_t)rand(); + uint64_t b = (uint64_t)rand(); + return a << 32 | b; +} + +blst_fr rand_fr() { + blst_fr ret; + uint64_t a[4]; + a[0] = rand_uint64(); + a[1] = rand_uint64(); + a[2] = rand_uint64(); + a[3] = rand_uint64(); + blst_fr_from_uint64(&ret, a); + return ret; +} + +blst_p1 rand_g1() { + blst_p1 ret; + blst_fr random = rand_fr(); + p1_mul(&ret, blst_p1_generator(), &random); + return ret; +} diff --git a/src/bench_util.h b/src/bench_util.h new file mode 100644 index 0000000..6c0e26e --- /dev/null +++ b/src/bench_util.h @@ -0,0 +1,27 @@ +/* + * Copyright 2021 Benjamin Edgington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // CLOCK_REALTIME, clock_gettime(), timespec +#include "c_kzg.h" + +typedef struct timespec timespec; + +#define NANO 1000000000L + +unsigned long tdiff(timespec start, timespec end); +uint64_t rand_uint64(); +blst_fr rand_fr(); +blst_p1 rand_g1(); diff --git a/src/blst_util.c b/src/blst_util.c index 7127554..4daceda 100644 --- a/src/blst_util.c +++ b/src/blst_util.c @@ -60,7 +60,6 @@ void fr_pow(blst_fr *out, const blst_fr *a, uint64_t n) { } } -// TODO: Is there really no better way to do this? void p1_mul(blst_p1 *out, const blst_p1 *a, const blst_fr *b) { blst_scalar s; blst_scalar_from_fr(&s, b); @@ -73,7 +72,6 @@ void p1_sub(blst_p1 *out, const blst_p1 *a, const blst_p1 *b) { blst_p1_add_or_double(out, a, &bneg); } -// TODO: Is there really no better way to do this? void p2_mul(blst_p2 *out, const blst_p2 *a, const blst_fr *b) { blst_scalar s; blst_scalar_from_fr(&s, b); diff --git a/src/fft_fr.c b/src/fft_fr.c index c0548f4..fe2da51 100644 --- a/src/fft_fr.c +++ b/src/fft_fr.c @@ -37,7 +37,7 @@ void fft_fr_slow(blst_fr *out, const blst_fr *in, uint64_t stride, const blst_fr void fft_fr_fast(blst_fr *out, const blst_fr *in, uint64_t stride, const blst_fr *roots, uint64_t roots_stride, uint64_t l) { uint64_t half = l / 2; - if (half > 2) { // TODO: Tunable parameter + if (half > 0) { // TODO: Tunable parameter fft_fr_fast(out, in, stride * 2, roots, roots_stride * 2, half); fft_fr_fast(out + half, in + stride, stride * 2, roots, roots_stride * 2, half); for (uint64_t i = 0; i < half; i++) { diff --git a/src/fft_fr_bench.c b/src/fft_fr_bench.c new file mode 100644 index 0000000..3677ad2 --- /dev/null +++ b/src/fft_fr_bench.c @@ -0,0 +1,61 @@ +/* + * Copyright 2021 Benjamin Edgington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // malloc(), free() +#include // printf() +#include // assert() +#include "bench_util.h" +#include "fft_fr.h" + +// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds. +long run_bench(int scale, int max_seconds) { + timespec t0, t1; + unsigned long total_time = 0, nits = 0; + FFTSettings fs; + + assert(C_KZG_OK == new_fft_settings(&fs, scale)); + // Allocate on the heap to avoid stack overflow for large sizes + blst_fr *data, *out; + data = malloc(fs.max_width * sizeof(blst_fr)); + out = malloc(fs.max_width * sizeof(blst_fr)); + + // Fill with randomness + for (uint64_t i = 0; i < fs.max_width; i++) { + data[i] = rand_fr(); + } + + while (total_time < max_seconds * NANO) { + clock_gettime(CLOCK_REALTIME, &t0); + assert(C_KZG_OK == fft_fr(out, data, &fs, false, fs.max_width)); + clock_gettime(CLOCK_REALTIME, &t1); + nits++; + total_time += tdiff(t0, t1); + } + + free(out); + free(data); + + return total_time / nits; +} + +#define NSEC 1 + +int main(void) { + printf("*** Benchmarking FFT_fr, %d second%s per test.\n", NSEC, NSEC == 1 ? "" : "s"); + for (int scale = 4; scale < 16; scale++) { + printf("fft_fr/scale_%d %lu ns/op\n", scale, run_bench(scale, 1)); + } +} diff --git a/src/fft_g1.c b/src/fft_g1.c index 4b9639b..4ccafda 100644 --- a/src/fft_g1.c +++ b/src/fft_g1.c @@ -36,7 +36,7 @@ void fft_g1_slow(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uin // Fast Fourier Transform void fft_g1_fast(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) { uint64_t half = l / 2; - if (half > 2) { // TODO: Tunable parameter + if (half > 0) { // Tunable parameter fft_g1_fast(out, in, stride * 2, roots, roots_stride * 2, half); fft_g1_fast(out + half, in + stride, stride * 2, roots, roots_stride * 2, half); for (uint64_t i = 0; i < half; i++) { diff --git a/src/fft_g1_bench.c b/src/fft_g1_bench.c new file mode 100644 index 0000000..ef6e7b9 --- /dev/null +++ b/src/fft_g1_bench.c @@ -0,0 +1,61 @@ +/* + * Copyright 2021 Benjamin Edgington + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // malloc(), free() +#include // printf() +#include // assert() +#include "bench_util.h" +#include "fft_g1.h" + +// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds. +long run_bench(int scale, int max_seconds) { + timespec t0, t1; + unsigned long total_time = 0, nits = 0; + FFTSettings fs; + + assert(C_KZG_OK == new_fft_settings(&fs, scale)); + // Allocate on the heap to avoid stack overflow for large sizes + blst_p1 *data, *out; + data = malloc(fs.max_width * sizeof(blst_p1)); + out = malloc(fs.max_width * sizeof(blst_p1)); + + // Fill with randomness + for (uint64_t i = 0; i < fs.max_width; i++) { + data[i] = rand_g1(); + } + + while (total_time < max_seconds * NANO) { + clock_gettime(CLOCK_REALTIME, &t0); + assert(C_KZG_OK == fft_g1(out, data, &fs, false, fs.max_width)); + clock_gettime(CLOCK_REALTIME, &t1); + nits++; + total_time += tdiff(t0, t1); + } + + free(out); + free(data); + + return total_time / nits; +} + +#define NSEC 1 + +int main(void) { + printf("*** Benchmarking FFT_g1, %d second%s per test.\n", NSEC, NSEC == 1 ? "" : "s"); + for (int scale = 4; scale < 16; scale++) { + printf("fft_g1/scale_%d %lu ns/op\n", scale, run_bench(scale, 1)); + } +}