Add benchmarking for FFTs
This commit is contained in:
parent
80c513f653
commit
b9bc4bb496
|
@ -1,7 +1,10 @@
|
|||
*.o
|
||||
*.a
|
||||
*_test
|
||||
a.out
|
||||
*_bench
|
||||
*.prof
|
||||
*.out
|
||||
*.log
|
||||
tmp/
|
||||
inc/blst.h
|
||||
inc/blst_aux.h
|
||||
|
|
15
src/Makefile
15
src/Makefile
|
@ -1,5 +1,6 @@
|
|||
TESTS = blst_util_test c_kzg_util_test fft_common_test fft_fr_test fft_g1_test \
|
||||
kzg_proofs_test poly_test
|
||||
BENCH = fft_fr_bench fft_g1_bench
|
||||
LIB_SRC = blst_util.c c_kzg_util.c fft_common.c fft_fr.c fft_g1.c kzg_proofs.c poly.c
|
||||
LIB_OBJ = $(LIB_SRC:.c=.o)
|
||||
|
||||
|
@ -17,13 +18,25 @@ libckzg.a: $(LIB_OBJ) Makefile
|
|||
clang -Wall $(CFLAGS) -o $@ $@.c debug_util.o libckzg.a -L../lib -lblst
|
||||
./$@
|
||||
|
||||
%_bench: %_bench.c bench_util.o $(LIB_OBJ) Makefile
|
||||
clang -Wall $(CFLAGS) -o $@ $@.c bench_util.o $(LIB_OBJ) -L../lib -lblst
|
||||
./$@
|
||||
|
||||
lib: clean libckzg.a
|
||||
|
||||
debuglib: CFLAGS += -g -DDEBUG
|
||||
debuglib: CFLAGS += -O1 -DDEBUG
|
||||
debuglib: clean libckzg.a
|
||||
|
||||
optlib: CFLAGS += -O2
|
||||
optlib: clean libckzg.a
|
||||
|
||||
profilelib: CFLAGS += -fprofile-instr-generate -fcoverage-mapping
|
||||
profilelib: clean libckzg.a
|
||||
|
||||
test: $(TESTS)
|
||||
|
||||
bench: $(BENCH)
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f libckzg.a
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright 2021 Benjamin Edgington
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h> // rand()
|
||||
#include "bench_util.h"
|
||||
#include "blst_util.h"
|
||||
|
||||
unsigned long tdiff(timespec start, timespec end) {
|
||||
return (end.tv_sec - start.tv_sec) * NANO + (end.tv_nsec - start.tv_nsec);
|
||||
}
|
||||
|
||||
uint64_t rand_uint64() {
|
||||
uint64_t a = (uint64_t)rand();
|
||||
uint64_t b = (uint64_t)rand();
|
||||
return a << 32 | b;
|
||||
}
|
||||
|
||||
blst_fr rand_fr() {
|
||||
blst_fr ret;
|
||||
uint64_t a[4];
|
||||
a[0] = rand_uint64();
|
||||
a[1] = rand_uint64();
|
||||
a[2] = rand_uint64();
|
||||
a[3] = rand_uint64();
|
||||
blst_fr_from_uint64(&ret, a);
|
||||
return ret;
|
||||
}
|
||||
|
||||
blst_p1 rand_g1() {
|
||||
blst_p1 ret;
|
||||
blst_fr random = rand_fr();
|
||||
p1_mul(&ret, blst_p1_generator(), &random);
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright 2021 Benjamin Edgington
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <time.h> // CLOCK_REALTIME, clock_gettime(), timespec
|
||||
#include "c_kzg.h"
|
||||
|
||||
typedef struct timespec timespec;
|
||||
|
||||
#define NANO 1000000000L
|
||||
|
||||
unsigned long tdiff(timespec start, timespec end);
|
||||
uint64_t rand_uint64();
|
||||
blst_fr rand_fr();
|
||||
blst_p1 rand_g1();
|
|
@ -60,7 +60,6 @@ void fr_pow(blst_fr *out, const blst_fr *a, uint64_t n) {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: Is there really no better way to do this?
|
||||
void p1_mul(blst_p1 *out, const blst_p1 *a, const blst_fr *b) {
|
||||
blst_scalar s;
|
||||
blst_scalar_from_fr(&s, b);
|
||||
|
@ -73,7 +72,6 @@ void p1_sub(blst_p1 *out, const blst_p1 *a, const blst_p1 *b) {
|
|||
blst_p1_add_or_double(out, a, &bneg);
|
||||
}
|
||||
|
||||
// TODO: Is there really no better way to do this?
|
||||
void p2_mul(blst_p2 *out, const blst_p2 *a, const blst_fr *b) {
|
||||
blst_scalar s;
|
||||
blst_scalar_from_fr(&s, b);
|
||||
|
|
|
@ -37,7 +37,7 @@ void fft_fr_slow(blst_fr *out, const blst_fr *in, uint64_t stride, const blst_fr
|
|||
void fft_fr_fast(blst_fr *out, const blst_fr *in, uint64_t stride, const blst_fr *roots, uint64_t roots_stride,
|
||||
uint64_t l) {
|
||||
uint64_t half = l / 2;
|
||||
if (half > 2) { // TODO: Tunable parameter
|
||||
if (half > 0) { // TODO: Tunable parameter
|
||||
fft_fr_fast(out, in, stride * 2, roots, roots_stride * 2, half);
|
||||
fft_fr_fast(out + half, in + stride, stride * 2, roots, roots_stride * 2, half);
|
||||
for (uint64_t i = 0; i < half; i++) {
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright 2021 Benjamin Edgington
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h> // malloc(), free()
|
||||
#include <stdio.h> // printf()
|
||||
#include <assert.h> // assert()
|
||||
#include "bench_util.h"
|
||||
#include "fft_fr.h"
|
||||
|
||||
// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
|
||||
long run_bench(int scale, int max_seconds) {
|
||||
timespec t0, t1;
|
||||
unsigned long total_time = 0, nits = 0;
|
||||
FFTSettings fs;
|
||||
|
||||
assert(C_KZG_OK == new_fft_settings(&fs, scale));
|
||||
// Allocate on the heap to avoid stack overflow for large sizes
|
||||
blst_fr *data, *out;
|
||||
data = malloc(fs.max_width * sizeof(blst_fr));
|
||||
out = malloc(fs.max_width * sizeof(blst_fr));
|
||||
|
||||
// Fill with randomness
|
||||
for (uint64_t i = 0; i < fs.max_width; i++) {
|
||||
data[i] = rand_fr();
|
||||
}
|
||||
|
||||
while (total_time < max_seconds * NANO) {
|
||||
clock_gettime(CLOCK_REALTIME, &t0);
|
||||
assert(C_KZG_OK == fft_fr(out, data, &fs, false, fs.max_width));
|
||||
clock_gettime(CLOCK_REALTIME, &t1);
|
||||
nits++;
|
||||
total_time += tdiff(t0, t1);
|
||||
}
|
||||
|
||||
free(out);
|
||||
free(data);
|
||||
|
||||
return total_time / nits;
|
||||
}
|
||||
|
||||
#define NSEC 1
|
||||
|
||||
int main(void) {
|
||||
printf("*** Benchmarking FFT_fr, %d second%s per test.\n", NSEC, NSEC == 1 ? "" : "s");
|
||||
for (int scale = 4; scale < 16; scale++) {
|
||||
printf("fft_fr/scale_%d %lu ns/op\n", scale, run_bench(scale, 1));
|
||||
}
|
||||
}
|
|
@ -36,7 +36,7 @@ void fft_g1_slow(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uin
|
|||
// Fast Fourier Transform
|
||||
void fft_g1_fast(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) {
|
||||
uint64_t half = l / 2;
|
||||
if (half > 2) { // TODO: Tunable parameter
|
||||
if (half > 0) { // Tunable parameter
|
||||
fft_g1_fast(out, in, stride * 2, roots, roots_stride * 2, half);
|
||||
fft_g1_fast(out + half, in + stride, stride * 2, roots, roots_stride * 2, half);
|
||||
for (uint64_t i = 0; i < half; i++) {
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright 2021 Benjamin Edgington
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h> // malloc(), free()
|
||||
#include <stdio.h> // printf()
|
||||
#include <assert.h> // assert()
|
||||
#include "bench_util.h"
|
||||
#include "fft_g1.h"
|
||||
|
||||
// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
|
||||
long run_bench(int scale, int max_seconds) {
|
||||
timespec t0, t1;
|
||||
unsigned long total_time = 0, nits = 0;
|
||||
FFTSettings fs;
|
||||
|
||||
assert(C_KZG_OK == new_fft_settings(&fs, scale));
|
||||
// Allocate on the heap to avoid stack overflow for large sizes
|
||||
blst_p1 *data, *out;
|
||||
data = malloc(fs.max_width * sizeof(blst_p1));
|
||||
out = malloc(fs.max_width * sizeof(blst_p1));
|
||||
|
||||
// Fill with randomness
|
||||
for (uint64_t i = 0; i < fs.max_width; i++) {
|
||||
data[i] = rand_g1();
|
||||
}
|
||||
|
||||
while (total_time < max_seconds * NANO) {
|
||||
clock_gettime(CLOCK_REALTIME, &t0);
|
||||
assert(C_KZG_OK == fft_g1(out, data, &fs, false, fs.max_width));
|
||||
clock_gettime(CLOCK_REALTIME, &t1);
|
||||
nits++;
|
||||
total_time += tdiff(t0, t1);
|
||||
}
|
||||
|
||||
free(out);
|
||||
free(data);
|
||||
|
||||
return total_time / nits;
|
||||
}
|
||||
|
||||
#define NSEC 1
|
||||
|
||||
int main(void) {
|
||||
printf("*** Benchmarking FFT_g1, %d second%s per test.\n", NSEC, NSEC == 1 ? "" : "s");
|
||||
for (int scale = 4; scale < 16; scale++) {
|
||||
printf("fft_g1/scale_%d %lu ns/op\n", scale, run_bench(scale, 1));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue