Add benchmarking for FFTs
This commit is contained in:
parent
80c513f653
commit
b9bc4bb496
|
@ -1,7 +1,10 @@
|
||||||
*.o
|
*.o
|
||||||
*.a
|
*.a
|
||||||
*_test
|
*_test
|
||||||
a.out
|
*_bench
|
||||||
|
*.prof
|
||||||
|
*.out
|
||||||
|
*.log
|
||||||
tmp/
|
tmp/
|
||||||
inc/blst.h
|
inc/blst.h
|
||||||
inc/blst_aux.h
|
inc/blst_aux.h
|
||||||
|
|
15
src/Makefile
15
src/Makefile
|
@ -1,5 +1,6 @@
|
||||||
TESTS = blst_util_test c_kzg_util_test fft_common_test fft_fr_test fft_g1_test \
|
TESTS = blst_util_test c_kzg_util_test fft_common_test fft_fr_test fft_g1_test \
|
||||||
kzg_proofs_test poly_test
|
kzg_proofs_test poly_test
|
||||||
|
BENCH = fft_fr_bench fft_g1_bench
|
||||||
LIB_SRC = blst_util.c c_kzg_util.c fft_common.c fft_fr.c fft_g1.c kzg_proofs.c poly.c
|
LIB_SRC = blst_util.c c_kzg_util.c fft_common.c fft_fr.c fft_g1.c kzg_proofs.c poly.c
|
||||||
LIB_OBJ = $(LIB_SRC:.c=.o)
|
LIB_OBJ = $(LIB_SRC:.c=.o)
|
||||||
|
|
||||||
|
@ -17,13 +18,25 @@ libckzg.a: $(LIB_OBJ) Makefile
|
||||||
clang -Wall $(CFLAGS) -o $@ $@.c debug_util.o libckzg.a -L../lib -lblst
|
clang -Wall $(CFLAGS) -o $@ $@.c debug_util.o libckzg.a -L../lib -lblst
|
||||||
./$@
|
./$@
|
||||||
|
|
||||||
|
%_bench: %_bench.c bench_util.o $(LIB_OBJ) Makefile
|
||||||
|
clang -Wall $(CFLAGS) -o $@ $@.c bench_util.o $(LIB_OBJ) -L../lib -lblst
|
||||||
|
./$@
|
||||||
|
|
||||||
lib: clean libckzg.a
|
lib: clean libckzg.a
|
||||||
|
|
||||||
debuglib: CFLAGS += -g -DDEBUG
|
debuglib: CFLAGS += -O1 -DDEBUG
|
||||||
debuglib: clean libckzg.a
|
debuglib: clean libckzg.a
|
||||||
|
|
||||||
|
optlib: CFLAGS += -O2
|
||||||
|
optlib: clean libckzg.a
|
||||||
|
|
||||||
|
profilelib: CFLAGS += -fprofile-instr-generate -fcoverage-mapping
|
||||||
|
profilelib: clean libckzg.a
|
||||||
|
|
||||||
test: $(TESTS)
|
test: $(TESTS)
|
||||||
|
|
||||||
|
bench: $(BENCH)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
rm -f libckzg.a
|
rm -f libckzg.a
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2021 Benjamin Edgington
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h> // rand()
|
||||||
|
#include "bench_util.h"
|
||||||
|
#include "blst_util.h"
|
||||||
|
|
||||||
|
unsigned long tdiff(timespec start, timespec end) {
|
||||||
|
return (end.tv_sec - start.tv_sec) * NANO + (end.tv_nsec - start.tv_nsec);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t rand_uint64() {
|
||||||
|
uint64_t a = (uint64_t)rand();
|
||||||
|
uint64_t b = (uint64_t)rand();
|
||||||
|
return a << 32 | b;
|
||||||
|
}
|
||||||
|
|
||||||
|
blst_fr rand_fr() {
|
||||||
|
blst_fr ret;
|
||||||
|
uint64_t a[4];
|
||||||
|
a[0] = rand_uint64();
|
||||||
|
a[1] = rand_uint64();
|
||||||
|
a[2] = rand_uint64();
|
||||||
|
a[3] = rand_uint64();
|
||||||
|
blst_fr_from_uint64(&ret, a);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
blst_p1 rand_g1() {
|
||||||
|
blst_p1 ret;
|
||||||
|
blst_fr random = rand_fr();
|
||||||
|
p1_mul(&ret, blst_p1_generator(), &random);
|
||||||
|
return ret;
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2021 Benjamin Edgington
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <time.h> // CLOCK_REALTIME, clock_gettime(), timespec
|
||||||
|
#include "c_kzg.h"
|
||||||
|
|
||||||
|
typedef struct timespec timespec;
|
||||||
|
|
||||||
|
#define NANO 1000000000L
|
||||||
|
|
||||||
|
unsigned long tdiff(timespec start, timespec end);
|
||||||
|
uint64_t rand_uint64();
|
||||||
|
blst_fr rand_fr();
|
||||||
|
blst_p1 rand_g1();
|
|
@ -60,7 +60,6 @@ void fr_pow(blst_fr *out, const blst_fr *a, uint64_t n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Is there really no better way to do this?
|
|
||||||
void p1_mul(blst_p1 *out, const blst_p1 *a, const blst_fr *b) {
|
void p1_mul(blst_p1 *out, const blst_p1 *a, const blst_fr *b) {
|
||||||
blst_scalar s;
|
blst_scalar s;
|
||||||
blst_scalar_from_fr(&s, b);
|
blst_scalar_from_fr(&s, b);
|
||||||
|
@ -73,7 +72,6 @@ void p1_sub(blst_p1 *out, const blst_p1 *a, const blst_p1 *b) {
|
||||||
blst_p1_add_or_double(out, a, &bneg);
|
blst_p1_add_or_double(out, a, &bneg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Is there really no better way to do this?
|
|
||||||
void p2_mul(blst_p2 *out, const blst_p2 *a, const blst_fr *b) {
|
void p2_mul(blst_p2 *out, const blst_p2 *a, const blst_fr *b) {
|
||||||
blst_scalar s;
|
blst_scalar s;
|
||||||
blst_scalar_from_fr(&s, b);
|
blst_scalar_from_fr(&s, b);
|
||||||
|
|
|
@ -37,7 +37,7 @@ void fft_fr_slow(blst_fr *out, const blst_fr *in, uint64_t stride, const blst_fr
|
||||||
void fft_fr_fast(blst_fr *out, const blst_fr *in, uint64_t stride, const blst_fr *roots, uint64_t roots_stride,
|
void fft_fr_fast(blst_fr *out, const blst_fr *in, uint64_t stride, const blst_fr *roots, uint64_t roots_stride,
|
||||||
uint64_t l) {
|
uint64_t l) {
|
||||||
uint64_t half = l / 2;
|
uint64_t half = l / 2;
|
||||||
if (half > 2) { // TODO: Tunable parameter
|
if (half > 0) { // TODO: Tunable parameter
|
||||||
fft_fr_fast(out, in, stride * 2, roots, roots_stride * 2, half);
|
fft_fr_fast(out, in, stride * 2, roots, roots_stride * 2, half);
|
||||||
fft_fr_fast(out + half, in + stride, stride * 2, roots, roots_stride * 2, half);
|
fft_fr_fast(out + half, in + stride, stride * 2, roots, roots_stride * 2, half);
|
||||||
for (uint64_t i = 0; i < half; i++) {
|
for (uint64_t i = 0; i < half; i++) {
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2021 Benjamin Edgington
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h> // malloc(), free()
|
||||||
|
#include <stdio.h> // printf()
|
||||||
|
#include <assert.h> // assert()
|
||||||
|
#include "bench_util.h"
|
||||||
|
#include "fft_fr.h"
|
||||||
|
|
||||||
|
// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
|
||||||
|
long run_bench(int scale, int max_seconds) {
|
||||||
|
timespec t0, t1;
|
||||||
|
unsigned long total_time = 0, nits = 0;
|
||||||
|
FFTSettings fs;
|
||||||
|
|
||||||
|
assert(C_KZG_OK == new_fft_settings(&fs, scale));
|
||||||
|
// Allocate on the heap to avoid stack overflow for large sizes
|
||||||
|
blst_fr *data, *out;
|
||||||
|
data = malloc(fs.max_width * sizeof(blst_fr));
|
||||||
|
out = malloc(fs.max_width * sizeof(blst_fr));
|
||||||
|
|
||||||
|
// Fill with randomness
|
||||||
|
for (uint64_t i = 0; i < fs.max_width; i++) {
|
||||||
|
data[i] = rand_fr();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (total_time < max_seconds * NANO) {
|
||||||
|
clock_gettime(CLOCK_REALTIME, &t0);
|
||||||
|
assert(C_KZG_OK == fft_fr(out, data, &fs, false, fs.max_width));
|
||||||
|
clock_gettime(CLOCK_REALTIME, &t1);
|
||||||
|
nits++;
|
||||||
|
total_time += tdiff(t0, t1);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(out);
|
||||||
|
free(data);
|
||||||
|
|
||||||
|
return total_time / nits;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NSEC 1
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
printf("*** Benchmarking FFT_fr, %d second%s per test.\n", NSEC, NSEC == 1 ? "" : "s");
|
||||||
|
for (int scale = 4; scale < 16; scale++) {
|
||||||
|
printf("fft_fr/scale_%d %lu ns/op\n", scale, run_bench(scale, 1));
|
||||||
|
}
|
||||||
|
}
|
|
@ -36,7 +36,7 @@ void fft_g1_slow(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uin
|
||||||
// Fast Fourier Transform
|
// Fast Fourier Transform
|
||||||
void fft_g1_fast(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) {
|
void fft_g1_fast(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uint64_t roots_stride, uint64_t l) {
|
||||||
uint64_t half = l / 2;
|
uint64_t half = l / 2;
|
||||||
if (half > 2) { // TODO: Tunable parameter
|
if (half > 0) { // Tunable parameter
|
||||||
fft_g1_fast(out, in, stride * 2, roots, roots_stride * 2, half);
|
fft_g1_fast(out, in, stride * 2, roots, roots_stride * 2, half);
|
||||||
fft_g1_fast(out + half, in + stride, stride * 2, roots, roots_stride * 2, half);
|
fft_g1_fast(out + half, in + stride, stride * 2, roots, roots_stride * 2, half);
|
||||||
for (uint64_t i = 0; i < half; i++) {
|
for (uint64_t i = 0; i < half; i++) {
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2021 Benjamin Edgington
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h> // malloc(), free()
|
||||||
|
#include <stdio.h> // printf()
|
||||||
|
#include <assert.h> // assert()
|
||||||
|
#include "bench_util.h"
|
||||||
|
#include "fft_g1.h"
|
||||||
|
|
||||||
|
// Run the benchmark for `max_seconds` and return the time per iteration in nanoseconds.
|
||||||
|
long run_bench(int scale, int max_seconds) {
|
||||||
|
timespec t0, t1;
|
||||||
|
unsigned long total_time = 0, nits = 0;
|
||||||
|
FFTSettings fs;
|
||||||
|
|
||||||
|
assert(C_KZG_OK == new_fft_settings(&fs, scale));
|
||||||
|
// Allocate on the heap to avoid stack overflow for large sizes
|
||||||
|
blst_p1 *data, *out;
|
||||||
|
data = malloc(fs.max_width * sizeof(blst_p1));
|
||||||
|
out = malloc(fs.max_width * sizeof(blst_p1));
|
||||||
|
|
||||||
|
// Fill with randomness
|
||||||
|
for (uint64_t i = 0; i < fs.max_width; i++) {
|
||||||
|
data[i] = rand_g1();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (total_time < max_seconds * NANO) {
|
||||||
|
clock_gettime(CLOCK_REALTIME, &t0);
|
||||||
|
assert(C_KZG_OK == fft_g1(out, data, &fs, false, fs.max_width));
|
||||||
|
clock_gettime(CLOCK_REALTIME, &t1);
|
||||||
|
nits++;
|
||||||
|
total_time += tdiff(t0, t1);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(out);
|
||||||
|
free(data);
|
||||||
|
|
||||||
|
return total_time / nits;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NSEC 1
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
printf("*** Benchmarking FFT_g1, %d second%s per test.\n", NSEC, NSEC == 1 ? "" : "s");
|
||||||
|
for (int scale = 4; scale < 16; scale++) {
|
||||||
|
printf("fft_g1/scale_%d %lu ns/op\n", scale, run_bench(scale, 1));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue