Merge #678: Preventing compiler optimizations in benchmarks without a memory fence

362bb25608dbcd724a07dd5170c4ebe081c3dd84 Modified bench_scalar_split so it won't get optimized out (Elichai Turkel)
73a30c6b58f078b42a03a222c55bfe8b4dd86a2b Added accumulators and checks on benchmarks so they won't get optimized out (Elichai Turkel)

Pull request description:

  As asked https://github.com/bitcoin-core/secp256k1/pull/667#issuecomment-546885951 this is the parts of #667 that don't require an assembly memory fence.

  I splitted them to 2 commits, one with obvious easy ones. and another that changes the logic a bit to achieve this (See https://github.com/bitcoin-core/secp256k1/pull/667#discussion_r337248398 )

ACKs for top commit:
  jonasnick:
    ACK 362bb256
  real-or-random:
    ACK 362bb25608dbcd724a07dd5170c4ebe081c3dd84 I read the diff and I ran the benchmarks

Tree-SHA512: d5e47f5d64c3b035155276f057671ceb7f5852f24c7102fee4d0141aabebf882039f3eae0d152bae89d0603bc09fa6ad9f7bc6b8c0f74a668ee252c727517804
This commit is contained in:
Jonas Nick 2019-11-18 20:09:05 +00:00
commit 544002c008
No known key found for this signature in database
GPG Key ID: 4861DBF262123605

View File

@ -57,12 +57,13 @@ void bench_setup(void* arg) {
}
void bench_scalar_add(void* arg) {
int i;
int i, j = 0;
bench_inv *data = (bench_inv*)arg;
for (i = 0; i < 2000000; i++) {
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
}
CHECK(j <= 2000000);
}
void bench_scalar_negate(void* arg) {
@ -94,35 +95,37 @@ void bench_scalar_mul(void* arg) {
#ifdef USE_ENDOMORPHISM
void bench_scalar_split(void* arg) {
int i;
int i, j = 0;
bench_inv *data = (bench_inv*)arg;
for (i = 0; i < 20000; i++) {
secp256k1_scalar l, r;
secp256k1_scalar_split_lambda(&l, &r, &data->scalar_x);
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
secp256k1_scalar_split_lambda(&data->scalar_x, &data->scalar_y, &data->scalar_x);
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
}
CHECK(j <= 20000);
}
#endif
void bench_scalar_inverse(void* arg) {
int i;
int i, j = 0;
bench_inv *data = (bench_inv*)arg;
for (i = 0; i < 2000; i++) {
secp256k1_scalar_inverse(&data->scalar_x, &data->scalar_x);
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
}
CHECK(j <= 2000);
}
void bench_scalar_inverse_var(void* arg) {
int i;
int i, j = 0;
bench_inv *data = (bench_inv*)arg;
for (i = 0; i < 2000; i++) {
secp256k1_scalar_inverse_var(&data->scalar_x, &data->scalar_x);
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
}
CHECK(j <= 2000);
}
void bench_field_normalize(void* arg) {
@ -182,15 +185,16 @@ void bench_field_inverse_var(void* arg) {
}
void bench_field_sqrt(void* arg) {
int i;
int i, j = 0;
bench_inv *data = (bench_inv*)arg;
secp256k1_fe t;
for (i = 0; i < 20000; i++) {
t = data->fe_x;
secp256k1_fe_sqrt(&data->fe_x, &t);
j += secp256k1_fe_sqrt(&data->fe_x, &t);
secp256k1_fe_add(&data->fe_x, &data->fe_y);
}
CHECK(j <= 20000);
}
void bench_group_double_var(void* arg) {
@ -230,32 +234,37 @@ void bench_group_add_affine_var(void* arg) {
}
void bench_group_jacobi_var(void* arg) {
int i;
int i, j = 0;
bench_inv *data = (bench_inv*)arg;
for (i = 0; i < 20000; i++) {
secp256k1_gej_has_quad_y_var(&data->gej_x);
j += secp256k1_gej_has_quad_y_var(&data->gej_x);
}
CHECK(j == 20000);
}
void bench_ecmult_wnaf(void* arg) {
int i;
int i, bits = 0, overflow = 0;
bench_inv *data = (bench_inv*)arg;
for (i = 0; i < 20000; i++) {
secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar_x, WINDOW_A);
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar_x, WINDOW_A);
overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
}
CHECK(overflow >= 0);
CHECK(bits <= 256*20000);
}
void bench_wnaf_const(void* arg) {
int i;
int i, bits = 0, overflow = 0;
bench_inv *data = (bench_inv*)arg;
for (i = 0; i < 20000; i++) {
secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A, 256);
secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
bits += secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A, 256);
overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
}
CHECK(overflow >= 0);
CHECK(bits <= 256*20000);
}
@ -312,7 +321,7 @@ void bench_context_sign(void* arg) {
#ifndef USE_NUM_NONE
void bench_num_jacobi(void* arg) {
int i;
int i, j = 0;
bench_inv *data = (bench_inv*)arg;
secp256k1_num nx, norder;
@ -321,8 +330,9 @@ void bench_num_jacobi(void* arg) {
secp256k1_scalar_get_num(&norder, &data->scalar_y);
for (i = 0; i < 200000; i++) {
secp256k1_num_jacobi(&nx, &norder);
j += secp256k1_num_jacobi(&nx, &norder);
}
CHECK(j <= 200000);
}
#endif