Implement optimisation suggestion from Mamy

This commit is contained in:
Ben Edgington 2021-02-06 19:18:53 +00:00
parent a1659645af
commit c9dee6b54c
2 changed files with 4 additions and 6 deletions

View File

@ -40,10 +40,9 @@ void fft_fr_fast(blst_fr *out, const blst_fr *in, uint64_t stride, const blst_fr
fft_fr_fast(out + half, in + stride, stride * 2, roots, roots_stride * 2, half);
for (uint64_t i = 0; i < half; i++) {
blst_fr y_times_root;
blst_fr x = out[i];
blst_fr_mul(&y_times_root, &out[i + half], &roots[i * roots_stride]);
blst_fr_add(&out[i], &x, &y_times_root);
blst_fr_sub(&out[i + half], &x, &y_times_root);
blst_fr_sub(&out[i + half], &out[i], &y_times_root);
blst_fr_add(&out[i], &out[i], &y_times_root);
}
} else {
fft_fr_slow(out, in, stride, roots, roots_stride, l);

View File

@ -41,10 +41,9 @@ void fft_g1_fast(blst_p1 *out, blst_p1 *in, uint64_t stride, blst_fr *roots, uin
fft_g1_fast(out + half, in + stride, stride * 2, roots, roots_stride * 2, half);
for (uint64_t i = 0; i < half; i++) {
blst_p1 y_times_root;
blst_p1 x = out[i];
p1_mul(&y_times_root, &out[i + half], &roots[i * roots_stride]);
blst_p1_add_or_double(&out[i], &x, &y_times_root);
p1_sub(&out[i + half], &x, &y_times_root);
p1_sub(&out[i + half], &out[i], &y_times_root);
blst_p1_add_or_double(&out[i], &out[i], &y_times_root);
}
} else {
fft_g1_slow(out, in, stride, roots, roots_stride, l);