Improve G1 multiplication time with Blst
This commit is contained in:
parent
c10e1f40f1
commit
f557f32ed1
|
@ -24,6 +24,25 @@
|
|||
|
||||
#ifdef BLST
|
||||
|
||||
/**
|
||||
* Fast log base 2 of a byte.
|
||||
*
|
||||
* Corresponds to the index of the highest bit set in the byte. Adapted from
|
||||
* https://graphics.stanford.edu/~seander/bithacks.html#IntegerLog.
|
||||
*
|
||||
* @param[in] b A non-zero byte
|
||||
* @return The index of the highest set bit
|
||||
*/
|
||||
int log_2_byte(byte b) {
|
||||
int r, shift;
|
||||
r = (b > 0xF) << 2;
|
||||
b >>= r;
|
||||
shift = (b > 0x3) << 1;
|
||||
b >>= (shift + 1);
|
||||
r |= shift | b;
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the operand is zero in the finite field.
|
||||
*
|
||||
|
@ -250,6 +269,8 @@ bool g1_equal(const g1_t *a, const g1_t *b) {
|
|||
/**
|
||||
* Multiply a G1 group element by a field element.
|
||||
*
|
||||
* This "undoes" the Blst constant-timedness. FFTs do a lot of multiplication by one, so constant time is rather slow.
|
||||
*
|
||||
* @param[out] out [@p b]@p a
|
||||
* @param[in] a The G1 group element
|
||||
* @param[in] b The multiplier
|
||||
|
@ -257,7 +278,18 @@ bool g1_equal(const g1_t *a, const g1_t *b) {
|
|||
void g1_mul(g1_t *out, const g1_t *a, const fr_t *b) {
|
||||
blst_scalar s;
|
||||
blst_scalar_from_fr(&s, b);
|
||||
blst_p1_mult(out, a, s.b, 8 * sizeof(blst_scalar));
|
||||
|
||||
// Count the number of bytes to be multiplied.
|
||||
int i = sizeof(blst_scalar);
|
||||
while (i && !s.b[i - 1]) --i;
|
||||
if (i == 0) {
|
||||
*out = g1_identity;
|
||||
} else if (i == 1 && s.b[0] == 1) {
|
||||
*out = *a;
|
||||
} else {
|
||||
// Count the number of bits to be multiplied.
|
||||
blst_p1_mult(out, a, s.b, 8 * i - 7 + log_2_byte(s.b[i - 1]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -94,6 +94,7 @@ static const g2_t g2_negative_generator = {{{{0xf5f28fa202940a10L, 0xb3f5fb2687b
|
|||
#endif // BLST
|
||||
|
||||
// All the functions in the interface
|
||||
int log_2_byte(byte b);
|
||||
bool fr_is_zero(const fr_t *p);
|
||||
bool fr_is_one(const fr_t *p);
|
||||
void fr_from_scalar(fr_t *out, const scalar_t *a);
|
||||
|
|
|
@ -20,6 +20,14 @@
|
|||
// This is -1 (the second root of unity)
|
||||
uint64_t m1[] = {0xffffffff00000000L, 0x53bda402fffe5bfeL, 0x3339d80809a1d805L, 0x73eda753299d7d48L};
|
||||
|
||||
void log_2_byte_works(void) {
|
||||
// TEST_CHECK(0 == log_2_byte(0x00));
|
||||
TEST_CHECK(0 == log_2_byte(0x01));
|
||||
TEST_CHECK(7 == log_2_byte(0x80));
|
||||
TEST_CHECK(7 == log_2_byte(0xff));
|
||||
TEST_CHECK(4 == log_2_byte(0x10));
|
||||
}
|
||||
|
||||
void fr_is_zero_works(void) {
|
||||
fr_t zero;
|
||||
fr_from_uint64(&zero, 0);
|
||||
|
@ -176,6 +184,7 @@ void pairings_work(void) {
|
|||
|
||||
TEST_LIST = {
|
||||
{"BLS12_384_TEST", title},
|
||||
{"log_2_byte_works", log_2_byte_works},
|
||||
{"fr_is_zero_works", fr_is_zero_works},
|
||||
{"fr_is_one_works", fr_is_one_works},
|
||||
{"fr_from_uint64_works", fr_from_uint64_works},
|
||||
|
|
|
@ -78,7 +78,7 @@ void fft_fr_fast(fr_t *out, const fr_t *in, uint64_t stride, const fr_t *roots,
|
|||
fr_add(&out[i], &out[i], &y_times_root);
|
||||
}
|
||||
} else {
|
||||
fft_fr_slow(out, in, stride, roots, roots_stride, n);
|
||||
*out = *in;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ void fft_g1_fast(g1_t *out, const g1_t *in, uint64_t stride, const fr_t *roots,
|
|||
g1_add_or_dbl(&out[i], &out[i], &y_times_root);
|
||||
}
|
||||
} else {
|
||||
fft_g1_slow(out, in, stride, roots, roots_stride, n);
|
||||
*out = *in;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue