Merge bitcoin-core/secp256k1#1033: Add _fe_half and use in _gej_add_ge and _gej_double
e848c3799c
Update sage files for new formulae (Peter Dettman)d64bb5d4f3
Add fe_half tests for worst-case inputs (Peter Dettman)4eb8b932ff
Further improve doubling formula using fe_half (Peter Dettman)557b31fac3
Doubling formula using fe_half (Pieter Wuille)2cbb4b1a42
Run more iterations of run_field_misc (Pieter Wuille)9cc5c257ed
Add test for secp256k1_fe_half (Pieter Wuille)925f78d55e
Add _fe_half and use in _gej_add_ge (Peter Dettman) Pull request description: - Trades 1 _half for 3 _mul_int and 2 _normalize_weak Gives around 2-3% faster signing and ECDH, depending on compiler/platform. ACKs for top commit: sipa: utACKe848c3799c
jonasnick: ACKe848c3799c
real-or-random: ACKe848c3799c
Tree-SHA512: 81a6c93b3d983f1b48ec8e8b6f262ba914215045a95415147f41ee6e85296aa4d0cbbad9f370cdf475571447baad861d2cc8e0b04a71202d48959cb8a098f584
This commit is contained in:
commit
1253a27756
|
@ -8,25 +8,20 @@ load("weierstrass_prover.sage")
|
|||
def formula_secp256k1_gej_double_var(a):
|
||||
"""libsecp256k1's secp256k1_gej_double_var, used by various addition functions"""
|
||||
rz = a.Z * a.Y
|
||||
rz = rz * 2
|
||||
t1 = a.X^2
|
||||
t1 = t1 * 3
|
||||
t2 = t1^2
|
||||
t3 = a.Y^2
|
||||
t3 = t3 * 2
|
||||
t4 = t3^2
|
||||
t4 = t4 * 2
|
||||
t3 = t3 * a.X
|
||||
rx = t3
|
||||
rx = rx * 4
|
||||
rx = -rx
|
||||
rx = rx + t2
|
||||
t2 = -t2
|
||||
t3 = t3 * 6
|
||||
t3 = t3 + t2
|
||||
ry = t1 * t3
|
||||
t2 = -t4
|
||||
ry = ry + t2
|
||||
s = a.Y^2
|
||||
l = a.X^2
|
||||
l = l * 3
|
||||
l = l / 2
|
||||
t = -s
|
||||
t = t * a.X
|
||||
rx = l^2
|
||||
rx = rx + t
|
||||
rx = rx + t
|
||||
s = s^2
|
||||
t = t + rx
|
||||
ry = t * l
|
||||
ry = ry + s
|
||||
ry = -ry
|
||||
return jacobianpoint(rx, ry, rz)
|
||||
|
||||
def formula_secp256k1_gej_add_var(branch, a, b):
|
||||
|
@ -197,7 +192,8 @@ def formula_secp256k1_gej_add_ge(branch, a, b):
|
|||
rr_alt = rr
|
||||
m_alt = m
|
||||
n = m_alt^2
|
||||
q = n * t
|
||||
q = -t
|
||||
q = q * n
|
||||
n = n^2
|
||||
if degenerate:
|
||||
n = m
|
||||
|
@ -210,8 +206,6 @@ def formula_secp256k1_gej_add_ge(branch, a, b):
|
|||
zeroes.update({rz : 'r.z=0'})
|
||||
else:
|
||||
nonzeroes.update({rz : 'r.z!=0'})
|
||||
rz = rz * 2
|
||||
q = -q
|
||||
t = t + q
|
||||
rx = t
|
||||
t = t * 2
|
||||
|
@ -219,8 +213,7 @@ def formula_secp256k1_gej_add_ge(branch, a, b):
|
|||
t = t * rr_alt
|
||||
t = t + n
|
||||
ry = -t
|
||||
rx = rx * 4
|
||||
ry = ry * 4
|
||||
ry = ry / 2
|
||||
if a_infinity:
|
||||
rx = b.X
|
||||
ry = b.Y
|
||||
|
|
|
@ -140,6 +140,15 @@ void bench_scalar_inverse_var(void* arg, int iters) {
|
|||
CHECK(j <= iters);
|
||||
}
|
||||
|
||||
void bench_field_half(void* arg, int iters) {
|
||||
int i;
|
||||
bench_inv *data = (bench_inv*)arg;
|
||||
|
||||
for (i = 0; i < iters; i++) {
|
||||
secp256k1_fe_half(&data->fe[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void bench_field_normalize(void* arg, int iters) {
|
||||
int i;
|
||||
bench_inv *data = (bench_inv*)arg;
|
||||
|
@ -354,6 +363,7 @@ int main(int argc, char **argv) {
|
|||
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, iters);
|
||||
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse_var", bench_scalar_inverse_var, bench_setup, NULL, &data, 10, iters);
|
||||
|
||||
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "half")) run_benchmark("field_half", bench_field_half, bench_setup, NULL, &data, 10, iters*100);
|
||||
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize", bench_field_normalize, bench_setup, NULL, &data, 10, iters*100);
|
||||
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize_weak", bench_field_normalize_weak, bench_setup, NULL, &data, 10, iters*100);
|
||||
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "sqr")) run_benchmark("field_sqr", bench_field_sqr, bench_setup, NULL, &data, 10, iters*10);
|
||||
|
|
|
@ -130,4 +130,13 @@ static void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_f
|
|||
/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. Both *r and *a must be initialized.*/
|
||||
static void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag);
|
||||
|
||||
/** Halves the value of a field element modulo the field prime. Constant-time.
|
||||
* For an input magnitude 'm', the output magnitude is set to 'floor(m/2) + 1'.
|
||||
* The output is not guaranteed to be normalized, regardless of the input. */
|
||||
static void secp256k1_fe_half(secp256k1_fe *r);
|
||||
|
||||
/** Sets each limb of 'r' to its upper bound at magnitude 'm'. The output will also have its
|
||||
* magnitude set to 'm' and is normalized if (and only if) 'm' is zero. */
|
||||
static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m);
|
||||
|
||||
#endif /* SECP256K1_FIELD_H */
|
||||
|
|
|
@ -49,6 +49,26 @@ static void secp256k1_fe_verify(const secp256k1_fe *a) {
|
|||
}
|
||||
#endif
|
||||
|
||||
static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m) {
|
||||
VERIFY_CHECK(m >= 0);
|
||||
VERIFY_CHECK(m <= 2048);
|
||||
r->n[0] = 0x3FFFFFFUL * 2 * m;
|
||||
r->n[1] = 0x3FFFFFFUL * 2 * m;
|
||||
r->n[2] = 0x3FFFFFFUL * 2 * m;
|
||||
r->n[3] = 0x3FFFFFFUL * 2 * m;
|
||||
r->n[4] = 0x3FFFFFFUL * 2 * m;
|
||||
r->n[5] = 0x3FFFFFFUL * 2 * m;
|
||||
r->n[6] = 0x3FFFFFFUL * 2 * m;
|
||||
r->n[7] = 0x3FFFFFFUL * 2 * m;
|
||||
r->n[8] = 0x3FFFFFFUL * 2 * m;
|
||||
r->n[9] = 0x03FFFFFUL * 2 * m;
|
||||
#ifdef VERIFY
|
||||
r->magnitude = m;
|
||||
r->normalized = (m == 0);
|
||||
secp256k1_fe_verify(r);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void secp256k1_fe_normalize(secp256k1_fe *r) {
|
||||
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
|
||||
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
|
||||
|
@ -1133,6 +1153,82 @@ static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_
|
|||
#endif
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_fe_half(secp256k1_fe *r) {
|
||||
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
|
||||
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
|
||||
uint32_t one = (uint32_t)1;
|
||||
uint32_t mask = -(t0 & one) >> 6;
|
||||
|
||||
#ifdef VERIFY
|
||||
secp256k1_fe_verify(r);
|
||||
VERIFY_CHECK(r->magnitude < 32);
|
||||
#endif
|
||||
|
||||
/* Bounds analysis (over the rationals).
|
||||
*
|
||||
* Let m = r->magnitude
|
||||
* C = 0x3FFFFFFUL * 2
|
||||
* D = 0x03FFFFFUL * 2
|
||||
*
|
||||
* Initial bounds: t0..t8 <= C * m
|
||||
* t9 <= D * m
|
||||
*/
|
||||
|
||||
t0 += 0x3FFFC2FUL & mask;
|
||||
t1 += 0x3FFFFBFUL & mask;
|
||||
t2 += mask;
|
||||
t3 += mask;
|
||||
t4 += mask;
|
||||
t5 += mask;
|
||||
t6 += mask;
|
||||
t7 += mask;
|
||||
t8 += mask;
|
||||
t9 += mask >> 4;
|
||||
|
||||
VERIFY_CHECK((t0 & one) == 0);
|
||||
|
||||
/* t0..t8: added <= C/2
|
||||
* t9: added <= D/2
|
||||
*
|
||||
* Current bounds: t0..t8 <= C * (m + 1/2)
|
||||
* t9 <= D * (m + 1/2)
|
||||
*/
|
||||
|
||||
r->n[0] = (t0 >> 1) + ((t1 & one) << 25);
|
||||
r->n[1] = (t1 >> 1) + ((t2 & one) << 25);
|
||||
r->n[2] = (t2 >> 1) + ((t3 & one) << 25);
|
||||
r->n[3] = (t3 >> 1) + ((t4 & one) << 25);
|
||||
r->n[4] = (t4 >> 1) + ((t5 & one) << 25);
|
||||
r->n[5] = (t5 >> 1) + ((t6 & one) << 25);
|
||||
r->n[6] = (t6 >> 1) + ((t7 & one) << 25);
|
||||
r->n[7] = (t7 >> 1) + ((t8 & one) << 25);
|
||||
r->n[8] = (t8 >> 1) + ((t9 & one) << 25);
|
||||
r->n[9] = (t9 >> 1);
|
||||
|
||||
/* t0..t8: shifted right and added <= C/4 + 1/2
|
||||
* t9: shifted right
|
||||
*
|
||||
* Current bounds: t0..t8 <= C * (m/2 + 1/2)
|
||||
* t9 <= D * (m/2 + 1/4)
|
||||
*/
|
||||
|
||||
#ifdef VERIFY
|
||||
/* Therefore the output magnitude (M) has to be set such that:
|
||||
* t0..t8: C * M >= C * (m/2 + 1/2)
|
||||
* t9: D * M >= D * (m/2 + 1/4)
|
||||
*
|
||||
* It suffices for all limbs that, for any input magnitude m:
|
||||
* M >= m/2 + 1/2
|
||||
*
|
||||
* and since we want the smallest such integer value for M:
|
||||
* M == floor(m/2) + 1
|
||||
*/
|
||||
r->magnitude = (r->magnitude >> 1) + 1;
|
||||
r->normalized = 0;
|
||||
secp256k1_fe_verify(r);
|
||||
#endif
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) {
|
||||
uint32_t mask0, mask1;
|
||||
VG_CHECK_VERIFY(r->n, sizeof(r->n));
|
||||
|
|
|
@ -58,6 +58,21 @@ static void secp256k1_fe_verify(const secp256k1_fe *a) {
|
|||
}
|
||||
#endif
|
||||
|
||||
static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m) {
|
||||
VERIFY_CHECK(m >= 0);
|
||||
VERIFY_CHECK(m <= 2048);
|
||||
r->n[0] = 0xFFFFFFFFFFFFFULL * 2 * m;
|
||||
r->n[1] = 0xFFFFFFFFFFFFFULL * 2 * m;
|
||||
r->n[2] = 0xFFFFFFFFFFFFFULL * 2 * m;
|
||||
r->n[3] = 0xFFFFFFFFFFFFFULL * 2 * m;
|
||||
r->n[4] = 0x0FFFFFFFFFFFFULL * 2 * m;
|
||||
#ifdef VERIFY
|
||||
r->magnitude = m;
|
||||
r->normalized = (m == 0);
|
||||
secp256k1_fe_verify(r);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void secp256k1_fe_normalize(secp256k1_fe *r) {
|
||||
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
|
||||
|
||||
|
@ -477,6 +492,71 @@ static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_
|
|||
#endif
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_fe_half(secp256k1_fe *r) {
|
||||
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
|
||||
uint64_t one = (uint64_t)1;
|
||||
uint64_t mask = -(t0 & one) >> 12;
|
||||
|
||||
#ifdef VERIFY
|
||||
secp256k1_fe_verify(r);
|
||||
VERIFY_CHECK(r->magnitude < 32);
|
||||
#endif
|
||||
|
||||
/* Bounds analysis (over the rationals).
|
||||
*
|
||||
* Let m = r->magnitude
|
||||
* C = 0xFFFFFFFFFFFFFULL * 2
|
||||
* D = 0x0FFFFFFFFFFFFULL * 2
|
||||
*
|
||||
* Initial bounds: t0..t3 <= C * m
|
||||
* t4 <= D * m
|
||||
*/
|
||||
|
||||
t0 += 0xFFFFEFFFFFC2FULL & mask;
|
||||
t1 += mask;
|
||||
t2 += mask;
|
||||
t3 += mask;
|
||||
t4 += mask >> 4;
|
||||
|
||||
VERIFY_CHECK((t0 & one) == 0);
|
||||
|
||||
/* t0..t3: added <= C/2
|
||||
* t4: added <= D/2
|
||||
*
|
||||
* Current bounds: t0..t3 <= C * (m + 1/2)
|
||||
* t4 <= D * (m + 1/2)
|
||||
*/
|
||||
|
||||
r->n[0] = (t0 >> 1) + ((t1 & one) << 51);
|
||||
r->n[1] = (t1 >> 1) + ((t2 & one) << 51);
|
||||
r->n[2] = (t2 >> 1) + ((t3 & one) << 51);
|
||||
r->n[3] = (t3 >> 1) + ((t4 & one) << 51);
|
||||
r->n[4] = (t4 >> 1);
|
||||
|
||||
/* t0..t3: shifted right and added <= C/4 + 1/2
|
||||
* t4: shifted right
|
||||
*
|
||||
* Current bounds: t0..t3 <= C * (m/2 + 1/2)
|
||||
* t4 <= D * (m/2 + 1/4)
|
||||
*/
|
||||
|
||||
#ifdef VERIFY
|
||||
/* Therefore the output magnitude (M) has to be set such that:
|
||||
* t0..t3: C * M >= C * (m/2 + 1/2)
|
||||
* t4: D * M >= D * (m/2 + 1/4)
|
||||
*
|
||||
* It suffices for all limbs that, for any input magnitude m:
|
||||
* M >= m/2 + 1/2
|
||||
*
|
||||
* and since we want the smallest such integer value for M:
|
||||
* M == floor(m/2) + 1
|
||||
*/
|
||||
r->magnitude = (r->magnitude >> 1) + 1;
|
||||
r->normalized = 0;
|
||||
secp256k1_fe_verify(r);
|
||||
#endif
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) {
|
||||
uint64_t mask0, mask1;
|
||||
VG_CHECK_VERIFY(r->n, sizeof(r->n));
|
||||
|
|
|
@ -271,37 +271,35 @@ static int secp256k1_ge_is_valid_var(const secp256k1_ge *a) {
|
|||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_gej_double(secp256k1_gej *r, const secp256k1_gej *a) {
|
||||
/* Operations: 3 mul, 4 sqr, 0 normalize, 12 mul_int/add/negate.
|
||||
*
|
||||
* Note that there is an implementation described at
|
||||
* https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l
|
||||
* which trades a multiply for a square, but in practice this is actually slower,
|
||||
* mainly because it requires more normalizations.
|
||||
*/
|
||||
secp256k1_fe t1,t2,t3,t4;
|
||||
/* Operations: 3 mul, 4 sqr, 8 add/half/mul_int/negate */
|
||||
secp256k1_fe l, s, t;
|
||||
|
||||
r->infinity = a->infinity;
|
||||
|
||||
secp256k1_fe_mul(&r->z, &a->z, &a->y);
|
||||
secp256k1_fe_mul_int(&r->z, 2); /* Z' = 2*Y*Z (2) */
|
||||
secp256k1_fe_sqr(&t1, &a->x);
|
||||
secp256k1_fe_mul_int(&t1, 3); /* T1 = 3*X^2 (3) */
|
||||
secp256k1_fe_sqr(&t2, &t1); /* T2 = 9*X^4 (1) */
|
||||
secp256k1_fe_sqr(&t3, &a->y);
|
||||
secp256k1_fe_mul_int(&t3, 2); /* T3 = 2*Y^2 (2) */
|
||||
secp256k1_fe_sqr(&t4, &t3);
|
||||
secp256k1_fe_mul_int(&t4, 2); /* T4 = 8*Y^4 (2) */
|
||||
secp256k1_fe_mul(&t3, &t3, &a->x); /* T3 = 2*X*Y^2 (1) */
|
||||
r->x = t3;
|
||||
secp256k1_fe_mul_int(&r->x, 4); /* X' = 8*X*Y^2 (4) */
|
||||
secp256k1_fe_negate(&r->x, &r->x, 4); /* X' = -8*X*Y^2 (5) */
|
||||
secp256k1_fe_add(&r->x, &t2); /* X' = 9*X^4 - 8*X*Y^2 (6) */
|
||||
secp256k1_fe_negate(&t2, &t2, 1); /* T2 = -9*X^4 (2) */
|
||||
secp256k1_fe_mul_int(&t3, 6); /* T3 = 12*X*Y^2 (6) */
|
||||
secp256k1_fe_add(&t3, &t2); /* T3 = 12*X*Y^2 - 9*X^4 (8) */
|
||||
secp256k1_fe_mul(&r->y, &t1, &t3); /* Y' = 36*X^3*Y^2 - 27*X^6 (1) */
|
||||
secp256k1_fe_negate(&t2, &t4, 2); /* T2 = -8*Y^4 (3) */
|
||||
secp256k1_fe_add(&r->y, &t2); /* Y' = 36*X^3*Y^2 - 27*X^6 - 8*Y^4 (4) */
|
||||
/* Formula used:
|
||||
* L = (3/2) * X1^2
|
||||
* S = Y1^2
|
||||
* T = -X1*S
|
||||
* X3 = L^2 + 2*T
|
||||
* Y3 = -(L*(X3 + T) + S^2)
|
||||
* Z3 = Y1*Z1
|
||||
*/
|
||||
|
||||
secp256k1_fe_mul(&r->z, &a->z, &a->y); /* Z3 = Y1*Z1 (1) */
|
||||
secp256k1_fe_sqr(&s, &a->y); /* S = Y1^2 (1) */
|
||||
secp256k1_fe_sqr(&l, &a->x); /* L = X1^2 (1) */
|
||||
secp256k1_fe_mul_int(&l, 3); /* L = 3*X1^2 (3) */
|
||||
secp256k1_fe_half(&l); /* L = 3/2*X1^2 (2) */
|
||||
secp256k1_fe_negate(&t, &s, 1); /* T = -S (2) */
|
||||
secp256k1_fe_mul(&t, &t, &a->x); /* T = -X1*S (1) */
|
||||
secp256k1_fe_sqr(&r->x, &l); /* X3 = L^2 (1) */
|
||||
secp256k1_fe_add(&r->x, &t); /* X3 = L^2 + T (2) */
|
||||
secp256k1_fe_add(&r->x, &t); /* X3 = L^2 + 2*T (3) */
|
||||
secp256k1_fe_sqr(&s, &s); /* S' = S^2 (1) */
|
||||
secp256k1_fe_add(&t, &r->x); /* T' = X3 + T (4) */
|
||||
secp256k1_fe_mul(&r->y, &t, &l); /* Y3 = L*(X3 + T) (1) */
|
||||
secp256k1_fe_add(&r->y, &s); /* Y3 = L*(X3 + T) + S^2 (2) */
|
||||
secp256k1_fe_negate(&r->y, &r->y, 2); /* Y3 = -(L*(X3 + T) + S^2) (3) */
|
||||
}
|
||||
|
||||
static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr) {
|
||||
|
@ -326,7 +324,6 @@ static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, s
|
|||
if (rzr != NULL) {
|
||||
*rzr = a->y;
|
||||
secp256k1_fe_normalize_weak(rzr);
|
||||
secp256k1_fe_mul_int(rzr, 2);
|
||||
}
|
||||
|
||||
secp256k1_gej_double(r, a);
|
||||
|
@ -492,7 +489,7 @@ static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a,
|
|||
|
||||
|
||||
static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b) {
|
||||
/* Operations: 7 mul, 5 sqr, 4 normalize, 21 mul_int/add/negate/cmov */
|
||||
/* Operations: 7 mul, 5 sqr, 24 add/cmov/half/mul_int/negate/normalize_weak/normalizes_to_zero */
|
||||
secp256k1_fe zz, u1, u2, s1, s2, t, tt, m, n, q, rr;
|
||||
secp256k1_fe m_alt, rr_alt;
|
||||
int infinity, degenerate;
|
||||
|
@ -513,11 +510,11 @@ static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const
|
|||
* Z = Z1*Z2
|
||||
* T = U1+U2
|
||||
* M = S1+S2
|
||||
* Q = T*M^2
|
||||
* Q = -T*M^2
|
||||
* R = T^2-U1*U2
|
||||
* X3 = 4*(R^2-Q)
|
||||
* Y3 = 4*(R*(3*Q-2*R^2)-M^4)
|
||||
* Z3 = 2*M*Z
|
||||
* X3 = R^2+Q
|
||||
* Y3 = -(R*(2*X3+Q)+M^4)/2
|
||||
* Z3 = M*Z
|
||||
* (Note that the paper uses xi = Xi / Zi and yi = Yi / Zi instead.)
|
||||
*
|
||||
* This formula has the benefit of being the same for both addition
|
||||
|
@ -581,7 +578,8 @@ static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const
|
|||
* and denominator of lambda; R and M represent the explicit
|
||||
* expressions x1^2 + x2^2 + x1x2 and y1 + y2. */
|
||||
secp256k1_fe_sqr(&n, &m_alt); /* n = Malt^2 (1) */
|
||||
secp256k1_fe_mul(&q, &n, &t); /* q = Q = T*Malt^2 (1) */
|
||||
secp256k1_fe_negate(&q, &t, 2); /* q = -T (3) */
|
||||
secp256k1_fe_mul(&q, &q, &n); /* q = Q = -T*Malt^2 (1) */
|
||||
/* These two lines use the observation that either M == Malt or M == 0,
|
||||
* so M^3 * Malt is either Malt^4 (which is computed by squaring), or
|
||||
* zero (which is "computed" by cmov). So the cost is one squaring
|
||||
|
@ -589,21 +587,16 @@ static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const
|
|||
secp256k1_fe_sqr(&n, &n);
|
||||
secp256k1_fe_cmov(&n, &m, degenerate); /* n = M^3 * Malt (2) */
|
||||
secp256k1_fe_sqr(&t, &rr_alt); /* t = Ralt^2 (1) */
|
||||
secp256k1_fe_mul(&r->z, &a->z, &m_alt); /* r->z = Malt*Z (1) */
|
||||
secp256k1_fe_mul(&r->z, &a->z, &m_alt); /* r->z = Z3 = Malt*Z (1) */
|
||||
infinity = secp256k1_fe_normalizes_to_zero(&r->z) & ~a->infinity;
|
||||
secp256k1_fe_mul_int(&r->z, 2); /* r->z = Z3 = 2*Malt*Z (2) */
|
||||
secp256k1_fe_negate(&q, &q, 1); /* q = -Q (2) */
|
||||
secp256k1_fe_add(&t, &q); /* t = Ralt^2-Q (3) */
|
||||
secp256k1_fe_normalize_weak(&t);
|
||||
r->x = t; /* r->x = Ralt^2-Q (1) */
|
||||
secp256k1_fe_mul_int(&t, 2); /* t = 2*x3 (2) */
|
||||
secp256k1_fe_add(&t, &q); /* t = 2*x3 - Q: (4) */
|
||||
secp256k1_fe_mul(&t, &t, &rr_alt); /* t = Ralt*(2*x3 - Q) (1) */
|
||||
secp256k1_fe_add(&t, &n); /* t = Ralt*(2*x3 - Q) + M^3*Malt (3) */
|
||||
secp256k1_fe_negate(&r->y, &t, 3); /* r->y = Ralt*(Q - 2x3) - M^3*Malt (4) */
|
||||
secp256k1_fe_normalize_weak(&r->y);
|
||||
secp256k1_fe_mul_int(&r->x, 4); /* r->x = X3 = 4*(Ralt^2-Q) */
|
||||
secp256k1_fe_mul_int(&r->y, 4); /* r->y = Y3 = 4*Ralt*(Q - 2x3) - 4*M^3*Malt (4) */
|
||||
secp256k1_fe_add(&t, &q); /* t = Ralt^2 + Q (2) */
|
||||
r->x = t; /* r->x = X3 = Ralt^2 + Q (2) */
|
||||
secp256k1_fe_mul_int(&t, 2); /* t = 2*X3 (4) */
|
||||
secp256k1_fe_add(&t, &q); /* t = 2*X3 + Q (5) */
|
||||
secp256k1_fe_mul(&t, &t, &rr_alt); /* t = Ralt*(2*X3 + Q) (1) */
|
||||
secp256k1_fe_add(&t, &n); /* t = Ralt*(2*X3 + Q) + M^3*Malt (3) */
|
||||
secp256k1_fe_negate(&r->y, &t, 3); /* r->y = -(Ralt*(2*X3 + Q) + M^3*Malt) (4) */
|
||||
secp256k1_fe_half(&r->y); /* r->y = Y3 = -(Ralt*(2*X3 + Q) + M^3*Malt)/2 (3) */
|
||||
|
||||
/** In case a->infinity == 1, replace r with (b->x, b->y, 1). */
|
||||
secp256k1_fe_cmov(&r->x, &b->x, a->infinity);
|
||||
|
|
64
src/tests.c
64
src/tests.c
|
@ -2471,6 +2471,55 @@ int fe_identical(const secp256k1_fe *a, const secp256k1_fe *b) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
void run_field_half(void) {
|
||||
secp256k1_fe t, u;
|
||||
int m;
|
||||
|
||||
/* Check magnitude 0 input */
|
||||
secp256k1_fe_get_bounds(&t, 0);
|
||||
secp256k1_fe_half(&t);
|
||||
#ifdef VERIFY
|
||||
CHECK(t.magnitude == 1);
|
||||
CHECK(t.normalized == 0);
|
||||
#endif
|
||||
CHECK(secp256k1_fe_normalizes_to_zero(&t));
|
||||
|
||||
/* Check non-zero magnitudes in the supported range */
|
||||
for (m = 1; m < 32; m++) {
|
||||
/* Check max-value input */
|
||||
secp256k1_fe_get_bounds(&t, m);
|
||||
|
||||
u = t;
|
||||
secp256k1_fe_half(&u);
|
||||
#ifdef VERIFY
|
||||
CHECK(u.magnitude == (m >> 1) + 1);
|
||||
CHECK(u.normalized == 0);
|
||||
#endif
|
||||
secp256k1_fe_normalize_weak(&u);
|
||||
secp256k1_fe_add(&u, &u);
|
||||
CHECK(check_fe_equal(&t, &u));
|
||||
|
||||
/* Check worst-case input: ensure the LSB is 1 so that P will be added,
|
||||
* which will also cause all carries to be 1, since all limbs that can
|
||||
* generate a carry are initially even and all limbs of P are odd in
|
||||
* every existing field implementation. */
|
||||
secp256k1_fe_get_bounds(&t, m);
|
||||
CHECK(t.n[0] > 0);
|
||||
CHECK((t.n[0] & 1) == 0);
|
||||
--t.n[0];
|
||||
|
||||
u = t;
|
||||
secp256k1_fe_half(&u);
|
||||
#ifdef VERIFY
|
||||
CHECK(u.magnitude == (m >> 1) + 1);
|
||||
CHECK(u.normalized == 0);
|
||||
#endif
|
||||
secp256k1_fe_normalize_weak(&u);
|
||||
secp256k1_fe_add(&u, &u);
|
||||
CHECK(check_fe_equal(&t, &u));
|
||||
}
|
||||
}
|
||||
|
||||
void run_field_misc(void) {
|
||||
secp256k1_fe x;
|
||||
secp256k1_fe y;
|
||||
|
@ -2478,9 +2527,13 @@ void run_field_misc(void) {
|
|||
secp256k1_fe q;
|
||||
secp256k1_fe fe5 = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 5);
|
||||
int i, j;
|
||||
for (i = 0; i < 5*count; i++) {
|
||||
for (i = 0; i < 1000 * count; i++) {
|
||||
secp256k1_fe_storage xs, ys, zs;
|
||||
if (i & 1) {
|
||||
random_fe(&x);
|
||||
} else {
|
||||
random_fe_test(&x);
|
||||
}
|
||||
random_fe_non_zero(&y);
|
||||
/* Test the fe equality and comparison operations. */
|
||||
CHECK(secp256k1_fe_cmp_var(&x, &x) == 0);
|
||||
|
@ -2548,6 +2601,14 @@ void run_field_misc(void) {
|
|||
secp256k1_fe_add(&q, &x);
|
||||
CHECK(check_fe_equal(&y, &z));
|
||||
CHECK(check_fe_equal(&q, &y));
|
||||
/* Check secp256k1_fe_half. */
|
||||
z = x;
|
||||
secp256k1_fe_half(&z);
|
||||
secp256k1_fe_add(&z, &z);
|
||||
CHECK(check_fe_equal(&x, &z));
|
||||
secp256k1_fe_add(&z, &z);
|
||||
secp256k1_fe_half(&z);
|
||||
CHECK(check_fe_equal(&x, &z));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6912,6 +6973,7 @@ int main(int argc, char **argv) {
|
|||
run_scalar_tests();
|
||||
|
||||
/* field tests */
|
||||
run_field_half();
|
||||
run_field_misc();
|
||||
run_field_convert();
|
||||
run_fe_mul();
|
||||
|
|
Loading…
Reference in New Issue