diff --git a/sage/prove_group_implementations.sage b/sage/prove_group_implementations.sage index 96ce335..652bd87 100644 --- a/sage/prove_group_implementations.sage +++ b/sage/prove_group_implementations.sage @@ -40,29 +40,26 @@ def formula_secp256k1_gej_add_var(branch, a, b): s2 = s2 * a.Z h = -u1 h = h + u2 - i = -s1 - i = i + s2 + i = -s2 + i = i + s1 if branch == 2: r = formula_secp256k1_gej_double_var(a) return (constraints(), constraints(zero={h : 'h=0', i : 'i=0', a.Infinity : 'a_finite', b.Infinity : 'b_finite'}), r) if branch == 3: return (constraints(), constraints(zero={h : 'h=0', a.Infinity : 'a_finite', b.Infinity : 'b_finite'}, nonzero={i : 'i!=0'}), point_at_infinity()) - i2 = i^2 + t = h * b.Z + rz = a.Z * t h2 = h^2 + h2 = -h2 h3 = h2 * h - h = h * b.Z - rz = a.Z * h t = u1 * h2 - rx = t - rx = rx * 2 + rx = i^2 rx = rx + h3 - rx = -rx - rx = rx + i2 - ry = -rx - ry = ry + t - ry = ry * i + rx = rx + t + rx = rx + t + t = t + rx + ry = t * i h3 = h3 * s1 - h3 = -h3 ry = ry + h3 return (constraints(), constraints(zero={a.Infinity : 'a_finite', b.Infinity : 'b_finite'}, nonzero={h : 'h!=0'}), jacobianpoint(rx, ry, rz)) @@ -80,28 +77,25 @@ def formula_secp256k1_gej_add_ge_var(branch, a, b): s2 = s2 * a.Z h = -u1 h = h + u2 - i = -s1 - i = i + s2 + i = -s2 + i = i + s1 if (branch == 2): r = formula_secp256k1_gej_double_var(a) return (constraints(zero={b.Z - 1 : 'b.z=1'}), constraints(zero={a.Infinity : 'a_finite', b.Infinity : 'b_finite', h : 'h=0', i : 'i=0'}), r) if (branch == 3): return (constraints(zero={b.Z - 1 : 'b.z=1'}), constraints(zero={a.Infinity : 'a_finite', b.Infinity : 'b_finite', h : 'h=0'}, nonzero={i : 'i!=0'}), point_at_infinity()) - i2 = i^2 - h2 = h^2 - h3 = h * h2 rz = a.Z * h + h2 = h^2 + h2 = -h2 + h3 = h2 * h t = u1 * h2 - rx = t - rx = rx * 2 + rx = i^2 rx = rx + h3 - rx = -rx - rx = rx + i2 - ry = -rx - ry = ry + t - ry = ry * i + rx = rx + t + rx = rx + t + t = t + rx + ry = t * i h3 = h3 * s1 - h3 = -h3 ry = ry + h3 return (constraints(zero={b.Z - 1 : 'b.z=1'}), constraints(zero={a.Infinity : 'a_finite', b.Infinity : 'b_finite'}, nonzero={h : 'h!=0'}), jacobianpoint(rx, ry, rz)) @@ -109,14 +103,15 @@ def formula_secp256k1_gej_add_zinv_var(branch, a, b): """libsecp256k1's secp256k1_gej_add_zinv_var""" bzinv = b.Z^(-1) if branch == 0: - return (constraints(), constraints(nonzero={b.Infinity : 'b_infinite'}), a) - if branch == 1: + rinf = b.Infinity bzinv2 = bzinv^2 bzinv3 = bzinv2 * bzinv rx = b.X * bzinv2 ry = b.Y * bzinv3 rz = 1 - return (constraints(), constraints(zero={b.Infinity : 'b_finite'}, nonzero={a.Infinity : 'a_infinite'}), jacobianpoint(rx, ry, rz)) + return (constraints(), constraints(nonzero={a.Infinity : 'a_infinite'}), jacobianpoint(rx, ry, rz, rinf)) + if branch == 1: + return (constraints(), constraints(zero={a.Infinity : 'a_finite'}, nonzero={b.Infinity : 'b_infinite'}), a) azz = a.Z * bzinv z12 = azz^2 u1 = a.X @@ -126,29 +121,25 @@ def formula_secp256k1_gej_add_zinv_var(branch, a, b): s2 = s2 * azz h = -u1 h = h + u2 - i = -s1 - i = i + s2 + i = -s2 + i = i + s1 if branch == 2: r = formula_secp256k1_gej_double_var(a) return (constraints(), constraints(zero={a.Infinity : 'a_finite', b.Infinity : 'b_finite', h : 'h=0', i : 'i=0'}), r) if branch == 3: return (constraints(), constraints(zero={a.Infinity : 'a_finite', b.Infinity : 'b_finite', h : 'h=0'}, nonzero={i : 'i!=0'}), point_at_infinity()) - i2 = i^2 + rz = a.Z * h h2 = h^2 - h3 = h * h2 - rz = a.Z - rz = rz * h + h2 = -h2 + h3 = h2 * h t = u1 * h2 - rx = t - rx = rx * 2 + rx = i^2 rx = rx + h3 - rx = -rx - rx = rx + i2 - ry = -rx - ry = ry + t - ry = ry * i + rx = rx + t + rx = rx + t + t = t + rx + ry = t * i h3 = h3 * s1 - h3 = -h3 ry = ry + h3 return (constraints(), constraints(zero={a.Infinity : 'a_finite', b.Infinity : 'b_finite'}, nonzero={h : 'h!=0'}), jacobianpoint(rx, ry, rz)) diff --git a/src/bench_internal.c b/src/bench_internal.c index 3c145f3..7eb3af2 100644 --- a/src/bench_internal.c +++ b/src/bench_internal.c @@ -254,6 +254,15 @@ void bench_group_add_affine_var(void* arg, int iters) { } } +void bench_group_add_zinv_var(void* arg, int iters) { + int i; + bench_inv *data = (bench_inv*)arg; + + for (i = 0; i < iters; i++) { + secp256k1_gej_add_zinv_var(&data->gej[0], &data->gej[0], &data->ge[1], &data->gej[0].y); + } +} + void bench_group_to_affine_var(void* arg, int iters) { int i; bench_inv *data = (bench_inv*)arg; @@ -376,6 +385,7 @@ int main(int argc, char **argv) { if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_var", bench_group_add_var, bench_setup, NULL, &data, 10, iters*10); if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, iters*10); if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, iters*10); + if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_zinv_var", bench_group_add_zinv_var, bench_setup, NULL, &data, 10, iters*10); if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "to_affine")) run_benchmark("group_to_affine_var", bench_group_to_affine_var, bench_setup, NULL, &data, 10, iters); if (d || have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, iters); diff --git a/src/group_impl.h b/src/group_impl.h index b19b02a..63735ab 100644 --- a/src/group_impl.h +++ b/src/group_impl.h @@ -330,15 +330,14 @@ static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, s } static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_gej *b, secp256k1_fe *rzr) { - /* Operations: 12 mul, 4 sqr, 2 normalize, 12 mul_int/add/negate */ - secp256k1_fe z22, z12, u1, u2, s1, s2, h, i, i2, h2, h3, t; + /* 12 mul, 4 sqr, 11 add/negate/normalizes_to_zero (ignoring special cases) */ + secp256k1_fe z22, z12, u1, u2, s1, s2, h, i, h2, h3, t; if (a->infinity) { VERIFY_CHECK(rzr == NULL); *r = *b; return; } - if (b->infinity) { if (rzr != NULL) { secp256k1_fe_set_int(rzr, 1); @@ -347,7 +346,6 @@ static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, cons return; } - r->infinity = 0; secp256k1_fe_sqr(&z22, &b->z); secp256k1_fe_sqr(&z12, &a->z); secp256k1_fe_mul(&u1, &a->x, &z22); @@ -355,7 +353,7 @@ static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, cons secp256k1_fe_mul(&s1, &a->y, &z22); secp256k1_fe_mul(&s1, &s1, &b->z); secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z); secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2); - secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2); + secp256k1_fe_negate(&i, &s2, 1); secp256k1_fe_add(&i, &s1); if (secp256k1_fe_normalizes_to_zero_var(&h)) { if (secp256k1_fe_normalizes_to_zero_var(&i)) { secp256k1_gej_double_var(r, a, rzr); @@ -367,24 +365,33 @@ static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, cons } return; } - secp256k1_fe_sqr(&i2, &i); - secp256k1_fe_sqr(&h2, &h); - secp256k1_fe_mul(&h3, &h, &h2); - secp256k1_fe_mul(&h, &h, &b->z); + + r->infinity = 0; + secp256k1_fe_mul(&t, &h, &b->z); if (rzr != NULL) { - *rzr = h; + *rzr = t; } - secp256k1_fe_mul(&r->z, &a->z, &h); + secp256k1_fe_mul(&r->z, &a->z, &t); + + secp256k1_fe_sqr(&h2, &h); + secp256k1_fe_negate(&h2, &h2, 1); + secp256k1_fe_mul(&h3, &h2, &h); secp256k1_fe_mul(&t, &u1, &h2); - r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2); - secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i); - secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1); + + secp256k1_fe_sqr(&r->x, &i); + secp256k1_fe_add(&r->x, &h3); + secp256k1_fe_add(&r->x, &t); + secp256k1_fe_add(&r->x, &t); + + secp256k1_fe_add(&t, &r->x); + secp256k1_fe_mul(&r->y, &t, &i); + secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_add(&r->y, &h3); } static void secp256k1_gej_add_ge_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b, secp256k1_fe *rzr) { - /* 8 mul, 3 sqr, 4 normalize, 12 mul_int/add/negate */ - secp256k1_fe z12, u1, u2, s1, s2, h, i, i2, h2, h3, t; + /* 8 mul, 3 sqr, 13 add/negate/normalize_weak/normalizes_to_zero (ignoring special cases) */ + secp256k1_fe z12, u1, u2, s1, s2, h, i, h2, h3, t; if (a->infinity) { VERIFY_CHECK(rzr == NULL); secp256k1_gej_set_ge(r, b); @@ -397,7 +404,6 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej *r, const secp256k1_gej *a, c *r = *a; return; } - r->infinity = 0; secp256k1_fe_sqr(&z12, &a->z); u1 = a->x; secp256k1_fe_normalize_weak(&u1); @@ -405,7 +411,7 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej *r, const secp256k1_gej *a, c s1 = a->y; secp256k1_fe_normalize_weak(&s1); secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &a->z); secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2); - secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2); + secp256k1_fe_negate(&i, &s2, 1); secp256k1_fe_add(&i, &s1); if (secp256k1_fe_normalizes_to_zero_var(&h)) { if (secp256k1_fe_normalizes_to_zero_var(&i)) { secp256k1_gej_double_var(r, a, rzr); @@ -417,28 +423,33 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej *r, const secp256k1_gej *a, c } return; } - secp256k1_fe_sqr(&i2, &i); - secp256k1_fe_sqr(&h2, &h); - secp256k1_fe_mul(&h3, &h, &h2); + + r->infinity = 0; if (rzr != NULL) { *rzr = h; } secp256k1_fe_mul(&r->z, &a->z, &h); + + secp256k1_fe_sqr(&h2, &h); + secp256k1_fe_negate(&h2, &h2, 1); + secp256k1_fe_mul(&h3, &h2, &h); secp256k1_fe_mul(&t, &u1, &h2); - r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2); - secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i); - secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1); + + secp256k1_fe_sqr(&r->x, &i); + secp256k1_fe_add(&r->x, &h3); + secp256k1_fe_add(&r->x, &t); + secp256k1_fe_add(&r->x, &t); + + secp256k1_fe_add(&t, &r->x); + secp256k1_fe_mul(&r->y, &t, &i); + secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_add(&r->y, &h3); } static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b, const secp256k1_fe *bzinv) { - /* 9 mul, 3 sqr, 4 normalize, 12 mul_int/add/negate */ - secp256k1_fe az, z12, u1, u2, s1, s2, h, i, i2, h2, h3, t; + /* 9 mul, 3 sqr, 13 add/negate/normalize_weak/normalizes_to_zero (ignoring special cases) */ + secp256k1_fe az, z12, u1, u2, s1, s2, h, i, h2, h3, t; - if (b->infinity) { - *r = *a; - return; - } if (a->infinity) { secp256k1_fe bzinv2, bzinv3; r->infinity = b->infinity; @@ -449,7 +460,10 @@ static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe_set_int(&r->z, 1); return; } - r->infinity = 0; + if (b->infinity) { + *r = *a; + return; + } /** We need to calculate (rx,ry,rz) = (ax,ay,az) + (bx,by,1/bzinv). Due to * secp256k1's isomorphism we can multiply the Z coordinates on both sides @@ -467,7 +481,7 @@ static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a, s1 = a->y; secp256k1_fe_normalize_weak(&s1); secp256k1_fe_mul(&s2, &b->y, &z12); secp256k1_fe_mul(&s2, &s2, &az); secp256k1_fe_negate(&h, &u1, 1); secp256k1_fe_add(&h, &u2); - secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2); + secp256k1_fe_negate(&i, &s2, 1); secp256k1_fe_add(&i, &s1); if (secp256k1_fe_normalizes_to_zero_var(&h)) { if (secp256k1_fe_normalizes_to_zero_var(&i)) { secp256k1_gej_double_var(r, a, NULL); @@ -476,14 +490,23 @@ static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a, } return; } - secp256k1_fe_sqr(&i2, &i); + + r->infinity = 0; + secp256k1_fe_mul(&r->z, &a->z, &h); + secp256k1_fe_sqr(&h2, &h); - secp256k1_fe_mul(&h3, &h, &h2); - r->z = a->z; secp256k1_fe_mul(&r->z, &r->z, &h); + secp256k1_fe_negate(&h2, &h2, 1); + secp256k1_fe_mul(&h3, &h2, &h); secp256k1_fe_mul(&t, &u1, &h2); - r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2); - secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i); - secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_negate(&h3, &h3, 1); + + secp256k1_fe_sqr(&r->x, &i); + secp256k1_fe_add(&r->x, &h3); + secp256k1_fe_add(&r->x, &t); + secp256k1_fe_add(&r->x, &t); + + secp256k1_fe_add(&t, &r->x); + secp256k1_fe_mul(&r->y, &t, &i); + secp256k1_fe_mul(&h3, &h3, &s1); secp256k1_fe_add(&r->y, &h3); }