Replace set/add with cmov in secp256k1_gej_add_ge.

Use a conditional move of the same kind we use for the affine points in the storage type instead of multiplying with the infinity flag and adding. This results in fewer constructions to worry about for sidechannel behavior. It also might be faster: It doesn't appear to benchmark as slower for me at least; but I think the CMOV is faster than the mul_int + add, but slower than the set+add; making it a wash.
2015-04-22 00:20:54 +00:00 · 2015-04-22 00:20:54 +00:00 · bb0ea50de8
parent f3d3519ce0
commit bb0ea50de8
4 changed files with 44 additions and 8 deletions
--- a/src/field.h
+++ b/src/field.h
@ -113,4 +113,7 @@ static void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_stor
 /** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */
 static void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag);

+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */
+static void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag);
+
 #endif
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@ -1068,6 +1068,26 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 #endif
 }

+static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) {
+    uint32_t mask0, mask1;
+    mask0 = flag + ~((uint32_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+    r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
+    r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
+    r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
+    r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
+    r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
+    r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
+#ifdef VERIFY
+    r->magnitude = (r->magnitude & mask0) | (a->magnitude & mask1);
+    r->normalized = (r->normalized & mask0) | (a->normalized & mask1);
+#endif
+}
+
 static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) {
    uint32_t mask0, mask1;
    mask0 = flag + ~((uint32_t)0);
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@ -404,6 +404,21 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 #endif
 }

+static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) {
+    uint64_t mask0, mask1;
+    mask0 = flag + ~((uint64_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+    r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
+#ifdef VERIFY
+    r->magnitude = (r->magnitude & mask0) | (a->magnitude & mask1);
+    r->normalized = (r->normalized & mask0) | (a->normalized & mask1);
+#endif
+}
+
 static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) {
    uint64_t mask0, mask1;
    mask0 = flag + ~((uint64_t)0);
--- a/src/group_impl.h
+++ b/src/group_impl.h
@ -325,7 +325,8 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *
 }

 static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
-    /* Operations: 7 mul, 5 sqr, 5 normalize, 19 mul_int/add/negate */
+    /* Operations: 7 mul, 5 sqr, 5 normalize, 17 mul_int/add/negate/cmov */
+    static const secp256k1_fe_t fe_1 = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1);
    secp256k1_fe_t zz, u1, u2, s1, s2, z, t, m, n, q, rr;
    int infinity;
    VERIFY_CHECK(!b->infinity);
@ -387,14 +388,11 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
    secp256k1_fe_mul_int(&r->y, 4 * (1 - a->infinity)); /* r->y = Y3 = 4*R*(3*Q-2*R^2)-4*M^4 (4) */

    /** In case a->infinity == 1, the above code results in r->x, r->y, and r->z all equal to 0.
-     *  Add b->x to x, b->y to y, and 1 to z in that case.
+     *  Replace r with b->x, b->y, 1 in that case.
     */
-    t = b->x; secp256k1_fe_mul_int(&t, a->infinity);
-    secp256k1_fe_add(&r->x, &t);
-    t = b->y; secp256k1_fe_mul_int(&t, a->infinity);
-    secp256k1_fe_add(&r->y, &t);
-    secp256k1_fe_set_int(&t, a->infinity);
-    secp256k1_fe_add(&r->z, &t);
+    secp256k1_fe_cmov(&r->x, &b->x, a->infinity);
+    secp256k1_fe_cmov(&r->y, &b->y, a->infinity);
+    secp256k1_fe_cmov(&r->z, &fe_1, a->infinity);
    r->infinity = infinity;
 }