Require that r and b are different for field multiplication.

Suggested by Peter Dettman, this prepares for slightly faster muitiplication which writes results immediately to r before finishing reading b.
2014-11-30 19:27:24 +01:00 · 2014-11-30 19:27:24 +01:00 · be82e92fc4
parent 0af5b47133
commit be82e92fc4
9 changed files with 30 additions and 21 deletions
--- a/include/secp256k1.h
+++ b/include/secp256k1.h
@ -14,18 +14,6 @@ extern "C" {
 #  endif
 # endif

-# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
-#  if SECP256K1_GNUC_PREREQ(3,0)
-#   define SECP256K1_RESTRICT __restrict__
-#  elif (defined(_MSC_VER) && _MSC_VER >= 1400)
-#   define SECP256K1_RESTRICT __restrict
-#  else
-#   define SECP256K1_RESTRICT
-#  endif
-# else
-#  define SECP256K1_RESTRICT restrict
-# endif
-
 # if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
 #  if SECP256K1_GNUC_PREREQ(2,7)
 #   define SECP256K1_INLINE __inline__
--- a/src/field.h
+++ b/src/field.h
@ -82,7 +82,7 @@ static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a);

 /** Sets a field element to be the product of two others. Requires the inputs' magnitudes to be at most 8.
 *  The output magnitude is 1 (but not guaranteed to be normalized). */
-static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b);
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b);

 /** Sets a field element to be the square of another. Requires the input's magnitude to be at most 8.
 *  The output magnitude is 1 (but not guaranteed to be normalized). */
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@ -271,7 +271,7 @@ SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1
 #define VERIFY_BITS(x, n) do { } while(0)
 #endif

-SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint32_t *a, const uint32_t *b, uint32_t *r) {
+SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b, uint32_t *r) {
    VERIFY_BITS(a[0], 30);
    VERIFY_BITS(a[1], 30);
    VERIFY_BITS(a[2], 30);
@ -871,12 +871,13 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(const uint32_t *a, uint32_t
 }


-static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) {
 #ifdef VERIFY
    VERIFY_CHECK(a->magnitude <= 8);
    VERIFY_CHECK(b->magnitude <= 8);
    secp256k1_fe_verify(a);
    secp256k1_fe_verify(b);
+    VERIFY_CHECK(r != b);
 #endif
    secp256k1_fe_mul_inner(a->n, b->n, r->n);
 #ifdef VERIFY
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@ -247,12 +247,13 @@ SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1
 #endif
 }

-static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) {
 #ifdef VERIFY
    VERIFY_CHECK(a->magnitude <= 8);
    VERIFY_CHECK(b->magnitude <= 8);
    secp256k1_fe_verify(a);
    secp256k1_fe_verify(b);
+    VERIFY_CHECK(r != b);
 #endif
    secp256k1_fe_mul_inner(a->n, b->n, r->n);
 #ifdef VERIFY
--- a/src/field_5x52_int128_impl.h
+++ b/src/field_5x52_int128_impl.h
@ -15,7 +15,7 @@
 #define VERIFY_BITS(x, n) do { } while(0)
 #endif

-SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r) {
+SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t * SECP256K1_RESTRICT r) {
    VERIFY_BITS(a[0], 56);
    VERIFY_BITS(a[1], 56);
    VERIFY_BITS(a[2], 56);
@ -26,6 +26,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint64_t *a, const uin
    VERIFY_BITS(b[2], 56);
    VERIFY_BITS(b[3], 56);
    VERIFY_BITS(b[4], 52);
+    VERIFY_CHECK(r != b);

    const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
    /*  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
--- a/src/field_gmp_impl.h
+++ b/src/field_gmp_impl.h
@ -151,7 +151,8 @@ static void secp256k1_fe_reduce(secp256k1_fe_t *r, mp_limb_t *tmp) {
    r->n[FIELD_LIMBS] = mpn_add(r->n, tmp, FIELD_LIMBS, q, 1+(33+GMP_NUMB_BITS-1)/GMP_NUMB_BITS);
 }

-static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
+static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const secp256k1_fe_t * SECP256K1_RESTRICT b) {
+    VERIFY_CHECK(r != b);
    secp256k1_fe_t ac = *a;
    secp256k1_fe_t bc = *b;
    secp256k1_fe_normalize(&ac);
--- a/src/field_impl.h
+++ b/src/field_impl.h
@ -197,7 +197,7 @@ static void secp256k1_fe_inv(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
    for (int j=0; j<3; j++) secp256k1_fe_sqr(&t1, &t1);
    secp256k1_fe_mul(&t1, &t1, &x2);
    for (int j=0; j<2; j++) secp256k1_fe_sqr(&t1, &t1);
-    secp256k1_fe_mul(r, &t1, a);
+    secp256k1_fe_mul(r, a, &t1);
 }

 static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
--- a/src/group_impl.h
+++ b/src/group_impl.h
@ -217,7 +217,7 @@ static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *
    }

    secp256k1_fe_t t1,t2,t3,t4;
-    secp256k1_fe_mul(&r->z, &a->y, &a->z);
+    secp256k1_fe_mul(&r->z, &a->z, &a->y);
    secp256k1_fe_mul_int(&r->z, 2);       /* Z' = 2*Y*Z (2) */
    secp256k1_fe_sqr(&t1, &a->x);
    secp256k1_fe_mul_int(&t1, 3);         /* T1 = 3*X^2 (3) */
@ -226,7 +226,7 @@ static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *
    secp256k1_fe_mul_int(&t3, 2);         /* T3 = 2*Y^2 (2) */
    secp256k1_fe_sqr(&t4, &t3);
    secp256k1_fe_mul_int(&t4, 2);         /* T4 = 8*Y^4 (2) */
-    secp256k1_fe_mul(&t3, &a->x, &t3);    /* T3 = 2*X*Y^2 (1) */
+    secp256k1_fe_mul(&t3, &t3, &a->x);    /* T3 = 2*X*Y^2 (1) */
    r->x = t3;
    secp256k1_fe_mul_int(&r->x, 4);       /* X' = 8*X*Y^2 (4) */
    secp256k1_fe_negate(&r->x, &r->x, 4); /* X' = -8*X*Y^2 (5) */
--- a/src/util.h
+++ b/src/util.h
@ -61,4 +61,21 @@
 #define VERIFY_CHECK(cond) do { (void)(cond); } while(0)
 #endif

+/* Macro for restrict, when available and not in a VERIFY build. */
+#if defined(SECP256K1_BUILD) && defined(VERIFY)
+# define SECP256K1_RESTRICT
+#else
+# if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) )
+#  if SECP256K1_GNUC_PREREQ(3,0)
+#   define SECP256K1_RESTRICT __restrict__
+#  elif (defined(_MSC_VER) && _MSC_VER >= 1400)
+#   define SECP256K1_RESTRICT __restrict
+#  else
+#   define SECP256K1_RESTRICT
+#  endif
+# else
+#  define SECP256K1_RESTRICT restrict
+# endif
+#endif
+
 #endif