From 0295f0a33d0bfec8ce712e849253c71a579651c3 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter.wuille@gmail.com>
Date: Wed, 10 Dec 2014 14:34:25 +0100
Subject: [PATCH] weak normalization

---
 src/ecmult_gen_impl.h  |  2 +-
 src/ecmult_impl.h      |  4 ++--
 src/field.h            |  3 +++
 src/field_10x26_impl.h | 28 ++++++++++++++++++++++++++++
 src/field_5x52_impl.h  | 24 ++++++++++++++++++++++++
 src/group.h            |  3 +--
 src/group_impl.h       | 18 ++++++------------
 7 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
index 5a5b16c..4843631 100644
--- a/src/ecmult_gen_impl.h
+++ b/src/ecmult_gen_impl.h
@@ -73,7 +73,7 @@ static void secp256k1_ecmult_gen_start(void) {
             secp256k1_gej_double_var(&numsbase, &numsbase);
             if (j == 62) {
                 /* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
-                secp256k1_gej_neg_var(&numsbase, &numsbase);
+                secp256k1_gej_neg(&numsbase, &numsbase);
                 secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej);
             }
         }
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 6536771..345cfae 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -70,8 +70,8 @@ static void secp256k1_ecmult_table_precomp_ge_var(secp256k1_ge_t *pre, const sec
         (neg)((r), &(pre)[(-(n)-1)/2]); \
 } while(0)
 
-#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_gej_neg_var)
-#define ECMULT_TABLE_GET_GE(r,pre,n,w)  ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_ge_neg_var)
+#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_gej_neg)
+#define ECMULT_TABLE_GET_GE(r,pre,n,w)  ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_ge_neg)
 
 typedef struct {
     /* For accelerating the computation of a*P + b*G: */
diff --git a/src/field.h b/src/field.h
index 9d5466a..391d97a 100644
--- a/src/field.h
+++ b/src/field.h
@@ -48,6 +48,9 @@ static void secp256k1_fe_stop(void);
 /** Normalize a field element. */
 static void secp256k1_fe_normalize(secp256k1_fe_t *r);
 
+/** Weakly normalize a field element: reduce it magnitude to 1, but don't fully normalize. */
+static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r);
+
 /** Normalize a field element, without constant-time guarantee. */
 static void secp256k1_fe_normalize_var(secp256k1_fe_t *r);
 
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index d20229c..368e851 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -103,6 +103,34 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
 #endif
 }
 
+static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r) {
+    uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
+             t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
+
+    /* Reduce t9 at the start so there will be at most a single carry from the first pass */
+    uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x3D1UL; t1 += (x << 6);
+    t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
+    t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
+    t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
+    t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
+    t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
+    t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
+    t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
+    t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
+    t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+    r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
 static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
     uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
              t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index a045db3..74e5ab2 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -100,6 +100,30 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
 #endif
 }
 
+static void secp256k1_fe_normalize_weak(secp256k1_fe_t *r) {
+    uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
+
+    /* Reduce t4 at the start so there will be at most a single carry from the first pass */
+    uint64_t x = t4 >> 48; t4 &= 0x0FFFFFFFFFFFFULL;
+
+    /* The first pass ensures the magnitude is 1, ... */
+    t0 += x * 0x1000003D1ULL;
+    t1 += (t0 >> 52); t0 &= 0xFFFFFFFFFFFFFULL;
+    t2 += (t1 >> 52); t1 &= 0xFFFFFFFFFFFFFULL;
+    t3 += (t2 >> 52); t2 &= 0xFFFFFFFFFFFFFULL;
+    t4 += (t3 >> 52); t3 &= 0xFFFFFFFFFFFFFULL;
+
+    /* ... except for a possible carry at bit 48 of t4 (i.e. bit 256 of the field element) */
+    VERIFY_CHECK(t4 >> 49 == 0);
+
+    r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
+
+#ifdef VERIFY
+    r->magnitude = 1;
+    secp256k1_fe_verify(r);
+#endif
+}
+
 static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
     uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
 
diff --git a/src/group.h b/src/group.h
index 330e069..6dea6bb 100644
--- a/src/group.h
+++ b/src/group.h
@@ -60,7 +60,6 @@ static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a);
 static int secp256k1_ge_is_valid_var(const secp256k1_ge_t *a);
 
 static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a);
-static void secp256k1_ge_neg_var(secp256k1_ge_t *r, const secp256k1_ge_t *a);
 
 /** Get a hex representation of a point. *rlen will be overwritten with the real length. */
 static void secp256k1_ge_get_hex(char *r, int *rlen, const secp256k1_ge_t *a);
@@ -85,7 +84,7 @@ static void secp256k1_gej_set_ge(secp256k1_gej_t *r, const secp256k1_ge_t *a);
 static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t *a);
 
 /** Set r equal to the inverse of a (i.e., mirrored around the X axis) */
-static void secp256k1_gej_neg_var(secp256k1_gej_t *r, const secp256k1_gej_t *a);
+static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a);
 
 /** Check whether a group element is the point at infinity. */
 static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a);
diff --git a/src/group_impl.h b/src/group_impl.h
index e6d4c34..82bfd37 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -29,13 +29,7 @@ static int secp256k1_ge_is_infinity(const secp256k1_ge_t *a) {
 
 static void secp256k1_ge_neg(secp256k1_ge_t *r, const secp256k1_ge_t *a) {
     *r = *a;
-    secp256k1_fe_normalize(&r->y);
-    secp256k1_fe_negate(&r->y, &r->y, 1);
-}
-
-static void secp256k1_ge_neg_var(secp256k1_ge_t *r, const secp256k1_ge_t *a) {
-    *r = *a;
-    secp256k1_fe_normalize_var(&r->y);
+    secp256k1_fe_normalize_weak(&r->y);
     secp256k1_fe_negate(&r->y, &r->y, 1);
 }
 
@@ -172,12 +166,12 @@ static int secp256k1_gej_eq_x_var(const secp256k1_fe_t *x, const secp256k1_gej_t
     return secp256k1_fe_equal(&r, &r2);
 }
 
-static void secp256k1_gej_neg_var(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
+static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a) {
     r->infinity = a->infinity;
     r->x = a->x;
     r->y = a->y;
     r->z = a->z;
-    secp256k1_fe_normalize_var(&r->y);
+    secp256k1_fe_normalize_weak(&r->y);
     secp256k1_fe_negate(&r->y, &r->y, 1);
 }
 
@@ -359,9 +353,9 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
      */
 
     secp256k1_fe_t zz; secp256k1_fe_sqr(&zz, &a->z);                /* z = Z1^2 */
-    secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize(&u1);          /* u1 = U1 = X1*Z2^2 (1) */
+    secp256k1_fe_t u1 = a->x; secp256k1_fe_normalize_weak(&u1);     /* u1 = U1 = X1*Z2^2 (1) */
     secp256k1_fe_t u2; secp256k1_fe_mul(&u2, &b->x, &zz);           /* u2 = U2 = X2*Z1^2 (1) */
-    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize(&s1);          /* s1 = S1 = Y1*Z2^3 (1) */
+    secp256k1_fe_t s1 = a->y; secp256k1_fe_normalize_weak(&s1);     /* s1 = S1 = Y1*Z2^3 (1) */
     secp256k1_fe_t s2; secp256k1_fe_mul(&s2, &b->y, &zz);           /* s2 = Y2*Z2^2 (1) */
     secp256k1_fe_mul(&s2, &s2, &a->z);                              /* s2 = S2 = Y2*Z1^3 (1) */
     secp256k1_fe_t z = a->z;                                        /* z = Z = Z1*Z2 (8) */
@@ -388,7 +382,7 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
     secp256k1_fe_mul(&t, &t, &rr);                      /* t = R*(2*R^2-3*Q) (1) */
     secp256k1_fe_add(&t, &n);                           /* t = R*(2*R^2-3*Q)+M^4 (2) */
     secp256k1_fe_negate(&r->y, &t, 2);                  /* r->y = R*(3*Q-2*R^2)-M^4 (3) */
-    secp256k1_fe_normalize(&r->y);
+    secp256k1_fe_normalize_weak(&r->y);
     secp256k1_fe_mul_int(&r->x, 4 * (1 - a->infinity)); /* r->x = X3 = 4*(R^2-Q) */
     secp256k1_fe_mul_int(&r->y, 4 * (1 - a->infinity)); /* r->y = Y3 = 4*R*(3*Q-2*R^2)-4*M^4 (4) */