From acab934d24ff26289ab9930587c3fc51c30c6a2f Mon Sep 17 00:00:00 2001
From: Russell O'Connor <roconnor@blockstream.io>
Date: Tue, 22 Sep 2020 11:01:47 -0400
Subject: [PATCH] Detailed comments for secp256k1_scalar_split_lambda

---
 src/scalar.h           |   9 +--
 src/scalar_4x64_impl.h |  10 +--
 src/scalar_8x32_impl.h |  18 ++---
 src/scalar_impl.h      | 152 ++++++++++++++++++++++++++++++++++++-----
 4 files changed, 154 insertions(+), 35 deletions(-)

diff --git a/src/scalar.h b/src/scalar.h
index 95d3e32..ed01c7e 100644
--- a/src/scalar.h
+++ b/src/scalar.h
@@ -103,10 +103,11 @@ static void secp256k1_scalar_order_get_num(secp256k1_num *r);
 static int secp256k1_scalar_eq(const secp256k1_scalar *a, const secp256k1_scalar *b);
 
 #ifdef USE_ENDOMORPHISM
-/** Find r1 and r2 such that r1+r2*2^128 = a. */
-static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a);
-/** Find r1 and r2 such that r1+r2*lambda = a, and r1 and r2 are maximum 128 bits long (see secp256k1_gej_mul_lambda). */
-static void secp256k1_scalar_split_lambda(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a);
+/** Find r1 and r2 such that r1+r2*2^128 = k. */
+static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k);
+/** Find r1 and r2 such that r1+r2*lambda = k,
+ * where r1 and r2 or their negations are maximum 128 bits long (see secp256k1_ge_mul_lambda). */
+static void secp256k1_scalar_split_lambda(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k);
 #endif
 
 /** Multiply a and b (without taking the modulus!), divide by 2**shift, and round to the nearest integer. Shift must be at least 256. */
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index 7f39927..3d8d2a5 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -913,13 +913,13 @@ static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a)
 }
 
 #ifdef USE_ENDOMORPHISM
-static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a) {
-    r1->d[0] = a->d[0];
-    r1->d[1] = a->d[1];
+static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
+    r1->d[0] = k->d[0];
+    r1->d[1] = k->d[1];
     r1->d[2] = 0;
     r1->d[3] = 0;
-    r2->d[0] = a->d[2];
-    r2->d[1] = a->d[3];
+    r2->d[0] = k->d[2];
+    r2->d[1] = k->d[3];
     r2->d[2] = 0;
     r2->d[3] = 0;
 }
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index f8c7fa7..44561c8 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -673,19 +673,19 @@ static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a)
 }
 
 #ifdef USE_ENDOMORPHISM
-static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a) {
-    r1->d[0] = a->d[0];
-    r1->d[1] = a->d[1];
-    r1->d[2] = a->d[2];
-    r1->d[3] = a->d[3];
+static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
+    r1->d[0] = k->d[0];
+    r1->d[1] = k->d[1];
+    r1->d[2] = k->d[2];
+    r1->d[3] = k->d[3];
     r1->d[4] = 0;
     r1->d[5] = 0;
     r1->d[6] = 0;
     r1->d[7] = 0;
-    r2->d[0] = a->d[4];
-    r2->d[1] = a->d[5];
-    r2->d[2] = a->d[6];
-    r2->d[3] = a->d[7];
+    r2->d[0] = k->d[4];
+    r2->d[1] = k->d[5];
+    r2->d[2] = k->d[6];
+    r2->d[3] = k->d[7];
     r2->d[4] = 0;
     r2->d[5] = 0;
     r2->d[6] = 0;
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
index 36c279d..e2a0363 100644
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -279,19 +279,31 @@ static void secp256k1_scalar_split_lambda(secp256k1_scalar *r1, secp256k1_scalar
  * lambda is {0x53,0x63,0xad,0x4c,0xc0,0x5c,0x30,0xe0,0xa5,0x26,0x1c,0x02,0x88,0x12,0x64,0x5a,
  *            0x12,0x2e,0x22,0xea,0x20,0x81,0x66,0x78,0xdf,0x02,0x96,0x7c,0x1b,0x23,0xbd,0x72}
  *
- * "Guide to Elliptic Curve Cryptography" (Hankerson, Menezes, Vanstone) gives an algorithm
- * (algorithm 3.74) to find k1 and k2 given k, such that k1 + k2 * lambda == k mod n, and k1
- * and k2 have a small size.
- * It relies on constants a1, b1, a2, b2. These constants for the value of lambda above are:
+ * Both lambda and beta are primitive cube roots of unity.  That is lamba^3 == 1 mod n and
+ * beta^3 == 1 mod p, where n is the curve order and p is the field order.
+ *
+ * Futhermore, because (X^3 - 1) = (X - 1)(X^2 + X + 1), the primitive cube roots of unity are
+ * roots of X^2 + X + 1.  Therefore lambda^2 + lamba == -1 mod n and beta^2 + beta == -1 mod p.
+ * (The other primitive cube roots of unity are lambda^2 and beta^2 respectively.)
+ *
+ * Let l = -1/2 + i*sqrt(3)/2, the complex root of X^2 + X + 1. We can define a ring
+ * homomorphism phi : Z[l] -> Z_n where phi(a + b*l) == a + b*lambda mod n. The kernel of phi
+ * is a lattice over Z[l] (considering Z[l] as a Z-module). This lattice is generated by a
+ * reduced basis {a1 + b1*l, a2 + b2*l} where
  *
  * - a1 =      {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15}
  * - b1 =     -{0xe4,0x43,0x7e,0xd6,0x01,0x0e,0x88,0x28,0x6f,0x54,0x7f,0xa9,0x0a,0xbf,0xe4,0xc3}
  * - a2 = {0x01,0x14,0xca,0x50,0xf7,0xa8,0xe2,0xf3,0xf6,0x57,0xc1,0x10,0x8d,0x9d,0x44,0xcf,0xd8}
  * - b2 =      {0x30,0x86,0xd2,0x21,0xa7,0xd4,0x6b,0xcd,0xe8,0x6c,0x90,0xe4,0x92,0x84,0xeb,0x15}
  *
- * The algorithm then computes c1 = round(b1 * k / n) and c2 = round(b2 * k / n), and gives
+ * "Guide to Elliptic Curve Cryptography" (Hankerson, Menezes, Vanstone) gives an algorithm
+ * (algorithm 3.74) to find k1 and k2 given k, such that k1 + k2 * lambda == k mod n, and k1
+ * and k2 have a small size.
+ *
+ * The algorithm computes c1 = round(b2 * k / n) and c2 = round((-b1) * k / n), and gives
  * k1 = k - (c1*a1 + c2*a2) and k2 = -(c1*b1 + c2*b2). Instead, we use modular arithmetic, and
- * compute k1 as k - k2 * lambda, avoiding the need for constants a1 and a2.
+ * compute k - k2 * lambda (mod n) which is equivalent to k1 (mod n), avoiding the need for
+ * the constants a1 and a2.
  *
  * g1, g2 are precomputed constants used to replace division with a rounded multiplication
  * when decomposing the scalar for an endomorphism-based point multiplication.
@@ -303,16 +315,122 @@ static void secp256k1_scalar_split_lambda(secp256k1_scalar *r1, secp256k1_scalar
  * Cryptography on Sensor Networks Using the MSP430X Microcontroller" (Gouvea, Oliveira, Lopez),
  * Section 4.3 (here we use a somewhat higher-precision estimate):
  * d = a1*b2 - b1*a2
- * g1 = round((2^384)*b2/d)
- * g2 = round((2^384)*(-b1)/d)
+ * g1 = round(2^384 * b2/d)
+ * g2 = round(2^384 * (-b1)/d)
  *
- * (Note that 'd' is also equal to the curve order here because [a1,b1] and [a2,b2] are found
- * as outputs of the Extended Euclidean Algorithm on inputs 'order' and 'lambda').
+ * (Note that d is also equal to the curve order, n, here because [a1,b1] and [a2,b2]
+ * can be found as outputs of the Extended Euclidean Algorithm on inputs n and lambda).
  *
- * The function below splits a in r1 and r2, such that r1 + lambda * r2 == a (mod order).
+ * The function below splits k into r1 and r2, such that
+ * - r1 + lambda * r2 == k (mod n)
+ * - either r1 < 2^128 or -r1 mod n < 2^128
+ * - either r2 < 2^128 or -r2 mod n < 2^128
+ *
+ * Proof.
+ *
+ * Let
+ *  - epsilon1 = 2^256 * |g1/2^384 - b2/d|
+ *  - epsilon2 = 2^256 * |g2/2^384 - (-b1)/d|
+ *  - c1 = round(k*g1/2^384)
+ *  - c2 = round(k*g2/2^384)
+ *
+ * Lemma 1: |c1 - k*b2/d| < 2^-1 + epsilon1
+ *
+ *    |c1 - k*b2/d|
+ *  =
+ *    |c1 - k*g1/2^384 + k*g1/2^384 - k*b2/d|
+ * <=   {triangle inequality}
+ *    |c1 - k*g1/2^384| + |k*g1/2^384 - k*b2/d|
+ *  =
+ *    |c1 - k*g1/2^384| + k*|g1/2^384 - b2/d|
+ * <    {rounding in c1 and 0 <= k < 2^256}
+ *    2^-1 + 2^256 * |g1/2^384 - b2/d|
+ *  =   {definition of epsilon1}
+ *    2^-1 + epsilon1
+ *
+ * Lemma 2: |c2 - k*(-b1)/d| < 2^-1 + epsilon2
+ *
+ *    |c2 - k*(-b1)/d|
+ *  =
+ *    |c2 - k*g2/2^384 + k*g2/2^384 - k*(-b1)/d|
+ * <=   {triangle inequality}
+ *    |c2 - k*g2/2^384| + |k*g2/2^384 - k*(-b1)/d|
+ *  =
+ *    |c2 - k*g2/2^384| + k*|g2/2^384 - (-b1)/d|
+ * <    {rounding in c2 and 0 <= k < 2^256}
+ *    2^-1 + 2^256 * |g2/2^384 - (-b1)/d|
+ *  =   {definition of epsilon2}
+ *    2^-1 + epsilon2
+ *
+ * Let
+ *  - k1 = k - c1*a1 - c2*a2
+ *  - k2 = - c1*b1 - c2*b2
+ *
+ * Lemma 3: |k1| < (a1 + a2 + 1)/2 < 2^128
+ *
+ *    |k1|
+ *  =   {definition of k1}
+ *    |k - c1*a1 - c2*a2|
+ *  =   {(a1*b2 - b1*a2)/n = 1}
+ *    |k*(a1*b2 - b1*a2)/n - c1*a1 - c2*a2|
+ *  =
+ *    |a1*(k*b2/n - c1) + a2*(k*(-b1)/n - c2)|
+ * <=   {triangle inequality}
+ *    a1*|k*b2/n - c1| + a2*|k*(-b1)/n - c2|
+ * <    {Lemma 1 and Lemma 2}
+ *    a1*(2^-1 + epslion1) + a2*(2^-1 + epsilon2)
+ * <    {rounding up to an integer}
+ *    (a1 + a2 + 1)/2
+ * <    {rounding up to a power of 2}
+ *    2^128
+ *
+ * Lemma 4: |k2| < (-b1 + b2)/2 + 1 < 2^128
+ *
+ *    |k2|
+ *  =   {definition of k2}
+ *    |- c1*a1 - c2*a2|
+ *  =   {(b1*b2 - b1*b2)/n = 0}
+ *    |k*(b1*b2 - b1*b2)/n - c1*b1 - c2*b2|
+ *  =
+ *    |b1*(k*b2/n - c1) + b2*(k*(-b1)/n - c2)|
+ * <=   {triangle inequality}
+ *    (-b1)*|k*b2/n - c1| + b2*|k*(-b1)/n - c2|
+ * <    {Lemma 1 and Lemma 2}
+ *    (-b1)*(2^-1 + epslion1) + b2*(2^-1 + epsilon2)
+ * <    {rounding up to an integer}
+ *    (-b1 + b2)/2 + 1
+ * <    {rounding up to a power of 2}
+ *    2^128
+ *
+ * Let
+ *  - r2 = k2 mod n
+ *  - r1 = k - r2*lambda mod n.
+ *
+ * Notice that r1 is defined such that r1 + r2 * lambda == k (mod n).
+ *
+ * Lemma 5: r1 == k1 mod n.
+ *
+ *    r1
+ * ==   {definition of r1 and r2}
+ *    k - k2*lambda
+ * ==   {definition of k2}
+ *    k - (- c1*b1 - c2*b2)*lambda
+ * ==
+ *    k + c1*b1*lambda + c2*b2*lambda
+ * ==  {a1 + b1*lambda == 0 mod n and a2 + b2*lambda == 0 mod n}
+ *    k - c1*a1 - c2*a2
+ * ==  {definition of k1}
+ *    k1
+ *
+ * From Lemma 3, Lemma 4, Lemma 5 and the definition of r2, we can conclude that
+ *
+ *  - either r1 < 2^128 or -r1 mod n < 2^128
+ *  - either r2 < 2^128 or -r2 mod n < 2^128.
+ *
+ * Q.E.D.
  */
 
-static void secp256k1_scalar_split_lambda(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a) {
+static void secp256k1_scalar_split_lambda(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
     secp256k1_scalar c1, c2;
     static const secp256k1_scalar minus_lambda = SECP256K1_SCALAR_CONST(
         0xAC9C52B3UL, 0x3FA3CF1FUL, 0x5AD9E3FDUL, 0x77ED9BA4UL,
@@ -334,16 +452,16 @@ static void secp256k1_scalar_split_lambda(secp256k1_scalar *r1, secp256k1_scalar
         0xE4437ED6UL, 0x010E8828UL, 0x6F547FA9UL, 0x0ABFE4C4UL,
         0x221208ACUL, 0x9DF506C6UL, 0x1571B4AEUL, 0x8AC47F71UL
     );
-    VERIFY_CHECK(r1 != a);
-    VERIFY_CHECK(r2 != a);
+    VERIFY_CHECK(r1 != k);
+    VERIFY_CHECK(r2 != k);
     /* these _var calls are constant time since the shift amount is constant */
-    secp256k1_scalar_mul_shift_var(&c1, a, &g1, 384);
-    secp256k1_scalar_mul_shift_var(&c2, a, &g2, 384);
+    secp256k1_scalar_mul_shift_var(&c1, k, &g1, 384);
+    secp256k1_scalar_mul_shift_var(&c2, k, &g2, 384);
     secp256k1_scalar_mul(&c1, &c1, &minus_b1);
     secp256k1_scalar_mul(&c2, &c2, &minus_b2);
     secp256k1_scalar_add(r2, &c1, &c2);
     secp256k1_scalar_mul(r1, r2, &minus_lambda);
-    secp256k1_scalar_add(r1, r1, a);
+    secp256k1_scalar_add(r1, r1, k);
 }
 #endif
 #endif