Simulated int128 type.
This commit is contained in:
parent
694ce8fb2d
commit
2914bccbc0
|
@ -48,6 +48,12 @@ noinst_HEADERS += src/precomputed_ecmult.h
|
|||
noinst_HEADERS += src/precomputed_ecmult_gen.h
|
||||
noinst_HEADERS += src/assumptions.h
|
||||
noinst_HEADERS += src/util.h
|
||||
noinst_HEADERS += src/int128.h
|
||||
noinst_HEADERS += src/int128_impl.h
|
||||
noinst_HEADERS += src/int128_native.h
|
||||
noinst_HEADERS += src/int128_native_impl.h
|
||||
noinst_HEADERS += src/int128_struct.h
|
||||
noinst_HEADERS += src/int128_struct_impl.h
|
||||
noinst_HEADERS += src/scratch.h
|
||||
noinst_HEADERS += src/scratch_impl.h
|
||||
noinst_HEADERS += src/selftest.h
|
||||
|
|
|
@ -175,7 +175,11 @@ AC_ARG_ENABLE(external_default_callbacks,
|
|||
[SECP_SET_DEFAULT([enable_external_default_callbacks], [no], [no])])
|
||||
|
||||
# Test-only override of the (autodetected by the C code) "widemul" setting.
|
||||
# Legal values are int64 (for [u]int64_t), int128 (for [unsigned] __int128), and auto (the default).
|
||||
# Legal values are:
|
||||
# * int64 (for [u]int64_t),
|
||||
# * int128 (for [unsigned] __int128),
|
||||
# * int128_struct (for int128 implemented as a structure),
|
||||
# * and auto (the default).
|
||||
AC_ARG_WITH([test-override-wide-multiply], [] ,[set_widemul=$withval], [set_widemul=auto])
|
||||
|
||||
AC_ARG_WITH([asm], [AS_HELP_STRING([--with-asm=x86_64|arm|no|auto],
|
||||
|
@ -285,6 +289,9 @@ fi
|
|||
|
||||
# Select wide multiplication implementation
|
||||
case $set_widemul in
|
||||
int128_struct)
|
||||
AC_DEFINE(USE_FORCE_WIDEMUL_INT128_STRUCT, 1, [Define this symbol to force the use of the structure for simulating (unsigned) int128 based wide multiplication])
|
||||
;;
|
||||
int128)
|
||||
AC_DEFINE(USE_FORCE_WIDEMUL_INT128, 1, [Define this symbol to force the use of the (unsigned) __int128 based wide multiplication implementation])
|
||||
;;
|
||||
|
|
|
@ -10,6 +10,9 @@
|
|||
#include <limits.h>
|
||||
|
||||
#include "util.h"
|
||||
#if defined(SECP256K1_INT128_NATIVE)
|
||||
#include "int128_native.h"
|
||||
#endif
|
||||
|
||||
/* This library, like most software, relies on a number of compiler implementation defined (but not undefined)
|
||||
behaviours. Although the behaviours we require are essentially universal we test them specifically here to
|
||||
|
@ -55,7 +58,7 @@ struct secp256k1_assumption_checker {
|
|||
|
||||
/* To int64_t. */
|
||||
((int64_t)(uint64_t)0xB123C456D789E012ULL == (int64_t)-(int64_t)0x4EDC3BA928761FEEULL) &&
|
||||
#if defined(SECP256K1_WIDEMUL_INT128)
|
||||
#if defined(SECP256K1_INT128_NATIVE)
|
||||
((int64_t)(((uint128_t)0xA1234567B8901234ULL << 64) + 0xC5678901D2345678ULL) == (int64_t)-(int64_t)0x3A9876FE2DCBA988ULL) &&
|
||||
(((int64_t)(int128_t)(((uint128_t)0xB1C2D3E4F5A6B7C8ULL << 64) + 0xD9E0F1A2B3C4D5E6ULL)) == (int64_t)(uint64_t)0xD9E0F1A2B3C4D5E6ULL) &&
|
||||
(((int64_t)(int128_t)(((uint128_t)0xABCDEF0123456789ULL << 64) + 0x0123456789ABCDEFULL)) == (int64_t)(uint64_t)0x0123456789ABCDEFULL) &&
|
||||
|
@ -71,7 +74,7 @@ struct secp256k1_assumption_checker {
|
|||
((((int16_t)0xE9AC) >> 4) == (int16_t)(uint16_t)0xFE9A) &&
|
||||
((((int32_t)0x937C918A) >> 9) == (int32_t)(uint32_t)0xFFC9BE48) &&
|
||||
((((int64_t)0xA8B72231DF9CF4B9ULL) >> 19) == (int64_t)(uint64_t)0xFFFFF516E4463BF3ULL) &&
|
||||
#if defined(SECP256K1_WIDEMUL_INT128)
|
||||
#if defined(SECP256K1_INT128_NATIVE)
|
||||
((((int128_t)(((uint128_t)0xCD833A65684A0DBCULL << 64) + 0xB349312F71EA7637ULL)) >> 39) == (int128_t)(((uint128_t)0xFFFFFFFFFF9B0674ULL << 64) + 0xCAD0941B79669262ULL)) &&
|
||||
#endif
|
||||
1) * 2 - 1];
|
||||
|
|
|
@ -11,12 +11,14 @@
|
|||
|
||||
#ifdef VERIFY
|
||||
#define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
|
||||
#define VERIFY_BITS_128(x, n) VERIFY_CHECK(secp256k1_u128_check_bits((x), (n)))
|
||||
#else
|
||||
#define VERIFY_BITS(x, n) do { } while(0)
|
||||
#define VERIFY_BITS_128(x, n) do { } while(0)
|
||||
#endif
|
||||
|
||||
SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
|
||||
uint128_t c, d;
|
||||
secp256k1_uint128 c, d;
|
||||
uint64_t t3, t4, tx, u0;
|
||||
uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
|
||||
const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
|
||||
|
@ -40,121 +42,119 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
|
|||
* Note that [x 0 0 0 0 0] = [x*R].
|
||||
*/
|
||||
|
||||
d = (uint128_t)a0 * b[3]
|
||||
+ (uint128_t)a1 * b[2]
|
||||
+ (uint128_t)a2 * b[1]
|
||||
+ (uint128_t)a3 * b[0];
|
||||
VERIFY_BITS(d, 114);
|
||||
secp256k1_u128_mul(&d, a0, b[3]);
|
||||
secp256k1_u128_accum_mul(&d, a1, b[2]);
|
||||
secp256k1_u128_accum_mul(&d, a2, b[1]);
|
||||
secp256k1_u128_accum_mul(&d, a3, b[0]);
|
||||
VERIFY_BITS_128(&d, 114);
|
||||
/* [d 0 0 0] = [p3 0 0 0] */
|
||||
c = (uint128_t)a4 * b[4];
|
||||
VERIFY_BITS(c, 112);
|
||||
secp256k1_u128_mul(&c, a4, b[4]);
|
||||
VERIFY_BITS_128(&c, 112);
|
||||
/* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
|
||||
d += (uint128_t)R * (uint64_t)c; c >>= 64;
|
||||
VERIFY_BITS(d, 115);
|
||||
VERIFY_BITS(c, 48);
|
||||
secp256k1_u128_accum_mul(&d, R, secp256k1_u128_to_u64(&c)); secp256k1_u128_rshift(&c, 64);
|
||||
VERIFY_BITS_128(&d, 115);
|
||||
VERIFY_BITS_128(&c, 48);
|
||||
/* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
|
||||
t3 = d & M; d >>= 52;
|
||||
t3 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
|
||||
VERIFY_BITS(t3, 52);
|
||||
VERIFY_BITS(d, 63);
|
||||
VERIFY_BITS_128(&d, 63);
|
||||
/* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
|
||||
|
||||
d += (uint128_t)a0 * b[4]
|
||||
+ (uint128_t)a1 * b[3]
|
||||
+ (uint128_t)a2 * b[2]
|
||||
+ (uint128_t)a3 * b[1]
|
||||
+ (uint128_t)a4 * b[0];
|
||||
VERIFY_BITS(d, 115);
|
||||
secp256k1_u128_accum_mul(&d, a0, b[4]);
|
||||
secp256k1_u128_accum_mul(&d, a1, b[3]);
|
||||
secp256k1_u128_accum_mul(&d, a2, b[2]);
|
||||
secp256k1_u128_accum_mul(&d, a3, b[1]);
|
||||
secp256k1_u128_accum_mul(&d, a4, b[0]);
|
||||
VERIFY_BITS_128(&d, 115);
|
||||
/* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
|
||||
d += (uint128_t)(R << 12) * (uint64_t)c;
|
||||
VERIFY_BITS(d, 116);
|
||||
secp256k1_u128_accum_mul(&d, R << 12, secp256k1_u128_to_u64(&c));
|
||||
VERIFY_BITS_128(&d, 116);
|
||||
/* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
|
||||
t4 = d & M; d >>= 52;
|
||||
t4 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
|
||||
VERIFY_BITS(t4, 52);
|
||||
VERIFY_BITS(d, 64);
|
||||
VERIFY_BITS_128(&d, 64);
|
||||
/* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
|
||||
tx = (t4 >> 48); t4 &= (M >> 4);
|
||||
VERIFY_BITS(tx, 4);
|
||||
VERIFY_BITS(t4, 48);
|
||||
/* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
|
||||
|
||||
c = (uint128_t)a0 * b[0];
|
||||
VERIFY_BITS(c, 112);
|
||||
secp256k1_u128_mul(&c, a0, b[0]);
|
||||
VERIFY_BITS_128(&c, 112);
|
||||
/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
|
||||
d += (uint128_t)a1 * b[4]
|
||||
+ (uint128_t)a2 * b[3]
|
||||
+ (uint128_t)a3 * b[2]
|
||||
+ (uint128_t)a4 * b[1];
|
||||
VERIFY_BITS(d, 115);
|
||||
secp256k1_u128_accum_mul(&d, a1, b[4]);
|
||||
secp256k1_u128_accum_mul(&d, a2, b[3]);
|
||||
secp256k1_u128_accum_mul(&d, a3, b[2]);
|
||||
secp256k1_u128_accum_mul(&d, a4, b[1]);
|
||||
VERIFY_BITS_128(&d, 115);
|
||||
/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
u0 = d & M; d >>= 52;
|
||||
u0 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
|
||||
VERIFY_BITS(u0, 52);
|
||||
VERIFY_BITS(d, 63);
|
||||
VERIFY_BITS_128(&d, 63);
|
||||
/* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
/* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
u0 = (u0 << 4) | tx;
|
||||
VERIFY_BITS(u0, 56);
|
||||
/* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
c += (uint128_t)u0 * (R >> 4);
|
||||
VERIFY_BITS(c, 115);
|
||||
secp256k1_u128_accum_mul(&c, u0, R >> 4);
|
||||
VERIFY_BITS_128(&c, 115);
|
||||
/* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
r[0] = c & M; c >>= 52;
|
||||
r[0] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
|
||||
VERIFY_BITS(r[0], 52);
|
||||
VERIFY_BITS(c, 61);
|
||||
VERIFY_BITS_128(&c, 61);
|
||||
/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
|
||||
c += (uint128_t)a0 * b[1]
|
||||
+ (uint128_t)a1 * b[0];
|
||||
VERIFY_BITS(c, 114);
|
||||
secp256k1_u128_accum_mul(&c, a0, b[1]);
|
||||
secp256k1_u128_accum_mul(&c, a1, b[0]);
|
||||
VERIFY_BITS_128(&c, 114);
|
||||
/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
|
||||
d += (uint128_t)a2 * b[4]
|
||||
+ (uint128_t)a3 * b[3]
|
||||
+ (uint128_t)a4 * b[2];
|
||||
VERIFY_BITS(d, 114);
|
||||
secp256k1_u128_accum_mul(&d, a2, b[4]);
|
||||
secp256k1_u128_accum_mul(&d, a3, b[3]);
|
||||
secp256k1_u128_accum_mul(&d, a4, b[2]);
|
||||
VERIFY_BITS_128(&d, 114);
|
||||
/* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
|
||||
c += (d & M) * R; d >>= 52;
|
||||
VERIFY_BITS(c, 115);
|
||||
VERIFY_BITS(d, 62);
|
||||
secp256k1_u128_accum_mul(&c, secp256k1_u128_to_u64(&d) & M, R); secp256k1_u128_rshift(&d, 52);
|
||||
VERIFY_BITS_128(&c, 115);
|
||||
VERIFY_BITS_128(&d, 62);
|
||||
/* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
|
||||
r[1] = c & M; c >>= 52;
|
||||
r[1] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
|
||||
VERIFY_BITS(r[1], 52);
|
||||
VERIFY_BITS(c, 63);
|
||||
VERIFY_BITS_128(&c, 63);
|
||||
/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
|
||||
|
||||
c += (uint128_t)a0 * b[2]
|
||||
+ (uint128_t)a1 * b[1]
|
||||
+ (uint128_t)a2 * b[0];
|
||||
VERIFY_BITS(c, 114);
|
||||
secp256k1_u128_accum_mul(&c, a0, b[2]);
|
||||
secp256k1_u128_accum_mul(&c, a1, b[1]);
|
||||
secp256k1_u128_accum_mul(&c, a2, b[0]);
|
||||
VERIFY_BITS_128(&c, 114);
|
||||
/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
|
||||
d += (uint128_t)a3 * b[4]
|
||||
+ (uint128_t)a4 * b[3];
|
||||
VERIFY_BITS(d, 114);
|
||||
secp256k1_u128_accum_mul(&d, a3, b[4]);
|
||||
secp256k1_u128_accum_mul(&d, a4, b[3]);
|
||||
VERIFY_BITS_128(&d, 114);
|
||||
/* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
c += (uint128_t)R * (uint64_t)d; d >>= 64;
|
||||
VERIFY_BITS(c, 115);
|
||||
VERIFY_BITS(d, 50);
|
||||
secp256k1_u128_accum_mul(&c, R, secp256k1_u128_to_u64(&d)); secp256k1_u128_rshift(&d, 64);
|
||||
VERIFY_BITS_128(&c, 115);
|
||||
VERIFY_BITS_128(&d, 50);
|
||||
/* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
|
||||
r[2] = c & M; c >>= 52;
|
||||
r[2] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
|
||||
VERIFY_BITS(r[2], 52);
|
||||
VERIFY_BITS(c, 63);
|
||||
VERIFY_BITS_128(&c, 63);
|
||||
/* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
c += (uint128_t)(R << 12) * (uint64_t)d + t3;
|
||||
VERIFY_BITS(c, 100);
|
||||
secp256k1_u128_accum_mul(&c, R << 12, secp256k1_u128_to_u64(&d));
|
||||
secp256k1_u128_accum_u64(&c, t3);
|
||||
VERIFY_BITS_128(&c, 100);
|
||||
/* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
r[3] = c & M; c >>= 52;
|
||||
r[3] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
|
||||
VERIFY_BITS(r[3], 52);
|
||||
VERIFY_BITS(c, 48);
|
||||
VERIFY_BITS_128(&c, 48);
|
||||
/* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
c += t4;
|
||||
VERIFY_BITS(c, 49);
|
||||
/* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
r[4] = c;
|
||||
r[4] = secp256k1_u128_to_u64(&c) + t4;
|
||||
VERIFY_BITS(r[4], 49);
|
||||
/* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
}
|
||||
|
||||
SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
|
||||
uint128_t c, d;
|
||||
secp256k1_uint128 c, d;
|
||||
uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4];
|
||||
int64_t t3, t4, tx, u0;
|
||||
const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL;
|
||||
|
@ -170,107 +170,105 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t
|
|||
* Note that [x 0 0 0 0 0] = [x*R].
|
||||
*/
|
||||
|
||||
d = (uint128_t)(a0*2) * a3
|
||||
+ (uint128_t)(a1*2) * a2;
|
||||
VERIFY_BITS(d, 114);
|
||||
secp256k1_u128_mul(&d, a0*2, a3);
|
||||
secp256k1_u128_accum_mul(&d, a1*2, a2);
|
||||
VERIFY_BITS_128(&d, 114);
|
||||
/* [d 0 0 0] = [p3 0 0 0] */
|
||||
c = (uint128_t)a4 * a4;
|
||||
VERIFY_BITS(c, 112);
|
||||
secp256k1_u128_mul(&c, a4, a4);
|
||||
VERIFY_BITS_128(&c, 112);
|
||||
/* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
|
||||
d += (uint128_t)R * (uint64_t)c; c >>= 64;
|
||||
VERIFY_BITS(d, 115);
|
||||
VERIFY_BITS(c, 48);
|
||||
secp256k1_u128_accum_mul(&d, R, secp256k1_u128_to_u64(&c)); secp256k1_u128_rshift(&c, 64);
|
||||
VERIFY_BITS_128(&d, 115);
|
||||
VERIFY_BITS_128(&c, 48);
|
||||
/* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
|
||||
t3 = d & M; d >>= 52;
|
||||
t3 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
|
||||
VERIFY_BITS(t3, 52);
|
||||
VERIFY_BITS(d, 63);
|
||||
VERIFY_BITS_128(&d, 63);
|
||||
/* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
|
||||
|
||||
a4 *= 2;
|
||||
d += (uint128_t)a0 * a4
|
||||
+ (uint128_t)(a1*2) * a3
|
||||
+ (uint128_t)a2 * a2;
|
||||
VERIFY_BITS(d, 115);
|
||||
secp256k1_u128_accum_mul(&d, a0, a4);
|
||||
secp256k1_u128_accum_mul(&d, a1*2, a3);
|
||||
secp256k1_u128_accum_mul(&d, a2, a2);
|
||||
VERIFY_BITS_128(&d, 115);
|
||||
/* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
|
||||
d += (uint128_t)(R << 12) * (uint64_t)c;
|
||||
VERIFY_BITS(d, 116);
|
||||
secp256k1_u128_accum_mul(&d, R << 12, secp256k1_u128_to_u64(&c));
|
||||
VERIFY_BITS_128(&d, 116);
|
||||
/* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
|
||||
t4 = d & M; d >>= 52;
|
||||
t4 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
|
||||
VERIFY_BITS(t4, 52);
|
||||
VERIFY_BITS(d, 64);
|
||||
VERIFY_BITS_128(&d, 64);
|
||||
/* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
|
||||
tx = (t4 >> 48); t4 &= (M >> 4);
|
||||
VERIFY_BITS(tx, 4);
|
||||
VERIFY_BITS(t4, 48);
|
||||
/* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
|
||||
|
||||
c = (uint128_t)a0 * a0;
|
||||
VERIFY_BITS(c, 112);
|
||||
secp256k1_u128_mul(&c, a0, a0);
|
||||
VERIFY_BITS_128(&c, 112);
|
||||
/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
|
||||
d += (uint128_t)a1 * a4
|
||||
+ (uint128_t)(a2*2) * a3;
|
||||
VERIFY_BITS(d, 114);
|
||||
secp256k1_u128_accum_mul(&d, a1, a4);
|
||||
secp256k1_u128_accum_mul(&d, a2*2, a3);
|
||||
VERIFY_BITS_128(&d, 114);
|
||||
/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
u0 = d & M; d >>= 52;
|
||||
u0 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52);
|
||||
VERIFY_BITS(u0, 52);
|
||||
VERIFY_BITS(d, 62);
|
||||
VERIFY_BITS_128(&d, 62);
|
||||
/* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
/* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
u0 = (u0 << 4) | tx;
|
||||
VERIFY_BITS(u0, 56);
|
||||
/* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
c += (uint128_t)u0 * (R >> 4);
|
||||
VERIFY_BITS(c, 113);
|
||||
secp256k1_u128_accum_mul(&c, u0, R >> 4);
|
||||
VERIFY_BITS_128(&c, 113);
|
||||
/* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
r[0] = c & M; c >>= 52;
|
||||
r[0] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
|
||||
VERIFY_BITS(r[0], 52);
|
||||
VERIFY_BITS(c, 61);
|
||||
VERIFY_BITS_128(&c, 61);
|
||||
/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
|
||||
|
||||
a0 *= 2;
|
||||
c += (uint128_t)a0 * a1;
|
||||
VERIFY_BITS(c, 114);
|
||||
secp256k1_u128_accum_mul(&c, a0, a1);
|
||||
VERIFY_BITS_128(&c, 114);
|
||||
/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
|
||||
d += (uint128_t)a2 * a4
|
||||
+ (uint128_t)a3 * a3;
|
||||
VERIFY_BITS(d, 114);
|
||||
secp256k1_u128_accum_mul(&d, a2, a4);
|
||||
secp256k1_u128_accum_mul(&d, a3, a3);
|
||||
VERIFY_BITS_128(&d, 114);
|
||||
/* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
|
||||
c += (d & M) * R; d >>= 52;
|
||||
VERIFY_BITS(c, 115);
|
||||
VERIFY_BITS(d, 62);
|
||||
secp256k1_u128_accum_mul(&c, secp256k1_u128_to_u64(&d) & M, R); secp256k1_u128_rshift(&d, 52);
|
||||
VERIFY_BITS_128(&c, 115);
|
||||
VERIFY_BITS_128(&d, 62);
|
||||
/* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
|
||||
r[1] = c & M; c >>= 52;
|
||||
r[1] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
|
||||
VERIFY_BITS(r[1], 52);
|
||||
VERIFY_BITS(c, 63);
|
||||
VERIFY_BITS_128(&c, 63);
|
||||
/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
|
||||
|
||||
c += (uint128_t)a0 * a2
|
||||
+ (uint128_t)a1 * a1;
|
||||
VERIFY_BITS(c, 114);
|
||||
secp256k1_u128_accum_mul(&c, a0, a2);
|
||||
secp256k1_u128_accum_mul(&c, a1, a1);
|
||||
VERIFY_BITS_128(&c, 114);
|
||||
/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
|
||||
d += (uint128_t)a3 * a4;
|
||||
VERIFY_BITS(d, 114);
|
||||
secp256k1_u128_accum_mul(&d, a3, a4);
|
||||
VERIFY_BITS_128(&d, 114);
|
||||
/* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
c += (uint128_t)R * (uint64_t)d; d >>= 64;
|
||||
VERIFY_BITS(c, 115);
|
||||
VERIFY_BITS(d, 50);
|
||||
secp256k1_u128_accum_mul(&c, R, secp256k1_u128_to_u64(&d)); secp256k1_u128_rshift(&d, 64);
|
||||
VERIFY_BITS_128(&c, 115);
|
||||
VERIFY_BITS_128(&d, 50);
|
||||
/* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
r[2] = c & M; c >>= 52;
|
||||
r[2] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
|
||||
VERIFY_BITS(r[2], 52);
|
||||
VERIFY_BITS(c, 63);
|
||||
VERIFY_BITS_128(&c, 63);
|
||||
/* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
|
||||
c += (uint128_t)(R << 12) * (uint64_t)d + t3;
|
||||
VERIFY_BITS(c, 100);
|
||||
secp256k1_u128_accum_mul(&c, R << 12, secp256k1_u128_to_u64(&d));
|
||||
secp256k1_u128_accum_u64(&c, t3);
|
||||
VERIFY_BITS_128(&c, 100);
|
||||
/* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
r[3] = c & M; c >>= 52;
|
||||
r[3] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52);
|
||||
VERIFY_BITS(r[3], 52);
|
||||
VERIFY_BITS(c, 48);
|
||||
VERIFY_BITS_128(&c, 48);
|
||||
/* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
c += t4;
|
||||
VERIFY_BITS(c, 49);
|
||||
/* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
r[4] = c;
|
||||
r[4] = secp256k1_u128_to_u64(&c) + t4;
|
||||
VERIFY_BITS(r[4], 49);
|
||||
/* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
|
||||
}
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
#ifndef SECP256K1_INT128_H
|
||||
#define SECP256K1_INT128_H
|
||||
|
||||
#if defined HAVE_CONFIG_H
|
||||
#include "libsecp256k1-config.h"
|
||||
#endif
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#if defined(SECP256K1_INT128_NATIVE)
|
||||
#include "int128_native.h"
|
||||
#elif defined(SECP256K1_INT128_STRUCT)
|
||||
#include "int128_struct.h"
|
||||
#else
|
||||
#error "Please select int128 implementation"
|
||||
#endif
|
||||
|
||||
/* Multiply two unsigned 64-bit values a and b and write the result to r. */
|
||||
static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b);
|
||||
|
||||
/* Multiply two unsigned 64-bit values a and b and add the result to r.
|
||||
* The final result is taken modulo 2^128.
|
||||
*/
|
||||
static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b);
|
||||
|
||||
/* Add an unsigned 64-bit value a to r.
|
||||
* The final result is taken modulo 2^128.
|
||||
*/
|
||||
static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a);
|
||||
|
||||
/* Unsigned (logical) right shift.
|
||||
* Non-constant time in n.
|
||||
*/
|
||||
static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n);
|
||||
|
||||
/* Return the low 64-bits of a 128-bit value as an unsigned 64-bit value. */
|
||||
static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a);
|
||||
|
||||
/* Return the high 64-bits of a 128-bit value as an unsigned 64-bit value. */
|
||||
static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a);
|
||||
|
||||
/* Write an unsigned 64-bit value to r. */
|
||||
static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a);
|
||||
|
||||
/* Tests if r is strictly less than to 2^n.
|
||||
* n must be strictly less than 128.
|
||||
*/
|
||||
static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n);
|
||||
|
||||
/* Multiply two signed 64-bit values a and b and write the result to r. */
|
||||
static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b);
|
||||
|
||||
/* Multiply two signed 64-bit values a and b and add the result to r.
|
||||
* Overflow or underflow from the addition is undefined behaviour.
|
||||
*/
|
||||
static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b);
|
||||
|
||||
/* Compute a*d - b*c from signed 64-bit values and write the result to r. */
|
||||
static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d);
|
||||
|
||||
/* Signed (arithmetic) right shift.
|
||||
* Non-constant time in b.
|
||||
*/
|
||||
static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int b);
|
||||
|
||||
/* Return the low 64-bits of a 128-bit value interpreted as an signed 64-bit value. */
|
||||
static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a);
|
||||
|
||||
/* Write a signed 64-bit value to r. */
|
||||
static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a);
|
||||
|
||||
/* Compare two 128-bit values for equality. */
|
||||
static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b);
|
||||
|
||||
/* Tests if r is equal to 2^n.
|
||||
* n must be strictly less than 127.
|
||||
*/
|
||||
static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,19 @@
|
|||
#ifndef SECP256K1_INT128_IMPL_H
|
||||
#define SECP256K1_INT128_IMPL_H
|
||||
|
||||
#include "int128.h"
|
||||
#include "util.h"
|
||||
|
||||
#if defined HAVE_CONFIG_H
|
||||
#include "libsecp256k1-config.h"
|
||||
#endif
|
||||
|
||||
#if defined(SECP256K1_INT128_NATIVE)
|
||||
#include "int128_native_impl.h"
|
||||
#elif defined(SECP256K1_INT128_STRUCT)
|
||||
#include "int128_struct_impl.h"
|
||||
#else
|
||||
#error "Please select int128 implementation"
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,19 @@
|
|||
#ifndef SECP256K1_INT128_NATIVE_H
|
||||
#define SECP256K1_INT128_NATIVE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "util.h"
|
||||
|
||||
#if !defined(UINT128_MAX) && defined(__SIZEOF_INT128__)
|
||||
SECP256K1_GNUC_EXT typedef unsigned __int128 uint128_t;
|
||||
SECP256K1_GNUC_EXT typedef __int128 int128_t;
|
||||
# define UINT128_MAX ((uint128_t)(-1))
|
||||
# define INT128_MAX ((int128_t)(UINT128_MAX >> 1))
|
||||
# define INT128_MIN (-INT128_MAX - 1)
|
||||
/* No (U)INT128_C macros because compilers providing __int128 do not support 128-bit literals. */
|
||||
#endif
|
||||
|
||||
typedef uint128_t secp256k1_uint128;
|
||||
typedef int128_t secp256k1_int128;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,79 @@
|
|||
#ifndef SECP256K1_INT128_NATIVE_IMPL_H
|
||||
#define SECP256K1_INT128_NATIVE_IMPL_H
|
||||
|
||||
#include "int128.h"
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
|
||||
*r = (uint128_t)a * b;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
|
||||
*r += (uint128_t)a * b;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a) {
|
||||
*r += a;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n) {
|
||||
VERIFY_CHECK(n < 128);
|
||||
*r >>= n;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a) {
|
||||
return (uint64_t)(*a);
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a) {
|
||||
return (uint64_t)(*a >> 64);
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a) {
|
||||
*r = a;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n) {
|
||||
VERIFY_CHECK(n < 128);
|
||||
return (*r >> n == 0);
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
|
||||
*r = (int128_t)a * b;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
|
||||
int128_t ab = (int128_t)a * b;
|
||||
VERIFY_CHECK(0 <= ab ? *r <= INT128_MAX - ab : INT128_MIN - ab <= *r);
|
||||
*r += ab;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d) {
|
||||
int128_t ad = (int128_t)a * d;
|
||||
int128_t bc = (int128_t)b * c;
|
||||
VERIFY_CHECK(0 <= bc ? INT128_MIN + bc <= ad : ad <= INT128_MAX + bc);
|
||||
*r = ad - bc;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int n) {
|
||||
VERIFY_CHECK(n < 128);
|
||||
*r >>= n;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a) {
|
||||
return *a;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a) {
|
||||
*r = a;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b) {
|
||||
return *a == *b;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n) {
|
||||
VERIFY_CHECK(n < 127);
|
||||
return (*r == (int128_t)1 << n);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,14 @@
|
|||
#ifndef SECP256K1_INT128_STRUCT_H
|
||||
#define SECP256K1_INT128_STRUCT_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "util.h"
|
||||
|
||||
typedef struct {
|
||||
uint64_t lo;
|
||||
uint64_t hi;
|
||||
} secp256k1_uint128;
|
||||
|
||||
typedef secp256k1_uint128 secp256k1_int128;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,177 @@
|
|||
#ifndef SECP256K1_INT128_STRUCT_IMPL_H
|
||||
#define SECP256K1_INT128_STRUCT_IMPL_H
|
||||
|
||||
#include "int128.h"
|
||||
|
||||
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) /* MSVC */
|
||||
# include <intrin.h>
|
||||
# if defined(_M_X64)
|
||||
/* On x84_64 MSVC, use native _(u)mul128 for 64x64->128 multiplications. */
|
||||
# define secp256k1_umul128 _umul128
|
||||
# define secp256k1_mul128 _mul128
|
||||
# else
|
||||
/* On ARM64 MSVC, use __(u)mulh for the upper half of 64x64 multiplications. */
|
||||
static SECP256K1_INLINE uint64_t secp256k1_umul128(uint64_t a, uint64_t b, uint64_t* hi) {
|
||||
*hi = __umulh(a, b);
|
||||
return a * b;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE int64_t secp256k1_mul128(int64_t a, int64_t b, int64_t* hi) {
|
||||
*hi = __mulh(a, b);
|
||||
return a * b;
|
||||
}
|
||||
# endif
|
||||
#else
|
||||
/* On other systems, emulate 64x64->128 multiplications using 32x32->64 multiplications. */
|
||||
static SECP256K1_INLINE uint64_t secp256k1_umul128(uint64_t a, uint64_t b, uint64_t* hi) {
|
||||
uint64_t ll = (uint64_t)(uint32_t)a * (uint32_t)b;
|
||||
uint64_t lh = (uint32_t)a * (b >> 32);
|
||||
uint64_t hl = (a >> 32) * (uint32_t)b;
|
||||
uint64_t hh = (a >> 32) * (b >> 32);
|
||||
uint64_t mid34 = (ll >> 32) + (uint32_t)lh + (uint32_t)hl;
|
||||
*hi = hh + (lh >> 32) + (hl >> 32) + (mid34 >> 32);
|
||||
return (mid34 << 32) + (uint32_t)ll;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE int64_t secp256k1_mul128(int64_t a, int64_t b, int64_t* hi) {
|
||||
uint64_t ll = (uint64_t)(uint32_t)a * (uint32_t)b;
|
||||
int64_t lh = (uint32_t)a * (b >> 32);
|
||||
int64_t hl = (a >> 32) * (uint32_t)b;
|
||||
int64_t hh = (a >> 32) * (b >> 32);
|
||||
uint64_t mid34 = (ll >> 32) + (uint32_t)lh + (uint32_t)hl;
|
||||
*hi = hh + (lh >> 32) + (hl >> 32) + (mid34 >> 32);
|
||||
return (mid34 << 32) + (uint32_t)ll;
|
||||
}
|
||||
#endif
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
|
||||
r->lo = secp256k1_umul128(a, b, &r->hi);
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) {
|
||||
uint64_t lo, hi;
|
||||
lo = secp256k1_umul128(a, b, &hi);
|
||||
r->lo += lo;
|
||||
r->hi += hi + (r->lo < lo);
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a) {
|
||||
r->lo += a;
|
||||
r->hi += r->lo < a;
|
||||
}
|
||||
|
||||
/* Unsigned (logical) right shift.
|
||||
* Non-constant time in n.
|
||||
*/
|
||||
static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n) {
|
||||
VERIFY_CHECK(n < 128);
|
||||
if (n >= 64) {
|
||||
r->lo = r->hi >> (n-64);
|
||||
r->hi = 0;
|
||||
} else if (n > 0) {
|
||||
r->lo = ((1U * r->hi) << (64-n)) | r->lo >> n;
|
||||
r->hi >>= n;
|
||||
}
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a) {
|
||||
return a->lo;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a) {
|
||||
return a->hi;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a) {
|
||||
r->hi = 0;
|
||||
r->lo = a;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n) {
|
||||
VERIFY_CHECK(n < 128);
|
||||
return n >= 64 ? r->hi >> (n - 64) == 0
|
||||
: r->hi == 0 && r->lo >> n == 0;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
|
||||
int64_t hi;
|
||||
r->lo = (uint64_t)secp256k1_mul128(a, b, &hi);
|
||||
r->hi = (uint64_t)hi;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
|
||||
int64_t hi;
|
||||
uint64_t lo = (uint64_t)secp256k1_mul128(a, b, &hi);
|
||||
r->lo += lo;
|
||||
hi += r->lo < lo;
|
||||
/* Verify no overflow.
|
||||
* If r represents a positive value (the sign bit is not set) and the value we are adding is a positive value (the sign bit is not set),
|
||||
* then we require that the resulting value also be positive (the sign bit is not set).
|
||||
* Note that (X <= Y) means (X implies Y) when X and Y are boolean values (i.e. 0 or 1).
|
||||
*/
|
||||
VERIFY_CHECK((r->hi <= 0x7fffffffffffffffu && (uint64_t)hi <= 0x7fffffffffffffffu) <= (r->hi + (uint64_t)hi <= 0x7fffffffffffffffu));
|
||||
/* Verify no underflow.
|
||||
* If r represents a negative value (the sign bit is set) and the value we are adding is a negative value (the sign bit is set),
|
||||
* then we require that the resulting value also be negative (the sign bit is set).
|
||||
*/
|
||||
VERIFY_CHECK((r->hi > 0x7fffffffffffffffu && (uint64_t)hi > 0x7fffffffffffffffu) <= (r->hi + (uint64_t)hi > 0x7fffffffffffffffu));
|
||||
r->hi += hi;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_i128_dissip_mul(secp256k1_int128 *r, int64_t a, int64_t b) {
|
||||
int64_t hi;
|
||||
uint64_t lo = (uint64_t)secp256k1_mul128(a, b, &hi);
|
||||
hi += r->lo < lo;
|
||||
/* Verify no overflow.
|
||||
* If r represents a positive value (the sign bit is not set) and the value we are subtracting is a negative value (the sign bit is set),
|
||||
* then we require that the resulting value also be positive (the sign bit is not set).
|
||||
*/
|
||||
VERIFY_CHECK((r->hi <= 0x7fffffffffffffffu && (uint64_t)hi > 0x7fffffffffffffffu) <= (r->hi - (uint64_t)hi <= 0x7fffffffffffffffu));
|
||||
/* Verify no underflow.
|
||||
* If r represents a negative value (the sign bit is set) and the value we are subtracting is a positive value (the sign sign bit is not set),
|
||||
* then we require that the resulting value also be negative (the sign bit is set).
|
||||
*/
|
||||
VERIFY_CHECK((r->hi > 0x7fffffffffffffffu && (uint64_t)hi <= 0x7fffffffffffffffu) <= (r->hi - (uint64_t)hi > 0x7fffffffffffffffu));
|
||||
r->hi -= hi;
|
||||
r->lo -= lo;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d) {
|
||||
secp256k1_i128_mul(r, a, d);
|
||||
secp256k1_i128_dissip_mul(r, b, c);
|
||||
}
|
||||
|
||||
/* Signed (arithmetic) right shift.
|
||||
* Non-constant time in n.
|
||||
*/
|
||||
static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int n) {
|
||||
VERIFY_CHECK(n < 128);
|
||||
if (n >= 64) {
|
||||
r->lo = (uint64_t)((int64_t)(r->hi) >> (n-64));
|
||||
r->hi = (uint64_t)((int64_t)(r->hi) >> 63);
|
||||
} else if (n > 0) {
|
||||
r->lo = ((1U * r->hi) << (64-n)) | r->lo >> n;
|
||||
r->hi = (uint64_t)((int64_t)(r->hi) >> n);
|
||||
}
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a) {
|
||||
return (int64_t)a->lo;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a) {
|
||||
r->hi = (uint64_t)(a >> 63);
|
||||
r->lo = (uint64_t)a;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b) {
|
||||
return a->hi == b->hi && a->lo == b->lo;
|
||||
}
|
||||
|
||||
static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n) {
|
||||
VERIFY_CHECK(n < 127);
|
||||
return n >= 64 ? r->hi == (uint64_t)1 << (n - 64) && r->lo == 0
|
||||
: r->hi == 0 && r->lo == (uint64_t)1 << n;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
#include "modinv64.h"
|
||||
|
||||
#include "util.h"
|
||||
#include "int128_impl.h"
|
||||
|
||||
/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
|
||||
* modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
|
||||
|
@ -18,6 +18,15 @@
|
|||
* implementation for N=62, using 62-bit signed limbs represented as int64_t.
|
||||
*/
|
||||
|
||||
/* Data type for transition matrices (see section 3 of explanation).
|
||||
*
|
||||
* t = [ u v ]
|
||||
* [ q r ]
|
||||
*/
|
||||
typedef struct {
|
||||
int64_t u, v, q, r;
|
||||
} secp256k1_modinv64_trans2x2;
|
||||
|
||||
#ifdef VERIFY
|
||||
/* Helper function to compute the absolute value of an int64_t.
|
||||
* (we don't use abs/labs/llabs as it depends on the int sizes). */
|
||||
|
@ -32,15 +41,17 @@ static const secp256k1_modinv64_signed62 SECP256K1_SIGNED62_ONE = {{1}};
|
|||
/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^62). */
|
||||
static void secp256k1_modinv64_mul_62(secp256k1_modinv64_signed62 *r, const secp256k1_modinv64_signed62 *a, int alen, int64_t factor) {
|
||||
const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
|
||||
int128_t c = 0;
|
||||
secp256k1_int128 c, d;
|
||||
int i;
|
||||
secp256k1_i128_from_i64(&c, 0);
|
||||
for (i = 0; i < 4; ++i) {
|
||||
if (i < alen) c += (int128_t)a->v[i] * factor;
|
||||
r->v[i] = (int64_t)c & M62; c >>= 62;
|
||||
if (i < alen) secp256k1_i128_accum_mul(&c, a->v[i], factor);
|
||||
r->v[i] = secp256k1_i128_to_i64(&c) & M62; secp256k1_i128_rshift(&c, 62);
|
||||
}
|
||||
if (4 < alen) c += (int128_t)a->v[4] * factor;
|
||||
VERIFY_CHECK(c == (int64_t)c);
|
||||
r->v[4] = (int64_t)c;
|
||||
if (4 < alen) secp256k1_i128_accum_mul(&c, a->v[4], factor);
|
||||
secp256k1_i128_from_i64(&d, secp256k1_i128_to_i64(&c));
|
||||
VERIFY_CHECK(secp256k1_i128_eq_var(&c, &d));
|
||||
r->v[4] = secp256k1_i128_to_i64(&c);
|
||||
}
|
||||
|
||||
/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A has alen limbs; b has 5. */
|
||||
|
@ -60,6 +71,13 @@ static int secp256k1_modinv64_mul_cmp_62(const secp256k1_modinv64_signed62 *a, i
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check if the determinant of t is equal to 1 << n. */
|
||||
static int secp256k1_modinv64_det_check_pow2(const secp256k1_modinv64_trans2x2 *t, unsigned int n) {
|
||||
secp256k1_int128 a;
|
||||
secp256k1_i128_det(&a, t->u, t->v, t->q, t->r);
|
||||
return secp256k1_i128_check_pow2(&a, n);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Take as input a signed62 number in range (-2*modulus,modulus), and add a multiple of the modulus
|
||||
|
@ -136,15 +154,6 @@ static void secp256k1_modinv64_normalize_62(secp256k1_modinv64_signed62 *r, int6
|
|||
#endif
|
||||
}
|
||||
|
||||
/* Data type for transition matrices (see section 3 of explanation).
|
||||
*
|
||||
* t = [ u v ]
|
||||
* [ q r ]
|
||||
*/
|
||||
typedef struct {
|
||||
int64_t u, v, q, r;
|
||||
} secp256k1_modinv64_trans2x2;
|
||||
|
||||
/* Compute the transition matrix and eta for 59 divsteps (where zeta=-(delta+1/2)).
|
||||
* Note that the transformation matrix is scaled by 2^62 and not 2^59.
|
||||
*
|
||||
|
@ -203,13 +212,15 @@ static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_
|
|||
t->v = (int64_t)v;
|
||||
t->q = (int64_t)q;
|
||||
t->r = (int64_t)r;
|
||||
#ifdef VERIFY
|
||||
/* The determinant of t must be a power of two. This guarantees that multiplication with t
|
||||
* does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
|
||||
* will be divided out again). As each divstep's individual matrix has determinant 2, the
|
||||
* aggregate of 59 of them will have determinant 2^59. Multiplying with the initial
|
||||
* 8*identity (which has determinant 2^6) means the overall outputs has determinant
|
||||
* 2^65. */
|
||||
VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 65);
|
||||
VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 65));
|
||||
#endif
|
||||
return zeta;
|
||||
}
|
||||
|
||||
|
@ -286,11 +297,13 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint
|
|||
t->v = (int64_t)v;
|
||||
t->q = (int64_t)q;
|
||||
t->r = (int64_t)r;
|
||||
#ifdef VERIFY
|
||||
/* The determinant of t must be a power of two. This guarantees that multiplication with t
|
||||
* does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
|
||||
* will be divided out again). As each divstep's individual matrix has determinant 2, the
|
||||
* aggregate of 62 of them will have determinant 2^62. */
|
||||
VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 62);
|
||||
VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62));
|
||||
#endif
|
||||
return eta;
|
||||
}
|
||||
|
||||
|
@ -307,7 +320,7 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
|
|||
const int64_t e0 = e->v[0], e1 = e->v[1], e2 = e->v[2], e3 = e->v[3], e4 = e->v[4];
|
||||
const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
|
||||
int64_t md, me, sd, se;
|
||||
int128_t cd, ce;
|
||||
secp256k1_int128 cd, ce;
|
||||
#ifdef VERIFY
|
||||
VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
|
||||
VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0); /* d < modulus */
|
||||
|
@ -324,54 +337,64 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
|
|||
md = (u & sd) + (v & se);
|
||||
me = (q & sd) + (r & se);
|
||||
/* Begin computing t*[d,e]. */
|
||||
cd = (int128_t)u * d0 + (int128_t)v * e0;
|
||||
ce = (int128_t)q * d0 + (int128_t)r * e0;
|
||||
secp256k1_i128_mul(&cd, u, d0);
|
||||
secp256k1_i128_accum_mul(&cd, v, e0);
|
||||
secp256k1_i128_mul(&ce, q, d0);
|
||||
secp256k1_i128_accum_mul(&ce, r, e0);
|
||||
/* Correct md,me so that t*[d,e]+modulus*[md,me] has 62 zero bottom bits. */
|
||||
md -= (modinfo->modulus_inv62 * (uint64_t)cd + md) & M62;
|
||||
me -= (modinfo->modulus_inv62 * (uint64_t)ce + me) & M62;
|
||||
md -= (modinfo->modulus_inv62 * (uint64_t)secp256k1_i128_to_i64(&cd) + md) & M62;
|
||||
me -= (modinfo->modulus_inv62 * (uint64_t)secp256k1_i128_to_i64(&ce) + me) & M62;
|
||||
/* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
|
||||
cd += (int128_t)modinfo->modulus.v[0] * md;
|
||||
ce += (int128_t)modinfo->modulus.v[0] * me;
|
||||
secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[0], md);
|
||||
secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[0], me);
|
||||
/* Verify that the low 62 bits of the computation are indeed zero, and then throw them away. */
|
||||
VERIFY_CHECK(((int64_t)cd & M62) == 0); cd >>= 62;
|
||||
VERIFY_CHECK(((int64_t)ce & M62) == 0); ce >>= 62;
|
||||
VERIFY_CHECK((secp256k1_i128_to_i64(&cd) & M62) == 0); secp256k1_i128_rshift(&cd, 62);
|
||||
VERIFY_CHECK((secp256k1_i128_to_i64(&ce) & M62) == 0); secp256k1_i128_rshift(&ce, 62);
|
||||
/* Compute limb 1 of t*[d,e]+modulus*[md,me], and store it as output limb 0 (= down shift). */
|
||||
cd += (int128_t)u * d1 + (int128_t)v * e1;
|
||||
ce += (int128_t)q * d1 + (int128_t)r * e1;
|
||||
secp256k1_i128_accum_mul(&cd, u, d1);
|
||||
secp256k1_i128_accum_mul(&cd, v, e1);
|
||||
secp256k1_i128_accum_mul(&ce, q, d1);
|
||||
secp256k1_i128_accum_mul(&ce, r, e1);
|
||||
if (modinfo->modulus.v[1]) { /* Optimize for the case where limb of modulus is zero. */
|
||||
cd += (int128_t)modinfo->modulus.v[1] * md;
|
||||
ce += (int128_t)modinfo->modulus.v[1] * me;
|
||||
secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[1], md);
|
||||
secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[1], me);
|
||||
}
|
||||
d->v[0] = (int64_t)cd & M62; cd >>= 62;
|
||||
e->v[0] = (int64_t)ce & M62; ce >>= 62;
|
||||
d->v[0] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
|
||||
e->v[0] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
|
||||
/* Compute limb 2 of t*[d,e]+modulus*[md,me], and store it as output limb 1. */
|
||||
cd += (int128_t)u * d2 + (int128_t)v * e2;
|
||||
ce += (int128_t)q * d2 + (int128_t)r * e2;
|
||||
secp256k1_i128_accum_mul(&cd, u, d2);
|
||||
secp256k1_i128_accum_mul(&cd, v, e2);
|
||||
secp256k1_i128_accum_mul(&ce, q, d2);
|
||||
secp256k1_i128_accum_mul(&ce, r, e2);
|
||||
if (modinfo->modulus.v[2]) { /* Optimize for the case where limb of modulus is zero. */
|
||||
cd += (int128_t)modinfo->modulus.v[2] * md;
|
||||
ce += (int128_t)modinfo->modulus.v[2] * me;
|
||||
secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[2], md);
|
||||
secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[2], me);
|
||||
}
|
||||
d->v[1] = (int64_t)cd & M62; cd >>= 62;
|
||||
e->v[1] = (int64_t)ce & M62; ce >>= 62;
|
||||
d->v[1] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
|
||||
e->v[1] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
|
||||
/* Compute limb 3 of t*[d,e]+modulus*[md,me], and store it as output limb 2. */
|
||||
cd += (int128_t)u * d3 + (int128_t)v * e3;
|
||||
ce += (int128_t)q * d3 + (int128_t)r * e3;
|
||||
secp256k1_i128_accum_mul(&cd, u, d3);
|
||||
secp256k1_i128_accum_mul(&cd, v, e3);
|
||||
secp256k1_i128_accum_mul(&ce, q, d3);
|
||||
secp256k1_i128_accum_mul(&ce, r, e3);
|
||||
if (modinfo->modulus.v[3]) { /* Optimize for the case where limb of modulus is zero. */
|
||||
cd += (int128_t)modinfo->modulus.v[3] * md;
|
||||
ce += (int128_t)modinfo->modulus.v[3] * me;
|
||||
secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[3], md);
|
||||
secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[3], me);
|
||||
}
|
||||
d->v[2] = (int64_t)cd & M62; cd >>= 62;
|
||||
e->v[2] = (int64_t)ce & M62; ce >>= 62;
|
||||
d->v[2] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
|
||||
e->v[2] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
|
||||
/* Compute limb 4 of t*[d,e]+modulus*[md,me], and store it as output limb 3. */
|
||||
cd += (int128_t)u * d4 + (int128_t)v * e4;
|
||||
ce += (int128_t)q * d4 + (int128_t)r * e4;
|
||||
cd += (int128_t)modinfo->modulus.v[4] * md;
|
||||
ce += (int128_t)modinfo->modulus.v[4] * me;
|
||||
d->v[3] = (int64_t)cd & M62; cd >>= 62;
|
||||
e->v[3] = (int64_t)ce & M62; ce >>= 62;
|
||||
secp256k1_i128_accum_mul(&cd, u, d4);
|
||||
secp256k1_i128_accum_mul(&cd, v, e4);
|
||||
secp256k1_i128_accum_mul(&ce, q, d4);
|
||||
secp256k1_i128_accum_mul(&ce, r, e4);
|
||||
secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[4], md);
|
||||
secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[4], me);
|
||||
d->v[3] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62);
|
||||
e->v[3] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62);
|
||||
/* What remains is limb 5 of t*[d,e]+modulus*[md,me]; store it as output limb 4. */
|
||||
d->v[4] = (int64_t)cd;
|
||||
e->v[4] = (int64_t)ce;
|
||||
d->v[4] = secp256k1_i128_to_i64(&cd);
|
||||
e->v[4] = secp256k1_i128_to_i64(&ce);
|
||||
#ifdef VERIFY
|
||||
VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
|
||||
VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0); /* d < modulus */
|
||||
|
@ -389,36 +412,46 @@ static void secp256k1_modinv64_update_fg_62(secp256k1_modinv64_signed62 *f, secp
|
|||
const int64_t f0 = f->v[0], f1 = f->v[1], f2 = f->v[2], f3 = f->v[3], f4 = f->v[4];
|
||||
const int64_t g0 = g->v[0], g1 = g->v[1], g2 = g->v[2], g3 = g->v[3], g4 = g->v[4];
|
||||
const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
|
||||
int128_t cf, cg;
|
||||
secp256k1_int128 cf, cg;
|
||||
/* Start computing t*[f,g]. */
|
||||
cf = (int128_t)u * f0 + (int128_t)v * g0;
|
||||
cg = (int128_t)q * f0 + (int128_t)r * g0;
|
||||
secp256k1_i128_mul(&cf, u, f0);
|
||||
secp256k1_i128_accum_mul(&cf, v, g0);
|
||||
secp256k1_i128_mul(&cg, q, f0);
|
||||
secp256k1_i128_accum_mul(&cg, r, g0);
|
||||
/* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
|
||||
VERIFY_CHECK(((int64_t)cf & M62) == 0); cf >>= 62;
|
||||
VERIFY_CHECK(((int64_t)cg & M62) == 0); cg >>= 62;
|
||||
VERIFY_CHECK((secp256k1_i128_to_i64(&cf) & M62) == 0); secp256k1_i128_rshift(&cf, 62);
|
||||
VERIFY_CHECK((secp256k1_i128_to_i64(&cg) & M62) == 0); secp256k1_i128_rshift(&cg, 62);
|
||||
/* Compute limb 1 of t*[f,g], and store it as output limb 0 (= down shift). */
|
||||
cf += (int128_t)u * f1 + (int128_t)v * g1;
|
||||
cg += (int128_t)q * f1 + (int128_t)r * g1;
|
||||
f->v[0] = (int64_t)cf & M62; cf >>= 62;
|
||||
g->v[0] = (int64_t)cg & M62; cg >>= 62;
|
||||
secp256k1_i128_accum_mul(&cf, u, f1);
|
||||
secp256k1_i128_accum_mul(&cf, v, g1);
|
||||
secp256k1_i128_accum_mul(&cg, q, f1);
|
||||
secp256k1_i128_accum_mul(&cg, r, g1);
|
||||
f->v[0] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
|
||||
g->v[0] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
|
||||
/* Compute limb 2 of t*[f,g], and store it as output limb 1. */
|
||||
cf += (int128_t)u * f2 + (int128_t)v * g2;
|
||||
cg += (int128_t)q * f2 + (int128_t)r * g2;
|
||||
f->v[1] = (int64_t)cf & M62; cf >>= 62;
|
||||
g->v[1] = (int64_t)cg & M62; cg >>= 62;
|
||||
secp256k1_i128_accum_mul(&cf, u, f2);
|
||||
secp256k1_i128_accum_mul(&cf, v, g2);
|
||||
secp256k1_i128_accum_mul(&cg, q, f2);
|
||||
secp256k1_i128_accum_mul(&cg, r, g2);
|
||||
f->v[1] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
|
||||
g->v[1] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
|
||||
/* Compute limb 3 of t*[f,g], and store it as output limb 2. */
|
||||
cf += (int128_t)u * f3 + (int128_t)v * g3;
|
||||
cg += (int128_t)q * f3 + (int128_t)r * g3;
|
||||
f->v[2] = (int64_t)cf & M62; cf >>= 62;
|
||||
g->v[2] = (int64_t)cg & M62; cg >>= 62;
|
||||
secp256k1_i128_accum_mul(&cf, u, f3);
|
||||
secp256k1_i128_accum_mul(&cf, v, g3);
|
||||
secp256k1_i128_accum_mul(&cg, q, f3);
|
||||
secp256k1_i128_accum_mul(&cg, r, g3);
|
||||
f->v[2] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
|
||||
g->v[2] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
|
||||
/* Compute limb 4 of t*[f,g], and store it as output limb 3. */
|
||||
cf += (int128_t)u * f4 + (int128_t)v * g4;
|
||||
cg += (int128_t)q * f4 + (int128_t)r * g4;
|
||||
f->v[3] = (int64_t)cf & M62; cf >>= 62;
|
||||
g->v[3] = (int64_t)cg & M62; cg >>= 62;
|
||||
secp256k1_i128_accum_mul(&cf, u, f4);
|
||||
secp256k1_i128_accum_mul(&cf, v, g4);
|
||||
secp256k1_i128_accum_mul(&cg, q, f4);
|
||||
secp256k1_i128_accum_mul(&cg, r, g4);
|
||||
f->v[3] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
|
||||
g->v[3] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
|
||||
/* What remains is limb 5 of t*[f,g]; store it as output limb 4. */
|
||||
f->v[4] = (int64_t)cf;
|
||||
g->v[4] = (int64_t)cg;
|
||||
f->v[4] = secp256k1_i128_to_i64(&cf);
|
||||
g->v[4] = secp256k1_i128_to_i64(&cg);
|
||||
}
|
||||
|
||||
/* Compute (t/2^62) * [f, g], where t is a transition matrix for 62 divsteps.
|
||||
|
@ -431,30 +464,34 @@ static void secp256k1_modinv64_update_fg_62_var(int len, secp256k1_modinv64_sign
|
|||
const int64_t M62 = (int64_t)(UINT64_MAX >> 2);
|
||||
const int64_t u = t->u, v = t->v, q = t->q, r = t->r;
|
||||
int64_t fi, gi;
|
||||
int128_t cf, cg;
|
||||
secp256k1_int128 cf, cg;
|
||||
int i;
|
||||
VERIFY_CHECK(len > 0);
|
||||
/* Start computing t*[f,g]. */
|
||||
fi = f->v[0];
|
||||
gi = g->v[0];
|
||||
cf = (int128_t)u * fi + (int128_t)v * gi;
|
||||
cg = (int128_t)q * fi + (int128_t)r * gi;
|
||||
secp256k1_i128_mul(&cf, u, fi);
|
||||
secp256k1_i128_accum_mul(&cf, v, gi);
|
||||
secp256k1_i128_mul(&cg, q, fi);
|
||||
secp256k1_i128_accum_mul(&cg, r, gi);
|
||||
/* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
|
||||
VERIFY_CHECK(((int64_t)cf & M62) == 0); cf >>= 62;
|
||||
VERIFY_CHECK(((int64_t)cg & M62) == 0); cg >>= 62;
|
||||
VERIFY_CHECK((secp256k1_i128_to_i64(&cf) & M62) == 0); secp256k1_i128_rshift(&cf, 62);
|
||||
VERIFY_CHECK((secp256k1_i128_to_i64(&cg) & M62) == 0); secp256k1_i128_rshift(&cg, 62);
|
||||
/* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
|
||||
* down by 62 bits). */
|
||||
for (i = 1; i < len; ++i) {
|
||||
fi = f->v[i];
|
||||
gi = g->v[i];
|
||||
cf += (int128_t)u * fi + (int128_t)v * gi;
|
||||
cg += (int128_t)q * fi + (int128_t)r * gi;
|
||||
f->v[i - 1] = (int64_t)cf & M62; cf >>= 62;
|
||||
g->v[i - 1] = (int64_t)cg & M62; cg >>= 62;
|
||||
secp256k1_i128_accum_mul(&cf, u, fi);
|
||||
secp256k1_i128_accum_mul(&cf, v, gi);
|
||||
secp256k1_i128_accum_mul(&cg, q, fi);
|
||||
secp256k1_i128_accum_mul(&cg, r, gi);
|
||||
f->v[i - 1] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62);
|
||||
g->v[i - 1] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62);
|
||||
}
|
||||
/* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
|
||||
f->v[len - 1] = (int64_t)cf;
|
||||
g->v[len - 1] = (int64_t)cg;
|
||||
f->v[len - 1] = secp256k1_i128_to_i64(&cf);
|
||||
g->v[len - 1] = secp256k1_i128_to_i64(&cg);
|
||||
}
|
||||
|
||||
/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
|
||||
|
|
|
@ -69,50 +69,61 @@ SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scal
|
|||
}
|
||||
|
||||
SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar *r, unsigned int overflow) {
|
||||
uint128_t t;
|
||||
secp256k1_uint128 t;
|
||||
VERIFY_CHECK(overflow <= 1);
|
||||
t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0;
|
||||
r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
|
||||
t += (uint128_t)r->d[1] + overflow * SECP256K1_N_C_1;
|
||||
r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
|
||||
t += (uint128_t)r->d[2] + overflow * SECP256K1_N_C_2;
|
||||
r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
|
||||
t += (uint64_t)r->d[3];
|
||||
r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL;
|
||||
secp256k1_u128_from_u64(&t, r->d[0]);
|
||||
secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_0);
|
||||
r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, r->d[1]);
|
||||
secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_1);
|
||||
r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, r->d[2]);
|
||||
secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_2);
|
||||
r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, r->d[3]);
|
||||
r->d[3] = secp256k1_u128_to_u64(&t);
|
||||
return overflow;
|
||||
}
|
||||
|
||||
static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
|
||||
int overflow;
|
||||
uint128_t t = (uint128_t)a->d[0] + b->d[0];
|
||||
r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
|
||||
t += (uint128_t)a->d[1] + b->d[1];
|
||||
r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
|
||||
t += (uint128_t)a->d[2] + b->d[2];
|
||||
r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
|
||||
t += (uint128_t)a->d[3] + b->d[3];
|
||||
r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
|
||||
overflow = t + secp256k1_scalar_check_overflow(r);
|
||||
secp256k1_uint128 t;
|
||||
secp256k1_u128_from_u64(&t, a->d[0]);
|
||||
secp256k1_u128_accum_u64(&t, b->d[0]);
|
||||
r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, a->d[1]);
|
||||
secp256k1_u128_accum_u64(&t, b->d[1]);
|
||||
r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, a->d[2]);
|
||||
secp256k1_u128_accum_u64(&t, b->d[2]);
|
||||
r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, a->d[3]);
|
||||
secp256k1_u128_accum_u64(&t, b->d[3]);
|
||||
r->d[3] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
|
||||
overflow = secp256k1_u128_to_u64(&t) + secp256k1_scalar_check_overflow(r);
|
||||
VERIFY_CHECK(overflow == 0 || overflow == 1);
|
||||
secp256k1_scalar_reduce(r, overflow);
|
||||
return overflow;
|
||||
}
|
||||
|
||||
static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) {
|
||||
uint128_t t;
|
||||
secp256k1_uint128 t;
|
||||
VERIFY_CHECK(bit < 256);
|
||||
bit += ((uint32_t) flag - 1) & 0x100; /* forcing (bit >> 6) > 3 makes this a noop */
|
||||
t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
|
||||
r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
|
||||
t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
|
||||
r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
|
||||
t += (uint128_t)r->d[2] + (((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F));
|
||||
r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
|
||||
t += (uint128_t)r->d[3] + (((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F));
|
||||
r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL;
|
||||
secp256k1_u128_from_u64(&t, r->d[0]);
|
||||
secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
|
||||
r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, r->d[1]);
|
||||
secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
|
||||
r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, r->d[2]);
|
||||
secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F));
|
||||
r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, r->d[3]);
|
||||
secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F));
|
||||
r->d[3] = secp256k1_u128_to_u64(&t);
|
||||
#ifdef VERIFY
|
||||
VERIFY_CHECK((t >> 64) == 0);
|
||||
VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0);
|
||||
VERIFY_CHECK(secp256k1_u128_hi_u64(&t) == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -141,14 +152,19 @@ SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar *a)
|
|||
|
||||
static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a) {
|
||||
uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (secp256k1_scalar_is_zero(a) == 0);
|
||||
uint128_t t = (uint128_t)(~a->d[0]) + SECP256K1_N_0 + 1;
|
||||
r->d[0] = t & nonzero; t >>= 64;
|
||||
t += (uint128_t)(~a->d[1]) + SECP256K1_N_1;
|
||||
r->d[1] = t & nonzero; t >>= 64;
|
||||
t += (uint128_t)(~a->d[2]) + SECP256K1_N_2;
|
||||
r->d[2] = t & nonzero; t >>= 64;
|
||||
t += (uint128_t)(~a->d[3]) + SECP256K1_N_3;
|
||||
r->d[3] = t & nonzero;
|
||||
secp256k1_uint128 t;
|
||||
secp256k1_u128_from_u64(&t, ~a->d[0]);
|
||||
secp256k1_u128_accum_u64(&t, SECP256K1_N_0 + 1);
|
||||
r->d[0] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, ~a->d[1]);
|
||||
secp256k1_u128_accum_u64(&t, SECP256K1_N_1);
|
||||
r->d[1] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, ~a->d[2]);
|
||||
secp256k1_u128_accum_u64(&t, SECP256K1_N_2);
|
||||
r->d[2] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, ~a->d[3]);
|
||||
secp256k1_u128_accum_u64(&t, SECP256K1_N_3);
|
||||
r->d[3] = secp256k1_u128_to_u64(&t) & nonzero;
|
||||
}
|
||||
|
||||
SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
|
||||
|
@ -172,14 +188,19 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
|
|||
* if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
|
||||
uint64_t mask = !flag - 1;
|
||||
uint64_t nonzero = (secp256k1_scalar_is_zero(r) != 0) - 1;
|
||||
uint128_t t = (uint128_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask);
|
||||
r->d[0] = t & nonzero; t >>= 64;
|
||||
t += (uint128_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask);
|
||||
r->d[1] = t & nonzero; t >>= 64;
|
||||
t += (uint128_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask);
|
||||
r->d[2] = t & nonzero; t >>= 64;
|
||||
t += (uint128_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask);
|
||||
r->d[3] = t & nonzero;
|
||||
secp256k1_uint128 t;
|
||||
secp256k1_u128_from_u64(&t, r->d[0] ^ mask);
|
||||
secp256k1_u128_accum_u64(&t, (SECP256K1_N_0 + 1) & mask);
|
||||
r->d[0] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, r->d[1] ^ mask);
|
||||
secp256k1_u128_accum_u64(&t, SECP256K1_N_1 & mask);
|
||||
r->d[1] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, r->d[2] ^ mask);
|
||||
secp256k1_u128_accum_u64(&t, SECP256K1_N_2 & mask);
|
||||
r->d[2] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64);
|
||||
secp256k1_u128_accum_u64(&t, r->d[3] ^ mask);
|
||||
secp256k1_u128_accum_u64(&t, SECP256K1_N_3 & mask);
|
||||
r->d[3] = secp256k1_u128_to_u64(&t) & nonzero;
|
||||
return 2 * (mask == 0) - 1;
|
||||
}
|
||||
|
||||
|
@ -189,9 +210,10 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
|
|||
#define muladd(a,b) { \
|
||||
uint64_t tl, th; \
|
||||
{ \
|
||||
uint128_t t = (uint128_t)a * b; \
|
||||
th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \
|
||||
tl = t; \
|
||||
secp256k1_uint128 t; \
|
||||
secp256k1_u128_mul(&t, a, b); \
|
||||
th = secp256k1_u128_hi_u64(&t); /* at most 0xFFFFFFFFFFFFFFFE */ \
|
||||
tl = secp256k1_u128_to_u64(&t); \
|
||||
} \
|
||||
c0 += tl; /* overflow is handled on the next line */ \
|
||||
th += (c0 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \
|
||||
|
@ -204,9 +226,10 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
|
|||
#define muladd_fast(a,b) { \
|
||||
uint64_t tl, th; \
|
||||
{ \
|
||||
uint128_t t = (uint128_t)a * b; \
|
||||
th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \
|
||||
tl = t; \
|
||||
secp256k1_uint128 t; \
|
||||
secp256k1_u128_mul(&t, a, b); \
|
||||
th = secp256k1_u128_hi_u64(&t); /* at most 0xFFFFFFFFFFFFFFFE */ \
|
||||
tl = secp256k1_u128_to_u64(&t); \
|
||||
} \
|
||||
c0 += tl; /* overflow is handled on the next line */ \
|
||||
th += (c0 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \
|
||||
|
@ -484,8 +507,8 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
|||
: "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
|
||||
: "rax", "rdx", "r8", "r9", "r10", "cc", "memory");
|
||||
#else
|
||||
uint128_t c;
|
||||
uint64_t c0, c1, c2;
|
||||
secp256k1_uint128 c128;
|
||||
uint64_t c, c0, c1, c2;
|
||||
uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
|
||||
uint64_t m0, m1, m2, m3, m4, m5;
|
||||
uint32_t m6;
|
||||
|
@ -542,14 +565,18 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
|||
|
||||
/* Reduce 258 bits into 256. */
|
||||
/* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
|
||||
c = p0 + (uint128_t)SECP256K1_N_C_0 * p4;
|
||||
r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
|
||||
c += p1 + (uint128_t)SECP256K1_N_C_1 * p4;
|
||||
r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
|
||||
c += p2 + (uint128_t)p4;
|
||||
r->d[2] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
|
||||
c += p3;
|
||||
r->d[3] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64;
|
||||
secp256k1_u128_from_u64(&c128, p0);
|
||||
secp256k1_u128_accum_mul(&c128, SECP256K1_N_C_0, p4);
|
||||
r->d[0] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64);
|
||||
secp256k1_u128_accum_u64(&c128, p1);
|
||||
secp256k1_u128_accum_mul(&c128, SECP256K1_N_C_1, p4);
|
||||
r->d[1] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64);
|
||||
secp256k1_u128_accum_u64(&c128, p2);
|
||||
secp256k1_u128_accum_u64(&c128, p4);
|
||||
r->d[2] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64);
|
||||
secp256k1_u128_accum_u64(&c128, p3);
|
||||
r->d[3] = secp256k1_u128_to_u64(&c128);
|
||||
c = secp256k1_u128_hi_u64(&c128);
|
||||
#endif
|
||||
|
||||
/* Final reduction of r. */
|
||||
|
|
19
src/util.h
19
src/util.h
|
@ -230,28 +230,23 @@ static SECP256K1_INLINE void secp256k1_int_cmov(int *r, const int *a, int flag)
|
|||
*r = (int)(r_masked | a_masked);
|
||||
}
|
||||
|
||||
/* If USE_FORCE_WIDEMUL_{INT128,INT64} is set, use that wide multiplication implementation.
|
||||
/* If USE_FORCE_WIDEMUL_{INT128, INT128_STRUCT, INT64} is set, use that wide multiplication implementation.
|
||||
* Otherwise use the presence of __SIZEOF_INT128__ to decide.
|
||||
*/
|
||||
#if defined(USE_FORCE_WIDEMUL_INT128)
|
||||
#if defined(USE_FORCE_WIDEMUL_INT128_STRUCT)
|
||||
# define SECP256K1_WIDEMUL_INT128 1
|
||||
# define SECP256K1_INT128_STRUCT 1
|
||||
#elif defined(USE_FORCE_WIDEMUL_INT128)
|
||||
# define SECP256K1_WIDEMUL_INT128 1
|
||||
# define SECP256K1_INT128_NATIVE 1
|
||||
#elif defined(USE_FORCE_WIDEMUL_INT64)
|
||||
# define SECP256K1_WIDEMUL_INT64 1
|
||||
#elif defined(UINT128_MAX) || defined(__SIZEOF_INT128__)
|
||||
# define SECP256K1_WIDEMUL_INT128 1
|
||||
# define SECP256K1_INT128_NATIVE 1
|
||||
#else
|
||||
# define SECP256K1_WIDEMUL_INT64 1
|
||||
#endif
|
||||
#if defined(SECP256K1_WIDEMUL_INT128)
|
||||
# if !defined(UINT128_MAX) && defined(__SIZEOF_INT128__)
|
||||
SECP256K1_GNUC_EXT typedef unsigned __int128 uint128_t;
|
||||
SECP256K1_GNUC_EXT typedef __int128 int128_t;
|
||||
#define UINT128_MAX ((uint128_t)(-1))
|
||||
#define INT128_MAX ((int128_t)(UINT128_MAX >> 1))
|
||||
#define INT128_MIN (-INT128_MAX - 1)
|
||||
/* No (U)INT128_C macros because compilers providing __int128 do not support 128-bit literals. */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
|
|
Loading…
Reference in New Issue