Split 5x52 inner implementations

This commit is contained in:
Pieter Wuille 2013-03-31 04:37:15 +02:00
parent 59e038dc11
commit 938d3c27aa
7 changed files with 309 additions and 129 deletions

View File

@ -1,4 +1,4 @@
FLAGS_COMMON:=-Wall
FLAGS_COMMON:=-Wall -Wno-unused
FLAGS_PROD:=-DNDEBUG -O2 -march=native
FLAGS_DEBUG:=-DVERIFY -ggdb3 -O1
FLAGS_TEST:=-DVERIFY -ggdb3 -O2 -march=native
@ -15,20 +15,22 @@ default: all
ifeq ($(CONF), openssl)
FLAGS_CONF:=-DUSE_NUM_OPENSSL -DUSE_FIELDINVERSE_BUILTIN
LIBS:=-lcrypto
SECP256K1_FILES := $(SECP256K1_FILES) num_openssl.h num_openssl.cpp
SECP256K1_FILES := $(SECP256K1_FILES) num_openssl.h num_openssl.cpp field_5x52_int128.cpp
else
ifeq ($(CONF), gmp)
FLAGS_CONF:=-DUSE_NUM_GMP
LIBS:=-lgmp
SECP256K1_FILES := $(SECP256K1_FILES) num_gmp.h num_gmp.cpp
SECP256K1_FILES := $(SECP256K1_FILES) num_gmp.h num_gmp.cpp field_5x52_int128.cpp
else
ifeq ($(CONF), gmpasm)
FLAGS_CONF:=-DUSE_NUM_GMP -DINLINE_ASM
LIBS:=-lgmp obj/lin64.o
SECP256K1_FILES := $(SECP256K1_FILES) num_gmp.h num_gmp.cpp obj/lin64.o
LIBS:=-lgmp obj/field_5x52_asm.o
SECP256K1_FILES := $(SECP256K1_FILES) num_gmp.h num_gmp.cpp field_5x52_asm.cpp obj/field_5x52_asm.o
obj/lin64.o: lin64.asm
yasm -f elf64 -o obj/lin64.o lin64.asm
obj/field_5x52_asm.o: field_5x52_asm.asm
yasm -f elf64 -o obj/field_5x52_asm.o field_5x52_asm.asm
else
SECP256K1_FILES := $(SECP256K1_FILES) field_5x52_int128.cpp
endif
endif
endif

View File

@ -4,7 +4,9 @@
#include "field.h"
#ifdef INLINE_ASM
#include "lin64.h"
#include "field_5x52_asm.cpp"
#else
#include "field_5x52_int128.cpp"
#endif
extern "C" {
@ -165,119 +167,19 @@ void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const s
#ifdef VERIFY
assert(a->magnitude <= 8);
assert(b->magnitude <= 8);
#endif
#ifdef INLINE_ASM
ExSetMult((uint64_t*)a->n, (uint64_t*)b->n, (uint64_t*)r->n);
#else
unsigned __int128 c = (__int128)a->n[0] * b->n[0];
uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0
c = c + (__int128)a->n[0] * b->n[1] +
(__int128)a->n[1] * b->n[0];
uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF
c = c + (__int128)a->n[0] * b->n[2] +
(__int128)a->n[1] * b->n[1] +
(__int128)a->n[2] * b->n[0];
uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0
c = c + (__int128)a->n[0] * b->n[3] +
(__int128)a->n[1] * b->n[2] +
(__int128)a->n[2] * b->n[1] +
(__int128)a->n[3] * b->n[0];
uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280
c = c + (__int128)a->n[0] * b->n[4] +
(__int128)a->n[1] * b->n[3] +
(__int128)a->n[2] * b->n[2] +
(__int128)a->n[3] * b->n[1] +
(__int128)a->n[4] * b->n[0];
uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E
c = c + (__int128)a->n[1] * b->n[4] +
(__int128)a->n[2] * b->n[3] +
(__int128)a->n[3] * b->n[2] +
(__int128)a->n[4] * b->n[1];
uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE
c = c + (__int128)a->n[2] * b->n[4] +
(__int128)a->n[3] * b->n[3] +
(__int128)a->n[4] * b->n[2];
uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE
c = c + (__int128)a->n[3] * b->n[4] +
(__int128)a->n[4] * b->n[3];
uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE
c = c + (__int128)a->n[4] * b->n[4];
uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E
uint64_t t9 = c;
c = t0 + (__int128)t5 * 0x1000003D10ULL;
t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t1 + (__int128)t6 * 0x1000003D10ULL;
t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t2 + (__int128)t7 * 0x1000003D10ULL;
r->n[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t3 + (__int128)t8 * 0x1000003D10ULL;
r->n[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t4 + (__int128)t9 * 0x1000003D10ULL;
r->n[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110
c = t0 + (__int128)c * 0x1000003D1ULL;
r->n[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008
r->n[1] = t1 + c;
#endif
#ifdef VERIFY
r->magnitude = 1;
r->normalized = 0;
#endif
secp256k1_fe_mul_inner(a->n, b->n, r->n);
}
void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
#ifdef VERIFY
assert(a->magnitude <= 8);
#endif
#ifdef INLINE_ASM
ExSetSquare((uint64_t*)&a->n, (uint64_t*)&r->n);
#else
__int128 c = (__int128)a->n[0] * a->n[0];
uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0
c = c + (__int128)(a->n[0]*2) * a->n[1];
uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF
c = c + (__int128)(a->n[0]*2) * a->n[2] +
(__int128)a->n[1] * a->n[1];
uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0
c = c + (__int128)(a->n[0]*2) * a->n[3] +
(__int128)(a->n[1]*2) * a->n[2];
uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280
c = c + (__int128)(a->n[0]*2) * a->n[4] +
(__int128)(a->n[1]*2) * a->n[3] +
(__int128)a->n[2] * a->n[2];
uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E
c = c + (__int128)(a->n[1]*2) * a->n[4] +
(__int128)(a->n[2]*2) * a->n[3];
uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE
c = c + (__int128)(a->n[2]*2) * a->n[4] +
(__int128)a->n[3] * a->n[3];
uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE
c = c + (__int128)(a->n[3]*2) * a->n[4];
uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE
c = c + (__int128)a->n[4] * a->n[4];
uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E
uint64_t t9 = c;
c = t0 + (__int128)t5 * 0x1000003D10ULL;
t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t1 + (__int128)t6 * 0x1000003D10ULL;
t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t2 + (__int128)t7 * 0x1000003D10ULL;
r->n[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t3 + (__int128)t8 * 0x1000003D10ULL;
r->n[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t4 + (__int128)t9 * 0x1000003D10ULL;
r->n[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110
c = t0 + (__int128)c * 0x1000003D1ULL;
r->n[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008
r->n[1] = t1 + c;
#endif
#ifdef VERIFY
r->magnitude = 1;
r->normalized = 0;
#endif
secp256k1_fe_sqr_inner(a->n, r->n);
}
}

View File

@ -1,10 +1,10 @@
;; Added by Diederik Huys, March 2013
;;
;; Provided public procedures:
;; ExSetMult
;; ExSetSquare
;; secp256k1_fe_mul_inner
;; secp256k1_fe_sqr_inner
;;
;; Needed tools: YASM (http://www.japheth.de/JWasm.html)
;; Needed tools: YASM (http://yasm.tortall.net)
;;
;;
@ -12,9 +12,9 @@
;; Procedure ExSetMult
;; Register Layout:
;; INPUT: rdi = a.n
;; rsi = b.n
;; rdx = this.a
;; INPUT: rdi = a->n
;; rsi = b->n
;; rdx = r->a
;;
;; INTERNAL: rdx:rax = multiplication accumulator
;; r9:r8 = c
@ -26,9 +26,9 @@
;; rbp = Constant 0FFFFFFFFFFFFFh / t8
;; rsi = b.n / b.n[4] / t9
GLOBAL ExSetMult
GLOBAL secp256k1_fe_mul_inner
ALIGN 32
ExSetMult:
secp256k1_fe_mul_inner:
push rbp
push rbx
push r12
@ -315,9 +315,9 @@ common_exit_norm:
;; rcx = a.n[3] / t7
;; rbp = 0FFFFFFFFFFFFFh / t8
;; rsi = a.n[4] / a.n[4] /t9
GLOBAL ExSetSquare
GLOBAL secp256k1_fe_sqr_inner
ALIGN 32
ExSetSquare:
secp256k1_fe_sqr_inner:
push rbp
push rbx
push r12

2
field_5x52_asm.cpp Normal file
View File

@ -0,0 +1,2 @@
extern "C" void __attribute__ ((sysv_abi)) secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r);
extern "C" void __attribute__ ((sysv_abi)) secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r);

100
field_5x52_int128.cpp Normal file
View File

@ -0,0 +1,100 @@
#include "field.h"
extern "C" {
void static inline secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r) {
unsigned __int128 c = (__int128)a[0] * b[0];
uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0
c = c + (__int128)a[0] * b[1] +
(__int128)a[1] * b[0];
uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF
c = c + (__int128)a[0] * b[2] +
(__int128)a[1] * b[1] +
(__int128)a[2] * b[0];
uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0
c = c + (__int128)a[0] * b[3] +
(__int128)a[1] * b[2] +
(__int128)a[2] * b[1] +
(__int128)a[3] * b[0];
uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280
c = c + (__int128)a[0] * b[4] +
(__int128)a[1] * b[3] +
(__int128)a[2] * b[2] +
(__int128)a[3] * b[1] +
(__int128)a[4] * b[0];
uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E
c = c + (__int128)a[1] * b[4] +
(__int128)a[2] * b[3] +
(__int128)a[3] * b[2] +
(__int128)a[4] * b[1];
uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE
c = c + (__int128)a[2] * b[4] +
(__int128)a[3] * b[3] +
(__int128)a[4] * b[2];
uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE
c = c + (__int128)a[3] * b[4] +
(__int128)a[4] * b[3];
uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE
c = c + (__int128)a[4] * b[4];
uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E
uint64_t t9 = c;
c = t0 + (__int128)t5 * 0x1000003D10ULL;
t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t1 + (__int128)t6 * 0x1000003D10ULL;
t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t2 + (__int128)t7 * 0x1000003D10ULL;
r[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t3 + (__int128)t8 * 0x1000003D10ULL;
r[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t4 + (__int128)t9 * 0x1000003D10ULL;
r[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110
c = t0 + (__int128)c * 0x1000003D1ULL;
r[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008
r[1] = t1 + c;
}
void static inline secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r) {
__int128 c = (__int128)a[0] * a[0];
uint64_t t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0FFFFFFFFFFFFFE0
c = c + (__int128)(a[0]*2) * a[1];
uint64_t t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 20000000000000BF
c = c + (__int128)(a[0]*2) * a[2] +
(__int128)a[1] * a[1];
uint64_t t2 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 30000000000001A0
c = c + (__int128)(a[0]*2) * a[3] +
(__int128)(a[1]*2) * a[2];
uint64_t t3 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 4000000000000280
c = c + (__int128)(a[0]*2) * a[4] +
(__int128)(a[1]*2) * a[3] +
(__int128)a[2] * a[2];
uint64_t t4 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 320000000000037E
c = c + (__int128)(a[1]*2) * a[4] +
(__int128)(a[2]*2) * a[3];
uint64_t t5 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 22000000000002BE
c = c + (__int128)(a[2]*2) * a[4] +
(__int128)a[3] * a[3];
uint64_t t6 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 12000000000001DE
c = c + (__int128)(a[3]*2) * a[4];
uint64_t t7 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 02000000000000FE
c = c + (__int128)a[4] * a[4];
uint64_t t8 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 001000000000001E
uint64_t t9 = c;
c = t0 + (__int128)t5 * 0x1000003D10ULL;
t0 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t1 + (__int128)t6 * 0x1000003D10ULL;
t1 = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t2 + (__int128)t7 * 0x1000003D10ULL;
r[2] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t3 + (__int128)t8 * 0x1000003D10ULL;
r[3] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 0000001000003D10
c = c + t4 + (__int128)t9 * 0x1000003D10ULL;
r[4] = c & 0x0FFFFFFFFFFFFULL; c = c >> 48; // c max 000001000003D110
c = t0 + (__int128)c * 0x1000003D1ULL;
r[0] = c & 0xFFFFFFFFFFFFFULL; c = c >> 52; // c max 1000008
r[1] = t1 + c;
}
}

View File

@ -1,9 +0,0 @@
#ifndef _SECP256K1_LIN64
#define _SECP256K1_LIN64
#ifdef INLINE_ASM
extern "C" void __attribute__ ((sysv_abi)) ExSetMult(uint64_t *, uint64_t *, uint64_t *);
extern "C" void __attribute__ ((sysv_abi)) ExSetSquare(uint64_t *, uint64_t *);
#endif
#endif

183
num_openssl_.cpp Normal file
View File

@ -0,0 +1,183 @@
#include <assert.h>
#include <string>
#include <string.h>
#include <openssl/bn.h>
#include <openssl/crypto.h>
#include "num_openssl.h"
namespace secp256k1 {
class Context {
private:
BN_CTX *ctx;
operator BN_CTX*() {
return ctx;
}
friend class Number;
public:
Context() {
ctx = BN_CTX_new();
}
~Context() {
BN_CTX_free(ctx);
}
};
Number::operator const BIGNUM*() const {
return &b;
}
Number::operator BIGNUM*() {
return &b;
}
Number::Number() {
BN_init(*this);
}
Number::~Number() {
BN_free(*this);
}
Number::Number(const unsigned char *bin, int len) {
BN_init(*this);
SetBytes(bin,len);
}
void Number::SetNumber(const Number &x) {
BN_copy(*this, x);
}
Number::Number(const Number &x) {
BN_init(*this);
BN_copy(*this, x);
}
Number &Number::operator=(const Number &x) {
BN_copy(*this, x);
return *this;
}
void Number::SetBytes(const unsigned char *bin, int len) {
BN_bin2bn(bin, len, *this);
}
void Number::GetBytes(unsigned char *bin, int len) {
int size = BN_num_bytes(*this);
assert(size <= len);
memset(bin,0,len);
BN_bn2bin(*this, bin + len - size);
}
void Number::SetInt(int x) {
if (x >= 0) {
BN_set_word(*this, x);
} else {
BN_set_word(*this, -x);
BN_set_negative(*this, 1);
}
}
void Number::SetModInverse(const Number &x, const Number &m) {
Context ctx;
BN_mod_inverse(*this, x, m, ctx);
}
void Number::SetModMul(const Number &a, const Number &b, const Number &m) {
Context ctx;
BN_mod_mul(*this, a, b, m, ctx);
}
void Number::SetAdd(const Number &a1, const Number &a2) {
BN_add(*this, a1, a2);
}
void Number::SetSub(const Number &a1, const Number &a2) {
BN_sub(*this, a1, a2);
}
void Number::SetMult(const Number &a1, const Number &a2) {
Context ctx;
BN_mul(*this, a1, a2, ctx);
}
void Number::SetDiv(const Number &a1, const Number &a2) {
Context ctx;
BN_div(*this, NULL, a1, a2, ctx);
}
void Number::SetMod(const Number &a, const Number &m) {
Context ctx;
BN_nnmod(*this, a, m, ctx);
}
int Number::Compare(const Number &a) const {
return BN_cmp(*this, a);
}
int Number::GetBits() const {
return BN_num_bits(*this);
}
int Number::ShiftLowBits(int bits) {
BIGNUM *bn = *this;
int ret = BN_is_zero(bn) ? 0 : bn->d[0] & ((1 << bits) - 1);
BN_rshift(*this, *this, bits);
return ret;
}
bool Number::IsZero() const {
return BN_is_zero((const BIGNUM*)*this);
}
bool Number::IsOdd() const {
return BN_is_odd((const BIGNUM*)*this);
}
bool Number::CheckBit(int pos) const {
return BN_is_bit_set((const BIGNUM*)*this, pos);
}
bool Number::IsNeg() const {
return BN_is_negative((const BIGNUM*)*this);
}
void Number::Negate() {
BN_set_negative(*this, !IsNeg());
}
void Number::Shift1() {
BN_rshift1(*this,*this);
}
void Number::Inc() {
BN_add_word(*this,1);
}
void Number::SetHex(const std::string &str) {
BIGNUM *bn = *this;
BN_hex2bn(&bn, str.c_str());
}
void Number::SetPseudoRand(const Number &max) {
BN_pseudo_rand_range(*this, max);
}
void Number::SplitInto(int bits, Number &low, Number &high) const {
BN_copy(low, *this);
BN_mask_bits(low, bits);
BN_rshift(high, *this, bits);
}
std::string Number::ToString() const {
char *str = BN_bn2hex(*this);
std::string ret(str);
OPENSSL_free(str);
return ret;
}
}