diff --git a/bench_all b/bench_all index 89c747d..d9f411e 100644 --- a/bench_all +++ b/bench_all @@ -1,10 +1,10 @@ #!/bin/bash echo "Benchmark Results" >output.txt -for j in no-yasm yasm; do +for j in yasm; do echo "5x64 $j:" >>output.txt for i in O0 O1 O2 O3; do make clean - ./configure --$j + ./configure --use-5x64 --$j echo "OPTLEVEL=$i" >>config.mk make bench echo "OPTLEVEL=$i" >>output.txt diff --git a/configure b/configure index d68ae5b..cb69239 100755 --- a/configure +++ b/configure @@ -172,3 +172,4 @@ echo "LDFLAGS_EXTRA=$LDFLAGS_EXTRA" >> config.mk echo "LDFLAGS_TEST_EXTRA=$LDFLAGS_TEST_EXTRA" >> config.mk echo "USE_ASM=$USE_ASM" >>config.mk echo "HAVE_LIMB=$HAVE_LIMB" >>config.mk +echo "OPTLEVEL=O2" >>config.mk diff --git a/src/field_5x64_asm.asm b/src/field_5x64_asm.asm index 14b0a52..d449185 100644 --- a/src/field_5x64_asm.asm +++ b/src/field_5x64_asm.asm @@ -82,7 +82,7 @@ secp256k1_fe_mul_inner: add r9,rax adc r10,rdx adc r8,0 - mov rbp,r9 ; retire r[2] + mov rbp,r9 ; retire r[2] xor r9,r9 ;; c+=a.n[0 1 2 3] * b.n[3 2 1 0] @@ -153,7 +153,7 @@ secp256k1_fe_mul_inner: mul r14 add r10,rax adc r8,rdx - adc r9,0 + mov r14,r10 mov r15,r8 @@ -216,7 +216,7 @@ secp256k1_fe_sqr_inner: push r13 push r14 push r15 - push rdx + push rsi mov r11,[rdi+8*0] ; preload a.n[0] @@ -237,7 +237,7 @@ secp256k1_fe_sqr_inner: adc rdx,rdx adc r10,0 add r8,rax ; still the same :-) - adc r9,rdx ; + adc r9,rdx adc r10,0 ; mmm... mov rcx,r8 ; retire r[1] @@ -315,15 +315,14 @@ secp256k1_fe_sqr_inner: adc r8,0 mov r13,r9 - xor r13,r13 + xor r9,r9 ;; c+=a.n[3]² mov rax,r14 mul rax add r10,rax adc r8,rdx - adc r9,0 - + mov r14,r10 mov r15,r8 diff --git a/src/impl/field_5x64.h b/src/impl/field_5x64.h index 3c927fc..1e645cd 100644 --- a/src/impl/field_5x64.h +++ b/src/impl/field_5x64.h @@ -325,6 +325,10 @@ void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *ac, const void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *ac) { secp256k1_fe_t a = *ac; secp256k1_fe_reduce(&a); + +#ifdef USE_FIELD_5X64_ASM + secp256k1_fe_sqr_inner((&a)->n,r->n); +#else uint64_t c1,c2,c3; c3=0; mul_c2(a.n[0], a.n[0], c1, c2); @@ -355,6 +359,7 @@ void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *ac) { c = (unsigned __int128)r7 * COMP_LIMB + r3 + (c >> 64); r->n[3] = c; r->n[4] = c >> 64; +#endif #ifdef VERIFY r->normalized = 0;