Merge pull request #12 from PSYCPU/master

5x64 assembly bugfix
This commit is contained in:
Pieter Wuille 2013-06-17 12:10:37 -07:00
commit 634bc1820c
4 changed files with 14 additions and 9 deletions

View File

@ -1,10 +1,10 @@
#!/bin/bash #!/bin/bash
echo "Benchmark Results" >output.txt echo "Benchmark Results" >output.txt
for j in no-yasm yasm; do for j in yasm; do
echo "5x64 $j:" >>output.txt echo "5x64 $j:" >>output.txt
for i in O0 O1 O2 O3; do for i in O0 O1 O2 O3; do
make clean make clean
./configure --$j ./configure --use-5x64 --$j
echo "OPTLEVEL=$i" >>config.mk echo "OPTLEVEL=$i" >>config.mk
make bench make bench
echo "OPTLEVEL=$i" >>output.txt echo "OPTLEVEL=$i" >>output.txt

1
configure vendored
View File

@ -172,3 +172,4 @@ echo "LDFLAGS_EXTRA=$LDFLAGS_EXTRA" >> config.mk
echo "LDFLAGS_TEST_EXTRA=$LDFLAGS_TEST_EXTRA" >> config.mk echo "LDFLAGS_TEST_EXTRA=$LDFLAGS_TEST_EXTRA" >> config.mk
echo "USE_ASM=$USE_ASM" >>config.mk echo "USE_ASM=$USE_ASM" >>config.mk
echo "HAVE_LIMB=$HAVE_LIMB" >>config.mk echo "HAVE_LIMB=$HAVE_LIMB" >>config.mk
echo "OPTLEVEL=O2" >>config.mk

View File

@ -82,7 +82,7 @@ secp256k1_fe_mul_inner:
add r9,rax add r9,rax
adc r10,rdx adc r10,rdx
adc r8,0 adc r8,0
mov rbp,r9 ; retire r[2] mov rbp,r9 ; retire r[2]
xor r9,r9 xor r9,r9
;; c+=a.n[0 1 2 3] * b.n[3 2 1 0] ;; c+=a.n[0 1 2 3] * b.n[3 2 1 0]
@ -153,7 +153,7 @@ secp256k1_fe_mul_inner:
mul r14 mul r14
add r10,rax add r10,rax
adc r8,rdx adc r8,rdx
adc r9,0
mov r14,r10 mov r14,r10
mov r15,r8 mov r15,r8
@ -216,7 +216,7 @@ secp256k1_fe_sqr_inner:
push r13 push r13
push r14 push r14
push r15 push r15
push rdx push rsi
mov r11,[rdi+8*0] ; preload a.n[0] mov r11,[rdi+8*0] ; preload a.n[0]
@ -237,7 +237,7 @@ secp256k1_fe_sqr_inner:
adc rdx,rdx adc rdx,rdx
adc r10,0 adc r10,0
add r8,rax ; still the same :-) add r8,rax ; still the same :-)
adc r9,rdx ; adc r9,rdx
adc r10,0 ; mmm... adc r10,0 ; mmm...
mov rcx,r8 ; retire r[1] mov rcx,r8 ; retire r[1]
@ -315,14 +315,13 @@ secp256k1_fe_sqr_inner:
adc r8,0 adc r8,0
mov r13,r9 mov r13,r9
xor r13,r13 xor r9,r9
;; c+=a.n[3]² ;; c+=a.n[3]²
mov rax,r14 mov rax,r14
mul rax mul rax
add r10,rax add r10,rax
adc r8,rdx adc r8,rdx
adc r9,0
mov r14,r10 mov r14,r10
mov r15,r8 mov r15,r8

View File

@ -325,6 +325,10 @@ void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *ac, const
void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *ac) { void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *ac) {
secp256k1_fe_t a = *ac; secp256k1_fe_t a = *ac;
secp256k1_fe_reduce(&a); secp256k1_fe_reduce(&a);
#ifdef USE_FIELD_5X64_ASM
secp256k1_fe_sqr_inner((&a)->n,r->n);
#else
uint64_t c1,c2,c3; uint64_t c1,c2,c3;
c3=0; c3=0;
mul_c2(a.n[0], a.n[0], c1, c2); mul_c2(a.n[0], a.n[0], c1, c2);
@ -355,6 +359,7 @@ void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *ac) {
c = (unsigned __int128)r7 * COMP_LIMB + r3 + (c >> 64); c = (unsigned __int128)r7 * COMP_LIMB + r3 + (c >> 64);
r->n[3] = c; r->n[3] = c;
r->n[4] = c >> 64; r->n[4] = c >> 64;
#endif
#ifdef VERIFY #ifdef VERIFY
r->normalized = 0; r->normalized = 0;