1st assembly version of field 5x64 code - bugfix
This commit is contained in:
parent
dd804adeff
commit
355b4192cf
|
@ -1,10 +1,10 @@
|
|||
#!/bin/bash
|
||||
echo "Benchmark Results" >output.txt
|
||||
for j in no-yasm yasm; do
|
||||
for j in yasm; do
|
||||
echo "5x64 $j:" >>output.txt
|
||||
for i in O0 O1 O2 O3; do
|
||||
make clean
|
||||
./configure --$j
|
||||
./configure --use-5x64 --$j
|
||||
echo "OPTLEVEL=$i" >>config.mk
|
||||
make bench
|
||||
echo "OPTLEVEL=$i" >>output.txt
|
||||
|
|
|
@ -172,3 +172,4 @@ echo "LDFLAGS_EXTRA=$LDFLAGS_EXTRA" >> config.mk
|
|||
echo "LDFLAGS_TEST_EXTRA=$LDFLAGS_TEST_EXTRA" >> config.mk
|
||||
echo "USE_ASM=$USE_ASM" >>config.mk
|
||||
echo "HAVE_LIMB=$HAVE_LIMB" >>config.mk
|
||||
echo "OPTLEVEL=O2" >>config.mk
|
||||
|
|
|
@ -82,7 +82,7 @@ secp256k1_fe_mul_inner:
|
|||
add r9,rax
|
||||
adc r10,rdx
|
||||
adc r8,0
|
||||
mov rbp,r9 ; retire r[2]
|
||||
mov rbp,r9 ; retire r[2]
|
||||
xor r9,r9
|
||||
|
||||
;; c+=a.n[0 1 2 3] * b.n[3 2 1 0]
|
||||
|
@ -153,7 +153,7 @@ secp256k1_fe_mul_inner:
|
|||
mul r14
|
||||
add r10,rax
|
||||
adc r8,rdx
|
||||
adc r9,0
|
||||
|
||||
mov r14,r10
|
||||
mov r15,r8
|
||||
|
||||
|
@ -216,7 +216,7 @@ secp256k1_fe_sqr_inner:
|
|||
push r13
|
||||
push r14
|
||||
push r15
|
||||
push rdx
|
||||
push rsi
|
||||
|
||||
mov r11,[rdi+8*0] ; preload a.n[0]
|
||||
|
||||
|
@ -237,7 +237,7 @@ secp256k1_fe_sqr_inner:
|
|||
adc rdx,rdx
|
||||
adc r10,0
|
||||
add r8,rax ; still the same :-)
|
||||
adc r9,rdx ;
|
||||
adc r9,rdx
|
||||
adc r10,0 ; mmm...
|
||||
|
||||
mov rcx,r8 ; retire r[1]
|
||||
|
@ -315,15 +315,14 @@ secp256k1_fe_sqr_inner:
|
|||
adc r8,0
|
||||
|
||||
mov r13,r9
|
||||
xor r13,r13
|
||||
xor r9,r9
|
||||
|
||||
;; c+=a.n[3]²
|
||||
mov rax,r14
|
||||
mul rax
|
||||
add r10,rax
|
||||
adc r8,rdx
|
||||
adc r9,0
|
||||
|
||||
|
||||
mov r14,r10
|
||||
mov r15,r8
|
||||
|
||||
|
|
|
@ -325,6 +325,10 @@ void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *ac, const
|
|||
void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *ac) {
|
||||
secp256k1_fe_t a = *ac;
|
||||
secp256k1_fe_reduce(&a);
|
||||
|
||||
#ifdef USE_FIELD_5X64_ASM
|
||||
secp256k1_fe_sqr_inner((&a)->n,r->n);
|
||||
#else
|
||||
uint64_t c1,c2,c3;
|
||||
c3=0;
|
||||
mul_c2(a.n[0], a.n[0], c1, c2);
|
||||
|
@ -355,6 +359,7 @@ void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *ac) {
|
|||
c = (unsigned __int128)r7 * COMP_LIMB + r3 + (c >> 64);
|
||||
r->n[3] = c;
|
||||
r->n[4] = c >> 64;
|
||||
#endif
|
||||
|
||||
#ifdef VERIFY
|
||||
r->normalized = 0;
|
||||
|
|
Loading…
Reference in New Issue