Small fix for ExSetSquare
This commit is contained in:
parent
af073e29e4
commit
1d8e4308dc
83
lin64.asm
83
lin64.asm
|
@ -292,25 +292,25 @@ ExSetMult ENDP
|
||||||
;; rsi = a.n[4] / t9
|
;; rsi = a.n[4] / t9
|
||||||
ExSetSquare PROC C PUBLIC USES rbx rbp r12 r13 r14 r15
|
ExSetSquare PROC C PUBLIC USES rbx rbp r12 r13 r14 r15
|
||||||
push rsi
|
push rsi
|
||||||
mov rsi,0FFFFFFFFFFFFFh
|
mov rbp,0FFFFFFFFFFFFFh
|
||||||
|
|
||||||
;; c=a.n[0] * a.n[0]
|
;; c=a.n[0] * a.n[0]
|
||||||
mov r15,[rdi+0*8]
|
mov r14,[rdi+0*8] ; r14=a.n[0]
|
||||||
mov r10,rsi ; modulus
|
mov r10,rbp ; modulus
|
||||||
mov rax,r15
|
mov rax,r14
|
||||||
mul rax ; rsi=b.n[0]
|
mul rax
|
||||||
mov rbx,[rdi+1*8] ; a.n[1]
|
mov r15,[rdi+1*8] ; a.n[1]
|
||||||
add r15,r15 ; r15=2*a.n[0]
|
add r14,r14 ; r14=2*a.n[0]
|
||||||
mov r8,rax
|
mov r8,rax
|
||||||
and r10,rax ; only need lower qword
|
and r10,rax ; only need lower qword
|
||||||
shrd r8,rdx,52
|
shrd r8,rdx,52
|
||||||
xor r9,r9
|
xor r9,r9
|
||||||
|
|
||||||
;; c+=2*a.n[0] * a.n[1]
|
;; c+=2*a.n[0] * a.n[1]
|
||||||
mov rax,r15
|
mov rax,r14 ; r14=2*a.n[0]
|
||||||
mul rbx
|
mul r15
|
||||||
mov rcx,[rdi+2*8] ; rcx=a.n[2]
|
mov rbx,[rdi+2*8] ; rbx=a.n[2]
|
||||||
mov r11,rsi ; modulus
|
mov r11,rbp ; modulus
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
and r11,r8
|
and r11,r8
|
||||||
|
@ -318,33 +318,32 @@ ExSetSquare PROC C PUBLIC USES rbx rbp r12 r13 r14 r15
|
||||||
xor r9,r9
|
xor r9,r9
|
||||||
|
|
||||||
;; c+=2*a.n[0]*a.n[2]+a.n[1]*a.n[1]
|
;; c+=2*a.n[0]*a.n[2]+a.n[1]*a.n[1]
|
||||||
|
mov rax,r14
|
||||||
|
mul rbx
|
||||||
|
add r8,rax
|
||||||
|
adc r9,rdx
|
||||||
|
|
||||||
mov rax,r15
|
mov rax,r15
|
||||||
mul rcx
|
mov r12,rbp ; modulus
|
||||||
add r8,rax
|
|
||||||
adc r9,rdx
|
|
||||||
|
|
||||||
mov rax,rbx
|
|
||||||
mov r12,rsi ; modulus
|
|
||||||
mul rax
|
mul rax
|
||||||
mov rbp,[rdi+3*8] ; rbp=a.n[3]
|
mov rcx,[rdi+3*8] ; rcx=a.n[3]
|
||||||
add rbx,rbx ; rbx=a.n[1]*2
|
add r15,r15 ; r15=a.n[1]*2
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
|
|
||||||
and r12,r8 ; only need lower dword
|
and r12,r8 ; only need lower dword
|
||||||
shrd r8,r9,52
|
shrd r8,r9,52
|
||||||
xor r9,r9
|
xor r9,r9
|
||||||
|
|
||||||
;; c+=2*a.n[0]*a.n[3]+2*a.n[1]*a.n[2]
|
;; c+=2*a.n[0]*a.n[3]+2*a.n[1]*a.n[2]
|
||||||
mov rax,r15
|
mov rax,r14
|
||||||
mul rbp
|
mul rcx
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
|
|
||||||
mov rax,rbx ; rax=2*a.n[1]
|
mov rax,r15 ; rax=2*a.n[1]
|
||||||
mov r13,rsi ; modulus
|
mov r13,rbp ; modulus
|
||||||
mul rcx
|
mul rbx
|
||||||
mov rsi,[rdi+4*8] ; rsi=a.n[4] / destroy constant
|
mov rsi,[rdi+4*8] ; rsi=a.n[4]
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
and r13,r8
|
and r13,r8
|
||||||
|
@ -352,20 +351,20 @@ ExSetSquare PROC C PUBLIC USES rbx rbp r12 r13 r14 r15
|
||||||
xor r9,r9
|
xor r9,r9
|
||||||
|
|
||||||
;; c+=2*a.n[0]*a.n[4]+2*a.n[1]*a.n[3]+a.n[2]*a.n[2]
|
;; c+=2*a.n[0]*a.n[4]+2*a.n[1]*a.n[3]+a.n[2]*a.n[2]
|
||||||
mov rax,r15 ; last time we need 2*a.n[0]
|
mov rax,r14 ; last time we need 2*a.n[0]
|
||||||
mul rsi
|
mul rsi
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
|
|
||||||
mov rax,rbx
|
mov rax,r15
|
||||||
mul rbp
|
mul rcx
|
||||||
mov r14,0FFFFFFFFFFFFFh ; modulus
|
mov r14,rbp ; modulus
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
|
|
||||||
mov rax,rcx
|
mov rax,rbx
|
||||||
mul rax
|
mul rax
|
||||||
add rcx,rcx ; rcx=2*a.n[2]
|
add rbx,rbx ; rcx=2*a.n[2]
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
and r14,r8
|
and r14,r8
|
||||||
|
@ -373,14 +372,14 @@ ExSetSquare PROC C PUBLIC USES rbx rbp r12 r13 r14 r15
|
||||||
xor r9,r9
|
xor r9,r9
|
||||||
|
|
||||||
;; c+=2*a.n[1]*a.n[4]+2*a.n[2]*a.n[3]
|
;; c+=2*a.n[1]*a.n[4]+2*a.n[2]*a.n[3]
|
||||||
mov rax,rbx
|
mov rax,r15 ; last time we need 2*a.n[1]
|
||||||
mul rsi
|
mul rsi
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
|
|
||||||
mov rax,rcx
|
mov rax,rbx
|
||||||
mul rbp
|
mul rcx
|
||||||
mov r15,0FFFFFFFFFFFFFh ; modulus
|
mov r15,rbp ; modulus
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
and r15,r8
|
and r15,r8
|
||||||
|
@ -388,24 +387,24 @@ ExSetSquare PROC C PUBLIC USES rbx rbp r12 r13 r14 r15
|
||||||
xor r9,r9
|
xor r9,r9
|
||||||
|
|
||||||
;; c+=2*a.n[2]*a.n[4]+a.n[3]*a.n[3]
|
;; c+=2*a.n[2]*a.n[4]+a.n[3]*a.n[3]
|
||||||
mov rax,rcx ; 2*a.n[2]
|
mov rax,rbx ; last time we need 2*a.n[2]
|
||||||
mul rsi
|
mul rsi
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
|
|
||||||
mov rax,rbp ; a.n[3]
|
mov rax,rcx ; a.n[3]
|
||||||
mul rax
|
mul rax
|
||||||
mov rbx,0FFFFFFFFFFFFFh ; modulus
|
mov rbx,rbp ; modulus
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
and rbx,r8 ; only need lower dword
|
and rbx,r8 ; only need lower dword
|
||||||
lea rax,[2*rbp]
|
lea rax,[2*rcx]
|
||||||
shrd r8,r9,52
|
shrd r8,r9,52
|
||||||
xor r9,r9
|
xor r9,r9
|
||||||
|
|
||||||
;; c+=2*a.n[3]*a.n[4]
|
;; c+=2*a.n[3]*a.n[4]
|
||||||
mul rsi
|
mul rsi
|
||||||
mov rcx,0FFFFFFFFFFFFFh ; modulus
|
mov rcx,rbp ; modulus
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
and rcx,r8 ; only need lower dword
|
and rcx,r8 ; only need lower dword
|
||||||
|
@ -415,7 +414,7 @@ ExSetSquare PROC C PUBLIC USES rbx rbp r12 r13 r14 r15
|
||||||
;; c+=a.n[4]*a.n[4]
|
;; c+=a.n[4]*a.n[4]
|
||||||
mov rax,rsi
|
mov rax,rsi
|
||||||
mul rax
|
mul rax
|
||||||
mov rbp,0FFFFFFFFFFFFFh ; modulus
|
;; mov rbp,rbp ; modulus is already there!
|
||||||
add r8,rax
|
add r8,rax
|
||||||
adc r9,rdx
|
adc r9,rdx
|
||||||
and rbp,r8
|
and rbp,r8
|
||||||
|
|
Loading…
Reference in New Issue