Update scalar_4x64_impl.h
XOR reg,reg instead of MOV 0 to reg. It should be at least equal in all architectures and faster in some else.
This commit is contained in:
parent
7d15cd7859
commit
9d67afad96
|
@ -282,8 +282,8 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"movq 56(%%rsi), %%r14\n"
|
"movq 56(%%rsi), %%r14\n"
|
||||||
/* Initialize r8,r9,r10 */
|
/* Initialize r8,r9,r10 */
|
||||||
"movq 0(%%rsi), %%r8\n"
|
"movq 0(%%rsi), %%r8\n"
|
||||||
"movq $0, %%r9\n"
|
"xorq %%r9, %%r9\n"
|
||||||
"movq $0, %%r10\n"
|
"xorq %%r10, %%r10\n"
|
||||||
/* (r8,r9) += n0 * c0 */
|
/* (r8,r9) += n0 * c0 */
|
||||||
"movq %8, %%rax\n"
|
"movq %8, %%rax\n"
|
||||||
"mulq %%r11\n"
|
"mulq %%r11\n"
|
||||||
|
@ -291,7 +291,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"adcq %%rdx, %%r9\n"
|
"adcq %%rdx, %%r9\n"
|
||||||
/* extract m0 */
|
/* extract m0 */
|
||||||
"movq %%r8, %q0\n"
|
"movq %%r8, %q0\n"
|
||||||
"movq $0, %%r8\n"
|
"xorq %%r8, %%r8\n"
|
||||||
/* (r9,r10) += l1 */
|
/* (r9,r10) += l1 */
|
||||||
"addq 8(%%rsi), %%r9\n"
|
"addq 8(%%rsi), %%r9\n"
|
||||||
"adcq $0, %%r10\n"
|
"adcq $0, %%r10\n"
|
||||||
|
@ -309,7 +309,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"adcq $0, %%r8\n"
|
"adcq $0, %%r8\n"
|
||||||
/* extract m1 */
|
/* extract m1 */
|
||||||
"movq %%r9, %q1\n"
|
"movq %%r9, %q1\n"
|
||||||
"movq $0, %%r9\n"
|
"xorq %%r9, %%r9\n"
|
||||||
/* (r10,r8,r9) += l2 */
|
/* (r10,r8,r9) += l2 */
|
||||||
"addq 16(%%rsi), %%r10\n"
|
"addq 16(%%rsi), %%r10\n"
|
||||||
"adcq $0, %%r8\n"
|
"adcq $0, %%r8\n"
|
||||||
|
@ -332,7 +332,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"adcq $0, %%r9\n"
|
"adcq $0, %%r9\n"
|
||||||
/* extract m2 */
|
/* extract m2 */
|
||||||
"movq %%r10, %q2\n"
|
"movq %%r10, %q2\n"
|
||||||
"movq $0, %%r10\n"
|
"xorq %%r10, %%r10\n"
|
||||||
/* (r8,r9,r10) += l3 */
|
/* (r8,r9,r10) += l3 */
|
||||||
"addq 24(%%rsi), %%r8\n"
|
"addq 24(%%rsi), %%r8\n"
|
||||||
"adcq $0, %%r9\n"
|
"adcq $0, %%r9\n"
|
||||||
|
@ -355,7 +355,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"adcq $0, %%r10\n"
|
"adcq $0, %%r10\n"
|
||||||
/* extract m3 */
|
/* extract m3 */
|
||||||
"movq %%r8, %q3\n"
|
"movq %%r8, %q3\n"
|
||||||
"movq $0, %%r8\n"
|
"xorq %%r8, %%r8\n"
|
||||||
/* (r9,r10,r8) += n3 * c1 */
|
/* (r9,r10,r8) += n3 * c1 */
|
||||||
"movq %9, %%rax\n"
|
"movq %9, %%rax\n"
|
||||||
"mulq %%r14\n"
|
"mulq %%r14\n"
|
||||||
|
@ -387,8 +387,8 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"movq %q11, %%r13\n"
|
"movq %q11, %%r13\n"
|
||||||
/* Initialize (r8,r9,r10) */
|
/* Initialize (r8,r9,r10) */
|
||||||
"movq %q5, %%r8\n"
|
"movq %q5, %%r8\n"
|
||||||
"movq $0, %%r9\n"
|
"xorq %%r9, %%r9\n"
|
||||||
"movq $0, %%r10\n"
|
"xorq %%r10, %%r10\n"
|
||||||
/* (r8,r9) += m4 * c0 */
|
/* (r8,r9) += m4 * c0 */
|
||||||
"movq %12, %%rax\n"
|
"movq %12, %%rax\n"
|
||||||
"mulq %%r11\n"
|
"mulq %%r11\n"
|
||||||
|
@ -396,7 +396,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"adcq %%rdx, %%r9\n"
|
"adcq %%rdx, %%r9\n"
|
||||||
/* extract p0 */
|
/* extract p0 */
|
||||||
"movq %%r8, %q0\n"
|
"movq %%r8, %q0\n"
|
||||||
"movq $0, %%r8\n"
|
"xorq %%r8, %%r8\n"
|
||||||
/* (r9,r10) += m1 */
|
/* (r9,r10) += m1 */
|
||||||
"addq %q6, %%r9\n"
|
"addq %q6, %%r9\n"
|
||||||
"adcq $0, %%r10\n"
|
"adcq $0, %%r10\n"
|
||||||
|
@ -414,7 +414,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"adcq $0, %%r8\n"
|
"adcq $0, %%r8\n"
|
||||||
/* extract p1 */
|
/* extract p1 */
|
||||||
"movq %%r9, %q1\n"
|
"movq %%r9, %q1\n"
|
||||||
"movq $0, %%r9\n"
|
"xorq %%r9, %%r9\n"
|
||||||
/* (r10,r8,r9) += m2 */
|
/* (r10,r8,r9) += m2 */
|
||||||
"addq %q7, %%r10\n"
|
"addq %q7, %%r10\n"
|
||||||
"adcq $0, %%r8\n"
|
"adcq $0, %%r8\n"
|
||||||
|
@ -472,7 +472,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"movq %%rax, 0(%q6)\n"
|
"movq %%rax, 0(%q6)\n"
|
||||||
/* Move to (r8,r9) */
|
/* Move to (r8,r9) */
|
||||||
"movq %%rdx, %%r8\n"
|
"movq %%rdx, %%r8\n"
|
||||||
"movq $0, %%r9\n"
|
"xorq %%r9, %%r9\n"
|
||||||
/* (r8,r9) += p1 */
|
/* (r8,r9) += p1 */
|
||||||
"addq %q2, %%r8\n"
|
"addq %q2, %%r8\n"
|
||||||
"adcq $0, %%r9\n"
|
"adcq $0, %%r9\n"
|
||||||
|
@ -483,7 +483,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"adcq %%rdx, %%r9\n"
|
"adcq %%rdx, %%r9\n"
|
||||||
/* Extract r1 */
|
/* Extract r1 */
|
||||||
"movq %%r8, 8(%q6)\n"
|
"movq %%r8, 8(%q6)\n"
|
||||||
"movq $0, %%r8\n"
|
"xorq %%r8, %%r8\n"
|
||||||
/* (r9,r8) += p4 */
|
/* (r9,r8) += p4 */
|
||||||
"addq %%r10, %%r9\n"
|
"addq %%r10, %%r9\n"
|
||||||
"adcq $0, %%r8\n"
|
"adcq $0, %%r8\n"
|
||||||
|
@ -492,7 +492,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
|
||||||
"adcq $0, %%r8\n"
|
"adcq $0, %%r8\n"
|
||||||
/* Extract r2 */
|
/* Extract r2 */
|
||||||
"movq %%r9, 16(%q6)\n"
|
"movq %%r9, 16(%q6)\n"
|
||||||
"movq $0, %%r9\n"
|
"xorq %%r9, %%r9\n"
|
||||||
/* (r8,r9) += p3 */
|
/* (r8,r9) += p3 */
|
||||||
"addq %q4, %%r8\n"
|
"addq %q4, %%r8\n"
|
||||||
"adcq $0, %%r9\n"
|
"adcq $0, %%r9\n"
|
||||||
|
|
Loading…
Reference in New Issue