Update scalar_4x64_impl.h

XOR reg,reg instead of MOV 0 to reg. It should be at least equal in all architectures and faster in some else.
This commit is contained in:
Alex-GR 2016-10-09 00:16:50 +03:00 committed by GitHub
parent 7d15cd7859
commit 9d67afad96
1 changed files with 13 additions and 13 deletions

View File

@ -282,8 +282,8 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"movq 56(%%rsi), %%r14\n"
/* Initialize r8,r9,r10 */
"movq 0(%%rsi), %%r8\n"
"movq $0, %%r9\n"
"movq $0, %%r10\n"
"xorq %%r9, %%r9\n"
"xorq %%r10, %%r10\n"
/* (r8,r9) += n0 * c0 */
"movq %8, %%rax\n"
"mulq %%r11\n"
@ -291,7 +291,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"adcq %%rdx, %%r9\n"
/* extract m0 */
"movq %%r8, %q0\n"
"movq $0, %%r8\n"
"xorq %%r8, %%r8\n"
/* (r9,r10) += l1 */
"addq 8(%%rsi), %%r9\n"
"adcq $0, %%r10\n"
@ -309,7 +309,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"adcq $0, %%r8\n"
/* extract m1 */
"movq %%r9, %q1\n"
"movq $0, %%r9\n"
"xorq %%r9, %%r9\n"
/* (r10,r8,r9) += l2 */
"addq 16(%%rsi), %%r10\n"
"adcq $0, %%r8\n"
@ -332,7 +332,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"adcq $0, %%r9\n"
/* extract m2 */
"movq %%r10, %q2\n"
"movq $0, %%r10\n"
"xorq %%r10, %%r10\n"
/* (r8,r9,r10) += l3 */
"addq 24(%%rsi), %%r8\n"
"adcq $0, %%r9\n"
@ -355,7 +355,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"adcq $0, %%r10\n"
/* extract m3 */
"movq %%r8, %q3\n"
"movq $0, %%r8\n"
"xorq %%r8, %%r8\n"
/* (r9,r10,r8) += n3 * c1 */
"movq %9, %%rax\n"
"mulq %%r14\n"
@ -387,8 +387,8 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"movq %q11, %%r13\n"
/* Initialize (r8,r9,r10) */
"movq %q5, %%r8\n"
"movq $0, %%r9\n"
"movq $0, %%r10\n"
"xorq %%r9, %%r9\n"
"xorq %%r10, %%r10\n"
/* (r8,r9) += m4 * c0 */
"movq %12, %%rax\n"
"mulq %%r11\n"
@ -396,7 +396,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"adcq %%rdx, %%r9\n"
/* extract p0 */
"movq %%r8, %q0\n"
"movq $0, %%r8\n"
"xorq %%r8, %%r8\n"
/* (r9,r10) += m1 */
"addq %q6, %%r9\n"
"adcq $0, %%r10\n"
@ -414,7 +414,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"adcq $0, %%r8\n"
/* extract p1 */
"movq %%r9, %q1\n"
"movq $0, %%r9\n"
"xorq %%r9, %%r9\n"
/* (r10,r8,r9) += m2 */
"addq %q7, %%r10\n"
"adcq $0, %%r8\n"
@ -472,7 +472,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"movq %%rax, 0(%q6)\n"
/* Move to (r8,r9) */
"movq %%rdx, %%r8\n"
"movq $0, %%r9\n"
"xorq %%r9, %%r9\n"
/* (r8,r9) += p1 */
"addq %q2, %%r8\n"
"adcq $0, %%r9\n"
@ -483,7 +483,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"adcq %%rdx, %%r9\n"
/* Extract r1 */
"movq %%r8, 8(%q6)\n"
"movq $0, %%r8\n"
"xorq %%r8, %%r8\n"
/* (r9,r8) += p4 */
"addq %%r10, %%r9\n"
"adcq $0, %%r8\n"
@ -492,7 +492,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
"adcq $0, %%r8\n"
/* Extract r2 */
"movq %%r9, 16(%q6)\n"
"movq $0, %%r9\n"
"xorq %%r9, %%r9\n"
/* (r8,r9) += p3 */
"addq %q4, %%r8\n"
"adcq $0, %%r9\n"