Optimization: special-case zero modulus limbs in modinv64

Both the field and scalar modulus can be written in signed{30,62} notation
with one or more zero limbs. Make use of this in the update_de function to
avoid a few wide multiplications when that is the case.

This doesn't appear to be a win in the 32-bit implementation, so only
do it for the 64-bit one.
This commit is contained in:
Pieter Wuille 2020-11-28 15:58:22 -08:00
parent 1f233b3fa0
commit 9164a1b658
1 changed files with 12 additions and 6 deletions

View File

@ -338,22 +338,28 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
/* Compute limb 1 of t*[d,e]+modulus*[md,me], and store it as output limb 0 (= down shift). */
cd += (int128_t)u * d1 + (int128_t)v * e1;
ce += (int128_t)q * d1 + (int128_t)r * e1;
cd += (int128_t)modinfo->modulus.v[1] * md;
ce += (int128_t)modinfo->modulus.v[1] * me;
if (modinfo->modulus.v[1]) { /* Optimize for the case where limb of modulus is zero. */
cd += (int128_t)modinfo->modulus.v[1] * md;
ce += (int128_t)modinfo->modulus.v[1] * me;
}
d->v[0] = (int64_t)cd & M62; cd >>= 62;
e->v[0] = (int64_t)ce & M62; ce >>= 62;
/* Compute limb 2 of t*[d,e]+modulus*[md,me], and store it as output limb 1. */
cd += (int128_t)u * d2 + (int128_t)v * e2;
ce += (int128_t)q * d2 + (int128_t)r * e2;
cd += (int128_t)modinfo->modulus.v[2] * md;
ce += (int128_t)modinfo->modulus.v[2] * me;
if (modinfo->modulus.v[2]) { /* Optimize for the case where limb of modulus is zero. */
cd += (int128_t)modinfo->modulus.v[2] * md;
ce += (int128_t)modinfo->modulus.v[2] * me;
}
d->v[1] = (int64_t)cd & M62; cd >>= 62;
e->v[1] = (int64_t)ce & M62; ce >>= 62;
/* Compute limb 3 of t*[d,e]+modulus*[md,me], and store it as output limb 2. */
cd += (int128_t)u * d3 + (int128_t)v * e3;
ce += (int128_t)q * d3 + (int128_t)r * e3;
cd += (int128_t)modinfo->modulus.v[3] * md;
ce += (int128_t)modinfo->modulus.v[3] * me;
if (modinfo->modulus.v[3]) { /* Optimize for the case where limb of modulus is zero. */
cd += (int128_t)modinfo->modulus.v[3] * md;
ce += (int128_t)modinfo->modulus.v[3] * me;
}
d->v[2] = (int64_t)cd & M62; cd >>= 62;
e->v[2] = (int64_t)ce & M62; ce >>= 62;
/* Compute limb 4 of t*[d,e]+modulus*[md,me], and store it as output limb 3. */