From 53c4db7ead977dc1ee482ef22eb3d547b460d87d Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Thu, 10 Feb 2022 14:05:07 +0100 Subject: [PATCH] Fast modular inversion (#172) * split modular inversion in its own file * Stash fast GCD inversion https://eprint.iacr.org/2020/972.pdf * Stash Pornin's bingcd -> issue with inner modular reduction * Implement Bernstein-Yang inversion * Avoid Nim checks on signed integers (32-bit runtime issue) * cleanup: remove old inversion impls * cleanup: static moduli, move div2 * small comments (skip ci) * comment cleanup (skip ci) * fix total iterations on 32-bit * Add batch conversion to affine coordinates using simultaneous inversion trick * fix conditional setZero and batchAffine conversion * cleanup unneeded branches following affine conversion unification * Fix batchAffine with zero inputs and add fuzz failure to test suite --- benchmarks/bench_elliptic_template.nim | 9 +- benchmarks/bench_fields_template.nim | 23 +- benchmarks/bench_fp.nim | 5 +- benchmarks/bench_hash_to_curve.nim | 2 +- benchmarks/bench_pairing_template.nim | 4 +- benchmarks/bench_summary_template.nim | 9 +- constantine.nimble | 6 + constantine/arithmetic.nim | 2 - .../limbs_asm_montmul_x86_adx_bmi2.nim | 2 +- constantine/arithmetic/bigints.nim | 49 +- constantine/arithmetic/bigints_montgomery.nim | 1 - constantine/arithmetic/finite_fields.nim | 69 ++- .../finite_fields_double_precision.nim | 2 +- .../arithmetic/finite_fields_inversion.nim | 64 --- constantine/arithmetic/limbs.nim | 27 +- .../{limbs_modular.nim => limbs_division.nim} | 134 +----- constantine/arithmetic/limbs_invmod.nim | 453 ++++++++++++++++++ constantine/arithmetic/limbs_unsaturated.nim | 385 +++++++++++++++ ...ted_params.nim => bls12_377_g2_params.nim} | 0 constantine/curves/bls12_377_inversion.nim | 204 -------- ...ted_params.nim => bls12_381_g2_params.nim} | 0 constantine/curves/bls12_381_inversion.nim | 220 --------- ..._params.nim => bn254_nogami_g2_params.nim} | 0 constantine/curves/bn254_nogami_inversion.nim | 98 ---- ..._params.nim => bn254_snarks_g2_params.nim} | 0 constantine/curves/bn254_snarks_inversion.nim | 158 ------ ...puted_params.nim => bw6_761_g2_params.nim} | 0 constantine/curves/bw6_761_inversion.nim | 376 --------------- constantine/curves/curve25519_inversion.nim | 60 --- constantine/curves/secp256k1_inversion.nim | 96 ---- ...ecomputed_params.nim => zoo_g2_params.nim} | 10 +- constantine/curves/zoo_inversions.nim | 35 -- .../elliptic/ec_endomorphism_accel.nim | 74 +-- .../elliptic/ec_shortweierstrass_affine.nim | 10 +- .../elliptic/ec_shortweierstrass_jacobian.nim | 80 +++- .../ec_shortweierstrass_projective.nim | 63 ++- .../elliptic/ec_twistededwards_projective.nim | 4 +- constantine/io/io_ec.nim | 54 ++- constantine/pairing/miller_loops.nim | 4 +- constantine/pairing/pairing_bls12.nim | 2 +- constantine/pairing/pairing_bn.nim | 2 +- constantine/pairing/pairing_bw6_761.nim | 10 +- constantine/primitives/bithacks.nim | 2 +- constantine/primitives/constant_time.nim | 12 +- .../primitives/constant_time_types.nim | 1 + constantine/primitives/extended_precision.nim | 21 +- .../extended_precision_64bit_uint128.nim | 22 + .../extended_precision_x86_64_msvc.nim | 20 + .../protocols/ethereum_evm_precompiles.nim | 6 +- .../extension_fields.nim | 11 + docs/optimizations.md | 15 +- research/kzg_poly_commit/fft_g1.nim | 4 +- .../kzg_poly_commit/kzg_single_proofs.nim | 4 +- research/kzg_poly_commit/polynomials.nim | 8 +- ...{precompute_params.sage => g2_params.sage} | 0 tests/t_bigints.nim | 42 +- tests/t_ec_conversion.nim | 63 +++ tests/t_ec_sage_template.nim | 8 +- tests/t_ec_template.nim | 172 ++++++- tests/t_hash_to_curve.nim | 2 +- tests/t_io_unsaturated.nim | 89 ++++ tests/t_pairing_bls12_377_line_functions.nim | 2 +- tests/t_pairing_bls12_381_line_functions.nim | 2 +- tests/t_pairing_template.nim | 12 +- tests/t_sig_bls_lowlevel.nim | 10 +- 65 files changed, 1655 insertions(+), 1679 deletions(-) delete mode 100644 constantine/arithmetic/finite_fields_inversion.nim rename constantine/arithmetic/{limbs_modular.nim => limbs_division.nim} (70%) create mode 100644 constantine/arithmetic/limbs_invmod.nim create mode 100644 constantine/arithmetic/limbs_unsaturated.nim rename constantine/curves/{bls12_377_precomputed_params.nim => bls12_377_g2_params.nim} (100%) delete mode 100644 constantine/curves/bls12_377_inversion.nim rename constantine/curves/{bls12_381_precomputed_params.nim => bls12_381_g2_params.nim} (100%) delete mode 100644 constantine/curves/bls12_381_inversion.nim rename constantine/curves/{bn254_nogami_precomputed_params.nim => bn254_nogami_g2_params.nim} (100%) delete mode 100644 constantine/curves/bn254_nogami_inversion.nim rename constantine/curves/{bn254_snarks_precomputed_params.nim => bn254_snarks_g2_params.nim} (100%) delete mode 100644 constantine/curves/bn254_snarks_inversion.nim rename constantine/curves/{bw6_761_precomputed_params.nim => bw6_761_g2_params.nim} (100%) delete mode 100644 constantine/curves/bw6_761_inversion.nim delete mode 100644 constantine/curves/curve25519_inversion.nim delete mode 100644 constantine/curves/secp256k1_inversion.nim rename constantine/curves/{zoo_precomputed_params.nim => zoo_g2_params.nim} (83%) delete mode 100644 constantine/curves/zoo_inversions.nim rename sage/{precompute_params.sage => g2_params.sage} (100%) create mode 100644 tests/t_ec_conversion.nim create mode 100644 tests/t_io_unsaturated.nim diff --git a/benchmarks/bench_elliptic_template.nim b/benchmarks/bench_elliptic_template.nim index 835b028..02f7190 100644 --- a/benchmarks/bench_elliptic_template.nim +++ b/benchmarks/bench_elliptic_template.nim @@ -68,10 +68,7 @@ proc mixedAddBench*(T: typedesc, iters: int) = let P = rng.random_unsafe(T) let Q = rng.random_unsafe(T) var Qaff: ECP_ShortW_Aff[T.F, T.G] - when Q is ECP_ShortW_Prj: - Qaff.affineFromProjective(Q) - else: - Qaff.affineFromJacobian(Q) + Qaff.affine(Q) bench("EC Mixed Addition " & G1_or_G2, T, iters): r.madd(P, Qaff) @@ -87,14 +84,14 @@ proc affFromProjBench*(T: typedesc, iters: int) = var r {.noInit.}: ECP_ShortW_Aff[T.F, T.G] let P = rng.random_unsafe(T) bench("EC Projective to Affine " & G1_or_G2, T, iters): - r.affineFromProjective(P) + r.affine(P) proc affFromJacBench*(T: typedesc, iters: int) = const G1_or_G2 = when T.F is Fp: "G1" else: "G2" var r {.noInit.}: ECP_ShortW_Aff[T.F, T.G] let P = rng.random_unsafe(T) bench("EC Jacobian to Affine " & G1_or_G2, T, iters): - r.affineFromJacobian(P) + r.affine(P) proc scalarMulGenericBench*(T: typedesc, window: static int, iters: int) = const bits = T.F.C.getCurveOrderBitwidth() diff --git a/benchmarks/bench_fields_template.nim b/benchmarks/bench_fields_template.nim index 3ff77e8..c821622 100644 --- a/benchmarks/bench_fields_template.nim +++ b/benchmarks/bench_fields_template.nim @@ -95,30 +95,9 @@ proc invBench*(T: typedesc, iters: int) = var r: T let x = rng.random_unsafe(T) preventOptimAway(r) - bench("Inversion (constant-time default impl)", T, iters): + bench("Inversion (constant-time)", T, iters): r.inv(x) -proc invEuclidBench*(T: typedesc, iters: int) = - var r: T - let x = rng.random_unsafe(T) - preventOptimAway(r) - bench("Inversion (constant-time Euclid)", T, iters): - r.inv_euclid(x) - -proc invPowFermatBench*(T: typedesc, iters: int) = - let x = rng.random_unsafe(T) - const exponent = T.getInvModExponent() - bench("Inversion (exponentiation p-2, Little Fermat)", T, iters): - var r = x - r.powUnsafeExponent(exponent) - -proc invAddChainBench*(T: typedesc, iters: int) = - var r: T - let x = rng.random_unsafe(T) - preventOptimAway(r) - bench("Inversion (addition chain)", T, iters): - r.inv_addchain(x) - proc sqrtBench*(T: typedesc, iters: int) = let x = rng.random_unsafe(T) diff --git a/benchmarks/bench_fp.nim b/benchmarks/bench_fp.nim index 9cae10a..6d251b8 100644 --- a/benchmarks/bench_fp.nim +++ b/benchmarks/bench_fp.nim @@ -11,7 +11,7 @@ import ../constantine/config/[curves, common], ../constantine/arithmetic, ../constantine/io/io_bigints, - ../constantine/curves/[zoo_inversions, zoo_square_roots], + ../constantine/curves/zoo_square_roots, # Helpers ../helpers/static_for, ./bench_fields_template @@ -50,8 +50,7 @@ proc main() = mulBench(Fp[curve], Iters) sqrBench(Fp[curve], Iters) smallSeparator() - invEuclidBench(Fp[curve], ExponentIters) - invPowFermatBench(Fp[curve], ExponentIters) + invBench(Fp[curve], ExponentIters) sqrtBench(Fp[curve], ExponentIters) sqrtRatioBench(Fp[curve], ExponentIters) # Exponentiation by a "secret" of size ~the curve order diff --git a/benchmarks/bench_hash_to_curve.nim b/benchmarks/bench_hash_to_curve.nim index ffa429b..5757c63 100644 --- a/benchmarks/bench_hash_to_curve.nim +++ b/benchmarks/bench_hash_to_curve.nim @@ -64,7 +64,7 @@ proc bench_BLS12_381_proj_aff_conversion(iters: int) = ) bench("Proj->Affine conversion (for pairing)", BLS12_381, iters): - Paff.affineFromProjective(P) + Paff.affine(P) const Iters = 1000 diff --git a/benchmarks/bench_pairing_template.nim b/benchmarks/bench_pairing_template.nim index a4c8a10..71352aa 100644 --- a/benchmarks/bench_pairing_template.nim +++ b/benchmarks/bench_pairing_template.nim @@ -51,9 +51,9 @@ func clearCofactor[F; G: static Subgroup]( ec: var ECP_ShortW_Aff[F, G]) = # For now we don't have any affine operation defined var t {.noInit.}: ECP_ShortW_Prj[F, G] - t.projectiveFromAffine(ec) + t.fromAffine(ec) t.clearCofactor() - ec.affineFromProjective(t) + ec.affine(t) func random_point*(rng: var RngState, EC: typedesc): EC {.noInit.} = result = rng.random_unsafe(EC) diff --git a/benchmarks/bench_summary_template.nim b/benchmarks/bench_summary_template.nim index 06f3611..d40ca52 100644 --- a/benchmarks/bench_summary_template.nim +++ b/benchmarks/bench_summary_template.nim @@ -86,9 +86,9 @@ func clearCofactorReference[F; G: static Subgroup]( ec: var ECP_ShortW_Aff[F, G]) = # For now we don't have any affine operation defined var t {.noInit.}: ECP_ShortW_Prj[F, G] - t.projectiveFromAffine(ec) + t.fromAffine(ec) t.clearCofactorReference() - ec.affineFromProjective(t) + ec.affine(t) func random_point*(rng: var RngState, EC: typedesc): EC {.noInit.} = result = rng.random_unsafe(EC) @@ -136,10 +136,7 @@ proc mixedAddBench*(T: typedesc, iters: int) = let P = rng.random_unsafe(T) let Q = rng.random_unsafe(T) var Qaff: ECP_ShortW_Aff[T.F, T.G] - when Q is ECP_ShortW_Prj: - Qaff.affineFromProjective(Q) - else: - Qaff.affineFromJacobian(Q) + Qaff.affine(Q) bench("EC Mixed Addition " & G1_or_G2, T, iters): r.madd(P, Qaff) diff --git a/constantine.nimble b/constantine.nimble index 66c137d..7b149a3 100644 --- a/constantine.nimble +++ b/constantine.nimble @@ -28,6 +28,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[ # Big ints # ---------------------------------------------------------- ("tests/t_io_bigints.nim", false), + ("tests/t_io_unsaturated.nim", false), ("tests/t_bigints.nim", false), ("tests/t_bigints_multimod.nim", false), ("tests/t_bigints_mod_vs_gmp.nim", true), @@ -64,6 +65,11 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[ ("tests/t_fp4_frobenius.nim", false), ("tests/t_fp6_frobenius.nim", false), ("tests/t_fp12_frobenius.nim", false), + + # Elliptic curve arithmetic + # ---------------------------------------------------------- + ("tests/t_ec_conversion.nim", false), + # Elliptic curve arithmetic G1 # ---------------------------------------------------------- # ("tests/t_ec_shortw_prj_g1_add_double.nim", false), diff --git a/constantine/arithmetic.nim b/constantine/arithmetic.nim index 540cc6e..5c51657 100644 --- a/constantine/arithmetic.nim +++ b/constantine/arithmetic.nim @@ -10,7 +10,6 @@ import arithmetic/[bigints, bigints_montgomery], arithmetic/[ finite_fields, - finite_fields_inversion, finite_fields_square_root, finite_fields_double_precision ] @@ -19,6 +18,5 @@ export bigints, bigints_montgomery, finite_fields, - finite_fields_inversion, finite_fields_square_root, finite_fields_double_precision diff --git a/constantine/arithmetic/assembly/limbs_asm_montmul_x86_adx_bmi2.nim b/constantine/arithmetic/assembly/limbs_asm_montmul_x86_adx_bmi2.nim index 7b38e69..6229cd7 100644 --- a/constantine/arithmetic/assembly/limbs_asm_montmul_x86_adx_bmi2.nim +++ b/constantine/arithmetic/assembly/limbs_asm_montmul_x86_adx_bmi2.nim @@ -147,7 +147,7 @@ proc partialRedx( ctx.comment " Reduction" ctx.comment " m = t[0] * m0ninv mod 2^w" ctx.mov rdx, t[0] - ctx.mulx S, rdx, m0ninv, rdx # (S, RDX) <- m0ninv * RDX + ctx.imul rdx, m0ninv # Clear carry flags - TODO: necessary? ctx.`xor` S, S diff --git a/constantine/arithmetic/bigints.nim b/constantine/arithmetic/bigints.nim index 2aafa9e..755eaae 100644 --- a/constantine/arithmetic/bigints.nim +++ b/constantine/arithmetic/bigints.nim @@ -11,8 +11,8 @@ import ../primitives, ./limbs, ./limbs_extmul, - ./limbs_modular, - ./limbs_montgomery + ./limbs_invmod, + ./limbs_division export BigInt @@ -79,9 +79,9 @@ func setUint*(a: var BigInt, n: SomeUnsignedInt) = ## Set a BigInt to a machine-sized integer ``n`` a.limbs.setUint(n) -func czero*(a: var BigInt, ctl: SecretBool) = +func csetZero*(a: var BigInt, ctl: SecretBool) = ## Set ``a`` to 0 if ``ctl`` is true - a.limbs.czero(ctl) + a.limbs.csetZero(ctl) # Copy # ------------------------------------------------------------ @@ -450,35 +450,36 @@ func reduce*[aBits, mBits](r: var BigInt[mBits], a: BigInt[aBits], M: BigInt[mBi # pass a pointer+length to a fixed session of the BSS. reduce(r.limbs, a.limbs, aBits, M.limbs, mBits) -func div2_modular*[bits](a: var BigInt[bits], mp1div2: BigInt[bits]) = - ## Compute a <- a/2 (mod M) - ## `mp1div2` is the modulus (M+1)/2 - ## - ## Normally if `a` is odd we add the modulus before dividing by 2 - ## but this may overflow and we might lose a bit before shifting. - ## Instead we shift first and then add half the modulus rounded up - ## - ## Assuming M is odd, `mp1div2` can be precomputed without - ## overflowing the "Limbs" by dividing by 2 first - ## and add 1 - ## Otherwise `mp1div2` should be M/2 - a.limbs.div2_modular(mp1div2.limbs) - -func steinsGCD*[bits](r: var BigInt[bits], a, F, M, mp1div2: BigInt[bits]) = - ## Compute F multiplied the modular inverse of ``a`` modulo M - ## r ≡ F . a^-1 (mod M) +func invmod*[bits]( + r: var BigInt[bits], + a, F, M: BigInt[bits]) = + ## Compute the modular inverse of ``a`` modulo M + ## r ≡ F.a⁻¹ (mod M) ## ## M MUST be odd, M does not need to be prime. ## ``a`` MUST be less than M. - r.limbs.steinsGCD(a.limbs, F.limbs, M.limbs, bits, mp1div2.limbs) + r.limbs.invmod(a.limbs, F.limbs, M.limbs, bits) -func invmod*[bits](r: var BigInt[bits], a, M, mp1div2: BigInt[bits]) = +func invmod*[bits]( + r: var BigInt[bits], + a: BigInt[bits], + F, M: static BigInt[bits]) = + ## Compute the modular inverse of ``a`` modulo M + ## r ≡ F.a⁻¹ (mod M) + ## + ## with F and M known at compile-time + ## + ## M MUST be odd, M does not need to be prime. + ## ``a`` MUST be less than M. + r.limbs.invmod(a.limbs, F.limbs, M.limbs, bits) + +func invmod*[bits](r: var BigInt[bits], a, M: BigInt[bits]) = ## Compute the modular inverse of ``a`` modulo M ## ## The modulus ``M`` MUST be odd var one {.noInit.}: BigInt[bits] one.setOne() - r.steinsGCD(a, one, M, mp1div2) + r.invmod(a, one, M) {.pop.} # inline {.pop.} # raises no exceptions diff --git a/constantine/arithmetic/bigints_montgomery.nim b/constantine/arithmetic/bigints_montgomery.nim index b28e9a6..992f93e 100644 --- a/constantine/arithmetic/bigints_montgomery.nim +++ b/constantine/arithmetic/bigints_montgomery.nim @@ -11,7 +11,6 @@ import ../primitives, ../io/io_bigints, ./limbs, - ./limbs_modular, ./limbs_montgomery, ./bigints diff --git a/constantine/arithmetic/finite_fields.nim b/constantine/arithmetic/finite_fields.nim index d85ce4d..f15a8d4 100644 --- a/constantine/arithmetic/finite_fields.nim +++ b/constantine/arithmetic/finite_fields.nim @@ -231,25 +231,29 @@ func neg*(r: var FF, a: FF) {.meter.} = var t {.noInit.}: FF let isZero = a.isZero() discard t.mres.diff(FF.fieldMod(), a.mres) - t.mres.czero(isZero) + t.mres.csetZero(isZero) r = t func neg*(a: var FF) {.meter.} = ## Negate modulo p a.neg(a) -func div2*(a: var FF) {.meter.} = - ## Modular division by 2 - a.mres.div2_modular(FF.getPrimePlus1div2()) - # ############################################################ # # Field arithmetic conditional # # ############################################################ +func csetZero*(a: var FF, ctl: SecretBool) = + ## Set ``a`` to 0 if ``ctl`` is true + a.mres.csetZero(ctl) + +func csetOne*(a: var FF, ctl: SecretBool) = + ## Set ``a`` to 1 if ``ctl`` is true + a.mres.ccopy(FF.getMontyOne(), ctl) + func cneg*(r: var FF, a: FF, ctl: SecretBool) {.meter.} = - ## Constant-time in-place conditional negation + ## Constant-time out-of-place conditional negation ## The negation is only performed if ctl is "true" r.neg(a) r.ccopy(a, not ctl) @@ -261,19 +265,68 @@ func cneg*(a: var FF, ctl: SecretBool) {.meter.} = a.cneg(t, ctl) func cadd*(a: var FF, b: FF, ctl: SecretBool) {.meter.} = - ## Constant-time in-place conditional addition + ## Constant-time out-place conditional addition ## The addition is only performed if ctl is "true" var t = a t += b a.ccopy(t, ctl) func csub*(a: var FF, b: FF, ctl: SecretBool) {.meter.} = - ## Constant-time in-place conditional substraction + ## Constant-time out-place conditional substraction ## The substraction is only performed if ctl is "true" var t = a t -= b a.ccopy(t, ctl) +# ############################################################ +# +# Field arithmetic division and inversion +# +# ############################################################ + +func div2*(a: var FF) {.meter.} = + ## Modular division by 2 + ## `a` will be divided in-place + # + # Normally if `a` is odd we add the modulus before dividing by 2 + # but this may overflow and we might lose a bit before shifting. + # Instead we shift first and then add half the modulus rounded up + # + # Assuming M is odd, `mp1div2` can be precomputed without + # overflowing the "Limbs" by dividing by 2 first + # and add 1 + # Otherwise `mp1div2` should be M/2 + + # if a.isOdd: + # a += M + # a = a shr 1 + let wasOdd = a.mres.isOdd() + a.mres.shiftRight(1) + let carry {.used.} = a.mres.cadd(FF.getPrimePlus1div2(), wasOdd) + + # a < M -> a/2 <= M/2: + # a/2 + M/2 < M if a is odd + # a/2 < M if a is even + debug: doAssert not carry.bool + +func inv*(r: var FF, a: FF) = + ## Inversion modulo p + ## + ## The inverse of 0 is 0. + ## Incidentally this avoids extra check + ## to convert Jacobian and Projective coordinates + ## to affine for elliptic curve + r.mres.invmod(a.mres, FF.getR2modP(), FF.fieldMod()) + +func inv*(a: var FF) = + ## Inversion modulo p + ## + ## The inverse of 0 is 0. + ## Incidentally this avoids extra check + ## to convert Jacobian and Projective coordinates + ## to affine for elliptic curve + a.inv(a) + # ############################################################ # # Field arithmetic exponentiation diff --git a/constantine/arithmetic/finite_fields_double_precision.nim b/constantine/arithmetic/finite_fields_double_precision.nim index f3eba51..ab161c0 100644 --- a/constantine/arithmetic/finite_fields_double_precision.nim +++ b/constantine/arithmetic/finite_fields_double_precision.nim @@ -162,7 +162,7 @@ func neg2xMod*(r: var FpDbl, a: FpDbl) = subB(borrow, t.limbs2x[i], M.limbs[i-N], a.limbs2x[i], borrow) # Zero the result if input was zero - t.limbs2x.czero(isZero) + t.limbs2x.csetZero(isZero) r = t func prod2xImpl( diff --git a/constantine/arithmetic/finite_fields_inversion.nim b/constantine/arithmetic/finite_fields_inversion.nim deleted file mode 100644 index 93c7186..0000000 --- a/constantine/arithmetic/finite_fields_inversion.nim +++ /dev/null @@ -1,64 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../config/[curves, type_ff], - ./bigints, - ../curves/zoo_inversions - -export zoo_inversions - -# ############################################################ -# -# Finite field inversion -# -# ############################################################ - -# No exceptions allowed -{.push raises: [].} -{.push inline.} - -func inv_euclid*(r: var FF, a: FF) = - ## Inversion modulo p via - ## Niels Moller constant-time version of - ## Stein's GCD derived from extended binary Euclid algorithm - r.mres.steinsGCD(a.mres, FF.getR2modP(), FF.fieldMod(), FF.getPrimePlus1div2()) - -func inv*(r: var FF, a: FF) = - ## Inversion modulo p - ## - ## The inverse of 0 is 0. - ## Incidentally this avoids extra check - ## to convert Jacobian and Projective coordinates - ## to affine for elliptic curve - # For now we don't activate the addition chains - # Performance is slower than Euclid-based inversion on newer CPUs - # - # - Montgomery multiplication/squaring can skip the final substraction - # - For generalized Mersenne Prime curves, modular reduction can be made extremely cheap. - # - For BW6-761 the addition chain is over 2x slower than Euclid-based inversion - # due to multiplication being so costly with 12 limbs (grows quadratically) - # while Euclid costs grows linearly. - when false and - FF is Fp and FF.C.hasInversionAddchain() and - FF.C notin {BW6_761}: - r.inv_addchain(a) - else: - r.inv_euclid(a) - -func inv*(a: var FF) = - ## Inversion modulo p - ## - ## The inverse of 0 is 0. - ## Incidentally this avoids extra check - ## to convert Jacobian and Projective coordinates - ## to affine for elliptic curve - a.inv(a) - -{.pop.} # inline -{.pop.} # raises no exceptions diff --git a/constantine/arithmetic/limbs.nim b/constantine/arithmetic/limbs.nim index 7914ab2..a10cdf0 100644 --- a/constantine/arithmetic/limbs.nim +++ b/constantine/arithmetic/limbs.nim @@ -85,12 +85,11 @@ func setUint*(a: var Limbs, n: SomeUnsignedInt) = when a.len > 2: zeroMem(a[2].addr, (a.len - 2) * sizeof(SecretWord)) -func czero*(a: var Limbs, ctl: SecretBool) = +func csetZero*(a: var Limbs, ctl: SecretBool) = ## Set ``a`` to 0 if ``ctl`` is true - # Only used for FF neg in pure Nim fallback - # so no need for assembly + let mask = -(SecretWord(ctl) xor One) for i in 0 ..< a.len: - ctl.ccopy(a[i], Zero) + a[i] = a[i] and mask # Copy # ------------------------------------------------------------ @@ -330,6 +329,26 @@ func cneg*(a: var Limbs, ctl: CTBool) = carry = SecretWord(t < carry) # Carry on a[i] = t +func cneg*(r: var Limbs, a: Limbs, ctl: CTBool) = + ## Conditional negation. + ## Negate if ``ctl`` is true + + # Algorithm: + # In two-complement representation + # -x <=> not(x) + 1 <=> x xor 0xFF... + 1 + # and + # x <=> x xor 0x00...<=> x xor 0x00... + 0 + # + # So we need to xor all words and then add 1 + # The "+1" might carry + # So we fuse the 2 steps + let mask = -SecretWord(ctl) # Obtain a 0xFF... or 0x00... mask + var carry = SecretWord(ctl) + for i in 0 ..< a.len: + let t = (a[i] xor mask) + carry # XOR with mask and add 0x01 or 0x00 respectively + carry = SecretWord(t < carry) # Carry on + r[i] = t + {.pop.} # inline # Division diff --git a/constantine/arithmetic/limbs_modular.nim b/constantine/arithmetic/limbs_division.nim similarity index 70% rename from constantine/arithmetic/limbs_modular.nim rename to constantine/arithmetic/limbs_division.nim index 0ebf131..c1f6b2f 100644 --- a/constantine/arithmetic/limbs_modular.nim +++ b/constantine/arithmetic/limbs_division.nim @@ -8,144 +8,14 @@ import ../config/common, - ../primitives, - ./limbs + ../primitives # No exceptions allowed {.push raises: [].} # ############################################################ # -# Modular division by 2 -# -# ############################################################ - -func div2_modular*(a: var Limbs, mp1div2: Limbs) {.inline.} = - ## Modular Division by 2 - ## `a` will be divided in-place - ## `mp1div2` is the modulus (M+1)/2 - ## - ## Normally if `a` is odd we add the modulus before dividing by 2 - ## but this may overflow and we might lose a bit before shifting. - ## Instead we shift first and then add half the modulus rounded up - ## - ## Assuming M is odd, `mp1div2` can be precomputed without - ## overflowing the "Limbs" by dividing by 2 first - ## and add 1 - ## Otherwise `mp1div2` should be M/2 - - # if a.isOdd: - # a += M - # a = a shr 1 - let wasOdd = a.isOdd() - a.shiftRight(1) - let carry {.used.} = a.cadd(mp1div2, wasOdd) - debug: doAssert not carry.bool - -# ############################################################ -# -# Modular inversion -# -# ############################################################ - -# Generic (odd-only modulus) -# ------------------------------------------------------------ -# Algorithm by Niels Möller, -# a modified version of Stein's Algorithm (binary Extended Euclid GCD) -# -# Algorithm 5 in -# Fast Software Polynomial Multiplication on ARM Processors Using the NEON Engine -# Danilo Camara, Conrado P. L. Gouvea, Julio Lopez, and Ricardo Dahab -# https://link.springer.com/content/pdf/10.1007%2F978-3-642-40588-4_10.pdf -# -# Input: integer x, odd integer n, x < n -# Output: x−1 (mod n) -# 1: function ModInv(x, n) -# 2: (a, b, u, v) ← (x, n, 1, 1) -# 3: ℓ ← ⌊log2 n⌋ + 1 ⮚ number of bits in n -# 4: for i ← 0 to 2ℓ − 1 do -# 5: odd ← a & 1 -# 6: if odd and a ≥ b then -# 7: a ← a − b -# 8: else if odd and a < b then -# 9: (a, b, u, v) ← (b − a, a, v, u) -# 10: a ← a >> 1 -# 11: if odd then u ← u − v -# 12: if u < 0 then u ← u + n -# 13: if u & 1 then u ← u + n -# 14: u ← u >> 1 -# 15: return v -# -# Warning ⚠️: v should be 0 at initialization -# -# We modify it to return F . a^-1 -# So that we can pass an adjustment factor F -# And directly compute modular division or Montgomery inversion - -func steinsGCD*(v: var Limbs, a: Limbs, F, M: Limbs, bits: int, mp1div2: Limbs) = - ## Compute F multiplied the modular inverse of ``a`` modulo M - ## r ≡ F . a^-1 (mod M) - ## - ## M MUST be odd, M does not need to be prime. - ## ``a`` MUST be less than M. - ## - ## No information about ``a`` in particular its actual length in bits is leaked. - ## - ## This takes (M+1)/2 (mp1div2) as a precomputed parameter as a slight optimization - ## in stack size and speed. - ## - ## The inverse of 0 is 0. - - # Ideally we need registers for a, b, u, v - # but: - # - Even with ~256-bit primes, that's 4 limbs = 4*4 => 16 registers - # - x86_64 only has 16 general purposes registers - # - Registers are needed for the loop counter and comparison results - # - CMOV is reg <- RM so can move registers/memory into registers - # but cannot move into memory. - # so we choose to keep "v" from the algorithm in memory as `r` - - # TODO: the inlining of primitives like `csub` is bad for codesize - # but there is a 80% slowdown without it. - - var a = a - var b = M - var u = F - v.setZero() - - for i in 0 ..< 2 * bits: - debug: doAssert bool(b.isOdd) - let isOddA = a.isOdd() - - # if isOddA: a -= b - let aLessThanB = isOddA and (SecretBool) a.csub(b, isOddA) - # if a < b and the sub was processed - # in that case, b <- a = a - b + b - discard b.cadd(a, aLessThanB) - # and a <- -new_a = (b-a) - a.cneg(aLessThanB) - debug: doAssert not bool(a.isOdd) - a.shiftRight(1) - - # Swap u and v is a < b - u.cswap(v, aLessThanB) - # if isOddA: u -= v (mod M) - let neg = isOddA and (SecretBool) u.csub(v, isOddA) - discard u.cadd(M, neg) - - # u = u/2 (mod M) - u.div2_modular(mp1div2) - - debug: - doAssert bool a.isZero() - # GCD exist (always true if a and M are relatively prime) - doAssert bool b.isOne() or - # or not (on prime fields iff input was zero) and no GCD fallback output is zero - v.isZero() - -# ############################################################ -# -# Modular Reduction +# Division and Modular Reduction # # ############################################################ # diff --git a/constantine/arithmetic/limbs_invmod.nim b/constantine/arithmetic/limbs_invmod.nim new file mode 100644 index 0000000..c128fb2 --- /dev/null +++ b/constantine/arithmetic/limbs_invmod.nim @@ -0,0 +1,453 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/common, + ../primitives, + ./limbs, ./limbs_unsaturated + +# No exceptions allowed +{.push raises: [].} + +# ############################################################ +# +# Modular inversion (mod 2ᵏ) +# +# ############################################################ + +func invModBitwidth(a: BaseType): BaseType = + # Modular inverse algorithm: + # Explanation p11 "Dumas iterations" based on Newton-Raphson: + # - Cetin Kaya Koc (2017), https://eprint.iacr.org/2017/411 + # - Jean-Guillaume Dumas (2012), https://arxiv.org/pdf/1209.6626v2.pdf + # - Colin Plumb (1994), http://groups.google.com/groups?selm=1994Apr6.093116.27805%40mnemosyne.cs.du.edu + # Other sources: + # - https://crypto.stackexchange.com/questions/47493/how-to-determine-the-multiplicative-inverse-modulo-64-or-other-power-of-two + # - https://mumble.net/~campbell/2015/01/21/inverse-mod-power-of-two + # - http://marc-b-reynolds.github.io/math/2017/09/18/ModInverse.html + + # We are in a special case + # where m = 2^WordBitWidth. + # For a and m to be coprimes, a must be odd. + # + # We have the following relation + # ax ≡ 1 (mod 2ᵏ) <=> ax(2 - ax) ≡ 1 (mod 2²ᵏ) + # which grows in O(log(log(a))) + debug: doAssert (a and 1) == 1, "a must be odd" + + const k = log2_vartime(a.sizeof() * 8) + result = a # Start from an inverse of M0 modulo 2, M0 is odd and it's own inverse + for _ in 0 ..< k: # at each iteration we get the inverse mod(2^2k) + result *= 2 - a * result # x' = x(2 - ax) + +func invMod2powK(M0: BaseType, k: static BaseType): BaseType = + ## Compute 1/M mod 2ᵏ + ## from M[0] + # Algorithm: Cetin Kaya Koc (2017) p11, https://eprint.iacr.org/2017/411 + # Once you have a modular inverse (mod 2ˢ) you can reduce + # (mod 2ᵏ) to have the modular inverse (mod 2ᵏ) + static: doAssert k <= WordBitWidth + const maskMod = (1 shl k)-1 + M0.invModBitwidth() and maskMod + +# ############################################################### +# +# Modular inversion +# +# ############################################################### + +# Algorithm by Bernstein-Yang +# ------------------------------------------------------------ +# +# - Original Bernstein-Yang paper, https://eprint.iacr.org/2019/266 +# - Executable spec and description by Dettman-Wuille, https://github.com/bitcoin-core/secp256k1/blob/85b00a1/doc/safegcd_implementation.md +# - Formal bound verification by Wuille, https://github.com/sipa/safegcd-bounds +# - Formal verification by Hvass-Aranha-Spitters, https://eprint.iacr.org/2021/549 +# +# We implement the half-delta divstep variant + +type TransitionMatrix = object + ## Bernstein-Yang Jumpdivstep transition matrix + ## [u v] + ## t = [q r] + ## It it is scaled by 2ᵏ + u, v, q, r: SignedSecretWord + +debug: + # Debugging helpers + + func checkDeterminant(t: TransitionMatrix, u, v, q, r: SignedSecretWord, k, numIters: int): bool = + # The determinant of t must be a power of two. This guarantees that multiplication with t + # does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which + # will be divided out again). + # Each divstep's individual matrix has determinant 2⁻¹, + # the aggregate of numIters of them will have determinant 2ⁿᵘᵐᴵᵗᵉʳˢ. Multiplying with the initial + # 2ᵏ*identity (which has determinant 2²ᵏ) means the result has determinant 2²ᵏ⁻ⁿᵘᵐᴵᵗᵉʳˢ. + let + u = SecretWord u + v = SecretWord v + q = SecretWord q + r = SecretWord r + + var a, b: array[2, SecretWord] + var e: array[2, SecretWord] + smul(a[1], a[0], u, r) + smul(b[1], b[0], v, q) + + var borrow: Borrow + subB(borrow, a[0], a[0], b[0], Borrow(0)) + subB(borrow, a[1], a[1], b[1], borrow) + + let d = 2*k - numIters + b[0] = Zero; b[1] = Zero + b[d div WordBitwidth] = One shl (d mod WordBitwidth) + + return bool(a == b) + +func canonicalize( + a: var LimbsUnsaturated, + signMask: SignedSecretWord, + M: LimbsUnsaturated + ) = + ## Compute a = sign*a (mod M) + ## + ## with a in range (-2*M, M) + ## result in range [0, M) + + const + UnsatBitWidth = WordBitWidth - a.Excess + Max = SignedSecretWord(MaxWord shr a.Excess) + + # Operate in registers + var z = a + + # Add M if `z` is negative + # -> range (-M, M) + z.cadd(M, z.isNegMask()) + # Negate if sign is negative + # -> range (-M, M) + z.cneg(signMask) + # Normalize words to range (-2^UnsatBitwidth, 2^UnsatBitwidth) + for i in 0 ..< z.words.len-1: + z[i+1] = z[i+1] + z[i].ashr(UnsatBitWidth) + z[i] = z[i] and Max + + # Add M if `z` is negative + # -> range (0, M) + z.cadd(M, z.isNegMask()) + # Normalize words to range (-2^UnsatBitwidth, 2^UnsatBitwidth) + for i in 0 ..< z.words.len-1: + z[i+1] = z[i+1] + z[i].ashr(UnsatBitWidth) + z[i] = z[i] and Max + + a = z + +proc partitionDivsteps(bits, wordBitWidth: int): tuple[totalIters, numChunks, chunkSize, cutoff: int] = + # Given the field modulus number of bits + # and the effective word size + # Returns: + # - the total number of iterations that guarantees GCD convergence + # - the number of chunks of divsteps to compute + # - the base number of divsteps per chunk + # - a cutoff chunk, + # before this chunk ID, the number of divsteps is "base number + 1" + # afterward it's "base number" + let totalIters = + if bits == 256: + # https://github.com/sipa/safegcd-bounds/tree/master/coq + # For 256-bit inputs, 590 divsteps are sufficient with hddivstep variant (half-delta divstep) + # for gcd(f, g) with 0 <= g <= f <= Modulus (inversion g == 1) + # The generic formula reports 591 + 590 + else: + # https://github.com/sipa/safegcd-bounds/blob/master/genproofhd.md + # For any input, for gcd(f, g) with 0 <= g <= f <= Modulus with hddivstep variant (half-delta divstep) + # (inversion g == 1) + (45907*bits + 26313) div 19929 + let numChunks = (totalIters + wordBitWidth-1) div wordBitWidth + let chunkSize = totalIters div numChunks + let cutoff = totalIters mod numChunks + return (totalIters, numChunks, chunkSize, cutoff) + +func batchedDivsteps( + t: var TransitionMatrix, + theta: SignedSecretWord, + f0, g0: SignedSecretWord, + numIters: int, + k: static int + ): SignedSecretWord = + ## Bernstein-Yang half-delta (theta) batch of divsteps + ## + ## Output: + ## - return theta for the next batch of divsteps + ## - mutate t, the transition matrix to apply `numIters` divsteps at once + ## t is scaled by 2ᵏ + ## + ## Input: + ## - f0, bottom limb of f + ## - g0, bottom limb of g + ## - numIters, number of iterations requested in this batch of divsteps + ## - k, the maximum batch size, transition matrix is scaled by 2ᵏ + var + u = SignedSecretWord(1 shl k) + v = SignedSecretWord(0) + q = SignedSecretWord(0) + r = SignedSecretWord(1 shl k) + f = f0 + g = g0 + + theta = theta + + for i in k-numIters ..< k: + debug: + func reportLoop() = + debugEcho " iterations: [", k-numIters, ", ", k, ")", " (", numIters, " iterations in total)" + debugEcho " i: ", i, ", theta: ", int(theta) + # debugEcho " f: 0b", BiggestInt(f).toBin(64), ", g: 0b", BiggestInt(g).toBin(64), " | f: ", int(f), ", g: ", int(g) + # debugEcho " u: 0b", BiggestInt(u).toBin(64), ", v: 0b", BiggestInt(v).toBin(64), " | u: ", int(u), ", v: ", int(v) + # debugEcho " q: 0b", BiggestInt(q).toBin(64), ", r: 0b", BiggestInt(r).toBin(64), " | q: ", int(q), ", r: ", int(r) + + doAssert (BaseType(f) and 1) == 1, (reportLoop(); "f must be odd)") + doAssert bool(not(uint(u or v or q or r) and (if i == 0: high(uint) else: high(uint) shr (i - 1)))), (reportLoop(); "Min trailing zeros count decreases at each iteration") + doAssert bool(u.ashr(k-i)*f0 + v.ashr(k-i)*g0 == f.lshl(i)), (reportLoop(); "Applying the transition matrix to (f₀, g₀) returns current (f, g)") + doAssert bool(q.ashr(k-i)*f0 + r.ashr(k-i)*g0 == g.lshl(i)), (reportLoop(); "Applying the transition matrix to (f₀, g₀) returns current (f, g)") + + # Conditional masks for (theta < 0) and g odd + let c1 = theta.isNegMask() + let c2 = g.isOddMask() + # x, y, z, conditional complement of f, u, v + let x = f xor c1 + let y = u xor c1 + let z = v xor c1 + # conditional substraction from g, q, r + g.csub(x, c2) + q.csub(y, c2) + r.csub(z, c2) + # c3 = (theta >= 0) and g odd + let c3 = c2 and not c1 + # theta = -theta or theta+1 + theta = (theta xor c3) + SignedSecretWord(1) + # Conditional rollback substraction + f.cadd(g, c3) + u.cadd(q, c3) + v.cadd(r, c3) + # Shifts + g = g.lshr(1) + q = q.ashr(1) + r = r.ashr(1) + + t.u = u + t.v = v + t.q = q + t.r = r + debug: + doAssert bool(u*f0 + v*g0 == f.lshl(k)), "Applying the final matrix to (f₀, g₀) gives the final (f, g)" + doAssert bool(q*f0 + r*g0 == g.lshl(k)), "Applying the final matrix to (f₀, g₀) gives the final (f, g)" + doAssert checkDeterminant(t, u, v, q, r, k, numIters) + + return theta + +func matVecMul_shr_k_mod_M[N, E: static int]( + t: TransitionMatrix, + d, e: var LimbsUnsaturated[N, E], + k: static int, + M: LimbsUnsaturated[N, E], + invMod2powK: SecretWord + ) = + ## Compute + ## + ## [u v] [d] + ## [q r]/2ᵏ.[e] mod M + ## + ## d, e will be in range (-2*modulus,modulus) + ## and output limbs in (-2ᵏ, 2ᵏ) + + static: doAssert k == WordBitWidth - E + const Max = SignedSecretWord(MaxWord shr E) + + let + u = t.u + v = t.v + q = t.q + r = t.r + + let sign_d = d.isNegMask() + let sign_e = e.isNegMask() + + # Double-signed-word carries + var cd, ce: DSWord + + # First iteration of [u v] [d] + # [q r].[e] + cd.slincombAccNoCarry(u, d[0], v, e[0]) + ce.slincombAccNoCarry(q, d[0], r, e[0]) + + # Compute me and md, multiples of M + # such as the bottom k bits if d and e are 0 + # This allows fusing division by 2ᵏ + # i.e. (mx * M) mod 2ᵏ = x mod 2ᵏ + var md, me = SignedSecretWord(0) + md.cadd(u, sign_d) + md.cadd(v, sign_e) + me.cadd(q, sign_d) + me.cadd(r, sign_e) + + md = md - (SignedSecretWord(invMod2powK * SecretWord(cd.lo) + SecretWord(md)) and Max) + me = me - (SignedSecretWord(invMod2powK * SecretWord(ce.lo) + SecretWord(me)) and Max) + + # First iteration of [u v] [d] [md] + # [q r].[e] + [me].M[0] + # k bottom bits are 0 + cd.smulAccNoCarry(md, M[0]) + ce.smulAccNoCarry(me, M[0]) + cd.ashr(k) + ce.ashr(k) + + for i in 1 ..< N: + cd.slincombAccNoCarry(u, d[i], v, e[i]) + ce.slincombAccNoCarry(q, d[i], r, e[i]) + cd.smulAccNoCarry(md, M[i]) + ce.smulAccNoCarry(me, M[i]) + d[i-1] = cd.lo and Max + e[i-1] = ce.lo and Max + cd.ashr(k) + ce.ashr(k) + + d[N-1] = cd.lo + e[N-1] = ce.lo + +func matVecMul_shr_k[N, E: static int]( + t: TransitionMatrix, + f, g: var LimbsUnsaturated[N, E], + k: static int + ) = + ## Compute + ## + ## [u v] [f] + ## [q r].[g] / 2ᵏ + + static: doAssert k == WordBitWidth - E + const Max = SignedSecretWord(MaxWord shr E) + + let + u = t.u + v = t.v + q = t.q + r = t.r + + # Double-signed-word carries + var cf, cg: DSWord + + # First iteration of [u v] [f] + # [q r].[g] + cf.slincombAccNoCarry(u, f[0], v, g[0]) + cg.slincombAccNoCarry(q, f[0], r, g[0]) + # bottom k bits are zero by construction + debug: + doAssert BaseType(cf.lo and Max) == 0, "bottom k bits should be 0, cf.lo: " & $BaseType(cf.lo) + doAssert BaseType(cg.lo and Max) == 0, "bottom k bits should be 0, cg.lo: " & $BaseType(cg.lo) + + cf.ashr(k) + cg.ashr(k) + + for i in 1 ..< N: + cf.slincombAccNoCarry(u, f[i], v, g[i]) + cg.slincombAccNoCarry(q, f[i], r, g[i]) + f[i-1] = cf.lo and Max + g[i-1] = cg.lo and Max + cf.ashr(k) + cg.ashr(k) + + f[N-1] = cf.lo + g[N-1] = cg.lo + +func invmodImpl[N, E]( + a: var LimbsUnsaturated[N, E], + F, M: LimbsUnsaturated[N, E], + invMod2powK: SecretWord, + k, bits: static int) = + ## Modular inversion using Bernstein-Yang algorithm + ## r ≡ F.a⁻¹ (mod M) + + # theta = delta-1/2, delta starts at 1/2 for the half-delta variant + var theta = SignedSecretWord(0) + var d{.noInit.}, e{.noInit.}: LimbsUnsaturated[N, E] + var f{.noInit.}, g{.noInit.}: LimbsUnsaturated[N, E] + + d.setZero() + e = F + + # g < f for partitioning / iteration count formula + f = M + g = a + const partition = partitionDivsteps(bits, k) + + for i in 0 ..< partition.numChunks: + var t{.noInit.}: TransitionMatrix + let numIters = partition.chunkSize + int(i < partition.cutoff) + # Compute transition matrix and next theta + theta = t.batchedDivsteps(theta, f[0], g[0], numIters, k) + # Apply the transition matrix + # [u v] [d] + # [q r]/2ᵏ.[e] mod M + t.matVecMul_shr_k_mod_M(d, e, k, M, invMod2powK) + # [u v] [f] + # [q r]/2ᵏ.[g] + t.matVecMul_shr_k(f, g, k) + + d.canonicalize(signMask = f.isNegMask(), M) + a = d + +func invmod*( + r: var Limbs, a: Limbs, + F, M: Limbs, bits: static int) = + ## Compute the scaled modular inverse of ``a`` modulo M + ## r ≡ F.a⁻¹ (mod M) + ## + ## M MUST be odd, M does not need to be prime. + ## ``a`` MUST be less than M. + const Excess = 2 + const k = WordBitwidth - Excess + const NumUnsatWords = (bits + k - 1) div k + + # Convert values to unsaturated repr + var m2 {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess] + var factor {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess] + m2.fromPackedRepr(M) + factor.fromPackedRepr(F) + let m0invK = SecretWord invMod2powK(BaseType M[0], k) + + var a2 {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess] + a2.fromPackedRepr(a) + a2.invmodImpl(factor, m2, m0invK, k, bits) + r.fromUnsatRepr(a2) + +func invmod*( + r: var Limbs, a: Limbs, + F, M: static Limbs, bits: static int) = + ## Compute the scaled modular inverse of ``a`` modulo M + ## r ≡ F.a⁻¹ (mod M) + ## + ## with F and M known at compile-time + ## + ## M MUST be odd, M does not need to be prime. + ## ``a`` MUST be less than M. + + const Excess = 2 + const k = WordBitwidth - Excess + const NumUnsatWords = (bits + k - 1) div k + + # Convert values to unsaturated repr + const m2 = LimbsUnsaturated[NumUnsatWords, Excess].fromPackedRepr(M) + const factor = LimbsUnsaturated[NumUnsatWords, Excess].fromPackedRepr(F) + const m0invK = SecretWord invMod2powK(BaseType M[0], k) + + var a2 {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess] + a2.fromPackedRepr(a) + a2.invmodImpl(factor, m2, m0invK, k, bits) + r.fromUnsatRepr(a2) \ No newline at end of file diff --git a/constantine/arithmetic/limbs_unsaturated.nim b/constantine/arithmetic/limbs_unsaturated.nim new file mode 100644 index 0000000..3920545 --- /dev/null +++ b/constantine/arithmetic/limbs_unsaturated.nim @@ -0,0 +1,385 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/common, + ../primitives + +type + SignedSecretWord* = distinct SecretWord + + LimbsUnsaturated*[N, Excess: static int] = object + ## An array of signed secret words + ## with each word having their top Excess bits unused between function calls + ## This allows efficient handling of carries and signs without intrinsics or assembly. + # + # Comparison with packed representation: + # + # Packed representation + # - pro: uses less words (important for multiplication which is O(n²) with n the number of words) + # - pro: less "mental overhead" to keep track (clear/shift) excess bits + # - con: addition-with-carry require compiler intrinsics or inline assembly. + # Compiler codegen may be incredibly bad (GCC). + # - con: addition-with-carry cannot use instruction-level-parallelism or SIMD vectorization + # due to the dependency chain. + # - con: on x86 addition-with-carry dependency chain has high latency + # + # Unsaturated representation: + # - pro: portable, addition-with-carry can be implemented without compiler support or inline assembly + # - con: "mental overhead" to keep track of used free bits in algorithms, shifts, multiplication, division, ... + # - con: substraction-with-borrow internally requires signed integers + # or immediate canonicalization + # or adding a multiple of the modulus to avoid underflows + # - con: multiplication requires immediate canonicalization + # - con: require arithmetic right-shift if using signed integers + # - con: more memory usage + # - con: may be slower due to using more words + # - con: multiple representation of integers + # - pro: can do many additions before having to canonicalize + # + # Constantine used to have an unsaturated representation by default + # before refactor PR: https://github.com/mratsim/constantine/pull/17 + # Unsaturated representation has been advocated (in particular for generalized mersenne primes) + # by: + # - https://bearssl.org/bigint.html + # - https://cryptojedi.org/peter/data/pairing-20131122.pdf + # - https://milagro.apache.org/docs/amcl-overview/#representation + # - https://eprint.iacr.org/2017/437 + # + # In practice: + # - Addition is not a bottleneck in cryptography, multiplication is. + # Naive asymptotic analysis, makes it 4x~6x more costly + # on 256-bit (4-limbs) inputs to 384-bit (6-limbs) inputs + # - CPUs have significantly reduce carry latencies. + # https://www.agner.org/optimize/instruction_tables.pdf + # - Nehalem (Intel 2008) used to have 0.33 ADD (reg, reg), 1 ADD (reg, mem) and 2 ADC reciprcal throughput (ADD 6x faster than ADC) + # - Ice Lake (Intel 2019) is 0.25 ADD (reg, reg), 0.5 ADD (reg, mem) and 1 ADC reciprocal throughput + # - Zen 3 (AMD 2020) is 0.25 ADD (reg, reg/mem) and 1 ADC reciprocal throughput + # - Introducing inline assembly is easier than dealing with signed integers + # - Inline assembly is required to ensure constant-time properties + # as compilers get smarters + # - Canonicalization has an overhead, it just delays (and potentially batches) paying for the dependencing chain. + words*: array[N, SignedSecretWord] + +# ############################################################ +# +# Accessors +# +# ############################################################ + +template `[]`*(a: LimbsUnsaturated, idx: int): SignedSecretWord = + a.words[idx] + +template `[]=`*(a: LimbsUnsaturated, idx: int, val: SignedSecretWord) = + a.words[idx] = val + +# ############################################################ +# +# Conversion +# +# ############################################################ + +# {.push checks:off.} # Avoid IndexDefect and Int overflows +func fromPackedRepr*[LU, E, LP: static int]( + dst: var LimbsUnsaturated[LU, E], + src: Limbs[LP]) = + ## Converts from an packed representation to an unsaturated representation + const UnsatBitWidth = WordBitWidth-E + const Max = MaxWord shr E + + static: + # Destination and Source size are consistent + doAssert (LU-1) * UnsatBitWidth <= WordBitwidth * LP, block: + "\n (LU-1) * UnsatBitWidth: " & $(LU-1) & " * " & $UnsatBitWidth & " = " & $((LU-1) * UnsatBitWidth) & + "\n WordBitwidth * LP: " & $WordBitwidth & " * " & $LP & " = " & $(WordBitwidth * LP) + + var + srcIdx, dstIdx = 0 + hi, lo = Zero + accLen = 0 + + while srcIdx < src.len: + # Form a 2-word buffer (hi, lo) + let w = if src_idx < src.len: src[srcIdx] + else: Zero + inc srcIdx + + if accLen == 0: + lo = w and Max + hi = w shr UnsatBitWidth + else: + lo = (lo or (w shl accLen)) and Max + hi = w shr (UnsatBitWidth - accLen) + + accLen += WordBitWidth + + while accLen >= UnsatBitWidth: + let s = min(accLen, UnsatBitWidth) + + dst[dstIdx] = SignedSecretWord lo + dstIdx += 1 + accLen -= s + lo = ((lo shr s) or (hi shl (UnsatBitWidth - s))) and Max + hi = hi shr s + + if dstIdx < dst.words.len: + dst[dstIdx] = SignedSecretWord lo + + for i in dst_idx + 1 ..< dst.words.len: + dst[i] = SignedSecretWord Zero + +func fromPackedRepr*(T: type LimbsUnsaturated, src: Limbs): T = + ## Converts from an packed representation to an unsaturated representation + result.fromPackedRepr(src) + +func fromUnsatRepr*[LU, E, LP: static int]( + dst: var Limbs[LP], + src: LimbsUnsaturated[LU, E]) = + ## Converts from an packed representation to an unsaturated representation + const UnsatBitWidth = WordBitWidth-E + + static: + # Destination and Source size are consistent + doAssert (LU-1) * UnsatBitWidth <= WordBitwidth * LP, block: + "\n (LU-1) * UnsatBitWidth: " & $(LU-1) & " * " & $UnsatBitWidth & " = " & $((LU-1) * UnsatBitWidth) & + "\n WordBitwidth * LP: " & $WordBitwidth & " * " & $LP & " = " & $(WordBitwidth * LP) + + var + srcIdx, dstIdx = 0 + acc = Zero + accLen = 0 + + for src_idx in 0 ..< src.words.len: + let nextWord = SecretWord src[srcIdx] + + # buffer reads + acc = acc or (nextWord shl accLen) + accLen += UnsatBitWidth + + # if full, dump + if accLen >= WordBitWidth: + dst[dstIdx] = acc + inc dstIdx + accLen -= WordBitWidth + acc = nextWord shr (UnsatBitWidth - accLen) + + if dst_idx < dst.len: + dst[dst_idx] = acc + + for i in dst_idx + 1 ..< dst.len: + dst[i] = Zero + +# {.pop.} + +# ############################################################ +# +# Initialization +# +# ############################################################ + +func setZero*(a: var LimbsUnsaturated) = + ## Set ``a`` to 0 + for i in 0 ..< a.words.len: + a[i] = SignedSecretWord(0) + +func setOne*(a: var LimbsUnsaturated) = + ## Set ``a`` to 1 + a[0] = SignedSecretWord(1) + for i in 1 ..< a.words.len: + a[i] = SignedSecretWord(0) + +# ############################################################ +# +# Arithmetic +# +# ############################################################ + +# Workaround bug +func `xor`*(x,y: SecretWord): SecretWord {.inline.} = + # For some reason the template defined in constant_time.nim isn't found + SecretWord(x.BaseType xor y.BaseType) + +when sizeof(int) == 8 and not defined(Constantine32): + type + SignedBaseType* = int64 +else: + type + SignedBaseType* = int32 + +template fmap(x: SignedSecretWord, op: untyped, y: SignedSecretWord): SignedSecretWord = + ## Unwrap x and y from their distinct type + ## Apply op, and rewrap them + SignedSecretWord(op(SecretWord(x), SecretWord(y))) + +template fmapAsgn(x: SignedSecretWord, op: untyped, y: SignedSecretWord) = + ## Unwrap x and y from their distinct type + ## Apply assignment op, and rewrap them + op(SecretWord(x), SecretWord(y)) + +template `and`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `and`, y) +template `or`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `or`, y) +template `xor`*(x, y: SignedSecretWord): SignedSecretWord = SignedSecretWord(BaseType(x) xor BaseType(y)) +template `not`*(x: SignedSecretWord): SignedSecretWord = SignedSecretWord(not SecretWord(x)) +template `+`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `+`, y) +template `+=`*(x: var SignedSecretWord, y: SignedSecretWord) = fmapAsgn(x, `+=`, y) +template `-`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `-`, y) +template `-=`*(x: var SignedSecretWord, y: SignedSecretWord) = fmapAsgn(x, `-=`, y) + +template `-`*(x: SignedSecretWord): SignedSecretWord = + # We don't use Nim signed integers to avoid range checks + SignedSecretWord(-SecretWord(x)) + +template `*`*(x, y: SignedSecretWord): SignedSecretWord = + # Warning ⚠️ : We assume that mul hardware multiplication is constant time + # but this is not always true, especially on ARMv7 and ARMv9 + fmap(x, `*`, y) + +# shifts +template ashr*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord = + ## Arithmetic right shift + # We need to cast to Nim ints without Nim checks + SignedSecretWord(cast[SignedBaseType](x).ashr(y)) + +template lshr*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord = + ## Logical right shift + SignedSecretWord(SecretWord(x) shr y) + +template lshl*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord = + ## Logical left shift + SignedSecretWord(SecretWord(x) shl y) + +# ############################################################ +# +# Hardened Boolean primitives +# +# ############################################################ + +template `==`*(x, y: SignedSecretWord): SecretBool = + SecretWord(x) == SecretWord(y) + +# ############################################################ +# +# Conditional arithmetic +# +# ############################################################ + +# SignedSecretWord +# ---------------- + +func isNegMask*(a: SignedSecretWord): SignedSecretWord {.inline.} = + ## Produce the -1 mask if a is negative + ## and 0 otherwise + a.ashr(WordBitWidth-1) + +func isOddMask*(a: SignedSecretWord): SignedSecretWord {.inline.} = + ## Produce the -1 mask if a is odd + ## and 0 otherwise + -(a and SignedSecretWord(1)) + +func cneg*( + a: SignedSecretWord, + mask: SignedSecretWord): SignedSecretWord {.inline.} = + ## Conditionally negate `a` + ## mask must be 0 (0x00000...0000) (no negation) + ## or -1 (0xFFFF...FFFF) (negation) + (a xor mask) - mask + +func cadd*( + a: var SignedSecretWord, + b: SignedSecretWord, + mask: SignedSecretWord) {.inline.} = + ## Conditionally add `b` to `a` + ## mask must be 0 (0x00000...0000) (no addition) + ## or -1 (0xFFFF...FFFF) (addition) + a = a + (b and mask) + +func csub*( + a: var SignedSecretWord, + b: SignedSecretWord, + mask: SignedSecretWord) {.inline.} = + ## Conditionally substract `b` from `a` + ## mask must be 0 (0x00000...0000) (no substraction) + ## or -1 (0xFFFF...FFFF) (substraction) + a = a - (b and mask) + +# UnsaturatedLimbs +# ---------------- + +func isNegMask*(a: LimbsUnsaturated): SignedSecretWord {.inline.} = + ## Produce the -1 mask if a is negative + ## and 0 otherwise + a[a.words.len-1].ashr(WordBitWidth - a.Excess + 1) + +func cneg*( + a: var LimbsUnsaturated, + mask: SignedSecretWord) {.inline.} = + ## Conditionally negate `a` + ## mask must be 0 (0x00000...0000) (no negation) + ## or -1 (0xFFFF...FFFF) (negation) + ## + ## Carry propagation is deferred + for i in 0 ..< a.words.len: + a[i] = a[i].cneg(mask) + +func cadd*( + a: var LimbsUnsaturated, + b: LimbsUnsaturated, + mask: SignedSecretWord) {.inline.} = + ## Conditionally add `b` to `a` + ## mask must be 0 (0x00000...0000) (no addition) + ## or -1 (0xFFFF...FFFF) (addition) + ## + ## Carry propagation is deferred + for i in 0 ..< a.words.len: + a[i].cadd(b[i], mask) + +# ############################################################ +# +# Double-Width signed arithmetic +# +# ############################################################ + +type DSWord* = object + lo*, hi*: SignedSecretWord + +func smulAccNoCarry*(r: var DSWord, a, b: SignedSecretWord) {.inline.}= + ## Signed accumulated multiplication + ## (_, hi, lo) += a*b + ## This assumes no overflowing + var UV: array[2, SecretWord] + var carry: Carry + smul(UV[1], UV[0], SecretWord a, SecretWord b) + addC(carry, UV[0], UV[0], SecretWord r.lo, Carry(0)) + addC(carry, UV[1], UV[1], SecretWord r.hi, carry) + + r.lo = SignedSecretWord UV[0] + r.hi = SignedSecretWord UV[1] + +func slincombAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}= + ## Accumulated linear combination + ## (_, hi, lo) += a*u + b*v + ## This assumes no overflowing + var carry: Carry + var x1, x0, y1, y0: SecretWord + smul(x1, x0, SecretWord a, SecretWord u) + addC(carry, x0, x0, SecretWord r.lo, Carry(0)) + addC(carry, x1, x1, SecretWord r.hi, carry) + smul(y1, y0, SecretWord b, SecretWord v) + addC(carry, x0, x0, y0, Carry(0)) + addC(carry, x1, x1, y1, carry) + + r.lo = SignedSecretWord x0 + r.hi = SignedSecretWord x1 + +func ashr*( + r: var DSWord, + k: SomeInteger) {.inline.} = + ## Arithmetic right-shift of a double-word + ## This does not normalize the excess bits + r.lo = r.lo.lshr(k) or r.hi.lshl(WordBitWidth - k) + r.hi = r.hi.ashr(k) \ No newline at end of file diff --git a/constantine/curves/bls12_377_precomputed_params.nim b/constantine/curves/bls12_377_g2_params.nim similarity index 100% rename from constantine/curves/bls12_377_precomputed_params.nim rename to constantine/curves/bls12_377_g2_params.nim diff --git a/constantine/curves/bls12_377_inversion.nim b/constantine/curves/bls12_377_inversion.nim deleted file mode 100644 index c4c682f..0000000 --- a/constantine/curves/bls12_377_inversion.nim +++ /dev/null @@ -1,204 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../config/curves, - ../arithmetic/finite_fields - -# ############################################################ -# -# Specialized inversion for BLS12-377 -# -# ############################################################ - -func inv_addchain*(r: var Fp[BLS12_377], a: Fp[BLS12_377]) = - let a = a # ensure a.inv_addchain(a) is OK - - var - x10 {.noInit.}: Fp[BLS12_377] - x11 {.noInit.}: Fp[BLS12_377] - x100 {.noInit.}: Fp[BLS12_377] - x101 {.noInit.}: Fp[BLS12_377] - x111 {.noInit.}: Fp[BLS12_377] - x1001 {.noInit.}: Fp[BLS12_377] - x1011 {.noInit.}: Fp[BLS12_377] - x1111 {.noInit.}: Fp[BLS12_377] - x10001 {.noInit.}: Fp[BLS12_377] - x10011 {.noInit.}: Fp[BLS12_377] - x10111 {.noInit.}: Fp[BLS12_377] - x11011 {.noInit.}: Fp[BLS12_377] - x11101 {.noInit.}: Fp[BLS12_377] - x11111 {.noInit.}: Fp[BLS12_377] - x110100 {.noInit.}: Fp[BLS12_377] - x11010000 {.noInit.}: Fp[BLS12_377] - x11010111 {.noInit.}: Fp[BLS12_377] - - x10 .square(a) - x11 .prod(a, x10) - x100 .prod(a, x11) - x101 .prod(a, x100) - x111 .prod(x10, x101) - x1001 .prod(x10, x111) - x1011 .prod(x10, x1001) - x1111 .prod(x100, x1011) - x10001 .prod(x10, x1111) - x10011 .prod(x10, x10001) - x10111 .prod(x100, x10011) - x11011 .prod(x100, x10111) - x11101 .prod(x10, x11011) - x11111 .prod(x10, x11101) - x110100 .prod(x10111, x11101) - x11010000 .square_repeated(x110100, 2) - x11010111 .prod(x111, x11010000) - # 18 operations - - # TODO: we can accumulate in a partially reduced - # doubled-size `r` to avoid the final substractions. - # and only reduce at the end. - # This requires the number of op to be less than log2(p) == 381 - - # 18 + 18 = 36 operations - r.square_repeated(x11010111, 8) - r *= x11101 - r.square_repeated(7) - r *= x10001 - r.square() - - # 36 + 14 = 50 operations - r *= a - r.square_repeated(9) - r *= x10111 - r.square_repeated(2) - r *= x11 - - # 50 + 21 = 71 operations - r.square_repeated(6) - r *= x101 - r.square_repeated(4) - r *= a - r.square_repeated(9) - - # 71 + 13 = 84 operations - r *= x11101 - r.square_repeated(5) - r *= x1011 - r.square_repeated(5) - r *= x11 - - # 84 + 21 = 105 operations - r.square_repeated(8) - r *= x11101 - r.square() - r *= a - r.square_repeated(10) - - # 105 + 20 = 125 operations - r *= x10111 - r.square_repeated(12) - r *= x11011 - r.square_repeated(5) - r *= x101 - - # 125 + 22 = 147 operations - r.square_repeated(7) - r *= x101 - r.square_repeated(6) - r *= x1001 - r.square_repeated(7) - - # 147 + 11 = 158 operations - r *= x11101 - r.square_repeated(5) - r *= x10001 - r.square_repeated(3) - r *= x101 - - # 158 + 23 = 181 operations - r.square_repeated(8) - r *= x10001 - r.square_repeated(6) - r *= x11011 - r.square_repeated(7) - - # 181 + 19 = 200 operations - r *= x11111 - r.square_repeated(4) - r *= x11 - r.square_repeated(12) - r *= x1111 - - # 200 + 19 = 219 operations - r.square_repeated(4) - r *= x101 - r.square_repeated(8) - r *= x10011 - r.square_repeated(5) - - # 219 + 13 = 232 operations - r *= x10001 - r.square_repeated(3) - r *= x111 - r.square_repeated(7) - r *= x1111 - - # 232 + 22 = 254 operations - r.square_repeated(5) - r *= x1111 - r.square_repeated(7) - r *= x11011 - r.square_repeated(8) - - # 254 + 13 = 269 operations - r *= x10001 - r.square_repeated(6) - r *= x11111 - r.square_repeated(6) - r *= x11101 - - # 269 + 35 = 304 operations - r.square_repeated(9) - r *= x1001 - r.square_repeated(5) - r *= x1001 - r.square_repeated(19) - - # 304 + 17 = 321 operations - r *= x10111 - r.square_repeated(8) - r *= x1011 - r.square_repeated(6) - r *= x10111 - - # 321 + 16 = 337 operations - r.square_repeated(4) - r *= x101 - r.square_repeated(4) - r *= a - r.square_repeated(6) - - # 337 + 29 = 376 operations - r *= x11 - r.square_repeated(29) - r *= a - r.square_repeated(7) - r *= x101 - - # 376 + 16 = 392 operations - r.square_repeated(9) - r *= x10001 - r.square_repeated(6) - - # 392 + 8*6 = 440 operations - for _ in 0 ..< 8: - r *= x11111 - r.square_repeated(5) - - r *= x11111 - r.square() - r *= a - # Total 443 operations diff --git a/constantine/curves/bls12_381_precomputed_params.nim b/constantine/curves/bls12_381_g2_params.nim similarity index 100% rename from constantine/curves/bls12_381_precomputed_params.nim rename to constantine/curves/bls12_381_g2_params.nim diff --git a/constantine/curves/bls12_381_inversion.nim b/constantine/curves/bls12_381_inversion.nim deleted file mode 100644 index 590e627..0000000 --- a/constantine/curves/bls12_381_inversion.nim +++ /dev/null @@ -1,220 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../config/curves, - ../arithmetic/finite_fields - -# ############################################################ -# -# Specialized inversion for BLS12-381 -# -# ############################################################ - -func inv_addchain*(r: var Fp[BLS12_381], a: Fp[BLS12_381]) = - var - x10 {.noinit.}: Fp[BLS12_381] - x100 {.noinit.}: Fp[BLS12_381] - x1000 {.noinit.}: Fp[BLS12_381] - x1001 {.noinit.}: Fp[BLS12_381] - x1011 {.noinit.}: Fp[BLS12_381] - x1101 {.noinit.}: Fp[BLS12_381] - x10001 {.noinit.}: Fp[BLS12_381] - x10100 {.noinit.}: Fp[BLS12_381] - x11001 {.noinit.}: Fp[BLS12_381] - x11010 {.noinit.}: Fp[BLS12_381] - x110100 {.noinit.}: Fp[BLS12_381] - x110110 {.noinit.}: Fp[BLS12_381] - x110111 {.noinit.}: Fp[BLS12_381] - x1001101 {.noinit.}: Fp[BLS12_381] - x1001111 {.noinit.}: Fp[BLS12_381] - x1010101 {.noinit.}: Fp[BLS12_381] - x1011101 {.noinit.}: Fp[BLS12_381] - x1100111 {.noinit.}: Fp[BLS12_381] - x1101001 {.noinit.}: Fp[BLS12_381] - x1110111 {.noinit.}: Fp[BLS12_381] - x1111011 {.noinit.}: Fp[BLS12_381] - x10001001 {.noinit.}: Fp[BLS12_381] - x10010101 {.noinit.}: Fp[BLS12_381] - x10010111 {.noinit.}: Fp[BLS12_381] - x10101001 {.noinit.}: Fp[BLS12_381] - x10110001 {.noinit.}: Fp[BLS12_381] - x10111111 {.noinit.}: Fp[BLS12_381] - x11000011 {.noinit.}: Fp[BLS12_381] - x11010000 {.noinit.}: Fp[BLS12_381] - x11010111 {.noinit.}: Fp[BLS12_381] - x11100001 {.noinit.}: Fp[BLS12_381] - x11100101 {.noinit.}: Fp[BLS12_381] - x11101011 {.noinit.}: Fp[BLS12_381] - x11110101 {.noinit.}: Fp[BLS12_381] - x11111111 {.noinit.}: Fp[BLS12_381] - - x10 .square(a) - x100 .square(x10) - x1000 .square(x100) - x1001 .prod(a, x1000) - x1011 .prod(x10, x1001) - x1101 .prod(x10, x1011) - x10001 .prod(x100, x1101) - x10100 .prod(x1001, x1011) - x11001 .prod(x1000, x10001) - x11010 .prod(a, x11001) - x110100 .square(x11010) - x110110 .prod(x10, x110100) - x110111 .prod(a, x110110) - x1001101 .prod(x11001, x110100) - x1001111 .prod(x10, x1001101) - x1010101 .prod(x1000, x1001101) - x1011101 .prod(x1000, x1010101) - x1100111 .prod(x11010, x1001101) - x1101001 .prod(x10, x1100111) - x1110111 .prod(x11010, x1011101) - x1111011 .prod(x100, x1110111) - x10001001 .prod(x110100, x1010101) - x10010101 .prod(x11010, x1111011) - x10010111 .prod(x10, x10010101) - x10101001 .prod(x10100, x10010101) - x10110001 .prod(x1000, x10101001) - x10111111 .prod(x110110, x10001001) - x11000011 .prod(x100, x10111111) - x11010000 .prod(x1101, x11000011) - x11010111 .prod(x10100, x11000011) - x11100001 .prod(x10001, x11010000) - x11100101 .prod(x100, x11100001) - x11101011 .prod(x10100, x11010111) - x11110101 .prod(x10100, x11100001) - x11111111 .prod(x10100, x11101011) - # 35 operations - - # TODO: we can accumulate in a partially reduced - # doubled-size `r` to avoid the final substractions. - # and only reduce at the end. - # This requires the number of op to be less than log2(p) == 381 - - # 35 + 22 = 57 operations - r.prod(x10111111, x11100001) - r.square_repeated(8) - r *= x10001 - r.square_repeated(11) - r *= x11110101 - - # 57 + 28 = 85 operations - r.square_repeated(11) - r *= x11100101 - r.square_repeated(8) - r *= x11111111 - r.square_repeated(7) - - # 85 + 22 = 107 operations - r *= x1001101 - r.square_repeated(9) - r *= x1101001 - r.square_repeated(10) - r *= x10110001 - - # 107+24 = 131 operations - r.square_repeated(7) - r *= x1011101 - r.square_repeated(9) - r *= x1111011 - r.square_repeated(6) - - # 131+23 = 154 operations - r *= x11001 - r.square_repeated(11) - r *= x1101001 - r.square_repeated(9) - r *= x11101011 - - # 154+28 = 182 operations - r.square_repeated(10) - r *= x11010111 - r.square_repeated(6) - r *= x11001 - r.square_repeated(10) - - # 182+23 = 205 operations - r *= x1110111 - r.square_repeated(9) - r *= x10010111 - r.square_repeated(11) - r *= x1001111 - - # 205+30 = 235 operations - r.square_repeated(10) - r *= x11100001 - r.square_repeated(9) - r *= x10001001 - r.square_repeated(9) - - # 235+21 = 256 operations - r *= x10111111 - r.square_repeated(8) - r *= x1100111 - r.square_repeated(10) - r *= x11000011 - - # 256+28 = 284 operations - r.square_repeated(9) - r *= x10010101 - r.square_repeated(12) - r *= x1111011 - r.square_repeated(5) - - # 284 + 21 = 305 operations - r *= x1011 - r.square_repeated(11) - r *= x1111011 - r.square_repeated(7) - r *= x1001 - - # 305+32 = 337 operations - r.square_repeated(13) - r *= x11110101 - r.square_repeated(9) - r *= x10111111 - r.square_repeated(8) - - # 337+22 = 359 operations - r *= x11111111 - r.square_repeated(8) - r *= x11101011 - r.square_repeated(11) - r *= x10101001 - - # 359+24 = 383 operations - r.square_repeated(8) - r *= x11111111 - r.square_repeated(8) - r *= x11111111 - r.square_repeated(6) - - # 383+22 = 405 operations - r *= x110111 - r.square_repeated(10) - r *= x11111111 - r.square_repeated(9) - r *= x11111111 - - # 405+26 = 431 operations - r.square_repeated(8) - r *= x11111111 - r.square_repeated(8) - r *= x11111111 - r.square_repeated(8) - - # 431+19 = 450 operations - r *= x11111111 - r.square_repeated(7) - r *= x1010101 - r.square_repeated(9) - r *= x10101001 - - # Total 450 operations: - # - 74 multiplications - # - 376 squarings diff --git a/constantine/curves/bn254_nogami_precomputed_params.nim b/constantine/curves/bn254_nogami_g2_params.nim similarity index 100% rename from constantine/curves/bn254_nogami_precomputed_params.nim rename to constantine/curves/bn254_nogami_g2_params.nim diff --git a/constantine/curves/bn254_nogami_inversion.nim b/constantine/curves/bn254_nogami_inversion.nim deleted file mode 100644 index b43925d..0000000 --- a/constantine/curves/bn254_nogami_inversion.nim +++ /dev/null @@ -1,98 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../config/curves, - ../arithmetic/finite_fields - -# ############################################################ -# -# Specialized inversion for BN254-Nogami -# -# ############################################################ - -func inv_addchain*(r: var Fp[BN254_Nogami], a: Fp[BN254_Nogami]) = - var - x100 {.noInit.}: Fp[BN254_Nogami] - x1000 {.noInit.}: Fp[BN254_Nogami] - x1100 {.noInit.}: Fp[BN254_Nogami] - x1101 {.noInit.}: Fp[BN254_Nogami] - x10001 {.noInit.}: Fp[BN254_Nogami] - x100010 {.noInit.}: Fp[BN254_Nogami] - x1000100 {.noInit.}: Fp[BN254_Nogami] - x1010101 {.noInit.}: Fp[BN254_Nogami] - - x100 .square_repeated(a, 2) - x1000 .square(x100) - x1100 .prod(x100, x1000) - x1101 .prod(a, x1100) - x10001 .prod(x100, x1101) - x100010 .square(x10001) - x1000100 .square(x100010) - x1010101 .prod(x10001, x1000100) - # 9 operations - - var - r13 {.noInit.}: Fp[BN254_Nogami] - r17 {.noInit.}: Fp[BN254_Nogami] - r18 {.noInit.}: Fp[BN254_Nogami] - r23 {.noInit.}: Fp[BN254_Nogami] - r26 {.noInit.}: Fp[BN254_Nogami] - r27 {.noInit.}: Fp[BN254_Nogami] - r28 {.noInit.}: Fp[BN254_Nogami] - r36 {.noInit.}: Fp[BN254_Nogami] - r38 {.noInit.}: Fp[BN254_Nogami] - r39 {.noInit.}: Fp[BN254_Nogami] - r40 {.noInit.}: Fp[BN254_Nogami] - - r13.square_repeated(x1010101, 2) - r13 *= x100010 - r13 *= x1101 - - r17.square(r13) - r17 *= r13 - r17.square_repeated(2) - - r18.prod(r13, r17) - - r23.square_repeated(r18, 3) - r23 *= r18 - r23 *= r17 - - r26.square_repeated(r23, 2) - r26 *= r23 - - r27.prod(r23, r26) - r28.prod(r26, r27) - - r36.square(r28) - r36 *= r28 - r36.square_repeated(2) - r36 *= r28 - r36.square_repeated(3) - - r38.prod(r28, r36) - r38 *= r27 - r39.square(r38) - r40.prod(r38, r39) - - r.prod(r39, r40) - r.square_repeated(3) - r *= r40 - r.square_repeated(55) - r *= r38 - - r.square_repeated(55) - r *= r28 - r.square_repeated(56) - r *= r18 - r.square_repeated(56) - - r *= x10001 - - # Total 271 operations diff --git a/constantine/curves/bn254_snarks_precomputed_params.nim b/constantine/curves/bn254_snarks_g2_params.nim similarity index 100% rename from constantine/curves/bn254_snarks_precomputed_params.nim rename to constantine/curves/bn254_snarks_g2_params.nim diff --git a/constantine/curves/bn254_snarks_inversion.nim b/constantine/curves/bn254_snarks_inversion.nim deleted file mode 100644 index 8409259..0000000 --- a/constantine/curves/bn254_snarks_inversion.nim +++ /dev/null @@ -1,158 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../config/curves, - ../arithmetic/finite_fields - -# ############################################################ -# -# Specialized inversion for BN254-Snarks -# -# ############################################################ - -func inv_addchain*(r: var Fp[BN254_Snarks], a: Fp[BN254_Snarks]) = - var - x10 {.noInit.}: Fp[BN254_Snarks] - x11 {.noInit.}: Fp[BN254_Snarks] - x101 {.noInit.}: Fp[BN254_Snarks] - x110 {.noInit.}: Fp[BN254_Snarks] - x1000 {.noInit.}: Fp[BN254_Snarks] - x1101 {.noInit.}: Fp[BN254_Snarks] - x10010 {.noInit.}: Fp[BN254_Snarks] - x10011 {.noInit.}: Fp[BN254_Snarks] - x10100 {.noInit.}: Fp[BN254_Snarks] - x10111 {.noInit.}: Fp[BN254_Snarks] - x11100 {.noInit.}: Fp[BN254_Snarks] - x100000 {.noInit.}: Fp[BN254_Snarks] - x100011 {.noInit.}: Fp[BN254_Snarks] - x101011 {.noInit.}: Fp[BN254_Snarks] - x101111 {.noInit.}: Fp[BN254_Snarks] - x1000001 {.noInit.}: Fp[BN254_Snarks] - x1010011 {.noInit.}: Fp[BN254_Snarks] - x1011011 {.noInit.}: Fp[BN254_Snarks] - x1100001 {.noInit.}: Fp[BN254_Snarks] - x1110101 {.noInit.}: Fp[BN254_Snarks] - x10010001 {.noInit.}: Fp[BN254_Snarks] - x10010101 {.noInit.}: Fp[BN254_Snarks] - x10110101 {.noInit.}: Fp[BN254_Snarks] - x10111011 {.noInit.}: Fp[BN254_Snarks] - x11000001 {.noInit.}: Fp[BN254_Snarks] - x11000011 {.noInit.}: Fp[BN254_Snarks] - x11010011 {.noInit.}: Fp[BN254_Snarks] - x11100001 {.noInit.}: Fp[BN254_Snarks] - x11100011 {.noInit.}: Fp[BN254_Snarks] - x11100111 {.noInit.}: Fp[BN254_Snarks] - - x10 .square(a) - x11 .prod(x10, a) - x101 .prod(x10, x11) - x110 .prod(x101, a) - x1000 .prod(x10, x110) - x1101 .prod(x101, x1000) - x10010 .prod(x101, x1101) - x10011 .prod(x10010, a) - x10100 .prod(x10011, a) - x10111 .prod(x11, x10100) - x11100 .prod(x101, x10111) - x100000 .prod(x1101, x10011) - x100011 .prod(x11, x100000) - x101011 .prod(x1000, x100011) - x101111 .prod(x10011, x11100) - x1000001 .prod(x10010, x101111) - x1010011 .prod(x10010, x1000001) - x1011011 .prod(x1000, x1010011) - x1100001 .prod(x110, x1011011) - x1110101 .prod(x10100, x1100001) - x10010001 .prod(x11100, x1110101) - x10010101 .prod(x100000, x1110101) - x10110101 .prod(x100000, x10010101) - x10111011 .prod(x110, x10110101) - x11000001 .prod(x110, x10111011) - x11000011 .prod(x10, x11000001) - x11010011 .prod(x10010, x11000001) - x11100001 .prod(x100000, x11000001) - x11100011 .prod(x10, x11100001) - x11100111 .prod(x110, x11100001) # 30 operations - - # 30 + 27 = 57 operations - r.square(x11000001) - r.square_repeated(7) - r *= x10010001 - r.square_repeated(10) - r *= x11100111 - r.square_repeated(7) - - # 57 + 19 = 76 operations - r *= x10111 - r.square_repeated(9) - r *= x10011 - r.square_repeated(7) - r *= x1101 - - # 76 + 33 = 109 operations - r.square_repeated(14) - r *= x1010011 - r.square_repeated(9) - r *= x11100001 - r.square_repeated(8) - - # 109 + 18 = 127 operations - r *= x1000001 - r.square_repeated(10) - r *= x1011011 - r.square_repeated(5) - r *= x1101 - - # 127 + 34 = 161 operations - r.square_repeated(8) - r *= x11 - r.square_repeated(12) - r *= x101011 - r.square_repeated(12) - - # 161 + 25 = 186 operations - r *= x10111011 - r.square_repeated(8) - r *= x101111 - r.square_repeated(14) - r *= x10110101 - - # 186 + 28 = 214 - r.square_repeated(9) - r *= x10010001 - r.square_repeated(5) - r *= x1101 - r.square_repeated(12) - - # 214 + 22 = 236 - r *= x11100011 - r.square_repeated(8) - r *= x10010101 - r.square_repeated(11) - r *= x11010011 - - # 236 + 32 = 268 - r.square_repeated(7) - r *= x1100001 - r.square_repeated(11) - r *= x100011 - r.square_repeated(12) - - # 268 + 20 = 288 - r *= x1011011 - r.square_repeated(9) - r *= x11000011 - r.square_repeated(8) - r *= x11100111 - - # 288 + 15 = 303 - r.square_repeated(7) - r *= x1110101 - r.square_repeated(6) - r *= x101 diff --git a/constantine/curves/bw6_761_precomputed_params.nim b/constantine/curves/bw6_761_g2_params.nim similarity index 100% rename from constantine/curves/bw6_761_precomputed_params.nim rename to constantine/curves/bw6_761_g2_params.nim diff --git a/constantine/curves/bw6_761_inversion.nim b/constantine/curves/bw6_761_inversion.nim deleted file mode 100644 index b62c05b..0000000 --- a/constantine/curves/bw6_761_inversion.nim +++ /dev/null @@ -1,376 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../config/curves, - ../arithmetic/finite_fields - -# ############################################################ -# -# Specialized inversion for BW6-761 -# -# ############################################################ - -func inv_addchain*(r: var Fp[BW6_761], a: Fp[BW6_761]) = - let a = a # ensure a.inv_addchain(a) is OK - - var - x10 {.noInit.}: Fp[BW6_761] - x11 {.noInit.}: Fp[BW6_761] - x101 {.noInit.}: Fp[BW6_761] - x111 {.noInit.}: Fp[BW6_761] - x1001 {.noInit.}: Fp[BW6_761] - x1011 {.noInit.}: Fp[BW6_761] - x1101 {.noInit.}: Fp[BW6_761] - x1111 {.noInit.}: Fp[BW6_761] - x10001 {.noInit.}: Fp[BW6_761] - x10010 {.noInit.}: Fp[BW6_761] - x10011 {.noInit.}: Fp[BW6_761] - x10111 {.noInit.}: Fp[BW6_761] - x11001 {.noInit.}: Fp[BW6_761] - x11011 {.noInit.}: Fp[BW6_761] - x11101 {.noInit.}: Fp[BW6_761] - x11111 {.noInit.}: Fp[BW6_761] - x100001 {.noInit.}: Fp[BW6_761] - x100011 {.noInit.}: Fp[BW6_761] - x100101 {.noInit.}: Fp[BW6_761] - x100111 {.noInit.}: Fp[BW6_761] - x101001 {.noInit.}: Fp[BW6_761] - x101011 {.noInit.}: Fp[BW6_761] - x101101 {.noInit.}: Fp[BW6_761] - x101111 {.noInit.}: Fp[BW6_761] - x110001 {.noInit.}: Fp[BW6_761] - x110011 {.noInit.}: Fp[BW6_761] - x110101 {.noInit.}: Fp[BW6_761] - x110111 {.noInit.}: Fp[BW6_761] - x111001 {.noInit.}: Fp[BW6_761] - x111011 {.noInit.}: Fp[BW6_761] - x111101 {.noInit.}: Fp[BW6_761] - x1111010 {.noInit.}: Fp[BW6_761] - x1111111 {.noInit.}: Fp[BW6_761] - x11111110 {.noInit.}: Fp[BW6_761] - x11111111 {.noInit.}: Fp[BW6_761] - - x10 .square(a) - x11 .prod(a, x10) - x101 .prod(x10, x11) - x111 .prod(x10, x101) - x1001 .prod(x10, x111) - x1011 .prod(x10, x1001) - x1101 .prod(x10, x1011) - x1111 .prod(x10, x1101) - x10001 .prod(x10, x1111) - x10010 .prod(a, x10001) - x10011 .prod(a, x10010) - x10111 .prod(x101, x10010) - x11001 .prod(x10, x10111) - x11011 .prod(x10, x11001) - x11101 .prod(x10, x11011) - x11111 .prod(x10, x11101) - x100001 .prod(x10, x11111) - x100011 .prod(x10, x100001) - x100101 .prod(x10, x100011) - x100111 .prod(x10, x100101) - x101001 .prod(x10, x100111) - x101011 .prod(x10, x101001) - x101101 .prod(x10, x101011) - x101111 .prod(x10, x101101) - x110001 .prod(x10, x101111) - x110011 .prod(x10, x110001) - x110101 .prod(x10, x110011) - x110111 .prod(x10, x110101) - x111001 .prod(x10, x110111) - x111011 .prod(x10, x111001) - x111101 .prod(x10, x111011) - x1111010 .square(x111101) - x1111111 .prod(x101, x1111010) - x11111110 .square(x1111111) - x11111111 .prod(a, x11111110) - # 35 operations - - # TODO: we can accumulate in a partially reduced - # doubled-size `r` to avoid the final substractions. - # and only reduce at the end. - # This requires the number of op to be less than log2(p) == 381 - - # 35 + 8 = 43 operations - r.prod(x100001, x11111111) - r.square_repeated(3) - r *= x10111 - r.square_repeated(2) - r *= a - - # 43 + 22 = 65 operations - r.square_repeated(9) - r *= x1001 - r.square_repeated(7) - r *= x11111 - r.square_repeated(4) - - # 65 + 17 = 82 operations - r *= x111 - r.square_repeated(9) - r *= x1111 - r.square_repeated(5) - r *= x111 - - # 82 + 29 = 111 operations - r.square_repeated(11) - r *= x101011 - r.square_repeated(7) - r *= x100011 - r.square_repeated(9) - - # 111 + 28 = 139 operations - r *= x11111 - r.square_repeated(8) - r *= x100101 - r.square_repeated(17) - r *= x100111 - - # 139 + 22 = 161 operations - r.square_repeated(4) - r *= x1101 - r.square_repeated(9) - r *= x11111111 - r.square_repeated(7) - - # 161 + 15 = 176 operations - r *= x11111 - r.square_repeated(6) - r *= x10111 - r.square_repeated(6) - r *= x1001 - - # 176 + 22 = 198 operations - r.square_repeated(4) - r *= x11 - r.square_repeated(6) - r *= x11 - r.square_repeated(10) - - # 198 + 16 = 214 operations - r *= x110101 - r.square_repeated(2) - r *= a - r.square_repeated(11) - r *= x11101 - - # 214 + 28 = 238 operations - r.square_repeated(6) - r *= x101 - r.square_repeated(7) - r *= x1101 - r.square_repeated(9) - - # 238 + 21 = 259 operations - r *= x100001 - r.square_repeated(7) - r *= x100101 - r.square_repeated(11) - r *= x100111 - - # 259 + 28 = 287 operations - r.square_repeated(7) - r *= x101111 - r.square_repeated(6) - r *= x11111 - r.square_repeated(13) - - # 287 + 25 = 302 operations - r *= x100001 - r.square_repeated(6) - r *= x111011 - r.square_repeated(6) - r *= x111001 - - # 302 + 27 = 329 operations - r.square_repeated(10) - r *= x10111 - r.square_repeated(11) - r *= x111101 - r.square_repeated(4) - - # 329 + 17 = 346 operations - r *= x1101 - r.square_repeated(8) - r *= x110001 - r.square_repeated(6) - r *= x110001 - - # 346 + 20 = 366 operations - r.square_repeated(5) - r *= x11001 - r.square_repeated(3) - r *= x11 - r.square_repeated(10) - - # 366 + 16 = 382 operations - r *= x100111 - r.square_repeated(5) - r *= x1001 - r.square_repeated(8) - r *= x11001 - - # 382 + 25 = 407 operations - r.square_repeated(10) - r *= x1111 - r.square_repeated(7) - r *= x11101 - r.square_repeated(6) - - # 407 + 20 = 427 operations - r *= x11101 - r.square_repeated(9) - r *= x11111111 - r.square_repeated(8) - r *= x100101 - - # 427 + 27 = 454 operations - r.square_repeated(6) - r *= x101101 - r.square_repeated(10) - r *= x100011 - r.square_repeated(9) - - # 454 + 20 = 474 operations - r *= x1001 - r.square_repeated(8) - r *= x1101 - r.square_repeated(9) - r *= x100111 - - # 474 + 25 = 499 operations - r.square_repeated(8) - r *= x100011 - r.square_repeated(6) - r *= x101101 - r.square_repeated(9) - - # 499 + 16 = 515 operations - r *= x100101 - r.square_repeated(4) - r *= x1111 - r.square_repeated(9) - r *= x1111111 - - # 515 + 25 = 540 operations - r.square_repeated(6) - r *= x11001 - r.square_repeated(8) - r *= x111 - r.square_repeated(9) - - # 540 + 15 = 555 operations - r *= x111011 - r.square_repeated(5) - r *= x10011 - r.square_repeated(7) - r *= x100111 - - # 555 + 22 = 577 operations - r.square_repeated(5) - r *= x10111 - r.square_repeated(9) - r *= x111001 - r.square_repeated(6) - - # 577 + 14 = 591 operations - r *= x111101 - r.square_repeated(9) - r *= x11111111 - r.square_repeated(2) - r *= x11 - - # 591 + 21 = 612 operations - r.square_repeated(7) - r *= x10111 - r.square_repeated(6) - r *= x10011 - r.square_repeated(6) - - # 612 + 18 = 630 operations - r *= x101 - r.square_repeated(9) - r *= x10001 - r.square_repeated(6) - r *= x11011 - - # 630 + 27 = 657 operations - r.square_repeated(10) - r *= x100101 - r.square_repeated(7) - r *= x110011 - r.square_repeated(8) - - # 657 + 13 = 670 operations - r *= x111101 - r.square_repeated(7) - r *= x100011 - r.square_repeated(3) - r *= x111 - - # 670 + 26 = 696 operations - r.square_repeated(10) - r *= x1011 - r.square_repeated(11) - r *= x110011 - r.square_repeated(3) - - # 696 + 17 = 713 operations - r *= x111 - r.square_repeated(9) - r *= x101011 - r.square_repeated(5) - r *= x10111 - - # 713 + 21 = 734 operations - r.square_repeated(7) - r *= x101011 - r.square_repeated(2) - r *= x11 - r.square_repeated(10) - - # 734 + 19 = 753 operations - r *= x101001 - r.square_repeated(10) - r *= x110111 - r.square_repeated(6) - r *= x111001 - - # 753 + 23 = 776 operations - r.square_repeated(6) - r *= x101001 - r.square_repeated(9) - r *= x100111 - r.square_repeated(6) - - # 776 + 12 = 788 operations - r *= x110011 - r.square_repeated(7) - r *= x100001 - r.square_repeated(2) - r *= x11 - - # 788 + 39 = 827 operations - r.square_repeated(21) - r *= a - r.square_repeated(11) - r *= x101111 - r.square_repeated(5) - - # 827 + 55 = 882 operations - r *= x1001 - r.square_repeated(7) - r *= x11101 - r.square_repeated(45) - r *= x10001 - - # 882 + 4 = 886 operations - r.square_repeated(3) - r *= a diff --git a/constantine/curves/curve25519_inversion.nim b/constantine/curves/curve25519_inversion.nim deleted file mode 100644 index d05a53e..0000000 --- a/constantine/curves/curve25519_inversion.nim +++ /dev/null @@ -1,60 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../config/curves, - ../arithmetic/finite_fields - -# ############################################################ -# -# Specialized inversion for BLS12-381 -# -# ############################################################ - -func inv_addchain*(r: var Fp[Curve25519], a: Fp[Curve25519]) = - var - x10 {.noinit.}: Fp[Curve25519] - x1001 {.noinit.}: Fp[Curve25519] - x1011 {.noinit.}: Fp[Curve25519] - - x10 .square(a) # 2 - x1001 .square_repeated(x10, 2) # 8 - x1001 *= a # 9 - x1011 .prod(x10, x1001) # 11 - # 5 operations - - # TODO: we can accumulate in a partially reduced - # doubled-size `r` to avoid the final substractions. - # and only reduce at the end. - # This requires the number of op to be less than log2(p) == 255 - - template t: untyped = x10 - - t.square(x1011) # 22 - r.prod(t, x1001) # 31 = 2⁵-1 - - template u: untyped = x1001 - - t.square_repeated(r, 5) - r *= t # 2¹⁰-1 - t.square_repeated(r, 10) - t *= r # 2²⁰-1 - - u.square_repeated(t, 20) - t *= u # 2⁴⁰-1 - t.square_repeated(10) - t *= r # 2⁵⁰-1 - r.square_repeated(t, 50) - r *= t # 2¹⁰⁰-1 - - u.square_repeated(r, 100) - r *= u # 2²⁰⁰-1 - r.square_repeated(50) - r *= t # 2²⁵⁰-1 - r.square_repeated(5) - r *= x1011 # 2²⁵⁵-21 (note: 11 = 2⁵-21) diff --git a/constantine/curves/secp256k1_inversion.nim b/constantine/curves/secp256k1_inversion.nim deleted file mode 100644 index b5fd0d5..0000000 --- a/constantine/curves/secp256k1_inversion.nim +++ /dev/null @@ -1,96 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../config/curves, - ../arithmetic/finite_fields - -# ############################################################ -# -# Specialized inversion for Secp256k1 -# -# ############################################################ - -func inv_addchain*(r: var Fp[Secp256k1], a: Fp[Secp256k1]) {.used.}= - ## We invert via Little Fermat's theorem - ## a^(-1) ≡ a^(p-2) (mod p) - ## with p = "0xFFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F" - ## We take advantage of the prime special form to hardcode - ## the sequence of squarings and multiplications for the modular exponentiation - ## - ## See libsecp256k1 - ## - ## The binary representation of (p - 2) has 5 blocks of 1s, with lengths in - ## { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block: - ## [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223] - - var - x2{.noInit.}: Fp[Secp256k1] - x3{.noInit.}: Fp[Secp256k1] - x6{.noInit.}: Fp[Secp256k1] - x9{.noInit.}: Fp[Secp256k1] - x11{.noInit.}: Fp[Secp256k1] - x22{.noInit.}: Fp[Secp256k1] - x44{.noInit.}: Fp[Secp256k1] - x88{.noInit.}: Fp[Secp256k1] - x176{.noInit.}: Fp[Secp256k1] - x220{.noInit.}: Fp[Secp256k1] - x223{.noInit.}: Fp[Secp256k1] - - x2.square(a) - x2 *= a - - x3.square(x2) - x3 *= a - - x6 = x3 - x6.square_repeated(3) - x6 *= x3 - - x9 = x6 - x9.square_repeated(3) - x9 *= x3 - - x11 = x9 - x11.square_repeated(2) - x11 *= x2 - - x22 = x11 - x22.square_repeated(11) - x22 *= x11 - - x44 = x22 - x44.square_repeated(22) - x44 *= x22 - - x88 = x44 - x88.square_repeated(44) - x88 *= x44 - - x176 = x88 - x88.square_repeated(88) - x176 *= x88 - - x220 = x176 - x220.square_repeated(44) - x220 *= x44 - - x223 = x220 - x223.square_repeated(3) - x223 *= x3 - - # The final result is then assembled using a sliding window over the blocks - r = x223 - r.square_repeated(23) - r *= x22 - r.square_repeated(5) - r *= a - r.square_repeated(3) - r *= x2 - r.square_repeated(2) - r *= a diff --git a/constantine/curves/zoo_precomputed_params.nim b/constantine/curves/zoo_g2_params.nim similarity index 83% rename from constantine/curves/zoo_precomputed_params.nim rename to constantine/curves/zoo_g2_params.nim index a001aad..12f639e 100644 --- a/constantine/curves/zoo_precomputed_params.nim +++ b/constantine/curves/zoo_g2_params.nim @@ -9,11 +9,11 @@ import std/macros, ../config/curves, - ./bls12_377_precomputed_params, - ./bls12_381_precomputed_params, - ./bn254_nogami_precomputed_params, - ./bn254_snarks_precomputed_params, - ./bw6_761_precomputed_params + ./bls12_377_g2_params, + ./bls12_381_g2_params, + ./bn254_nogami_g2_params, + ./bn254_snarks_g2_params, + ./bw6_761_g2_params {.experimental: "dynamicBindSym".} diff --git a/constantine/curves/zoo_inversions.nim b/constantine/curves/zoo_inversions.nim deleted file mode 100644 index def85a2..0000000 --- a/constantine/curves/zoo_inversions.nim +++ /dev/null @@ -1,35 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../config/curves, - ./bls12_377_inversion, - ./bls12_381_inversion, - ./bn254_nogami_inversion, - ./bn254_snarks_inversion, - ./bw6_761_inversion, - ./secp256k1_inversion, - ./curve25519_inversion - - -export - bls12_377_inversion, - bls12_381_inversion, - bn254_nogami_inversion, - bn254_snarks_inversion, - bw6_761_inversion, - secp256k1_inversion, - curve25519_inversion - -func hasInversionAddchain*(C: static Curve): static bool = - ## Is an inversion addition chain implemented for the curve. - ## Note: the addition chain might be slower than Euclid-based inversion. - when C in {BN254_Nogami, BN254_Snarks, BLS12_377, BLS12_381, BW6_761, Curve25519, Secp256k1}: - true - else: - false diff --git a/constantine/elliptic/ec_endomorphism_accel.nim b/constantine/elliptic/ec_endomorphism_accel.nim index 33a0459..57f5649 100644 --- a/constantine/elliptic/ec_endomorphism_accel.nim +++ b/constantine/elliptic/ec_endomorphism_accel.nim @@ -218,10 +218,10 @@ func nDimMultiScalarRecoding[M, L: static int]( k[j].div2() k[j] += SecretWord (bji and b[0][i]) -func buildLookupTable[M: static int, EC]( +func buildLookupTable[M: static int, EC, ECaff]( P: EC, endomorphisms: array[M-1, EC], - lut: var array[1 shl (M-1), EC], + lut: var array[1 shl (M-1), ECaff], ) = ## Build the lookup table from the base point P ## and the curve endomorphism @@ -251,11 +251,17 @@ func buildLookupTable[M: static int, EC]( # # This scheme ensures 1 addition per table entry instead of a number # of addition dependent on `u` Hamming Weight - lut[0] = P + + # Step 1. Create the lookup-table in alternative coordinates + var tab {.noInit.}: array[1 shl (M-1), EC] + tab[0] = P for u in 1'u32 ..< 1 shl (M-1): # The recoding allows usage of 2^(n-1) table instead of the usual 2^n with NAF let msb = u.log2_vartime() # No undefined, u != 0 - lut[u].sum(lut[u.clearBit(msb)], endomorphisms[msb]) + tab[u].sum(tab[u.clearBit(msb)], endomorphisms[msb]) + + # Step 2. Convert to affine coordinates to benefit from mixed-addition + lut.batchAffine(tab) func tableIndex(glv: GLV_SAC, bit: int): SecretWord = ## Compose the secret table index from @@ -286,12 +292,14 @@ func scalarMulEndo*[scalBits; EC]( ## Requires: ## - Cofactor to be cleared ## - 0 <= scalar < curve order + mixin affine + type ECaff = affine(EC) const C = P.F.C # curve static: doAssert scalBits <= C.getCurveOrderBitwidth(), "Do not use endomorphism to multiply beyond the curve order" when P.F is Fp: const M = 2 # 1. Compute endomorphisms - var endomorphisms {.noInit.}: array[M-1, typeof(P)] + var endomorphisms {.noInit.}: array[M-1, EC] when P.G == G1: endomorphisms[0] = P endomorphisms[0].x *= C.getCubicRootOfUnity_mod_p() @@ -301,7 +309,7 @@ func scalarMulEndo*[scalBits; EC]( elif P.F is Fp2: const M = 4 # 1. Compute endomorphisms - var endomorphisms {.noInit.}: array[M-1, typeof(P)] + var endomorphisms {.noInit.}: array[M-1, EC] endomorphisms[0].frobenius_psi(P) endomorphisms[1].frobenius_psi(P, 2) endomorphisms[2].frobenius_psi(P, 3) @@ -331,10 +339,8 @@ func scalarMulEndo*[scalBits; EC]( endomorphisms[i-1].cneg(negatePoints[i]) # 4. Precompute lookup table - var lut {.noInit.}: array[1 shl (M-1), typeof(P)] + var lut {.noInit.}: array[1 shl (M-1), ECaff] buildLookupTable(P, endomorphisms, lut) - # TODO: Montgomery simultaneous inversion (or other simultaneous inversion techniques) - # so that we use mixed addition formulas in the main loop # 5. Recode the miniscalars # we need the base miniscalar (that encodes the sign) @@ -346,12 +352,13 @@ func scalarMulEndo*[scalBits; EC]( recoded.nDimMultiScalarRecoding(miniScalars) # 6. Proceed to GLV accelerated scalar multiplication - var Q {.noInit.}: typeof(P) - Q.secretLookup(lut, recoded.tableIndex(L-1)) + var Q {.noInit.}: EC + var tmp {.noInit.}: ECaff + tmp.secretLookup(lut, recoded.tableIndex(L-1)) + Q.fromAffine(tmp) for i in countdown(L-2, 0): Q.double() - var tmp {.noInit.}: typeof(Q) tmp.secretLookup(lut, recoded.tableIndex(i)) tmp.cneg(SecretBool recoded[0][i]) Q += tmp @@ -392,34 +399,40 @@ func scalarMulEndo*[scalBits; EC]( # - 0t10 -> 0b10 is 2 # - 0t11 -> 0b11 is 3 -func buildLookupTable_m2w2[EC]( +func buildLookupTable_m2w2[EC, Ecaff]( P0: EC, P1: EC, - lut: var array[8, EC], + lut: var array[8, Ecaff], ) = ## Build a lookup table for GLV with 2-dimensional decomposition ## and window of size 2 + # Step 1. Create the lookup-table in alternative coordinates + var tab {.noInit.}: array[8, EC] + # with [k0, k1] the mini-scalars with digits of size 2-bit # # 4 = 0b100 - encodes [0b01, 0b00] ≡ P0 - lut[4] = P0 + tab[4] = P0 # 5 = 0b101 - encodes [0b01, 0b01] ≡ P0 - P1 - lut[5].diff(lut[4], P1) + tab[5].diff(tab[4], P1) # 7 = 0b111 - encodes [0b01, 0b11] ≡ P0 + P1 - lut[7].sum(lut[4], P1) + tab[7].sum(tab[4], P1) # 6 = 0b110 - encodes [0b01, 0b10] ≡ P0 + 2P1 - lut[6].sum(lut[7], P1) + tab[6].sum(tab[7], P1) # 0 = 0b000 - encodes [0b00, 0b00] ≡ 3P0 - lut[0].double(lut[4]) - lut[0] += lut[4] + tab[0].double(tab[4]) + tab[0] += tab[4] # 1 = 0b001 - encodes [0b00, 0b01] ≡ 3P0 + P1 - lut[1].sum(lut[0], P1) + tab[1].sum(tab[0], P1) # 2 = 0b010 - encodes [0b00, 0b10] ≡ 3P0 + 2P1 - lut[2].sum(lut[1], P1) + tab[2].sum(tab[1], P1) # 3 = 0b011 - encodes [0b00, 0b11] ≡ 3P0 + 3P1 - lut[3].sum(lut[2], P1) + tab[3].sum(tab[2], P1) + + # Step 2. Convert to affine coordinates to benefit from mixed-addition + lut.batchAffine(tab) func w2Get(recoding: Recoded, digitIdx: int): uint8 {.inline.}= @@ -477,6 +490,8 @@ func scalarMulGLV_m2w2*[scalBits; EC]( ## Requires: ## - Cofactor to be cleared ## - 0 <= scalar < curve order + mixin affine + type ECaff = affine(EC) const C = P0.F.C # curve static: doAssert: scalBits == C.getCurveOrderBitwidth() @@ -485,7 +500,7 @@ func scalarMulGLV_m2w2*[scalBits; EC]( var P1 = P0 P1.x *= C.getCubicRootOfUnity_mod_p() else: - var P1 {.noInit.}: typeof(P0) + var P1 {.noInit.}: EC P1.frobenius_psi(P0, 2) # 2. Decompose scalar into mini-scalars @@ -503,10 +518,8 @@ func scalarMulGLV_m2w2*[scalBits; EC]( P1.cneg(negatePoints[1]) # 4. Precompute lookup table - var lut {.noInit.}: array[8, EC] + var lut {.noInit.}: array[8, ECaff] buildLookupTable_m2w2(P0, P1, lut) - # TODO: Montgomery simultaneous inversion (or other simultaneous inversion techniques) - # so that we use mixed addition formulas in the main loop # 5. Recode the miniscalars # we need the base miniscalar (that encodes the sign) @@ -518,15 +531,16 @@ func scalarMulGLV_m2w2*[scalBits; EC]( recoded.nDimMultiScalarRecoding(miniScalars) # 6. Proceed to GLV accelerated scalar multiplication - var Q {.noInit.}: typeof(P0) + var Q {.noInit.}: EC + var tmp {.noInit.}: ECaff var isNeg: SecretBool - Q.secretLookup(lut, recoded.w2TableIndex((L div 2) - 1, isNeg)) + tmp.secretLookup(lut, recoded.w2TableIndex((L div 2) - 1, isNeg)) + Q.fromAffine(tmp) for i in countdown((L div 2) - 2, 0): Q.double() Q.double() - var tmp {.noInit.}: typeof(Q) tmp.secretLookup(lut, recoded.w2TableIndex(i, isNeg)) tmp.cneg(isNeg) Q += tmp diff --git a/constantine/elliptic/ec_shortweierstrass_affine.nim b/constantine/elliptic/ec_shortweierstrass_affine.nim index bbc8357..3916c5e 100644 --- a/constantine/elliptic/ec_shortweierstrass_affine.nim +++ b/constantine/elliptic/ec_shortweierstrass_affine.nim @@ -12,7 +12,7 @@ import ../arithmetic, ../towers, ../io/[io_fields, io_towers], - ../curves/zoo_precomputed_params + ../curves/zoo_g2_params # ############################################################ # @@ -45,6 +45,14 @@ func isInf*(P: ECP_ShortW_Aff): SecretBool = ## and false otherwise result = P.x.isZero() and P.y.isZero() +func ccopy*(P: var ECP_ShortW_Aff, Q: ECP_ShortW_Aff, ctl: SecretBool) {.inline.} = + ## Constant-time conditional copy + ## If ctl is true: Q is copied into P + ## if ctl is false: Q is not copied and P is unmodified + ## Time and memory accesses are the same whether a copy occurs or not + for fP, fQ in fields(P, Q): + ccopy(fP, fQ, ctl) + func curve_eq_rhs*[F](y2: var F, x: F, G: static Subgroup) = ## Compute the curve equation right-hand-side from field element `x` ## i.e. `y²` in `y² = x³ + a x + b` diff --git a/constantine/elliptic/ec_shortweierstrass_jacobian.nim b/constantine/elliptic/ec_shortweierstrass_jacobian.nim index ee93521..682163f 100644 --- a/constantine/elliptic/ec_shortweierstrass_jacobian.nim +++ b/constantine/elliptic/ec_shortweierstrass_jacobian.nim @@ -34,6 +34,10 @@ type ECP_ShortW_Jac*[F; G: static Subgroup] = object ## Note that jacobian coordinates are not unique x*, y*, z*: F +template affine*[F, G](_: type ECP_ShortW_Jac[F, G]): typedesc = + ## Returns the affine type that corresponds to the Jacobian type input + ECP_ShortW_Aff[F, G] + func `==`*(P, Q: ECP_ShortW_Jac): SecretBool = ## Constant-time equality check ## This is a costly operation @@ -379,6 +383,8 @@ func madd*[F; G: static Subgroup]( ## with p in Jacobian coordinates and Q in affine coordinates ## ## R = P + Q + ## + ## TODO: ⚠️ cannot handle P == Q # "madd-2007-bl" mixed addition formula - https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl # with conditional copies to handle infinity points # Assumptions: Z2=1. @@ -516,6 +522,14 @@ func `+=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Jac) {.inline.} = ## In-place point addition P.sum(P, Q) +func `+=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Aff) {.inline.} = + ## In-place mixed point addition + ## + ## TODO: ⚠️ cannot handle P == Q + var t{.noInit.}: typeof(P) + t.madd(P, Q) + P = t + func double*(P: var ECP_ShortW_Jac) {.inline.} = ## In-place point doubling P.double(P) @@ -528,7 +542,7 @@ func diff*(r: var ECP_ShortW_Jac, nQ.neg(Q) r.sum(P, nQ) -func affineFromJacobian*[F; G]( +func affine*[F; G]( aff: var ECP_ShortW_Aff[F, G], jac: ECP_ShortW_Jac[F, G]) = var invZ {.noInit.}, invZ2{.noInit.}: F @@ -536,16 +550,76 @@ func affineFromJacobian*[F; G]( invZ2.square(invZ) aff.x.prod(jac.x, invZ2) + invZ *= invZ2 aff.y.prod(jac.y, invZ) - aff.y *= invZ2 -func jacobianFromAffine*[F; G]( +func fromAffine*[F; G]( jac: var ECP_ShortW_Jac[F, G], aff: ECP_ShortW_Aff[F, G]) {.inline.} = jac.x = aff.x jac.y = aff.y jac.z.setOne() +func batchAffine*[N: static int, F, G]( + affs: var array[N, ECP_ShortW_Aff[F, G]], + jacs: array[N, ECP_ShortW_Jac[F, G]]) = + # Algorithm: Montgomery's batch inversion + # - Speeding the Pollard and Elliptic Curve Methods of Factorization + # Section 10.3.1 + # Peter L. Montgomery + # https://www.ams.org/journals/mcom/1987-48-177/S0025-5718-1987-0866113-7/S0025-5718-1987-0866113-7.pdf + # - Modern Computer Arithmetic + # Section 2.5.1 Several inversions at once + # Richard P. Brent and Paul Zimmermann + # https://members.loria.fr/PZimmermann/mca/mca-cup-0.5.9.pdf + + # To avoid temporaries, we store partial accumulations + # in affs[i].x and whether z == 0 in affs[i].y + var zeroes: array[N, SecretBool] + affs[0].x = jacs[0].z + zeroes[0] = affs[0].x.isZero() + affs[0].x.csetOne(zeroes[0]) + + for i in 1 ..< N: + # Skip zero z-coordinates (infinity points) + var z = jacs[i].z + zeroes[i] = z.isZero() + z.csetOne(zeroes[i]) + + affs[i].x.prod(affs[i-1].x, z) + + var accInv {.noInit.}: F + accInv.inv(affs[N-1].x) + + for i in countdown(N-1, 1): + # Skip zero z-coordinates (infinity points) + var z = affs[i].x + + # Extract 1/Pᵢ + var invi {.noInit.}: F + invi.prod(accInv, affs[i-1].x) + invi.csetZero(zeroes[i]) + + # Now convert Pᵢ to affine + var invi2 {.noinit.}: F + invi2.square(invi) + affs[i].x.prod(jacs[i].x, invi2) + invi *= invi2 + affs[i].y.prod(jacs[i].y, invi) + + # next iteration + invi = jacs[i].z + invi.csetOne(zeroes[i]) + accInv *= invi + + block: # tail + var invi2 {.noinit.}: F + accInv.csetZero(zeroes[0]) + invi2.square(accInv) + affs[0].x.prod(jacs[0].x, invi2) + accInv *= invi2 + affs[0].y.prod(jacs[0].y, accInv) + # ############################################################ # # # Deriving efficient and complete Jacobian formulae # diff --git a/constantine/elliptic/ec_shortweierstrass_projective.nim b/constantine/elliptic/ec_shortweierstrass_projective.nim index 1730347..80c2bfc 100644 --- a/constantine/elliptic/ec_shortweierstrass_projective.nim +++ b/constantine/elliptic/ec_shortweierstrass_projective.nim @@ -34,6 +34,10 @@ type ECP_ShortW_Prj*[F; G: static Subgroup] = object ## Note that projective coordinates are not unique x*, y*, z*: F +template affine*[F, G](_: type ECP_ShortW_Prj[F, G]): typedesc = + ## Returns the affine type that corresponds to the Jacobian type input + ECP_ShortW_Aff[F, G] + func `==`*(P, Q: ECP_ShortW_Prj): SecretBool = ## Constant-time equality check ## This is a costly operation @@ -393,7 +397,6 @@ func `+=`*(P: var ECP_ShortW_Prj, Q: ECP_ShortW_Prj) {.inline.} = func `+=`*(P: var ECP_ShortW_Prj, Q: ECP_ShortW_Aff) {.inline.} = ## In-place mixed point addition - # used in line_addition P.madd(P, Q) func double*(P: var ECP_ShortW_Prj) {.inline.} = @@ -409,7 +412,7 @@ func diff*(r: var ECP_ShortW_Prj, nQ.neg(Q) r.sum(P, nQ) -func affineFromProjective*[F, G]( +func affine*[F, G]( aff: var ECP_ShortW_Aff[F, G], proj: ECP_ShortW_Prj[F, G]) = var invZ {.noInit.}: F @@ -418,9 +421,63 @@ func affineFromProjective*[F, G]( aff.x.prod(proj.x, invZ) aff.y.prod(proj.y, invZ) -func projectiveFromAffine*[F, G]( +func fromAffine*[F, G]( proj: var ECP_ShortW_Prj[F, G], aff: ECP_ShortW_Aff[F, G]) {.inline.} = proj.x = aff.x proj.y = aff.y proj.z.setOne() + +func batchAffine*[N: static int, F, G]( + affs: var array[N, ECP_ShortW_Aff[F, G]], + projs: array[N, ECP_ShortW_Prj[F, G]]) = + # Algorithm: Montgomery's batch inversion + # - Speeding the Pollard and Elliptic Curve Methods of Factorization + # Section 10.3.1 + # Peter L. Montgomery + # https://www.ams.org/journals/mcom/1987-48-177/S0025-5718-1987-0866113-7/S0025-5718-1987-0866113-7.pdf + # - Modern Computer Arithmetic + # Section 2.5.1 Several inversions at once + # Richard P. Brent and Paul Zimmermann + # https://members.loria.fr/PZimmermann/mca/mca-cup-0.5.9.pdf + + # To avoid temporaries, we store partial accumulations + # in affs[i].x + var zeroes: array[N, SecretBool] + affs[0].x = projs[0].z + zeroes[0] = affs[0].x.isZero() + affs[0].x.csetOne(zeroes[0]) + + for i in 1 ..< N: + # Skip zero z-coordinates (infinity points) + var z = projs[i].z + zeroes[i] = z.isZero() + z.csetOne(zeroes[i]) + + affs[i].x.prod(affs[i-1].x, z) + + var accInv {.noInit.}: F + accInv.inv(affs[N-1].x) + + for i in countdown(N-1, 1): + # Skip zero z-coordinates (infinity points) + var z = affs[i].x + + # Extract 1/Pᵢ + var invi {.noInit.}: F + invi.prod(accInv, affs[i-1].x) + invi.csetZero(zeroes[i]) + + # Now convert Pᵢ to affine + affs[i].x.prod(projs[i].x, invi) + affs[i].y.prod(projs[i].y, invi) + + # next iteration + invi = projs[i].z + invi.csetOne(zeroes[i]) + accInv *= invi + + block: # tail + accInv.csetZero(zeroes[0]) + affs[0].x.prod(projs[0].x, accInv) + affs[0].y.prod(projs[0].y, accInv) \ No newline at end of file diff --git a/constantine/elliptic/ec_twistededwards_projective.nim b/constantine/elliptic/ec_twistededwards_projective.nim index d152806..45f6b4f 100644 --- a/constantine/elliptic/ec_twistededwards_projective.nim +++ b/constantine/elliptic/ec_twistededwards_projective.nim @@ -289,7 +289,7 @@ func diff*(r: var ECP_TwEdwards_Prj, nQ.neg(Q) r.sum(P, nQ) -func affineFromProjective*[F]( +func affine*[F]( aff: var ECP_TwEdwards_Prj[F], proj: ECP_TwEdwards_Prj[F]) = var invZ {.noInit.}: F @@ -298,7 +298,7 @@ func affineFromProjective*[F]( aff.x.prod(proj.x, invZ) aff.y.prod(proj.y, invZ) -func projectiveFromAffine*[F]( +func projective*[F]( proj: var ECP_TwEdwards_Prj[F], aff: ECP_TwEdwards_Prj[F]) {.inline.} = proj.x = aff.x diff --git a/constantine/io/io_ec.nim b/constantine/io/io_ec.nim index 44e0cd3..3352b43 100644 --- a/constantine/io/io_ec.nim +++ b/constantine/io/io_ec.nim @@ -10,6 +10,7 @@ import ./io_bigints, ./io_fields, ./io_towers, ../arithmetic, ../towers, + ../primitives, ../elliptic/[ ec_shortweierstrass_affine, ec_shortweierstrass_projective, @@ -26,7 +27,12 @@ import # # ############################################################ -func toHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff](P: EC): string = +proc spaces(num: int): string = + result = newString(num) + for i in 0 ..< num: + result[i] = ' ' + +func toHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff](P: EC, indent: static int = 0): string = ## Stringify an elliptic curve point to Hex ## Note. Leading zeros are not removed. ## Result is prefixed with 0x @@ -39,29 +45,31 @@ func toHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff](P: EC): stri ## This proc output may change format in the future var aff {.noInit.}: ECP_ShortW_Aff[EC.F, EC.G] - when EC is ECP_ShortW_Prj: - aff.affineFromProjective(P) - elif EC is ECP_ShortW_Jac: - aff.affineFromJacobian(P) + when EC isnot ECP_ShortW_Aff: + aff.affine(P) else: aff = P - result = "ECP[" & $aff.F & "](\n x: " + const sp = spaces(indent) + + result = sp & $EC & "(\n" & sp & " x: " result.appendHex(aff.x, bigEndian) - result &= ",\n y: " + result &= ",\n" & sp & " y: " result.appendHex(aff.y, bigEndian) - result &= "\n)" + result &= "\n" & sp & ")" func fromHex*(dst: var (ECP_ShortW_Prj or ECP_ShortW_Jac), x, y: string): bool {.raises: [ValueError].}= ## Convert hex strings to a G1 curve point - ## Returns `false` - ## if there is no point with coordinates (`x`, `y`) on the curve + ## Returns true if point exist or if input is the point at infinity (all 0) + ## Returns `false` if there is no point with coordinates (`x`, `y`) on the curve ## In that case, `dst` content is undefined. static: doAssert dst.F is Fp, "dst must be on G1, an elliptic curve over 𝔽p" dst.x.fromHex(x) dst.y.fromHex(y) dst.z.setOne() - return bool(isOnCurve(dst.x, dst.y, dst.G)) + let isInf = dst.x.isZero() and dst.y.isZero() + dst.z.csetZero(isInf) + return bool(isOnCurve(dst.x, dst.y, dst.G) or isInf) func fromHex*(dst: var (ECP_ShortW_Prj or ECP_ShortW_Jac), x0, x1, y0, y1: string): bool {.raises: [ValueError].}= ## Convert hex strings to a G2 curve point @@ -72,24 +80,34 @@ func fromHex*(dst: var (ECP_ShortW_Prj or ECP_ShortW_Jac), x0, x1, y0, y1: strin dst.x.fromHex(x0, x1) dst.y.fromHex(y0, y1) dst.z.setOne() - return bool(isOnCurve(dst.x, dst.y, dst.G)) + let isInf = dst.x.isZero() and dst.y.isZero() + dst.z.csetZero(isInf) + return bool(isOnCurve(dst.x, dst.y, dst.G) or isInf) func fromHex*(dst: var ECP_ShortW_Aff, x, y: string): bool {.raises: [ValueError].}= ## Convert hex strings to a G1 curve point - ## Returns `false` - ## if there is no point with coordinates (`x`, `y`) on the curve + ## Returns true if point exist or if input is the point at infinity (all 0) + ## Returns `false` if there is no point with coordinates (`x`, `y`) on the curve ## In that case, `dst` content is undefined. static: doAssert dst.F is Fp, "dst must be on G1, an elliptic curve over 𝔽p" dst.x.fromHex(x) dst.y.fromHex(y) - return bool(isOnCurve(dst.x, dst.y, dst.G)) + return bool(isOnCurve(dst.x, dst.y, dst.G) or dst.isInf()) func fromHex*(dst: var ECP_ShortW_Aff, x0, x1, y0, y1: string): bool {.raises: [ValueError].}= ## Convert hex strings to a G2 curve point - ## Returns `false` - ## if there is no point with coordinates (`x`, `y`) on the curve + ## Returns true if point exist or if input is the point at infinity (all 0) + ## Returns `false` if there is no point with coordinates (`x`, `y`) on the curve ## In that case, `dst` content is undefined. static: doAssert dst.F is Fp2, "dst must be on G2, an elliptic curve over 𝔽p2" dst.x.fromHex(x0, x1) dst.y.fromHex(y0, y1) - return bool(isOnCurve(dst.x, dst.y, dst.G)) + return bool(isOnCurve(dst.x, dst.y, dst.G) or dst.isInf()) + +func fromHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff]( + _: type EC, x, y: string): EC {.raises: [ValueError].} = + doAssert result.fromHex(x, y) + +func fromHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff]( + _: type EC, x0, x1, y0, y1: string): EC {.raises: [ValueError].} = + doAssert result.fromHex(x0, x1, y0, y1) \ No newline at end of file diff --git a/constantine/pairing/miller_loops.nim b/constantine/pairing/miller_loops.nim index 1c7f385..46c24cc 100644 --- a/constantine/pairing/miller_loops.nim +++ b/constantine/pairing/miller_loops.nim @@ -141,7 +141,7 @@ func miller_init_double_then_add*[FT, F1, F2]( # First step: 0b10, T <- Q, f = 1 (mod p¹²), f *= line # ---------------------------------------------------- - T.projectiveFromAffine(Q) + T.fromAffine(Q) # f.square() -> square(1) line.line_double(T, P) @@ -292,7 +292,7 @@ func miller_init_double_then_add*[N: static int, FT, F1, F2]( # First step: T <- Q, f = 1 (mod p¹²), f *= line # ---------------------------------------------- for i in 0 ..< N: - Ts[i].projectiveFromAffine(Qs[i]) + Ts[i].fromAffine(Qs[i]) line0.line_double(Ts[0], Ps[0]) when N >= 2: diff --git a/constantine/pairing/pairing_bls12.nim b/constantine/pairing/pairing_bls12.nim index 515d935..07cc051 100644 --- a/constantine/pairing/pairing_bls12.nim +++ b/constantine/pairing/pairing_bls12.nim @@ -63,7 +63,7 @@ func millerLoopGenericBLS12*[C]( line {.noInit.}: Line[Fp2[C]] nQ{.noInit.}: typeof(Q) - T.projectiveFromAffine(Q) + T.fromAffine(Q) nQ.neg(Q) basicMillerLoop( diff --git a/constantine/pairing/pairing_bn.nim b/constantine/pairing/pairing_bn.nim index 4230219..623fe52 100644 --- a/constantine/pairing/pairing_bn.nim +++ b/constantine/pairing/pairing_bn.nim @@ -61,7 +61,7 @@ func millerLoopGenericBN*[C]( line {.noInit.}: Line[Fp2[C]] nQ{.noInit.}: typeof(Q) - T.projectiveFromAffine(Q) + T.fromAffine(Q) nQ.neg(Q) basicMillerLoop( diff --git a/constantine/pairing/pairing_bw6_761.nim b/constantine/pairing/pairing_bw6_761.nim index 08401f5..3d128d8 100644 --- a/constantine/pairing/pairing_bw6_761.nim +++ b/constantine/pairing/pairing_bw6_761.nim @@ -45,7 +45,7 @@ func millerLoopBW6_761_naive[C]( line {.noInit.}: Line[Fp[C]] nQ{.noInit.}: typeof(Q) - T.projectiveFromAffine(Q) + T.fromAffine(Q) nQ.neg(Q) basicMillerLoop( @@ -55,7 +55,7 @@ func millerLoopBW6_761_naive[C]( ) var f2 {.noInit.}: typeof(f) - T.projectiveFromAffine(Q) + T.fromAffine(Q) basicMillerLoop( f2, T, line, @@ -93,7 +93,7 @@ func millerLoopBW6_761_opt_to_debug[C]( T {.noInit.}: ECP_ShortW_Prj[Fp[C], G2] line {.noInit.}: Line[Fp[C]] - T.projectiveFromAffine(Q) + T.fromAffine(Q) f.setOne() template u: untyped = pairing(C, ate_param_1_opt) @@ -114,7 +114,7 @@ func millerLoopBW6_761_opt_to_debug[C]( mu = f minvu.inv(f) - Qu.affineFromProjective(T) + Qu.affine(T) nQu.neg(Qu) # Drop the vertical line @@ -125,7 +125,7 @@ func millerLoopBW6_761_opt_to_debug[C]( # 2nd part: f_{u²-u-1,Q}(P) # ------------------------------ # We restart from `f` and `T` - T.projectiveFromAffine(Qu) + T.fromAffine(Qu) template u: untyped = pairing(C, ate_param_2_opt) var u3 = pairing(C, ate_param_2_opt) diff --git a/constantine/primitives/bithacks.nim b/constantine/primitives/bithacks.nim index 472861e..48edc5a 100644 --- a/constantine/primitives/bithacks.nim +++ b/constantine/primitives/bithacks.nim @@ -115,4 +115,4 @@ func isPowerOf2_vartime*(n: SomeUnsignedInt): bool {.inline.} = func nextPowerOf2_vartime*(n: uint64): uint64 {.inline.} = ## Returns x if x is a power of 2 ## or the next biggest power of 2 - 1'u64 shl (log2_vartime(n-1) + 1) \ No newline at end of file + 1'u64 shl (log2_vartime(n-1) + 1) diff --git a/constantine/primitives/constant_time.nim b/constantine/primitives/constant_time.nim index 3264680..b542cff 100644 --- a/constantine/primitives/constant_time.nim +++ b/constantine/primitives/constant_time.nim @@ -124,7 +124,7 @@ template `-`*[T: Ct](x: T): T = # # ############################################################ -template isMsbSet[T: Ct](x: T): CTBool[T] = +template isMsbSet*[T: Ct](x: T): CTBool[T] = ## Returns the most significant bit of an integer const msb_pos = T.sizeof * 8 - 1 (CTBool[T])(x shr msb_pos) @@ -163,6 +163,16 @@ template `<=`*[T: Ct](x, y: T): CTBool[T] = template `xor`*[T: Ct](x, y: CTBool[T]): CTBool[T] = CTBool[T](noteq(T(x), T(y))) +# ############################################################ +# +# Conditionals +# +# ############################################################ + +template cneg*[T: Ct](x: T, ctl: CTBool[T]): T = + # Conditional negate if ctl is true + (x xor -T(ctl)) + T(ctl) + # ############################################################ # # Workaround system.nim `!=` template diff --git a/constantine/primitives/constant_time_types.nim b/constantine/primitives/constant_time_types.nim index 2cab28f..c4bb049 100644 --- a/constantine/primitives/constant_time_types.nim +++ b/constantine/primitives/constant_time_types.nim @@ -39,3 +39,4 @@ const X86* = defined(amd64) or defined(i386) when sizeof(int) == 8 and GCC_Compatible: type uint128*{.importc: "unsigned __int128".} = object + int128*{.importc: "__int128".} = object \ No newline at end of file diff --git a/constantine/primitives/extended_precision.nim b/constantine/primitives/extended_precision.nim index ac98b75..b81e6cd 100644 --- a/constantine/primitives/extended_precision.nim +++ b/constantine/primitives/extended_precision.nim @@ -75,6 +75,19 @@ func muladd2*(hi, lo: var Ct[uint32], a, b, c1, c2: Ct[uint32]) {.inline.}= lo = (Ct[uint32])(dblPrec) hi = (Ct[uint32])(dblPrec shr 32) +func smul*(hi, lo: var Ct[uint32], a, b: Ct[uint32]) {.inline.} = + ## Signed extended precision multiplication + ## (hi, lo) <- a*b + ## + ## Inputs are intentionally unsigned + ## as we use their unchecked raw representation for cryptography + ## + ## This is constant-time on most hardware + ## See: https://www.bearssl.org/ctmul.html + let dblPrec = int64(cast[int32](a)) * int64(cast[int32](b)) # Sign-extension via cast+conversion to wider int + lo = cast[Ct[uint32]](dblPrec) + hi = cast[Ct[uint32]](dblPrec shr 32) + # ############################################################ # # 64-bit words @@ -83,15 +96,15 @@ func muladd2*(hi, lo: var Ct[uint32], a, b, c1, c2: Ct[uint32]) {.inline.}= when sizeof(int) == 8: when defined(vcc): - from ./extended_precision_x86_64_msvc import unsafeDiv2n1n, mul, muladd1, muladd2 + from ./extended_precision_x86_64_msvc import unsafeDiv2n1n, mul, muladd1, muladd2, smul elif GCCCompatible: # TODO: constant-time div2n1n when X86: from ./extended_precision_x86_64_gcc import unsafeDiv2n1n - from ./extended_precision_64bit_uint128 import mul, muladd1, muladd2 + from ./extended_precision_64bit_uint128 import mul, muladd1, muladd2, smul else: - from ./extended_precision_64bit_uint128 import unsafeDiv2n1n, mul, muladd1, muladd2 - export unsafeDiv2n1n, mul, muladd1, muladd2 + from ./extended_precision_64bit_uint128 import unsafeDiv2n1n, mul, muladd1, muladd2, smul + export unsafeDiv2n1n, mul, muladd1, muladd2, smul # ############################################################ # diff --git a/constantine/primitives/extended_precision_64bit_uint128.nim b/constantine/primitives/extended_precision_64bit_uint128.nim index 599ea95..045df87 100644 --- a/constantine/primitives/extended_precision_64bit_uint128.nim +++ b/constantine/primitives/extended_precision_64bit_uint128.nim @@ -97,3 +97,25 @@ func muladd2*(hi, lo: var Ct[uint64], a, b, c1, c2: Ct[uint64]) {.inline.}= else: {.emit:["*",hi, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].} {.emit:["*",lo, " = (NU64)", dblPrec,";"].} + +func smul*(hi, lo: var Ct[uint64], a, b: Ct[uint64]) {.inline.} = + ## Extended precision multiplication + ## (hi, lo) <- a*b + ## + ## Inputs are intentionally unsigned + ## as we use their unchecked raw representation for cryptography + ## + ## This is constant-time on most hardware + ## See: https://www.bearssl.org/ctmul.html + block: + var dblPrec {.noInit.}: int128 + # We need to cast to int64 then sign-extended to int128 + {.emit:[dblPrec, " = (__int128)", cast[int64](a)," * (__int128)", cast[int64](b),";"].} + + # Don't forget to dereference the var param in C mode + when defined(cpp): + {.emit:[hi, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].} + {.emit:[lo, " = (NU64)", dblPrec,";"].} + else: + {.emit:["*",hi, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].} + {.emit:["*",lo, " = (NU64)", dblPrec,";"].} \ No newline at end of file diff --git a/constantine/primitives/extended_precision_x86_64_msvc.nim b/constantine/primitives/extended_precision_x86_64_msvc.nim index 65fa75b..774cd05 100644 --- a/constantine/primitives/extended_precision_x86_64_msvc.nim +++ b/constantine/primitives/extended_precision_x86_64_msvc.nim @@ -29,6 +29,7 @@ func udiv128(highDividend, lowDividend, divisor: Ct[uint64], remainder: var Ct[u ## - if n_hi > d result is undefined func umul128(a, b: Ct[uint64], hi: var Ct[uint64]): Ct[uint64] {.importc:"_umul128", header:"", nodecl.} + ## Unsigned extended precision multiplication ## (hi, lo) <-- a * b ## Return value is the low word @@ -84,3 +85,22 @@ func muladd2*(hi, lo: var Ct[uint64], a, b, c1, c2: Ct[uint64]) {.inline.}= # Carry chain 2 addC(carry2, lo, lo, c2, Carry(0)) addC(carry2, hi, hi, 0, carry2) + +func smul128(a, b: Ct[uint64], hi: var Ct[uint64]): Ct[uint64] {.importc:"_mul128", header:"", nodecl.} + ## Signed extended precision multiplication + ## (hi, lo) <-- a * b + ## Return value is the low word + ## + ## Inputs are intentionally unsigned + ## as we use their unchecked raw representation for cryptography + +func smul*(hi, lo: var Ct[uint64], a, b: Ct[uint64]) {.inline.} = + ## Extended precision multiplication + ## (hi, lo) <- a*b + ## + ## Inputs are intentionally unsigned + ## as we use their unchecked raw representation for cryptography + ## + ## This is constant-time on most hardware + ## See: https://www.bearssl.org/ctmul.html + lo = smul128(a, b, hi) \ No newline at end of file diff --git a/constantine/protocols/ethereum_evm_precompiles.nim b/constantine/protocols/ethereum_evm_precompiles.nim index 727469a..ace843f 100644 --- a/constantine/protocols/ethereum_evm_precompiles.nim +++ b/constantine/protocols/ethereum_evm_precompiles.nim @@ -133,7 +133,7 @@ func eth_evm_ecadd*( R.sum(P, Q) var aff{.noInit.}: ECP_ShortW_Aff[Fp[BN254_Snarks], G1] - aff.affineFromProjective(R) + aff.affine(R) r.toOpenArray(0, 31).exportRawUint( aff.x, bigEndian @@ -207,7 +207,7 @@ func eth_evm_ecmul*( P.scalarMul(s) var aff{.noInit.}: ECP_ShortW_Aff[Fp[BN254_Snarks], G1] - aff.affineFromProjective(P) + aff.affine(P) r.toOpenArray(0, 31).exportRawUint( aff.x, bigEndian @@ -221,7 +221,7 @@ func subgroupCheck(P: ECP_ShortW_Aff[Fp2[BN254_Snarks], G2]): bool = ## that point may not be in the correct cyclic subgroup. ## If we are on the subgroup of order r then [r]P = 0 var Q{.noInit.}: ECP_ShortW_Prj[Fp2[BN254_Snarks], G2] - Q.projectiveFromAffine(P) + Q.fromAffine(P) return bool(Q.isInSubgroup()) func fromRawCoords( diff --git a/constantine/tower_field_extensions/extension_fields.nim b/constantine/tower_field_extensions/extension_fields.nim index 4f6a247..8b1c7eb 100644 --- a/constantine/tower_field_extensions/extension_fields.nim +++ b/constantine/tower_field_extensions/extension_fields.nim @@ -219,6 +219,17 @@ func conj*(r: var CubicExt, a: CubicExt) = # Conditional arithmetic # ------------------------------------------------------------------- +func csetZero*(a: var ExtensionField, ctl: SecretBool) = + ## Set ``a`` to 0 if ``ctl`` is true + staticFor i, 0, a.coords.len: + a.coords[i].csetZero(ctl) + +func csetOne*(a: var ExtensionField, ctl: SecretBool) = + ## Set ``a`` to 1 if ``ctl`` is true + a.coords[0].csetOne(ctl) + staticFor i, 1, a.coords.len: + a.coords[i].csetZero(ctl) + func cneg*(a: var ExtensionField, ctl: SecretBool) = ## Constant-time in-place conditional negation ## Only negate if ctl is true diff --git a/docs/optimizations.md b/docs/optimizations.md index a90e886..a7906dd 100644 --- a/docs/optimizations.md +++ b/docs/optimizations.md @@ -28,16 +28,16 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n - Squaring - [x] Dedicated squaring functions - [x] int128 - - [ ] loop unrolling + - [x] loop unrolling - [x] x86: Full Assembly implementation - - [ ] x86: MULX, ADCX, ADOX instructions + - [x] x86: MULX, ADCX, ADOX instructions ## Finite Fields & Modular Arithmetic - Representation - [x] Montgomery Representation - [ ] Barret Reduction - - [ ] Unsaturated Representation + - [x] Unsaturated Representation - [ ] Mersenne Prime (2ᵏ - 1), - [ ] Generalized Mersenne Prime (NIST Prime P256: 2^256 - 2^224 + 2^192 + 2^96 - 1) - [ ] Pseudo-Mersenne Prime (2^m - k for example Curve25519: 2^255 - 19) @@ -91,9 +91,10 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n - Inversion (constant-time baseline, Little-Fermat inversion via a^(p-2)) - [x] Constant-time binary GCD algorithm by Möller, algorithm 5 in https://link.springer.com/content/pdf/10.1007%2F978-3-642-40588-4_10.pdf - [x] Addition-chain for a^(p-2) - - [ ] Constant-time binary GCD algorithm by Bernstein-Yang, https://eprint.iacr.org/2019/266 + - [x] Constant-time binary GCD algorithm by Bernstein-Yang, https://eprint.iacr.org/2019/266 - [ ] Constant-time binary GCD algorithm by Pornin, https://eprint.iacr.org/2020/972 - - [ ] Simultaneous inversion + - [x] Constant-time binary GCD algorithm by BY with half-delta optimization by libsecp256k1, formally verified, https://eprint.iacr.org/2021/549 + - [x] Simultaneous inversion - Square Root (constant-time) - [x] baseline sqrt via Little-Fermat for `p ≡ 3 (mod 4)` @@ -193,7 +194,7 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n - [ ] BLS curves: Ghamman, Fouotsa - [x] BLS curves: Hayashida, Hayasaka, Teruya -- [ ] Multi-pairing +- [x] Multi-pairing - [ ] Line accumulation - [ ] Parallel Multi-Pairing @@ -204,7 +205,7 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n - [ ] BLS G2: Scott et al - [ ] BLS G2: Fuentes-Castañeda - [x] BLS G2: Budroni et al, endomorphism accelerated - - [ ] BN G2 + - [x] BN G2: Fuentes-Castañeda - [ ] BW6-761 G1 - [ ] BW6-761 G2 diff --git a/research/kzg_poly_commit/fft_g1.nim b/research/kzg_poly_commit/fft_g1.nim index b8e4b1d..6f61467 100644 --- a/research/kzg_poly_commit/fft_g1.nim +++ b/research/kzg_poly_commit/fft_g1.nim @@ -222,7 +222,7 @@ when isMainModule: proc roundtrip() = let fftDesc = FFTDescriptor[G1].init(maxScale = 4) var data = newSeq[G1](fftDesc.maxWidth) - data[0].projectiveFromAffine(Generator1) + data[0].fromAffine(Generator1) for i in 1 ..< fftDesc.maxWidth: data[i].madd(data[i-1], Generator1) @@ -273,7 +273,7 @@ when isMainModule: let desc = FFTDescriptor[G1].init(uint8 scale) var data = newSeq[G1](desc.maxWidth) - data[0].projectiveFromAffine(Generator1) + data[0].fromAffine(Generator1) for i in 1 ..< desc.maxWidth: data[i].madd(data[i-1], Generator1) diff --git a/research/kzg_poly_commit/kzg_single_proofs.nim b/research/kzg_poly_commit/kzg_single_proofs.nim index db64750..716b336 100644 --- a/research/kzg_poly_commit/kzg_single_proofs.nim +++ b/research/kzg_poly_commit/kzg_single_proofs.nim @@ -64,7 +64,7 @@ proc checkProofSingle( ): bool = ## Check a proof for a Kate commitment for an evaluation f(x) = y var xG2, g2: G2 - g2.projectiveFromAffine(Generator2) + g2.fromAffine(Generator2) xG2 = g2 xG2.scalarMul(x.toBig()) @@ -72,7 +72,7 @@ proc checkProofSingle( s_minus_x.diff(kzg.secretG2[1], xG2) var yG1: G1 - yG1.projectiveFromAffine(Generator1) + yG1.fromAffine(Generator1) yG1.scalarMul(y.toBig()) var commitment_minus_y: G1 diff --git a/research/kzg_poly_commit/polynomials.nim b/research/kzg_poly_commit/polynomials.nim index 9f39ad4..dd12ea2 100644 --- a/research/kzg_poly_commit/polynomials.nim +++ b/research/kzg_poly_commit/polynomials.nim @@ -47,10 +47,10 @@ func pair_verify*( var P1a, P2a: G1aff var Q1a, Q2a: G2aff - P1a.affineFromProjective(P1) - Q1a.affineFromProjective(Q1) - P2a.affineFromProjective(P2) - Q2a.affineFromProjective(Q2) + P1a.affine(P1) + Q1a.affine(Q1) + P2a.affine(P2) + Q2a.affine(Q2) # To verify if e(P1, Q1) == e(P2, Q2) # we can do e(P1, Q1) / e(P2, Q2) == 1 diff --git a/sage/precompute_params.sage b/sage/g2_params.sage similarity index 100% rename from sage/precompute_params.sage rename to sage/g2_params.sage diff --git a/tests/t_bigints.nim b/tests/t_bigints.nim index 0d6db16..9c256d6 100644 --- a/tests/t_bigints.nim +++ b/tests/t_bigints.nim @@ -555,14 +555,10 @@ proc mainModularInverse() = let a = BigInt[16].fromUint(42'u16) let M = BigInt[16].fromUint(2017'u16) - var mp1div2 = M - discard mp1div2.add(One) - mp1div2.shiftRight(1) - let expected = BigInt[16].fromUint(1969'u16) var r = canary(BigInt[16]) - r.invmod(a, M, mp1div2) + r.invmod(a, M) check: bool(r == expected) @@ -570,14 +566,10 @@ proc mainModularInverse() = let a = BigInt[381].fromUint(42'u16) let M = BigInt[381].fromUint(2017'u16) - var mp1div2 = M - discard mp1div2.add(One) - mp1div2.shiftRight(1) - let expected = BigInt[381].fromUint(1969'u16) var r = canary(BigInt[381]) - r.invmod(a, M, mp1div2) + r.invmod(a, M) check: bool(r == expected) @@ -586,14 +578,10 @@ proc mainModularInverse() = let a = BigInt[16].fromUint(271'u16) let M = BigInt[16].fromUint(383'u16) - var mp1div2 = M - discard mp1div2.add(One) - mp1div2.shiftRight(1) - let expected = BigInt[16].fromUint(106'u16) var r = canary(BigInt[16]) - r.invmod(a, M, mp1div2) + r.invmod(a, M) check: bool(r == expected) @@ -601,14 +589,10 @@ proc mainModularInverse() = let a = BigInt[381].fromUint(271'u16) let M = BigInt[381].fromUint(383'u16) - var mp1div2 = M - discard mp1div2.add(One) - mp1div2.shiftRight(1) - let expected = BigInt[381].fromUint(106'u16) var r = canary(BigInt[381]) - r.invmod(a, M, mp1div2) + r.invmod(a, M) check: bool(r == expected) @@ -616,14 +600,10 @@ proc mainModularInverse() = let a = BigInt[381].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") let M = BigInt[381].fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab") - var mp1div2 = M - discard mp1div2.add(One) - mp1div2.shiftRight(1) - let expected = BigInt[381].fromHex("0x0636759a0f3034fa47174b2c0334902f11e9915b7bd89c6a2b3082b109abbc9837da17201f6d8286fe6203caa1b9d4c8") var r = canary(BigInt[381]) - r.invmod(a, M, mp1div2) + r.invmod(a, M) check: bool(r == expected) @@ -632,14 +612,10 @@ proc mainModularInverse() = let a = BigInt[16].fromUint(0'u16) let M = BigInt[16].fromUint(2017'u16) - var mp1div2 = M - mp1div2.shiftRight(1) - discard mp1div2.add(One) - let expected = BigInt[16].fromUint(0'u16) var r = canary(BigInt[16]) - r.invmod(a, M, mp1div2) + r.invmod(a, M) check: bool(r == expected) @@ -647,14 +623,10 @@ proc mainModularInverse() = let a = BigInt[381].fromUint(0'u16) let M = BigInt[381].fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab") - var mp1div2 = M - mp1div2.shiftRight(1) - discard mp1div2.add(One) - let expected = BigInt[381].fromUint(0'u16) var r = canary(BigInt[381]) - r.invmod(a, M, mp1div2) + r.invmod(a, M) check: bool(r == expected) diff --git a/tests/t_ec_conversion.nim b/tests/t_ec_conversion.nim new file mode 100644 index 0000000..97757a9 --- /dev/null +++ b/tests/t_ec_conversion.nim @@ -0,0 +1,63 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Internals + ../constantine/config/[type_ff, curves], + ../constantine/elliptic/[ec_shortweierstrass_jacobian, ec_shortweierstrass_projective], + ../constantine/towers, + # Test utilities + ./t_ec_template + +const + Iters = 8 + +run_EC_conversion_failures( + moduleName = "test_ec_conversion_fuzzing_failures" +) + +run_EC_affine_conversion( + ec = ECP_ShortW_Jac[Fp[BN254_Snarks], G1], + Iters = Iters, + moduleName = "test_ec_conversion_shortw_affine_jacobian_g1_" & $BN254_Snarks + ) +run_EC_affine_conversion( + ec = ECP_ShortW_Prj[Fp[BN254_Snarks], G1], + Iters = Iters, + moduleName = "test_ec_conversion_shortw_affine_projective_g1_" & $BN254_Snarks + ) +run_EC_affine_conversion( + ec = ECP_ShortW_Jac[Fp2[BN254_Snarks], G2], + Iters = Iters, + moduleName = "test_ec_conversion_shortw_affine_jacobian_g2_" & $BN254_Snarks + ) +run_EC_affine_conversion( + ec = ECP_ShortW_Prj[Fp2[BN254_Snarks], G2], + Iters = Iters, + moduleName = "test_ec_conversion_shortw_affine_projective_g2_" & $BN254_Snarks + ) +run_EC_affine_conversion( + ec = ECP_ShortW_Jac[Fp[BLS12_381], G1], + Iters = Iters, + moduleName = "test_ec_conversion_shortw_affine_jacobian_g1_" & $BLS12_381 + ) +run_EC_affine_conversion( + ec = ECP_ShortW_Prj[Fp[BLS12_381], G1], + Iters = Iters, + moduleName = "test_ec_conversion_shortw_affine_projective_g1_" & $BLS12_381 + ) +run_EC_affine_conversion( + ec = ECP_ShortW_Jac[Fp2[BLS12_381], G2], + Iters = Iters, + moduleName = "test_ec_conversion_shortw_affine_jacobian_g2_" & $BLS12_381 + ) +run_EC_affine_conversion( + ec = ECP_ShortW_Prj[Fp2[BLS12_381], G2], + Iters = Iters, + moduleName = "test_ec_conversion_shortw_affine_projective_g2_" & $BLS12_381 + ) \ No newline at end of file diff --git a/tests/t_ec_sage_template.nim b/tests/t_ec_sage_template.nim index 49b21e3..3e9511a 100644 --- a/tests/t_ec_sage_template.nim +++ b/tests/t_ec_sage_template.nim @@ -203,12 +203,8 @@ proc run_scalar_mul_test_vs_sage*( reference {.noInit.}: EC endo {.noInit.}: EC - when EC is ECP_ShortW_Prj: - P.projectiveFromAffine(vec.vectors[i].P) - Q.projectiveFromAffine(vec.vectors[i].Q) - else: - P.jacobianFromAffine(vec.vectors[i].P) - Q.jacobianFromAffine(vec.vectors[i].Q) + P.fromAffine(vec.vectors[i].P) + Q.fromAffine(vec.vectors[i].Q) impl = P reference = P endo = P diff --git a/tests/t_ec_template.nim b/tests/t_ec_template.nim index bbfb4cc..80e9cf6 100644 --- a/tests/t_ec_template.nim +++ b/tests/t_ec_template.nim @@ -441,10 +441,7 @@ proc run_EC_mixed_add_impl*( let a = rng.random_point(EC, randZ, gen) let b = rng.random_point(EC, randZ, gen) var bAff: ECP_ShortW_Aff[EC.F, EC.G] - when b is ECP_ShortW_Prj: - bAff.affineFromProjective(b) - else: - bAff.affineFromJacobian(b) + bAff.affine(b) var r_generic, r_mixed: EC @@ -535,4 +532,169 @@ proc run_EC_subgroups_cofactors_impl*( test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Long01Sequence) echo " [SUCCESS] Test finished with ", inSubgroup, " points in ", G1_or_G2, " subgroup and ", - offSubgroup, " points on curve but not in subgroup (before cofactor clearing)" \ No newline at end of file + offSubgroup, " points on curve but not in subgroup (before cofactor clearing)" + +proc run_EC_affine_conversion*( + ec: typedesc, + Iters: static int, + moduleName: string + ) = + + # Random seed for reproducibility + var rng: RngState + let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32 + rng.seed(seed) + echo "\n------------------------------------------------------\n" + echo moduleName, " xoshiro512** seed: ", seed + + const G1_or_G2 = pairingGroup(ec) + + const testSuiteDesc = "Elliptic curve in " & $ec.F.C.getEquationForm() & " form" + + suite testSuiteDesc & " - " & $ec & " - [" & $WordBitwidth & "-bit mode]": + test "EC " & G1_or_G2 & " batchAffine is consistent with single affine conversion": + proc test(EC: typedesc, gen: RandomGen) = + const batchSize = 10 + for _ in 0 ..< Iters: + var Ps: array[batchSize, EC] + for i in 0 ..< batchSize: + Ps[i] = rng.random_point(EC, randZ = true, gen) + + var Qs, Rs: array[batchSize, affine(EC)] + for i in 0 ..< batchSize: + Qs[i].affine(Ps[i]) + Rs.batchAffine(Ps) + + for i in countdown(batchSize-1, 0): + doAssert bool(Qs[i] == Rs[i]), block: + var s: string + s &= "Mismatch on iteration " & $i + s &= "\nFailing batch for " & $EC & " (" & $WordBitwidth & "-bit)" + s &= "\n [" + for i in 0 ..< batchSize: + s &= "\n" & Ps[i].toHex(indent = 4) + if i != batchSize-1: s &= "," + s &= "\n ]" + s &= "\nFailing inversions for " & $EC & " (" & $WordBitwidth & "-bit)" + s &= "\n [" + for i in 0 ..< batchSize: + s &= "\n" & Rs[i].toHex(indent = 4) + if i != batchSize-1: s &= "," + s &= "\n ]" + s &= "\nExpected inversions for " & $EC & " (" & $WordBitwidth & "-bit)" + s &= "\n [" + for i in 0 ..< batchSize: + s &= "\n" & Qs[i].toHex(indent = 4) + if i != batchSize-1: s &= "," + s &= "\n ]" + s + + test(ec, gen = Uniform) + test(ec, gen = HighHammingWeight) + test(ec, gen = Long01Sequence) + +proc run_EC_conversion_failures*( + moduleName: string + ) = + + echo "\n------------------------------------------------------\n" + echo moduleName + + suite moduleName & " - [" & $WordBitwidth & "-bit mode]": + test "EC batchAffine fuzzing failures ": + proc test_bn254_snarks_g1(ECP: type) = + type ECP_Aff = ECP_ShortW_Aff[Fp[BN254_Snarks], G1] + + let Ps = [ + ECP.fromHex( + x = "0x0e0a76c19a07e01fe56f246f7878652c0b39eb28f5c60b3dd43e438dc50e0d9d", + y = "0x04e6da44bc7f802fab3df34ce45d86857327663bc24ff574da48ee2b01a4932e" + ), + ECP.fromHex( + x = "0x2036a21a3d9cc09d8f5f7491fe7e4f44cffd2addf01c6ae587bee7d24f060571", + y = "0x2b5f1cc6f1cdb4a6dbaf3c88b9c02ccf984aecbba4830d5aeb33f940cb632d8a" + ), + ECP.fromHex( + x = "0x2fd314a75c6b1f82d70f2edc7b7bf6e7397bc04bc6aaa0584b9e5bbb7689082a", + y = "0x111b3b4a697e7a990400eb39f09a9bb559748cea6699535bd114ffb3dcc0b4d1" + ), + ECP.fromHex( + x = "0x0000000000000000000000000000000000000000000000000000000000000000", + y = "0x0000000000000000000000000000000000000000000000000000000000000000" + ), + ECP.fromHex( + x = "0x0e0a77c199ffdf2f686ea36f7879462c0a74eb28f5e70b3dd31d438dc58f0d9d", + y = "0x0b3938a732020d98793510be6aa312651a5f5369ebbbe41d7fda8fd914b7f264" + ), + ECP.fromHex( + x = "0x0000000007ffffffffffffff80000000000007ffe000000000ffffffffffffff", + y = "0x1d9db0f30e3395ee33a70674a31e2854de0665292dd545c10fb3da579d7df916" + ), + ECP.fromHex( + x = "0x000000000000000fffffffffe0000000000007ffffffffffffffffffffffffff", + y = "0x2a5c6df4d24efa9ffcf4003e35801dc202d820b59d67ecc65d57cfdf53b4bbc6" + ), + ECP.fromHex( + x = "0x00000000000000000003ffffffc00000000000000c000000000000003ffffffe", + y = "0x09f811f84207472ccd6ca00bb1ec3e6132a1c9206adc9ed768871f0005f0d358" + ), + ECP.fromHex( + x = "0x0e0979b99d07df30656ea36f7879462c097beb28f5c8083dd25d448dc58f0ca4", + y = "0x1799b22d8780c917ab1c4e15da718c243babc1c51225b5f8298aa570b5029796" + ), + ECP.fromHex( + x = "0x0e0a76c29a07e02f666ea36e806a462c0a78eb25f5c70b3dd35c4b8dc58f0d9c", + y = "0x0529cb1ad2552c7979a900ff59551d5dc1f8680c3a4f20d3b9cdcf68b69ec61c" + ) + ] + + let Qs = [ + ECP_Aff.fromHex( + x = "0x0e0a76c19a07e01fe56f246f7878652c0b39eb28f5c60b3dd43e438dc50e0d9d", + y = "0x04e6da44bc7f802fab3df34ce45d86857327663bc24ff574da48ee2b01a4932e" + ), + ECP_Aff.fromHex( + x = "0x2036a21a3d9cc09d8f5f7491fe7e4f44cffd2addf01c6ae587bee7d24f060571", + y = "0x2b5f1cc6f1cdb4a6dbaf3c88b9c02ccf984aecbba4830d5aeb33f940cb632d8a" + ), + ECP_Aff.fromHex( + x = "0x2fd314a75c6b1f82d70f2edc7b7bf6e7397bc04bc6aaa0584b9e5bbb7689082a", + y = "0x111b3b4a697e7a990400eb39f09a9bb559748cea6699535bd114ffb3dcc0b4d1" + ), + ECP_Aff.fromHex( + x = "0x0000000000000000000000000000000000000000000000000000000000000000", + y = "0x0000000000000000000000000000000000000000000000000000000000000000" + ), + ECP_Aff.fromHex( + x = "0x0e0a77c199ffdf2f686ea36f7879462c0a74eb28f5e70b3dd31d438dc58f0d9d", + y = "0x0b3938a732020d98793510be6aa312651a5f5369ebbbe41d7fda8fd914b7f264" + ), + ECP_Aff.fromHex( + x = "0x0000000007ffffffffffffff80000000000007ffe000000000ffffffffffffff", + y = "0x1d9db0f30e3395ee33a70674a31e2854de0665292dd545c10fb3da579d7df916" + ), + ECP_Aff.fromHex( + x = "0x000000000000000fffffffffe0000000000007ffffffffffffffffffffffffff", + y = "0x2a5c6df4d24efa9ffcf4003e35801dc202d820b59d67ecc65d57cfdf53b4bbc6" + ), + ECP_Aff.fromHex( + x = "0x00000000000000000003ffffffc00000000000000c000000000000003ffffffe", + y = "0x09f811f84207472ccd6ca00bb1ec3e6132a1c9206adc9ed768871f0005f0d358" + ), + ECP_Aff.fromHex( + x = "0x0e0979b99d07df30656ea36f7879462c097beb28f5c8083dd25d448dc58f0ca4", + y = "0x1799b22d8780c917ab1c4e15da718c243babc1c51225b5f8298aa570b5029796" + ), + ECP_Aff.fromHex( + x = "0x0e0a76c29a07e02f666ea36e806a462c0a78eb25f5c70b3dd35c4b8dc58f0d9c", + y = "0x0529cb1ad2552c7979a900ff59551d5dc1f8680c3a4f20d3b9cdcf68b69ec61c" + ) + ] + + var Rs: array[10, ECP_Aff] + Rs.batchAffine(Ps) + for i in 0 ..< 10: + doAssert bool(Qs[i] == Rs[i]) + + test_bn254_snarks_g1(ECP_ShortW_Prj[Fp[BN254_Snarks], G1]) + test_bn254_snarks_g1(ECP_ShortW_Jac[Fp[BN254_Snarks], G1]) \ No newline at end of file diff --git a/tests/t_hash_to_curve.nim b/tests/t_hash_to_curve.nim index 7cebf73..9d07f71 100644 --- a/tests/t_hash_to_curve.nim +++ b/tests/t_hash_to_curve.nim @@ -132,7 +132,7 @@ proc run_hash_to_curve_test( ) var P_ref: EC - P_ref.projectiveFromAffine(vec.vectors[i].P) + P_ref.fromAffine(vec.vectors[i].P) doAssert: bool(P == P_ref) diff --git a/tests/t_io_unsaturated.nim b/tests/t_io_unsaturated.nim new file mode 100644 index 0000000..c426d4a --- /dev/null +++ b/tests/t_io_unsaturated.nim @@ -0,0 +1,89 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import std/[unittest,times], + ../constantine/config/[common, curves], + ../constantine/arithmetic, + ../constantine/arithmetic/limbs_unsaturated, + ../constantine/io/io_bigints, + ../helpers/prng_unsafe + +# Random seed for reproducibility +var rng: RngState +let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32 +rng.seed(seed) +echo "\n------------------------------------------------------\n" +echo "test_io_unsaturated xoshiro512** seed: ", seed + +type + RandomGen = enum + Uniform + HighHammingWeight + Long01Sequence + +func random_bigint*(rng: var RngState, curve: static Curve, gen: static RandomGen): auto = + when gen == Uniform: + rng.random_unsafe(matchingBigInt(curve)) + elif gen == HighHammingWeight: + rng.random_highHammingWeight(matchingBigInt(curve)) + else: + rng.random_long01Seq(matchingBigInt(curve)) + +# debug +import std/strutils + +proc testRoundtrip(curve: static Curve, gen: static RandomGen) = + const bits = curve.getCurveBitwidth() + const Excess = 2 + const UnsatBitwidth = WordBitWidth - Excess + const N = (bits + UnsatBitwidth-1) div UnsatBitwidth + + let a = rng.random_bigint(curve, gen) + var u: LimbsUnsaturated[N, Excess] + var b: typeof(a) + + u.fromPackedRepr(a.limbs) + b.limbs.fromUnsatRepr(u) + + doAssert bool(a == b), block: + "\n a: " & a.toHex() & + "\n b: " & b.toHex() + +proc main() = + suite "Packed <-> Unsaturated limbs roundtrips" & " [" & $WordBitwidth & "-bit mode]": + const Iters = 10000 + test "BN254_Snarks": + for _ in 0 ..< Iters: + testRoundtrip(BN254_Snarks, Uniform) + for _ in 0 ..< Iters: + testRoundtrip(BN254_Snarks, HighHammingWeight) + for _ in 0 ..< Iters: + testRoundtrip(BN254_Snarks, Long01Sequence) + test "Curve25519": + for _ in 0 ..< Iters: + testRoundtrip(Curve25519, Uniform) + for _ in 0 ..< Iters: + testRoundtrip(Curve25519, HighHammingWeight) + for _ in 0 ..< Iters: + testRoundtrip(Curve25519, Long01Sequence) + test "secp256k1": + for _ in 0 ..< Iters: + testRoundtrip(Secp256k1, Uniform) + for _ in 0 ..< Iters: + testRoundtrip(Secp256k1, HighHammingWeight) + for _ in 0 ..< Iters: + testRoundtrip(Secp256k1, Long01Sequence) + test "BLS12-381": + for _ in 0 ..< Iters: + testRoundtrip(BLS12_381, Uniform) + for _ in 0 ..< Iters: + testRoundtrip(BLS12_381, HighHammingWeight) + for _ in 0 ..< Iters: + testRoundtrip(BLS12_381, Long01Sequence) + +main() \ No newline at end of file diff --git a/tests/t_pairing_bls12_377_line_functions.nim b/tests/t_pairing_bls12_377_line_functions.nim index c492be6..45771b6 100644 --- a/tests/t_pairing_bls12_377_line_functions.nim +++ b/tests/t_pairing_bls12_377_line_functions.nim @@ -100,7 +100,7 @@ suite "Pairing - Line Functions on BLS12-377" & " [" & $WordBitwidth & "-bit mod TQ.sum(T, Q) var Qaff{.noInit.}: ECP_ShortW_Aff[Fp2[C], G2] - Qaff.affineFromProjective(Q) + Qaff.affine(Q) l.line_add(T, Qaff, P) doAssert: bool(T == TQ) diff --git a/tests/t_pairing_bls12_381_line_functions.nim b/tests/t_pairing_bls12_381_line_functions.nim index e503708..1351404 100644 --- a/tests/t_pairing_bls12_381_line_functions.nim +++ b/tests/t_pairing_bls12_381_line_functions.nim @@ -100,7 +100,7 @@ suite "Pairing - Line Functions on BLS12-381" & " [" & $WordBitwidth & "-bit mod TQ.sum(T, Q) var Qaff{.noInit.}: ECP_ShortW_Aff[Fp2[C], G2] - Qaff.affineFromProjective(Q) + Qaff.affine(Q) l.line_add(T, Qaff, P) doAssert: bool(T == TQ) diff --git a/tests/t_pairing_template.nim b/tests/t_pairing_template.nim index e8aa62b..48823c8 100644 --- a/tests/t_pairing_template.nim +++ b/tests/t_pairing_template.nim @@ -43,9 +43,9 @@ func clearCofactor[F; G: static Subgroup]( ec: var ECP_ShortW_Aff[F, G]) = # For now we don't have any affine operation defined var t {.noInit.}: ECP_ShortW_Prj[F, G] - t.projectiveFromAffine(ec) + t.fromAffine(ec) t.clearCofactor() - ec.affineFromProjective(t) + ec.affine(t) func random_point*(rng: var RngState, EC: typedesc, randZ: bool, gen: RandomGen): EC {.noInit.} = if gen == Uniform: @@ -82,10 +82,10 @@ template runPairingTests*(Iters: static int, C: static Curve, G1, G2, GT: typede var Pa {.noInit.}, Pa2 {.noInit.}: affineType(P) var Qa {.noInit.}, Qa2 {.noInit.}: affineType(Q) - Pa.affineFromProjective(P) - Pa2.affineFromProjective(P2) - Qa.affineFromProjective(Q) - Qa2.affineFromProjective(Q2) + Pa.affine(P) + Pa2.affine(P2) + Qa.affine(Q) + Qa2.affine(Q2) r.pairing_fn(Pa, Qa) r.square() diff --git a/tests/t_sig_bls_lowlevel.nim b/tests/t_sig_bls_lowlevel.nim index 51a80f2..0c03bb3 100644 --- a/tests/t_sig_bls_lowlevel.nim +++ b/tests/t_sig_bls_lowlevel.nim @@ -85,9 +85,9 @@ func publicKeyG1( seckey: Fr[BLS12_381] ) = var t: ECP_ShortW_Prj[Fp[BLS12_381], G1] - t.projectiveFromAffine(BLS12_381_G1_generator) + t.fromAffine(BLS12_381_G1_generator) t.scalarMul(seckey.toBig()) - pubkey.affineFromprojective(t) + pubkey.affine(t) doAssert not bool pubkey.isInf() func signG2[T: byte|char]( @@ -105,7 +105,7 @@ func signG2[T: byte|char]( domainSepTag = DomainSepTag ) t.scalarMul(secretKey.toBig()) - signature.affineFromprojective(t) + signature.affine(t) doAssert not bool signature.isInf() func verifyG2[T: byte|char]( @@ -125,7 +125,7 @@ func verifyG2[T: byte|char]( message = message, domainSepTag = DomainSepTag ) - Q.affineFromprojective(Qprj) + Q.affine(Qprj) var e0{.noInit.}, e1{.noInit.}: Fp12[BLS12_381] e0.pairing_bls12(pubkey, Q) @@ -154,7 +154,7 @@ func verifyG2_multi[T: byte|char]( var G1s: array[2, ECP_ShortW_Aff[Fp[BLS12_381], G1]] G1s[0] = pubkey - G2s[0].affineFromprojective(Qprj) + G2s[0].affine(Qprj) G1s[1].neg(BLS12_381_G1_generator) G2s[1] = signature