From 9770b3108c21810d10124d0d958dfa61cbf25efe Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Sun, 14 Aug 2022 09:48:10 +0200 Subject: [PATCH] Fp12 over fp6 (#201) * introduce sumprod for direct fp6_mul * change curves -> constants * forgotten constants * Full pairing using Fp2->Fp6->Fp12 towering --- benchmarks/bench_fields_template.nim | 42 +- benchmarks/bench_fp.nim | 9 +- benchmarks/bench_fp2.nim | 5 +- benchmarks/bench_fp4.nim | 5 +- benchmarks/bench_fp6.nim | 5 +- benchmarks/bench_pairing_template.nim | 4 +- benchmarks/bench_summary_template.nim | 4 +- constantine.nimble | 2 + constantine/blssig_pop_on_bls12381_g2.nim | 2 +- constantine/curves_primitives.nim | 2 +- constantine/ethereum_evm_precompiles.nim | 2 +- .../hash_to_curve/h2c_isogeny_maps.nim | 2 +- .../hash_to_curve/h2c_map_to_isocurve_swu.nim | 2 +- constantine/hash_to_curve/hash_to_curve.nim | 2 +- constantine/math/arithmetic/README.md | 2 +- .../assembly/limbs_asm_mul_mont_x86.nim | 201 ++++- .../limbs_asm_mul_mont_x86_adx_bmi2.nim | 191 +++- .../math/arithmetic/bigints_montgomery.nim | 14 + constantine/math/arithmetic/finite_fields.nim | 46 +- .../arithmetic/finite_fields_square_root.nim | 2 +- constantine/math/arithmetic/limbs_exgcd.nim | 23 +- .../math/arithmetic/limbs_montgomery.nim | 88 +- .../math/arithmetic/limbs_unsaturated.nim | 4 +- .../math/config/curves_prop_field_core.nim | 4 + .../math/{curves => constants}/README.md | 0 .../bandersnatch_sqrt.nim | 0 .../bls12_377_constants.nim | 0 .../bls12_377_endomorphisms.nim | 0 .../bls12_377_frobenius.nim | 0 .../bls12_377_pairing.nim | 0 .../{curves => constants}/bls12_377_sqrt.nim | 0 .../bls12_377_subgroups.nim | 2 +- .../bls12_381_constants.nim | 0 .../bls12_381_endomorphisms.nim | 0 .../bls12_381_frobenius.nim | 0 .../bls12_381_generators.nim | 0 .../bls12_381_hash_to_curve_g1.nim | 0 .../bls12_381_hash_to_curve_g2.nim | 0 .../bls12_381_pairing.nim | 0 .../{curves => constants}/bls12_381_sqrt.nim | 0 .../bls12_381_sqrt_fp2.nim | 0 .../bls12_381_subgroups.nim | 2 +- .../bn254_nogami_constants.nim | 0 .../bn254_nogami_endomorphisms.nim | 0 .../bn254_nogami_frobenius.nim | 0 .../bn254_nogami_pairing.nim | 0 .../bn254_nogami_sqrt.nim | 0 .../bn254_nogami_sqrt_fp2.nim | 0 .../bn254_nogami_subgroups.nim | 0 .../bn254_snarks_constants.nim | 0 .../bn254_snarks_endomorphisms.nim | 0 .../bn254_snarks_frobenius.nim | 0 .../bn254_snarks_generators.nim | 0 .../bn254_snarks_hash_to_curve_g1.nim | 0 .../bn254_snarks_hash_to_curve_g2.nim | 0 .../bn254_snarks_pairing.nim | 0 .../bn254_snarks_sqrt.nim | 0 .../bn254_snarks_sqrt_fp2.nim | 0 .../bn254_snarks_subgroups.nim | 0 .../bw6_761_constants.nim | 0 .../bw6_761_endomorphisms.nim | 0 .../bw6_761_frobenius.nim | 0 .../{curves => constants}/bw6_761_pairing.nim | 0 .../{curves => constants}/bw6_761_sqrt.nim | 0 .../bw6_761_subgroups.nim | 0 .../{curves => constants}/curve25519_sqrt.nim | 0 .../{curves => constants}/jubjub_sqrt.nim | 0 .../pallas_endomorphisms.nim | 0 .../{curves => constants}/pallas_sqrt.nim | 0 .../pallas_subgroups.nim | 0 .../vesta_endomorphisms.nim | 0 .../math/{curves => constants}/vesta_sqrt.nim | 0 .../{curves => constants}/vesta_subgroups.nim | 0 .../{curves => constants}/zoo_constants.nim | 0 .../zoo_endomorphisms.nim | 0 .../{curves => constants}/zoo_frobenius.nim | 0 .../{curves => constants}/zoo_generators.nim | 0 .../zoo_hash_to_curve.nim | 0 .../{curves => constants}/zoo_pairings.nim | 0 .../zoo_square_roots.nim | 0 .../zoo_square_roots_fp2.nim | 0 .../{curves => constants}/zoo_subgroups.nim | 0 .../math/elliptic/ec_endomorphism_accel.nim | 2 +- constantine/math/elliptic/ec_scalar_mul.nim | 2 +- .../elliptic/ec_shortweierstrass_affine.nim | 2 +- .../math/extension_fields/square_root_fp2.nim | 2 +- constantine/math/extension_fields/towers.nim | 382 ++++++-- constantine/math/io/io_extfields.nim | 4 +- constantine/math/isogenies/frobenius.nim | 23 +- .../math/pairing/cyclotomic_subgroup.nim | 193 +++- constantine/math/pairing/lines_eval.nim | 831 ++++++++++++++---- constantine/math/pairing/pairing_bls12.nim | 2 +- constantine/math/pairing/pairing_bn.nim | 2 +- constantine/math/pairing/pairing_bw6_761.nim | 2 +- .../platforms/isa/macro_assembler_x86.nim | 36 +- constantine/signatures/bls_signatures.nim | 2 +- metering/m_pairings.nim | 2 +- tests/math/t_ec_template.nim | 2 +- tests/math/t_finite_fields_mulsquare.nim | 73 +- tests/math/t_fp12_bn254_nogami.nim | 26 + tests/math/t_fp6_bn254_nogami.nim | 26 + tests/math/t_fp_cubic_root.nim | 2 +- tests/math/t_fp_tower_template.nim | 8 +- tests/math/t_pairing_mul_fp12_by_lines.nim | 277 +++++- tests/math/t_pairing_template.nim | 2 +- tests/t_hash_to_curve_random.nim | 2 +- 106 files changed, 2141 insertions(+), 431 deletions(-) rename constantine/math/{curves => constants}/README.md (100%) rename constantine/math/{curves => constants}/bandersnatch_sqrt.nim (100%) rename constantine/math/{curves => constants}/bls12_377_constants.nim (100%) rename constantine/math/{curves => constants}/bls12_377_endomorphisms.nim (100%) rename constantine/math/{curves => constants}/bls12_377_frobenius.nim (100%) rename constantine/math/{curves => constants}/bls12_377_pairing.nim (100%) rename constantine/math/{curves => constants}/bls12_377_sqrt.nim (100%) rename constantine/math/{curves => constants}/bls12_377_subgroups.nim (99%) rename constantine/math/{curves => constants}/bls12_381_constants.nim (100%) rename constantine/math/{curves => constants}/bls12_381_endomorphisms.nim (100%) rename constantine/math/{curves => constants}/bls12_381_frobenius.nim (100%) rename constantine/math/{curves => constants}/bls12_381_generators.nim (100%) rename constantine/math/{curves => constants}/bls12_381_hash_to_curve_g1.nim (100%) rename constantine/math/{curves => constants}/bls12_381_hash_to_curve_g2.nim (100%) rename constantine/math/{curves => constants}/bls12_381_pairing.nim (100%) rename constantine/math/{curves => constants}/bls12_381_sqrt.nim (100%) rename constantine/math/{curves => constants}/bls12_381_sqrt_fp2.nim (100%) rename constantine/math/{curves => constants}/bls12_381_subgroups.nim (99%) rename constantine/math/{curves => constants}/bn254_nogami_constants.nim (100%) rename constantine/math/{curves => constants}/bn254_nogami_endomorphisms.nim (100%) rename constantine/math/{curves => constants}/bn254_nogami_frobenius.nim (100%) rename constantine/math/{curves => constants}/bn254_nogami_pairing.nim (100%) rename constantine/math/{curves => constants}/bn254_nogami_sqrt.nim (100%) rename constantine/math/{curves => constants}/bn254_nogami_sqrt_fp2.nim (100%) rename constantine/math/{curves => constants}/bn254_nogami_subgroups.nim (100%) rename constantine/math/{curves => constants}/bn254_snarks_constants.nim (100%) rename constantine/math/{curves => constants}/bn254_snarks_endomorphisms.nim (100%) rename constantine/math/{curves => constants}/bn254_snarks_frobenius.nim (100%) rename constantine/math/{curves => constants}/bn254_snarks_generators.nim (100%) rename constantine/math/{curves => constants}/bn254_snarks_hash_to_curve_g1.nim (100%) rename constantine/math/{curves => constants}/bn254_snarks_hash_to_curve_g2.nim (100%) rename constantine/math/{curves => constants}/bn254_snarks_pairing.nim (100%) rename constantine/math/{curves => constants}/bn254_snarks_sqrt.nim (100%) rename constantine/math/{curves => constants}/bn254_snarks_sqrt_fp2.nim (100%) rename constantine/math/{curves => constants}/bn254_snarks_subgroups.nim (100%) rename constantine/math/{curves => constants}/bw6_761_constants.nim (100%) rename constantine/math/{curves => constants}/bw6_761_endomorphisms.nim (100%) rename constantine/math/{curves => constants}/bw6_761_frobenius.nim (100%) rename constantine/math/{curves => constants}/bw6_761_pairing.nim (100%) rename constantine/math/{curves => constants}/bw6_761_sqrt.nim (100%) rename constantine/math/{curves => constants}/bw6_761_subgroups.nim (100%) rename constantine/math/{curves => constants}/curve25519_sqrt.nim (100%) rename constantine/math/{curves => constants}/jubjub_sqrt.nim (100%) rename constantine/math/{curves => constants}/pallas_endomorphisms.nim (100%) rename constantine/math/{curves => constants}/pallas_sqrt.nim (100%) rename constantine/math/{curves => constants}/pallas_subgroups.nim (100%) rename constantine/math/{curves => constants}/vesta_endomorphisms.nim (100%) rename constantine/math/{curves => constants}/vesta_sqrt.nim (100%) rename constantine/math/{curves => constants}/vesta_subgroups.nim (100%) rename constantine/math/{curves => constants}/zoo_constants.nim (100%) rename constantine/math/{curves => constants}/zoo_endomorphisms.nim (100%) rename constantine/math/{curves => constants}/zoo_frobenius.nim (100%) rename constantine/math/{curves => constants}/zoo_generators.nim (100%) rename constantine/math/{curves => constants}/zoo_hash_to_curve.nim (100%) rename constantine/math/{curves => constants}/zoo_pairings.nim (100%) rename constantine/math/{curves => constants}/zoo_square_roots.nim (100%) rename constantine/math/{curves => constants}/zoo_square_roots_fp2.nim (100%) rename constantine/math/{curves => constants}/zoo_subgroups.nim (100%) create mode 100644 tests/math/t_fp12_bn254_nogami.nim create mode 100644 tests/math/t_fp6_bn254_nogami.nim diff --git a/benchmarks/bench_fields_template.nim b/benchmarks/bench_fields_template.nim index b06c083..e65c773 100644 --- a/benchmarks/bench_fields_template.nim +++ b/benchmarks/bench_fields_template.nim @@ -18,7 +18,7 @@ import ../constantine/math/config/curves, ../constantine/math/arithmetic, ../constantine/math/extension_fields, - ../constantine/math/curves/zoo_square_roots, + ../constantine/math/constants/zoo_square_roots, # Helpers ../helpers/prng_unsafe, ./bench_blueprint @@ -47,6 +47,20 @@ template bench(op: string, T: typedesc, iters: int, body: untyped): untyped = measure(iters, startTime, stopTime, startClk, stopClk, body) report(op, fixFieldDisplay(T), startTime, stopTime, startClk, stopClk, iters) +func random_unsafe(rng: var RngState, a: var FpDbl) = + ## Initialize a standalone Double-Width field element + ## we don't reduce it modulo p², this is only used for benchmark + let aHi = rng.random_unsafe(Fp[FpDbl.C]) + let aLo = rng.random_unsafe(Fp[FpDbl.C]) + for i in 0 ..< aLo.mres.limbs.len: + a.limbs2x[i] = aLo.mres.limbs[i] + for i in 0 ..< aHi.mres.limbs.len: + a.limbs2x[aLo.mres.limbs.len+i] = aHi.mres.limbs[i] + +func random_unsafe(rng: var RngState, a: var ExtensionField2x) = + for i in 0 ..< a.coords.len: + rng.random_unsafe(a.coords[i]) + proc addBench*(T: typedesc, iters: int) = var x = rng.random_unsafe(T) let y = rng.random_unsafe(T) @@ -92,21 +106,39 @@ proc sqrBench*(T: typedesc, iters: int) = bench("Squaring", T, iters): r.square(x) -proc mulUnrBench*(T: typedesc, iters: int) = +proc mul2xUnrBench*(T: typedesc, iters: int) = var r: doublePrec(T) let x = rng.random_unsafe(T) let y = rng.random_unsafe(T) preventOptimAway(r) - bench("Multiplication unreduced", T, iters): + bench("Multiplication 2x unreduced", T, iters): r.prod2x(x, y) -proc sqrUnrBench*(T: typedesc, iters: int) = +proc sqr2xUnrBench*(T: typedesc, iters: int) = var r: doublePrec(T) let x = rng.random_unsafe(T) preventOptimAway(r) - bench("Squaring unreduced", T, iters): + bench("Squaring 2x unreduced", T, iters): r.square2x(x) +proc rdc2xBench*(T: typedesc, iters: int) = + var r: T + var t: doublePrec(T) + rng.random_unsafe(t) + preventOptimAway(r) + bench("Redc 2x", T, iters): + r.redc2x(t) + +proc sumprodBench*(T: typedesc, iters: int) = + var r: T + let a = rng.random_unsafe(T) + let b = rng.random_unsafe(T) + let u = rng.random_unsafe(T) + let v = rng.random_unsafe(T) + preventOptimAway(r) + bench("Linear combination", T, iters): + r.sumprod([a, b], [u, v]) + proc toBigBench*(T: typedesc, iters: int) = var r: matchingBigInt(T.C) let x = rng.random_unsafe(T) diff --git a/benchmarks/bench_fp.nim b/benchmarks/bench_fp.nim index d2eefe3..8ef45d1 100644 --- a/benchmarks/bench_fp.nim +++ b/benchmarks/bench_fp.nim @@ -11,7 +11,7 @@ import ../constantine/math/config/curves, ../constantine/math/arithmetic, ../constantine/math/io/io_bigints, - ../constantine/math/curves/zoo_square_roots, + ../constantine/math/constants/zoo_square_roots, # Helpers ../helpers/static_for, ./bench_fields_template @@ -52,8 +52,11 @@ proc main() = mulBench(Fp[curve], Iters) sqrBench(Fp[curve], Iters) smallSeparator() - mulUnrBench(Fp[curve], Iters) - sqrUnrBench(Fp[curve], Iters) + mul2xUnrBench(Fp[curve], Iters) + sqr2xUnrBench(Fp[curve], Iters) + rdc2xBench(Fp[curve], Iters) + smallSeparator() + sumprodBench(Fp[curve], Iters) smallSeparator() toBigBench(Fp[curve], Iters) toFieldBench(Fp[curve], Iters) diff --git a/benchmarks/bench_fp2.nim b/benchmarks/bench_fp2.nim index 0f0eb5f..e4503df 100644 --- a/benchmarks/bench_fp2.nim +++ b/benchmarks/bench_fp2.nim @@ -46,8 +46,9 @@ proc main() = mulBench(Fp2[curve], Iters) sqrBench(Fp2[curve], Iters) smallSeparator() - mulUnrBench(Fp2[curve], Iters) - sqrUnrBench(Fp2[curve], Iters) + mul2xUnrBench(Fp2[curve], Iters) + sqr2xUnrBench(Fp2[curve], Iters) + rdc2xBench(Fp2[curve], Iters) smallSeparator() invBench(Fp2[curve], InvIters) isSquareBench(Fp2[curve], InvIters) diff --git a/benchmarks/bench_fp4.nim b/benchmarks/bench_fp4.nim index 2250bb5..1f14c8b 100644 --- a/benchmarks/bench_fp4.nim +++ b/benchmarks/bench_fp4.nim @@ -46,8 +46,9 @@ proc main() = mulBench(Fp4[curve], Iters) sqrBench(Fp4[curve], Iters) smallSeparator() - mulUnrBench(Fp2[curve], Iters) - sqrUnrBench(Fp2[curve], Iters) + mul2xUnrBench(Fp4[curve], Iters) + sqr2xUnrBench(Fp4[curve], Iters) + rdc2xBench(Fp4[curve], Iters) smallSeparator() invBench(Fp4[curve], InvIters) separator() diff --git a/benchmarks/bench_fp6.nim b/benchmarks/bench_fp6.nim index 28327b5..b167121 100644 --- a/benchmarks/bench_fp6.nim +++ b/benchmarks/bench_fp6.nim @@ -44,8 +44,9 @@ proc main() = mulBench(Fp6[curve], Iters) sqrBench(Fp6[curve], Iters) smallSeparator() - mulUnrBench(Fp6[curve], Iters) - sqrUnrBench(Fp6[curve], Iters) + mul2xUnrBench(Fp6[curve], Iters) + sqr2xUnrBench(Fp6[curve], Iters) + rdc2xBench(Fp6[curve], Iters) smallSeparator() invBench(Fp6[curve], InvIters) separator() diff --git a/benchmarks/bench_pairing_template.nim b/benchmarks/bench_pairing_template.nim index dcfd01a..c1e55e5 100644 --- a/benchmarks/bench_pairing_template.nim +++ b/benchmarks/bench_pairing_template.nim @@ -19,14 +19,14 @@ import ../constantine/math/arithmetic, ../constantine/math/extension_fields, ../constantine/math/ec_shortweierstrass, - ../constantine/math/curves/zoo_subgroups, + ../constantine/math/constants/zoo_subgroups, ../constantine/math/pairing/[ cyclotomic_subgroup, lines_eval, pairing_bls12, pairing_bn ], - ../constantine/math/curves/zoo_pairings, + ../constantine/math/constants/zoo_pairings, # Helpers ../helpers/prng_unsafe, ./bench_blueprint diff --git a/benchmarks/bench_summary_template.nim b/benchmarks/bench_summary_template.nim index b876ca5..32b6ec3 100644 --- a/benchmarks/bench_summary_template.nim +++ b/benchmarks/bench_summary_template.nim @@ -22,13 +22,13 @@ import ec_shortweierstrass_projective, ec_shortweierstrass_jacobian, ec_scalar_mul, ec_endomorphism_accel], - ../constantine/math/curves/zoo_subgroups, + ../constantine/math/constants/zoo_subgroups, ../constantine/math/pairing/[ cyclotomic_subgroup, pairing_bls12, pairing_bn ], - ../constantine/math/curves/zoo_pairings, + ../constantine/math/constants/zoo_pairings, ../constantine/hashes, ../constantine/hash_to_curve/hash_to_curve, # Helpers diff --git a/constantine.nimble b/constantine.nimble index 56bd076..7b363c3 100644 --- a/constantine.nimble +++ b/constantine.nimble @@ -52,10 +52,12 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[ ("tests/math/t_fp2.nim", false), ("tests/math/t_fp2_sqrt.nim", false), ("tests/math/t_fp4.nim", false), + ("tests/math/t_fp6_bn254_nogami.nim", false), ("tests/math/t_fp6_bn254_snarks.nim", false), ("tests/math/t_fp6_bls12_377.nim", false), ("tests/math/t_fp6_bls12_381.nim", false), ("tests/math/t_fp6_bw6_761.nim", false), + ("tests/math/t_fp12_bn254_nogami.nim", false), ("tests/math/t_fp12_bn254_snarks.nim", false), ("tests/math/t_fp12_bls12_377.nim", false), ("tests/math/t_fp12_bls12_381.nim", false), diff --git a/constantine/blssig_pop_on_bls12381_g2.nim b/constantine/blssig_pop_on_bls12381_g2.nim index a42fa5e..d9de9ad 100644 --- a/constantine/blssig_pop_on_bls12381_g2.nim +++ b/constantine/blssig_pop_on_bls12381_g2.nim @@ -13,7 +13,7 @@ import ec_shortweierstrass, extension_fields, arithmetic, - curves/zoo_subgroups + constants/zoo_subgroups ], ./math/io/[io_bigints, io_fields], hashes, diff --git a/constantine/curves_primitives.nim b/constantine/curves_primitives.nim index 35dbca5..9603980 100644 --- a/constantine/curves_primitives.nim +++ b/constantine/curves_primitives.nim @@ -23,7 +23,7 @@ import cyclotomic_subgroup, lines_eval ], - ./math/curves/zoo_pairings, + ./math/constants/zoo_pairings, ./hash_to_curve/hash_to_curve # ############################################################ diff --git a/constantine/ethereum_evm_precompiles.nim b/constantine/ethereum_evm_precompiles.nim index 945ef5d..5c374fc 100644 --- a/constantine/ethereum_evm_precompiles.nim +++ b/constantine/ethereum_evm_precompiles.nim @@ -13,7 +13,7 @@ import ./math/arithmetic/limbs_montgomery, ./math/ec_shortweierstrass, ./math/pairing/[pairing_bn, miller_loops, cyclotomic_subgroup], - ./math/curves/zoo_subgroups, + ./math/constants/zoo_subgroups, ./math/io/[io_bigints, io_fields] # ############################################################ diff --git a/constantine/hash_to_curve/h2c_isogeny_maps.nim b/constantine/hash_to_curve/h2c_isogeny_maps.nim index 3b083a0..edf7251 100644 --- a/constantine/hash_to_curve/h2c_isogeny_maps.nim +++ b/constantine/hash_to_curve/h2c_isogeny_maps.nim @@ -10,7 +10,7 @@ import # Internals ../platforms/abstractions, ../math/[arithmetic, extension_fields], - ../math/curves/zoo_hash_to_curve, + ../math/constants/zoo_hash_to_curve, ../math/elliptic/[ ec_shortweierstrass_projective, ec_shortweierstrass_jacobian, diff --git a/constantine/hash_to_curve/h2c_map_to_isocurve_swu.nim b/constantine/hash_to_curve/h2c_map_to_isocurve_swu.nim index 0d3e987..00fbc96 100644 --- a/constantine/hash_to_curve/h2c_map_to_isocurve_swu.nim +++ b/constantine/hash_to_curve/h2c_map_to_isocurve_swu.nim @@ -11,7 +11,7 @@ import ../platforms/abstractions, ../math/config/curves, ../math/[arithmetic, extension_fields], - ../math/curves/zoo_hash_to_curve, + ../math/constants/zoo_hash_to_curve, ./h2c_utilities # ############################################################ diff --git a/constantine/hash_to_curve/hash_to_curve.nim b/constantine/hash_to_curve/hash_to_curve.nim index 984c52e..ef45b49 100644 --- a/constantine/hash_to_curve/hash_to_curve.nim +++ b/constantine/hash_to_curve/hash_to_curve.nim @@ -11,7 +11,7 @@ import ../platforms/abstractions, ../math/config/curves, ../math/[arithmetic, extension_fields], - ../math/curves/[zoo_hash_to_curve, zoo_subgroups], + ../math/constants/[zoo_hash_to_curve, zoo_subgroups], ../math/ec_shortweierstrass, ./h2c_hash_to_field, ./h2c_map_to_isocurve_swu, diff --git a/constantine/math/arithmetic/README.md b/constantine/math/arithmetic/README.md index 57553d6..9392247 100644 --- a/constantine/math/arithmetic/README.md +++ b/constantine/math/arithmetic/README.md @@ -34,7 +34,7 @@ where code size is not an issue for example for multi-precision addition. - Faster big-integer modular multiplication for most moduli\ Gautam Botrel, Gus Gutoski, and Thomas Piellard, 2020\ - https://hackmd.io/@zkteam/modular_multiplication + https://hackmd.io/@gnark/modular_multiplication ### Square roots diff --git a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim index daa7d19..cb81e4d 100644 --- a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim +++ b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86.nim @@ -45,8 +45,8 @@ macro mulMont_CIOS_sparebit_gen[N: static int]( ## The multiplication and reduction are further merged in the same loop ## ## This requires the most significant word of the Modulus - ## M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111) - ## https://hackmd.io/@zkteam/modular_multiplication + ## M[^1] < high(SecretWord) shr 1 (i.e. less than 0b01111...1111) + ## https://hackmd.io/@gnark/modular_multiplication # No register spilling handling doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs." @@ -213,3 +213,200 @@ func squareMont_CIOS_asm*[N]( var r2x {.noInit.}: Limbs[2*N] r2x.square_asm_inline(a) r.redcMont_asm_inline(r2x, M, m0ninv, spareBits, skipFinalSub) + +# Montgomery Sum of Products +# ------------------------------------------------------------ + +macro sumprodMont_CIOS_spare2bits_gen[N, K: static int]( + r_PIR: var Limbs[N], a_PIR, b_PIR: array[K, Limbs[N]], + M_PIR: Limbs[N], m0ninv_REG: BaseType, + skipFinalSub: static bool + ): untyped = + ## Generate an optimized Montgomery merged sum of products ⅀aᵢ.bᵢ kernel + ## using the CIOS method + ## + ## This requires 2 spare bits in the most significant word + ## so that we can skip the intermediate reductions + + # No register spilling handling + doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs." + + doAssert K <= 8, "we cannot sum more than 8 products" + # Bounds: + # 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR + # for all intent and purposes this is true since aᵢ.bᵢ is: + # if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums + # if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1, + # with 4p < R due to the 2 unused bits constraint so more than p sums are allowed + # 2. We have a high-word tN to accumulate overflows. + # with 2 unused bits in the last word, + # the multiplication of two last words will leave 4 unused bits + # enough for accumulating 8 additions and overflow. + + result = newStmtList() + + var ctx = init(Assembler_x86, BaseType) + let + scratchSlots = 6 + + # We could force M as immediate by specializing per moduli + M = init(OperandArray, nimSymbol = M_PIR, N, PointerInReg, Input) + # If N is too big, we need to spill registers. TODO. + t = init(OperandArray, nimSymbol = ident"t", N, ElemsInReg, Output_EarlyClobber) + # MultiPurpose Register slots + scratch = init(OperandArray, nimSymbol = ident"scratch", scratchSlots, ElemsInReg, InputOutput_EnsureClobber) + + # MUL requires RAX and RDX + + m0ninv = Operand( + desc: OperandDesc( + asmId: "[m0ninv]", + nimSymbol: m0ninv_REG, + rm: MemOffsettable, + constraint: Input, + cEmit: "&" & $m0ninv_REG + ) + ) + + # We're really constrained by register and somehow setting as memory doesn't help + # So we store the result `r` in the scratch space and then reload it in RDX + # before the scratchspace is used in final substraction + a = scratch[0].as2dArrayAddr(rows = K, cols = N) # Store the `a` operand + b = scratch[1].as2dArrayAddr(rows = K, cols = N) # Store the `b` operand + tN = scratch[2] # High part of extended precision multiplication + C = scratch[3] # Carry during reduction step + r = scratch[4] # Stores the `r` operand + S = scratch[5] # Mul step: Stores the carry A + # Red step: Stores (t[0] * m0ninv) mod 2ʷ + + # Registers used: + # - 1 for `M` + # - 6 for `t` (at most) + # - 6 for `scratch` + # - 2 for RAX and RDX + # Total 15 out of 16 + # We can save 1 by hardcoding M as immediate (and m0ninv) + # but this prevent reusing the same code for multiple curves like BLS12-377 and BLS12-381 + # We might be able to save registers by having `r` and `M` be memory operand as well + + let tsym = t.nimSymbol + let scratchSym = scratch.nimSymbol + result.add quote do: + static: doAssert: sizeof(SecretWord) == sizeof(ByteAddress) + + var `tsym`{.noInit, used.}: typeof(`r_PIR`) + # Assumes 64-bit limbs on 64-bit arch (or you can't store an address) + var `scratchSym` {.noInit.}: Limbs[`scratchSlots`] + `scratchSym`[0] = cast[SecretWord](`a_PIR`[0][0].unsafeAddr) + `scratchSym`[1] = cast[SecretWord](`b_PIR`[0][0].unsafeAddr) + `scratchSym`[4] = cast[SecretWord](`r_PIR`[0].unsafeAddr) + + # Algorithm + # ----------------------------------------- + # for i=0 to N-1 + # tN := 0 + # for k=0 to K-1 + # A := 0 + # for j=0 to N-1 + # (A,t[j]) := t[j] + a[k][j]*b[k][i] + A + # tN += A + # m := t[0]*m0ninv mod W + # C,_ := t[0] + m*M[0] + # for j=1 to N-1 + # (C,t[j-1]) := t[j] + m*M[j] + C + # t[N-1] = tN + C + + # No register spilling handling + doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs." + + for i in 0 ..< N: + # Multiplication step + ctx.comment " Multiplication step" + ctx.comment " tN = 0" + ctx.`xor` tN, tN + for k in 0 ..< K: + template A: untyped = S + + ctx.comment " A = 0" + ctx.`xor` A, A + ctx.comment " (A,t[0]) := t[0] + a[k][0]*b[k][i] + A" + ctx.mov rax, a[k, 0] + ctx.mul rdx, rax, b[k, i], rax + if i == 0 and k == 0: # First accumulation, overwrite t[0] + ctx.mov t[0], rax + else: # Accumulate in t[0] + ctx.add t[0], rax + ctx.adc rdx, 0 + ctx.mov A, rdx + + for j in 1 ..< N: + ctx.comment " (A,t[j]) := t[j] + a[k][j]*b[k][i] + A" + ctx.mov rax, a[k, j] + ctx.mul rdx, rax, b[k, i], rax + if i == 0 and k == 0: # First accumulation, overwrite t[0] + ctx.mov t[j], A + else: # Accumulate in t[0] + ctx.add t[j], A + ctx.adc rdx, 0 + ctx.`xor` A, A + ctx.add t[j], rax + ctx.adc A, rdx + + ctx.comment " tN += A" + ctx.add tN, A + + # Reduction step + ctx.comment " Reduction step" + template m: untyped = S + ctx.comment " m := t[0]*m0ninv mod 2ʷ" + ctx.mov rax, m0ninv + ctx.imul rax, t[0] + ctx.mov m, rax + ctx.comment " C,_ := t[0] + m*M[0]" + ctx.`xor` C, C + ctx.mul rdx, rax, M[0], rax + ctx.add rax, t[0] + ctx.adc C, rdx + + for j in 1 ..< N: + ctx.comment " (C,t[j-1]) := t[j] + m*M[j] + C" + ctx.mov rax, M[j] + ctx.mul rdx, rax, m, rax + ctx.add C, t[j] + ctx.adc rdx, 0 + ctx.add C, rax + ctx.adc rdx, 0 + ctx.mov t[j-1], C + ctx.mov C, rdx + + ctx.comment "t[N-1] = tN + C" + ctx.add tN, C + ctx.mov t[N-1], tN + + + ctx.mov rax, r # move r away from scratchspace that will be used for final substraction + let r2 = rax.asArrayAddr(len = N) + + if skipFinalSub: + ctx.comment " Copy result" + for i in 0 ..< N: + ctx.mov r2[i], t[i] + else: + ctx.comment " Final substraction" + ctx.finalSubNoCarryImpl( + r2, t, M, + scratch + ) + result.add ctx.generate() + +func sumprodMont_CIOS_spare2bits_asm*[N, K: static int]( + r: var Limbs[N], a, b: array[K, Limbs[N]], + M: Limbs[N], m0ninv: BaseType, + skipFinalSub: static bool) = + ## Sum of products ⅀aᵢ.bᵢ in the Montgomery domain + ## If "skipFinalSub" is set + ## the result is in the range [0, 2M) + ## otherwise the result is in the range [0, M) + ## + ## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation + r.sumprodMont_CIOS_spare2bits_gen(a, b, M, m0ninv, skipFinalSub) \ No newline at end of file diff --git a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim index d6f94b5..98a1da4 100644 --- a/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim +++ b/constantine/math/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim @@ -130,7 +130,8 @@ proc partialRedx( t: OperandArray, M: OperandArray, m0ninv: Operand, - lo, S: Operand + lo: Operand or Register, + S: Operand ) = ## Partial Montgomery reduction ## For CIOS method @@ -182,8 +183,8 @@ macro mulMont_CIOS_sparebit_adx_gen[N: static int]( ## Generate an optimized Montgomery Multiplication kernel ## using the CIOS method ## This requires the most significant word of the Modulus - ## M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111) - ## https://hackmd.io/@zkteam/modular_multiplication + ## M[^1] < high(SecretWord) shr 1 (i.e. less than 0b01111...1111) + ## https://hackmd.io/@gnark/modular_multiplication # No register spilling handling doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs." @@ -308,3 +309,187 @@ func squareMont_CIOS_asm_adx*[N]( var r2x {.noInit.}: Limbs[2*N] r2x.square_asm_adx_inline(a) r.redcMont_asm_adx(r2x, M, m0ninv, spareBits, skipFinalSub) + +# Montgomery Sum of Products +# ------------------------------------------------------------ + +macro sumprodMont_CIOS_spare2bits_adx_gen[N, K: static int]( + r_PIR: var Limbs[N], a_PIR, b_PIR: array[K, Limbs[N]], + M_PIR: Limbs[N], m0ninv_REG: BaseType, + skipFinalSub: static bool + ): untyped = + ## Generate an optimized Montgomery merged sum of products ⅀aᵢ.bᵢ kernel + ## using the CIOS method + ## + ## This requires 2 spare bits in the most significant word + ## so that we can skip the intermediate reductions + + # No register spilling handling + doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs." + + doAssert K <= 8, "we cannot sum more than 8 products" + # Bounds: + # 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR + # for all intent and purposes this is true since aᵢ.bᵢ is: + # if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums + # if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1, + # with 4p < R due to the 2 unused bits constraint so more than p sums are allowed + # 2. We have a high-word tN to accumulate overflows. + # with 2 unused bits in the last word, + # the multiplication of two last words will leave 4 unused bits + # enough for accumulating 8 additions and overflow. + + result = newStmtList() + + var ctx = init(Assembler_x86, BaseType) + let + scratchSlots = 6 + + # We could force M as immediate by specializing per moduli + M = init(OperandArray, nimSymbol = M_PIR, N, PointerInReg, Input) + # If N is too big, we need to spill registers. TODO. + t = init(OperandArray, nimSymbol = ident"t", N, ElemsInReg, Output_EarlyClobber) + # MultiPurpose Register slots + scratch = init(OperandArray, nimSymbol = ident"scratch", scratchSlots, ElemsInReg, InputOutput_EnsureClobber) + + # MULX requires RDX as well + + m0ninv = Operand( + desc: OperandDesc( + asmId: "[m0ninv]", + nimSymbol: m0ninv_REG, + rm: MemOffsettable, + constraint: Input, + cEmit: "&" & $m0ninv_REG + ) + ) + + # We're really constrained by register and somehow setting as memory doesn't help + # So we store the result `r` in the scratch space and then reload it in RDX + # before the scratchspace is used in final substraction + a = scratch[0].as2dArrayAddr(rows = K, cols = N) # Store the `a` operand + b = scratch[1].as2dArrayAddr(rows = K, cols = N) # Store the `b` operand + tN = scratch[2] # High part of extended precision multiplication + C = scratch[3] # Carry during reduction step + r = scratch[4] # Stores the `r` operand + S = scratch[5] # Mul step: Stores the carry A + # Red step: Stores (t[0] * m0ninv) mod 2ʷ + + # Registers used: + # - 1 for `M` + # - 6 for `t` (at most) + # - 6 for `scratch` + # - 2 for RAX and RDX + # Total 15 out of 16 + # We can save 1 by hardcoding M as immediate (and m0ninv) + # but this prevent reusing the same code for multiple curves like BLS12-377 and BLS12-381 + # We might be able to save registers by having `r` and `M` be memory operand as well + + let tsym = t.nimSymbol + let scratchSym = scratch.nimSymbol + result.add quote do: + static: doAssert: sizeof(SecretWord) == sizeof(ByteAddress) + + var `tsym`{.noInit, used.}: typeof(`r_PIR`) + # Assumes 64-bit limbs on 64-bit arch (or you can't store an address) + var `scratchSym` {.noInit.}: Limbs[`scratchSlots`] + `scratchSym`[0] = cast[SecretWord](`a_PIR`[0][0].unsafeAddr) + `scratchSym`[1] = cast[SecretWord](`b_PIR`[0][0].unsafeAddr) + `scratchSym`[4] = cast[SecretWord](`r_PIR`[0].unsafeAddr) + + # Algorithm + # ----------------------------------------- + # for i=0 to N-1 + # tN := 0 + # for k=0 to K-1 + # A := 0 + # for j=0 to N-1 + # (A,t[j]) := t[j] + a[k][j]*b[k][i] + A + # tN += A + # m := t[0]*m0ninv mod W + # C,_ := t[0] + m*M[0] + # for j=1 to N-1 + # (C,t[j-1]) := t[j] + m*M[j] + C + # t[N-1] = tN + C + + # No register spilling handling + doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs." + + for i in 0 ..< N: + # Multiplication step + ctx.comment " Multiplication step" + ctx.comment " tN = 0" + ctx.`xor` tN, tN + for k in 0 ..< K: + template A: untyped = S + + ctx.comment " A = 0" + ctx.`xor` A, A + ctx.comment " (A,t[0]) := t[0] + a[k][0]*b[k][i] + A" + ctx.mov rdx, b[k, i] + if i == 0 and k == 0: # First accumulation, overwrite t[0] + ctx.mulx t[1], t[0], a[k, 0], rdx + else: # Accumulate in t[0] + ctx.mulx A, rax, a[k, 0], rdx + ctx.adcx t[0], rax + ctx.adox t[1], A + + for j in 1 ..< N-1: + ctx.comment " (A,t[j]) := t[j] + a[k][j]*b[k][i] + A" + if i == 0 and k == 0: + ctx.mulx t[j+1], rax, a[k, j], rdx + if j == 1: + ctx.add t[j], rax + else: + ctx.adc t[j], rax + else: + ctx.mulx A, rax, a[k, j], rdx + ctx.adcx t[j], rax + ctx.adox t[j+1], A + + # Last limb + ctx.mulx A, rax, a[k, N-1], rdx + if i == 0 and k == 0: + ctx.adc t[N-1], rax + ctx.comment " tN += A" + ctx.adc tN, A + else: + ctx.adcx t[N-1], rax + ctx.comment " tN += A" + ctx.mov rdx, 0 # Set to 0 without clearing flags + ctx.adox tN, A + ctx.adcx tN, rdx + + # Reduction step + ctx.partialRedx( + tN, t, + M, m0ninv, + rax, C + ) + + ctx.mov rax, r # move r away from scratchspace that will be used for final substraction + let r2 = rax.asArrayAddr(len = N) + + if skipFinalSub: + ctx.comment " Copy result" + for i in 0 ..< N: + ctx.mov r2[i], t[i] + else: + ctx.comment " Final substraction" + ctx.finalSubNoCarryImpl( + r2, t, M, + scratch + ) + result.add ctx.generate() + +func sumprodMont_CIOS_spare2bits_asm_adx*[N, K: static int]( + r: var Limbs[N], a, b: array[K, Limbs[N]], + M: Limbs[N], m0ninv: BaseType, + skipFinalSub: static bool) = + ## Sum of products ⅀aᵢ.bᵢ in the Montgomery domain + ## If "skipFinalSub" is set + ## the result is in the range [0, 2M) + ## otherwise the result is in the range [0, M) + ## + ## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation + r.sumprodMont_CIOS_spare2bits_adx_gen(a, b, M, m0ninv, skipFinalSub) \ No newline at end of file diff --git a/constantine/math/arithmetic/bigints_montgomery.nim b/constantine/math/arithmetic/bigints_montgomery.nim index 52d7893..96faa83 100644 --- a/constantine/math/arithmetic/bigints_montgomery.nim +++ b/constantine/math/arithmetic/bigints_montgomery.nim @@ -66,6 +66,20 @@ func squareMont*(r: var BigInt, a, M: BigInt, negInvModWord: static BaseType, ## to avoid duplicating with Nim zero-init policy squareMont(r.limbs, a.limbs, M.limbs, negInvModWord, spareBits, skipFinalSub) +func sumprodMont*[N: static int]( + r: var BigInt, + a, b: array[N, BigInt], + M: BigInt, negInvModWord: static BaseType, + spareBits: static int, skipFinalSub: static bool = false) = + ## Compute r <- ⅀aᵢ.bᵢ (mod M) (sum of products) in the Montgomery domain + # We rely on BigInt and Limbs having the same repr to avoid array copies + sumprodMont( + r.limbs, + cast[ptr array[N, typeof(a[0].limbs)]](a.unsafeAddr)[], + cast[ptr array[N, typeof(b[0].limbs)]](b.unsafeAddr)[], + M.limbs, negInvModWord, spareBits, skipFinalSub + ) + func powMont*[mBits: static int]( a: var BigInt[mBits], exponent: openarray[byte], M, one: BigInt[mBits], negInvModWord: static BaseType, windowSize: static int, diff --git a/constantine/math/arithmetic/finite_fields.nim b/constantine/math/arithmetic/finite_fields.nim index 1aae4a3..8b2f328 100644 --- a/constantine/math/arithmetic/finite_fields.nim +++ b/constantine/math/arithmetic/finite_fields.nim @@ -149,6 +149,26 @@ func setMinusOne*(a: var FF) = # Check if the compiler optimizes it away a.mres = FF.getMontyPrimeMinus1() + +func neg*(r: var FF, a: FF) {.meter.} = + ## Negate modulo p + when UseASM_X86_64: + negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs) + else: + # If a = 0 we need r = 0 and not r = M + # as comparison operator assume unicity + # of the modular representation. + # Also make sure to handle aliasing where r.addr = a.addr + var t {.noInit.}: FF + let isZero = a.isZero() + discard t.mres.diff(FF.fieldMod(), a.mres) + t.mres.csetZero(isZero) + r = t + +func neg*(a: var FF) {.meter.} = + ## Negate modulo p + a.neg(a) + func `+=`*(a: var FF, b: FF) {.meter.} = ## In-place addition modulo p when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling @@ -223,24 +243,14 @@ func square*(r: var FF, a: FF, skipFinalSub: static bool = false) {.meter.} = ## Squaring modulo p r.mres.squareMont(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub) -func neg*(r: var FF, a: FF) {.meter.} = - ## Negate modulo p - when UseASM_X86_64: - negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs) - else: - # If a = 0 we need r = 0 and not r = M - # as comparison operator assume unicity - # of the modular representation. - # Also make sure to handle aliasing where r.addr = a.addr - var t {.noInit.}: FF - let isZero = a.isZero() - discard t.mres.diff(FF.fieldMod(), a.mres) - t.mres.csetZero(isZero) - r = t - -func neg*(a: var FF) {.meter.} = - ## Negate modulo p - a.neg(a) +func sumprod*[N: static int](r: var FF, a, b: array[N, FF], skipFinalSub: static bool = false) {.meter.} = + ## Compute r <- ⅀aᵢ.bᵢ (mod M) (sum of products) + # We rely on FF and Bigints having the same repr to avoid array copies + r.mres.sumprodMont( + cast[ptr array[N, typeof(a[0].mres)]](a.unsafeAddr)[], + cast[ptr array[N, typeof(b[0].mres)]](b.unsafeAddr)[], + FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub + ) # ############################################################ # diff --git a/constantine/math/arithmetic/finite_fields_square_root.nim b/constantine/math/arithmetic/finite_fields_square_root.nim index 615236c..d89885c 100644 --- a/constantine/math/arithmetic/finite_fields_square_root.nim +++ b/constantine/math/arithmetic/finite_fields_square_root.nim @@ -9,7 +9,7 @@ import ../../platforms/abstractions, ../config/curves, - ../curves/zoo_square_roots, + ../constants/zoo_square_roots, ./bigints, ./finite_fields, ./limbs_exgcd # ############################################################ diff --git a/constantine/math/arithmetic/limbs_exgcd.nim b/constantine/math/arithmetic/limbs_exgcd.nim index b65b378..f8f64d9 100644 --- a/constantine/math/arithmetic/limbs_exgcd.nim +++ b/constantine/math/arithmetic/limbs_exgcd.nim @@ -305,8 +305,8 @@ func matVecMul_shr_k_mod_M[N, E: static int]( # First iteration of [u v] [d] # [q r].[e] - cd.slincombAccNoCarry(u, d[0], v, e[0]) - ce.slincombAccNoCarry(q, d[0], r, e[0]) + cd.ssumprodAccNoCarry(u, d[0], v, e[0]) + ce.ssumprodAccNoCarry(q, d[0], r, e[0]) # Compute me and md, multiples of M # such as the bottom k bits if d and e are 0 @@ -330,8 +330,8 @@ func matVecMul_shr_k_mod_M[N, E: static int]( ce.ashr(k) for i in 1 ..< N: - cd.slincombAccNoCarry(u, d[i], v, e[i]) - ce.slincombAccNoCarry(q, d[i], r, e[i]) + cd.ssumprodAccNoCarry(u, d[i], v, e[i]) + ce.ssumprodAccNoCarry(q, d[i], r, e[i]) cd.smulAccNoCarry(md, M[i]) ce.smulAccNoCarry(me, M[i]) d[i-1] = cd.lo and Max @@ -366,8 +366,8 @@ func matVecMul_shr_k[N, E: static int]( # First iteration of [u v] [f] # [q r].[g] - cf.slincombAccNoCarry(u, f[0], v, g[0]) - cg.slincombAccNoCarry(q, f[0], r, g[0]) + cf.ssumprodAccNoCarry(u, f[0], v, g[0]) + cg.ssumprodAccNoCarry(q, f[0], r, g[0]) # bottom k bits are zero by construction debug: doAssert BaseType(cf.lo and Max) == 0, "bottom k bits should be 0, cf.lo: " & $BaseType(cf.lo) @@ -377,8 +377,8 @@ func matVecMul_shr_k[N, E: static int]( cg.ashr(k) for i in 1 ..< N: - cf.slincombAccNoCarry(u, f[i], v, g[i]) - cg.slincombAccNoCarry(q, f[i], r, g[i]) + cf.ssumprodAccNoCarry(u, f[i], v, g[i]) + cg.ssumprodAccNoCarry(q, f[i], r, g[i]) f[i-1] = cf.lo and Max g[i-1] = cg.lo and Max cf.ashr(k) @@ -496,11 +496,6 @@ func invmod*( # # Those symbols can be computed either via exponentiation (Fermat's Little Theorem) # or using slight modifications to the Extended Euclidean Algorithm for GCD. -# -# See -# - Algorithm II.7 in Blake, Seroussi, Smart: "Elliptic Curves in Cryptography" -# - Algorithm 5.9.2 in Bach and Shallit: "Algorithmic Number Theory" -# - Pornin: https://github.com/pornin/x25519-cm0/blob/75a53f2/src/x25519-cm0.S#L89-L155 func batchedDivstepsSymbol( t: var TransitionMatrix, @@ -631,7 +626,7 @@ func legendreImpl[N, E]( accL = (accL + accL.isOdd()) and SignedSecretWord(3) accL = SignedSecretWord(1)-accL - accL.csetZero(f.isZeroMask()) # f = gcd = 1 as M is prime or f = 0 if a = 0 + accL.csetZero(f.isZeroMask()) return SecretWord(accL) func legendre*(a, M: Limbs, bits: static int): SecretWord = diff --git a/constantine/math/arithmetic/limbs_montgomery.nim b/constantine/math/arithmetic/limbs_montgomery.nim index 1f2c2dc..2c01f2e 100644 --- a/constantine/math/arithmetic/limbs_montgomery.nim +++ b/constantine/math/arithmetic/limbs_montgomery.nim @@ -306,11 +306,69 @@ func mulMont_FIPS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub: discard z.csub(M, v.isNonZero() or not(z < M)) r = z +func sumprodMont_CIOS_spare2bits[K: static int]( + r: var Limbs, a, b: array[K, Limbs], + M: Limbs, m0ninv: BaseType, + skipFinalSub: static bool = false) = + ## Compute r = ⅀aᵢ.bᵢ (mod M) (suim of products) + ## This requires 2 unused bits in the field element representation + ## + ## This maps + ## - [0, 2p) -> [0, 2p) with skipFinalSub + ## - [0, 2p) -> [0, p) without + ## + ## skipFinalSub skips the final substraction step. + + # We want all the computation to be kept in registers + # hence we use a temporary `t`, hoping that the compiler does the right thing™. + var t: typeof(M) # zero-init + const N = t.len + # Extra word to handle carries t[N] (not there are 2 unused spare bits) + var tN {.noInit.}: SecretWord + + static: doAssert K <= 8, "we cannot sum more than 8 products" + # Bounds: + # 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR + # for all intent and purposes this is true since aᵢ.bᵢ is: + # if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums + # if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1, + # with 4p < R due to the 2 unused bits constraint so more than p sums are allowed + # 2. We have a high-word tN to accumulate overflows. + # with 2 unused bits in the last word, + # the multiplication of two last words will leave 4 unused bits + # enough for accumulating 8 additions and overflow. + + staticFor i, 0, N: + tN = Zero + staticFor k, 0, K: + var A = Zero + staticFor j, 0, N: + # (A, t[j]) <- a[k][j] * b[k][i] + t[j] + A + muladd2(A, t[j], a[k][j], b[k][i], t[j], A) + tN += A + + # Reduction + # m <- (t[0] * m0ninv) mod 2ʷ + # (C, _) <- m * M[0] + t[0] + var C, lo = Zero + let m = t[0] * SecretWord(m0ninv) + muladd1(C, lo, m, M[0], t[0]) + staticFor j, 1, N: + # (C, t[j-1]) <- m*M[j] + t[j] + C + muladd2(C, t[j-1], m, M[j], t[j], C) + # (_,t[N-1]) <- t[N] + C + t[N-1] = tN + C + + when not skipFinalSub: + discard t.csub(M, not(t < M)) + r = t + + # Montgomery Squaring # -------------------------------------------------------------------------------------------------------------------- # # There are Montgomery squaring multiplications mentioned in the litterature -# - https://hackmd.io/@zkteam/modular_multiplication if M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111) +# - https://hackmd.io/@gnark/modular_multiplication if M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111) # - Architectural Support for Long Integer Modulo Arithmetic on Risc-Based Smart Cards # Johann Großschädl, 2003 # https://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=95950BAC26A728114431C0C7B425E022?doi=10.1.1.115.3276&rep=rep1&type=pdf @@ -482,9 +540,31 @@ func squareMont*[N](r: var Limbs[N], a, M: Limbs[N], r.redc2xMont(r2x, M, m0ninv, spareBits, skipFinalSub) else: mulMont(r, a, a, M, m0ninv, spareBits, skipFinalSub) - + +func sumprodMont*[N: static int]( + r: var Limbs, a, b: array[N, Limbs], + M: Limbs, m0ninv: BaseType, + spareBits: static int, + skipFinalSub: static bool = false) {.inline.} = + when spareBits >= 2: + when UseASM_X86_64 and r.len in {2 .. 6}: + if ({.noSideEffect.}: hasAdx()): + r.sumprodMont_CIOS_spare2bits_asm_adx(a, b, M, m0ninv, skipFinalSub) + else: + r.sumprodMont_CIOS_spare2bits_asm(a, b, M, m0ninv, skipFinalSub) + else: + r.sumprodMont_CIOS_spare2bits(a, b, M, m0ninv, skipFinalSub) + else: + r.mulMont(a[0], b[0], M, m0ninv, spareBits, skipFinalSub = false) + var ri {.noInit.}: Limbs + for i in 1 ..< N: + ri.mulMont(a[i], b[i], M, m0ninv, spareBits, skipFinalSub = false) + var overflowed = SecretBool r.add(ri) + overflowed = overflowed or not(r < M) + discard r.csub(M, overflowed) + func fromMont*(r: var Limbs, a, M: Limbs, - m0ninv: BaseType, spareBits: static int) = + m0ninv: BaseType, spareBits: static int) {.inline.} = ## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N) ## to the regular natural representation (mod N) ## @@ -512,7 +592,7 @@ func fromMont*(r: var Limbs, a, M: Limbs, fromMont_CIOS(r, a, M, m0ninv) func getMont*(r: var Limbs, a, M, r2modM: Limbs, - m0ninv: BaseType, spareBits: static int) = + m0ninv: BaseType, spareBits: static int) {.inline.} = ## Transform a bigint ``a`` from it's natural representation (mod N) ## to a the Montgomery n-residue representation ## diff --git a/constantine/math/arithmetic/limbs_unsaturated.nim b/constantine/math/arithmetic/limbs_unsaturated.nim index aff37e6..54e0d86 100644 --- a/constantine/math/arithmetic/limbs_unsaturated.nim +++ b/constantine/math/arithmetic/limbs_unsaturated.nim @@ -393,8 +393,8 @@ func smulAccNoCarry*(r: var DSWord, a, b: SignedSecretWord) {.inline.}= r.lo = SignedSecretWord UV[0] r.hi = SignedSecretWord UV[1] -func slincombAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}= - ## Accumulated linear combination +func ssumprodAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}= + ## Accumulated sum of products ## (_, hi, lo) += a*u + b*v ## This assumes no overflowing var carry: Carry diff --git a/constantine/math/config/curves_prop_field_core.nim b/constantine/math/config/curves_prop_field_core.nim index 668c278..d4ad635 100644 --- a/constantine/math/config/curves_prop_field_core.nim +++ b/constantine/math/config/curves_prop_field_core.nim @@ -45,6 +45,10 @@ func has_P_3mod4_primeModulus*(C: static Curve): static bool = ## Returns true iff p ≡ 3 (mod 4) (BaseType(C.Mod.limbs[0]) and 3) == 3 +func has_P_3mod8_primeModulus*(C: static Curve): static bool = + ## Returns true iff p ≡ 3 (mod 8) + (BaseType(C.Mod.limbs[0]) and 7) == 3 + func has_P_5mod8_primeModulus*(C: static Curve): static bool = ## Returns true iff p ≡ 5 (mod 8) (BaseType(C.Mod.limbs[0]) and 7) == 5 diff --git a/constantine/math/curves/README.md b/constantine/math/constants/README.md similarity index 100% rename from constantine/math/curves/README.md rename to constantine/math/constants/README.md diff --git a/constantine/math/curves/bandersnatch_sqrt.nim b/constantine/math/constants/bandersnatch_sqrt.nim similarity index 100% rename from constantine/math/curves/bandersnatch_sqrt.nim rename to constantine/math/constants/bandersnatch_sqrt.nim diff --git a/constantine/math/curves/bls12_377_constants.nim b/constantine/math/constants/bls12_377_constants.nim similarity index 100% rename from constantine/math/curves/bls12_377_constants.nim rename to constantine/math/constants/bls12_377_constants.nim diff --git a/constantine/math/curves/bls12_377_endomorphisms.nim b/constantine/math/constants/bls12_377_endomorphisms.nim similarity index 100% rename from constantine/math/curves/bls12_377_endomorphisms.nim rename to constantine/math/constants/bls12_377_endomorphisms.nim diff --git a/constantine/math/curves/bls12_377_frobenius.nim b/constantine/math/constants/bls12_377_frobenius.nim similarity index 100% rename from constantine/math/curves/bls12_377_frobenius.nim rename to constantine/math/constants/bls12_377_frobenius.nim diff --git a/constantine/math/curves/bls12_377_pairing.nim b/constantine/math/constants/bls12_377_pairing.nim similarity index 100% rename from constantine/math/curves/bls12_377_pairing.nim rename to constantine/math/constants/bls12_377_pairing.nim diff --git a/constantine/math/curves/bls12_377_sqrt.nim b/constantine/math/constants/bls12_377_sqrt.nim similarity index 100% rename from constantine/math/curves/bls12_377_sqrt.nim rename to constantine/math/constants/bls12_377_sqrt.nim diff --git a/constantine/math/curves/bls12_377_subgroups.nim b/constantine/math/constants/bls12_377_subgroups.nim similarity index 99% rename from constantine/math/curves/bls12_377_subgroups.nim rename to constantine/math/constants/bls12_377_subgroups.nim index cf34c6c..3d6dd1e 100644 --- a/constantine/math/curves/bls12_377_subgroups.nim +++ b/constantine/math/constants/bls12_377_subgroups.nim @@ -15,7 +15,7 @@ import ../ec_shortweierstrass, ../io/io_bigints, ../isogenies/frobenius, - ../curves/zoo_endomorphisms + ../constants/zoo_endomorphisms func pow_bls12_377_abs_x[ECP: ECP_ShortW[Fp[BLS12_377], G1] or ECP_ShortW[Fp2[BLS12_377], G2]]( diff --git a/constantine/math/curves/bls12_381_constants.nim b/constantine/math/constants/bls12_381_constants.nim similarity index 100% rename from constantine/math/curves/bls12_381_constants.nim rename to constantine/math/constants/bls12_381_constants.nim diff --git a/constantine/math/curves/bls12_381_endomorphisms.nim b/constantine/math/constants/bls12_381_endomorphisms.nim similarity index 100% rename from constantine/math/curves/bls12_381_endomorphisms.nim rename to constantine/math/constants/bls12_381_endomorphisms.nim diff --git a/constantine/math/curves/bls12_381_frobenius.nim b/constantine/math/constants/bls12_381_frobenius.nim similarity index 100% rename from constantine/math/curves/bls12_381_frobenius.nim rename to constantine/math/constants/bls12_381_frobenius.nim diff --git a/constantine/math/curves/bls12_381_generators.nim b/constantine/math/constants/bls12_381_generators.nim similarity index 100% rename from constantine/math/curves/bls12_381_generators.nim rename to constantine/math/constants/bls12_381_generators.nim diff --git a/constantine/math/curves/bls12_381_hash_to_curve_g1.nim b/constantine/math/constants/bls12_381_hash_to_curve_g1.nim similarity index 100% rename from constantine/math/curves/bls12_381_hash_to_curve_g1.nim rename to constantine/math/constants/bls12_381_hash_to_curve_g1.nim diff --git a/constantine/math/curves/bls12_381_hash_to_curve_g2.nim b/constantine/math/constants/bls12_381_hash_to_curve_g2.nim similarity index 100% rename from constantine/math/curves/bls12_381_hash_to_curve_g2.nim rename to constantine/math/constants/bls12_381_hash_to_curve_g2.nim diff --git a/constantine/math/curves/bls12_381_pairing.nim b/constantine/math/constants/bls12_381_pairing.nim similarity index 100% rename from constantine/math/curves/bls12_381_pairing.nim rename to constantine/math/constants/bls12_381_pairing.nim diff --git a/constantine/math/curves/bls12_381_sqrt.nim b/constantine/math/constants/bls12_381_sqrt.nim similarity index 100% rename from constantine/math/curves/bls12_381_sqrt.nim rename to constantine/math/constants/bls12_381_sqrt.nim diff --git a/constantine/math/curves/bls12_381_sqrt_fp2.nim b/constantine/math/constants/bls12_381_sqrt_fp2.nim similarity index 100% rename from constantine/math/curves/bls12_381_sqrt_fp2.nim rename to constantine/math/constants/bls12_381_sqrt_fp2.nim diff --git a/constantine/math/curves/bls12_381_subgroups.nim b/constantine/math/constants/bls12_381_subgroups.nim similarity index 99% rename from constantine/math/curves/bls12_381_subgroups.nim rename to constantine/math/constants/bls12_381_subgroups.nim index 8216f53..0e34848 100644 --- a/constantine/math/curves/bls12_381_subgroups.nim +++ b/constantine/math/constants/bls12_381_subgroups.nim @@ -15,7 +15,7 @@ import ../ec_shortweierstrass, ../io/io_bigints, ../isogenies/frobenius, - ../curves/zoo_endomorphisms + ../constants/zoo_endomorphisms func pow_bls12_381_abs_x[ECP: ECP_ShortW[Fp[BLS12_381], G1] or ECP_ShortW[Fp2[BLS12_381], G2]]( diff --git a/constantine/math/curves/bn254_nogami_constants.nim b/constantine/math/constants/bn254_nogami_constants.nim similarity index 100% rename from constantine/math/curves/bn254_nogami_constants.nim rename to constantine/math/constants/bn254_nogami_constants.nim diff --git a/constantine/math/curves/bn254_nogami_endomorphisms.nim b/constantine/math/constants/bn254_nogami_endomorphisms.nim similarity index 100% rename from constantine/math/curves/bn254_nogami_endomorphisms.nim rename to constantine/math/constants/bn254_nogami_endomorphisms.nim diff --git a/constantine/math/curves/bn254_nogami_frobenius.nim b/constantine/math/constants/bn254_nogami_frobenius.nim similarity index 100% rename from constantine/math/curves/bn254_nogami_frobenius.nim rename to constantine/math/constants/bn254_nogami_frobenius.nim diff --git a/constantine/math/curves/bn254_nogami_pairing.nim b/constantine/math/constants/bn254_nogami_pairing.nim similarity index 100% rename from constantine/math/curves/bn254_nogami_pairing.nim rename to constantine/math/constants/bn254_nogami_pairing.nim diff --git a/constantine/math/curves/bn254_nogami_sqrt.nim b/constantine/math/constants/bn254_nogami_sqrt.nim similarity index 100% rename from constantine/math/curves/bn254_nogami_sqrt.nim rename to constantine/math/constants/bn254_nogami_sqrt.nim diff --git a/constantine/math/curves/bn254_nogami_sqrt_fp2.nim b/constantine/math/constants/bn254_nogami_sqrt_fp2.nim similarity index 100% rename from constantine/math/curves/bn254_nogami_sqrt_fp2.nim rename to constantine/math/constants/bn254_nogami_sqrt_fp2.nim diff --git a/constantine/math/curves/bn254_nogami_subgroups.nim b/constantine/math/constants/bn254_nogami_subgroups.nim similarity index 100% rename from constantine/math/curves/bn254_nogami_subgroups.nim rename to constantine/math/constants/bn254_nogami_subgroups.nim diff --git a/constantine/math/curves/bn254_snarks_constants.nim b/constantine/math/constants/bn254_snarks_constants.nim similarity index 100% rename from constantine/math/curves/bn254_snarks_constants.nim rename to constantine/math/constants/bn254_snarks_constants.nim diff --git a/constantine/math/curves/bn254_snarks_endomorphisms.nim b/constantine/math/constants/bn254_snarks_endomorphisms.nim similarity index 100% rename from constantine/math/curves/bn254_snarks_endomorphisms.nim rename to constantine/math/constants/bn254_snarks_endomorphisms.nim diff --git a/constantine/math/curves/bn254_snarks_frobenius.nim b/constantine/math/constants/bn254_snarks_frobenius.nim similarity index 100% rename from constantine/math/curves/bn254_snarks_frobenius.nim rename to constantine/math/constants/bn254_snarks_frobenius.nim diff --git a/constantine/math/curves/bn254_snarks_generators.nim b/constantine/math/constants/bn254_snarks_generators.nim similarity index 100% rename from constantine/math/curves/bn254_snarks_generators.nim rename to constantine/math/constants/bn254_snarks_generators.nim diff --git a/constantine/math/curves/bn254_snarks_hash_to_curve_g1.nim b/constantine/math/constants/bn254_snarks_hash_to_curve_g1.nim similarity index 100% rename from constantine/math/curves/bn254_snarks_hash_to_curve_g1.nim rename to constantine/math/constants/bn254_snarks_hash_to_curve_g1.nim diff --git a/constantine/math/curves/bn254_snarks_hash_to_curve_g2.nim b/constantine/math/constants/bn254_snarks_hash_to_curve_g2.nim similarity index 100% rename from constantine/math/curves/bn254_snarks_hash_to_curve_g2.nim rename to constantine/math/constants/bn254_snarks_hash_to_curve_g2.nim diff --git a/constantine/math/curves/bn254_snarks_pairing.nim b/constantine/math/constants/bn254_snarks_pairing.nim similarity index 100% rename from constantine/math/curves/bn254_snarks_pairing.nim rename to constantine/math/constants/bn254_snarks_pairing.nim diff --git a/constantine/math/curves/bn254_snarks_sqrt.nim b/constantine/math/constants/bn254_snarks_sqrt.nim similarity index 100% rename from constantine/math/curves/bn254_snarks_sqrt.nim rename to constantine/math/constants/bn254_snarks_sqrt.nim diff --git a/constantine/math/curves/bn254_snarks_sqrt_fp2.nim b/constantine/math/constants/bn254_snarks_sqrt_fp2.nim similarity index 100% rename from constantine/math/curves/bn254_snarks_sqrt_fp2.nim rename to constantine/math/constants/bn254_snarks_sqrt_fp2.nim diff --git a/constantine/math/curves/bn254_snarks_subgroups.nim b/constantine/math/constants/bn254_snarks_subgroups.nim similarity index 100% rename from constantine/math/curves/bn254_snarks_subgroups.nim rename to constantine/math/constants/bn254_snarks_subgroups.nim diff --git a/constantine/math/curves/bw6_761_constants.nim b/constantine/math/constants/bw6_761_constants.nim similarity index 100% rename from constantine/math/curves/bw6_761_constants.nim rename to constantine/math/constants/bw6_761_constants.nim diff --git a/constantine/math/curves/bw6_761_endomorphisms.nim b/constantine/math/constants/bw6_761_endomorphisms.nim similarity index 100% rename from constantine/math/curves/bw6_761_endomorphisms.nim rename to constantine/math/constants/bw6_761_endomorphisms.nim diff --git a/constantine/math/curves/bw6_761_frobenius.nim b/constantine/math/constants/bw6_761_frobenius.nim similarity index 100% rename from constantine/math/curves/bw6_761_frobenius.nim rename to constantine/math/constants/bw6_761_frobenius.nim diff --git a/constantine/math/curves/bw6_761_pairing.nim b/constantine/math/constants/bw6_761_pairing.nim similarity index 100% rename from constantine/math/curves/bw6_761_pairing.nim rename to constantine/math/constants/bw6_761_pairing.nim diff --git a/constantine/math/curves/bw6_761_sqrt.nim b/constantine/math/constants/bw6_761_sqrt.nim similarity index 100% rename from constantine/math/curves/bw6_761_sqrt.nim rename to constantine/math/constants/bw6_761_sqrt.nim diff --git a/constantine/math/curves/bw6_761_subgroups.nim b/constantine/math/constants/bw6_761_subgroups.nim similarity index 100% rename from constantine/math/curves/bw6_761_subgroups.nim rename to constantine/math/constants/bw6_761_subgroups.nim diff --git a/constantine/math/curves/curve25519_sqrt.nim b/constantine/math/constants/curve25519_sqrt.nim similarity index 100% rename from constantine/math/curves/curve25519_sqrt.nim rename to constantine/math/constants/curve25519_sqrt.nim diff --git a/constantine/math/curves/jubjub_sqrt.nim b/constantine/math/constants/jubjub_sqrt.nim similarity index 100% rename from constantine/math/curves/jubjub_sqrt.nim rename to constantine/math/constants/jubjub_sqrt.nim diff --git a/constantine/math/curves/pallas_endomorphisms.nim b/constantine/math/constants/pallas_endomorphisms.nim similarity index 100% rename from constantine/math/curves/pallas_endomorphisms.nim rename to constantine/math/constants/pallas_endomorphisms.nim diff --git a/constantine/math/curves/pallas_sqrt.nim b/constantine/math/constants/pallas_sqrt.nim similarity index 100% rename from constantine/math/curves/pallas_sqrt.nim rename to constantine/math/constants/pallas_sqrt.nim diff --git a/constantine/math/curves/pallas_subgroups.nim b/constantine/math/constants/pallas_subgroups.nim similarity index 100% rename from constantine/math/curves/pallas_subgroups.nim rename to constantine/math/constants/pallas_subgroups.nim diff --git a/constantine/math/curves/vesta_endomorphisms.nim b/constantine/math/constants/vesta_endomorphisms.nim similarity index 100% rename from constantine/math/curves/vesta_endomorphisms.nim rename to constantine/math/constants/vesta_endomorphisms.nim diff --git a/constantine/math/curves/vesta_sqrt.nim b/constantine/math/constants/vesta_sqrt.nim similarity index 100% rename from constantine/math/curves/vesta_sqrt.nim rename to constantine/math/constants/vesta_sqrt.nim diff --git a/constantine/math/curves/vesta_subgroups.nim b/constantine/math/constants/vesta_subgroups.nim similarity index 100% rename from constantine/math/curves/vesta_subgroups.nim rename to constantine/math/constants/vesta_subgroups.nim diff --git a/constantine/math/curves/zoo_constants.nim b/constantine/math/constants/zoo_constants.nim similarity index 100% rename from constantine/math/curves/zoo_constants.nim rename to constantine/math/constants/zoo_constants.nim diff --git a/constantine/math/curves/zoo_endomorphisms.nim b/constantine/math/constants/zoo_endomorphisms.nim similarity index 100% rename from constantine/math/curves/zoo_endomorphisms.nim rename to constantine/math/constants/zoo_endomorphisms.nim diff --git a/constantine/math/curves/zoo_frobenius.nim b/constantine/math/constants/zoo_frobenius.nim similarity index 100% rename from constantine/math/curves/zoo_frobenius.nim rename to constantine/math/constants/zoo_frobenius.nim diff --git a/constantine/math/curves/zoo_generators.nim b/constantine/math/constants/zoo_generators.nim similarity index 100% rename from constantine/math/curves/zoo_generators.nim rename to constantine/math/constants/zoo_generators.nim diff --git a/constantine/math/curves/zoo_hash_to_curve.nim b/constantine/math/constants/zoo_hash_to_curve.nim similarity index 100% rename from constantine/math/curves/zoo_hash_to_curve.nim rename to constantine/math/constants/zoo_hash_to_curve.nim diff --git a/constantine/math/curves/zoo_pairings.nim b/constantine/math/constants/zoo_pairings.nim similarity index 100% rename from constantine/math/curves/zoo_pairings.nim rename to constantine/math/constants/zoo_pairings.nim diff --git a/constantine/math/curves/zoo_square_roots.nim b/constantine/math/constants/zoo_square_roots.nim similarity index 100% rename from constantine/math/curves/zoo_square_roots.nim rename to constantine/math/constants/zoo_square_roots.nim diff --git a/constantine/math/curves/zoo_square_roots_fp2.nim b/constantine/math/constants/zoo_square_roots_fp2.nim similarity index 100% rename from constantine/math/curves/zoo_square_roots_fp2.nim rename to constantine/math/constants/zoo_square_roots_fp2.nim diff --git a/constantine/math/curves/zoo_subgroups.nim b/constantine/math/constants/zoo_subgroups.nim similarity index 100% rename from constantine/math/curves/zoo_subgroups.nim rename to constantine/math/constants/zoo_subgroups.nim diff --git a/constantine/math/elliptic/ec_endomorphism_accel.nim b/constantine/math/elliptic/ec_endomorphism_accel.nim index 170c191..3115258 100644 --- a/constantine/math/elliptic/ec_endomorphism_accel.nim +++ b/constantine/math/elliptic/ec_endomorphism_accel.nim @@ -12,7 +12,7 @@ import # Internal ../../platforms/abstractions, ../config/[curves, type_bigint], - ../curves/zoo_endomorphisms, + ../constants/zoo_endomorphisms, ../arithmetic, ../extension_fields, ../isogenies/frobenius, diff --git a/constantine/math/elliptic/ec_scalar_mul.nim b/constantine/math/elliptic/ec_scalar_mul.nim index 6126149..dba6433 100644 --- a/constantine/math/elliptic/ec_scalar_mul.nim +++ b/constantine/math/elliptic/ec_scalar_mul.nim @@ -12,7 +12,7 @@ import ../arithmetic, ../extension_fields, ../io/io_bigints, - ../curves/zoo_endomorphisms, + ../constants/zoo_endomorphisms, ./ec_endomorphism_accel # ############################################################ diff --git a/constantine/math/elliptic/ec_shortweierstrass_affine.nim b/constantine/math/elliptic/ec_shortweierstrass_affine.nim index 8518970..e06afa7 100644 --- a/constantine/math/elliptic/ec_shortweierstrass_affine.nim +++ b/constantine/math/elliptic/ec_shortweierstrass_affine.nim @@ -12,7 +12,7 @@ import ../arithmetic, ../extension_fields, ../io/[io_fields, io_extfields], - ../curves/zoo_constants + ../constants/zoo_constants # ############################################################ # diff --git a/constantine/math/extension_fields/square_root_fp2.nim b/constantine/math/extension_fields/square_root_fp2.nim index b721376..b6c2cf5 100644 --- a/constantine/math/extension_fields/square_root_fp2.nim +++ b/constantine/math/extension_fields/square_root_fp2.nim @@ -11,7 +11,7 @@ import ./towers, ../arithmetic, ../config/curves, - ../curves/zoo_square_roots_fp2 + ../constants/zoo_square_roots_fp2 # Square root # ----------------------------------------------------------- diff --git a/constantine/math/extension_fields/towers.nim b/constantine/math/extension_fields/towers.nim index 3129984..5e13c20 100644 --- a/constantine/math/extension_fields/towers.nim +++ b/constantine/math/extension_fields/towers.nim @@ -57,8 +57,8 @@ type CubicExt[Fp2[C]] Fp12*[C: static Curve] = - CubicExt[Fp4[C]] - # QuadraticExt[Fp6[C]] + # CubicExt[Fp4[C]] + QuadraticExt[Fp6[C]] template c0*(a: ExtensionField): auto = a.coords[0] @@ -662,8 +662,8 @@ func prod*(r: var CubicExt, a: CubicExt, _: type NonResidue) = ## and v³ = ξ ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v² let t {.noInit.} = a.c2 - r.c1 = a.c0 r.c2 = a.c1 + r.c1 = a.c0 r.c0.prod(t, NonResidue) func `*=`*(a: var CubicExt, _: type NonResidue) {.inline.} = @@ -689,8 +689,8 @@ func prod2x*( ## and v³ = ξ ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v² let t {.noInit.} = a.c2 - r.c1 = a.c0 r.c2 = a.c1 + r.c1 = a.c0 r.c0.prod2x(t, NonResidue) {.pop.} # inline @@ -811,7 +811,7 @@ func square2x_complex(r: var QuadraticExt2x, a: Fp2) = # Complex multiplications # ---------------------------------------------------------------------- -func prod_complex(r: var Fp2, a, b: Fp2) = +func prod_complex(r: var Fp2, a, b: Fp2) {.used.} = ## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r`` ## ``r`` is initialized/overwritten ## @@ -823,10 +823,7 @@ func prod_complex(r: var Fp2, a, b: Fp2) = # while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction) # and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition) # - # Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words) - # where addition/substraction are significantly cheaper than multiplication - # - # So we always reframe the imaginary part using Karatsuba approach to save a multiplication + # Hence we can consider using q Karatsuba approach to save a multiplication # (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 ) # # Costs (naive implementation) @@ -841,19 +838,12 @@ func prod_complex(r: var Fp2, a, b: Fp2) = # - 2 Addition 𝔽p # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary) static: doAssert r.fromComplexExtension() - - var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0) - a0b0.prod(a.c0, b.c0) # [1 Mul] - a1b1.prod(a.c1, b.c1) # [2 Mul] - - r.c0.sum(a.c0, a.c1) # r0 = (a0 + a1) # [2 Mul, 1 Add] - r.c1.sum(b.c0, b.c1) # r1 = (b0 + b1) # [2 Mul, 2 Add] - # aliasing: a and b unneeded now - r.c1 *= r.c0 # r1 = (b0 + b1)(a0 + a1) # [3 Mul, 2 Add] - 𝔽p temporary - - r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1 # [3 Mul, 2 Add, 1 Sub] - r.c1 -= a0b0 # r1 = (b0 + b1)(a0 + a1) - a0b0 # [3 Mul, 2 Add, 2 Sub] - r.c1 -= a1b1 # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub] + var t {.noInit.}: typeof(r) + var na1 {.noInit.}: typeof(r.c0) + na1.neg(a.c1) + t.c0.sumprod([a.c0, na1], [b.c0, b.c1]) + t.c1.sumprod([a.c0, a.c1], [b.c1, b.c0]) + r = t func prod2x_complex(r: var QuadraticExt2x, a, b: Fp2) = ## Double-precision unreduced complex multiplication @@ -983,7 +973,37 @@ func square2x_disjoint*[Fdbl, F]( r.c1.diff2xMod(r.c1, V0) r.c1.diff2xMod(r.c1, V1) -# Generic multiplications +# Multiplications (specializations) +# ------------------------------------------------------------------- + +func prodImpl_fp4o2_p3mod8[C: static Curve](r: var Fp4[C], a, b: Fp4[C]) = + ## Returns r = a * b + ## For 𝔽p4/𝔽p2 with p ≡ 3 (mod 8), + ## hence 𝔽p QNR is 𝑖 = √-1 as p ≡ 3 (mod 8) implies p ≡ 3 (mod 4) + ## and 𝔽p SNR is (1 + i) + static: doAssert C.has_P_3mod8_primeModulus() + var + b10_m_b11{.noInit.}, b10_p_b11{.noInit.}: Fp[C] + n_a01{.noInit.}, n_a11{.noInit.}: Fp[C] + + t{.noInit.}: Fp4[C] + + b10_m_b11.diff(b.c1.c0, b.c1.c1) + b10_p_b11.sum(b.c1.c0, b.c1.c1) + n_a01.neg(a.c0.c1) + n_a11.neg(a.c1.c1) + + t.c0.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11], + [b.c0.c0, b.c0.c1, b10_m_b11, b10_p_b11]) + t.c0.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1], + [b.c0.c1, b.c0.c0, b10_p_b11, b10_m_b11]) + t.c1.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11], + [b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1]) + t.c1.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1], + [b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0]) + r = t + +# Multiplications (generic) # ---------------------------------------------------------------------- func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) = @@ -1264,7 +1284,7 @@ func inv2xImpl(r: var QuadraticExt, a: QuadraticExt) = # [1 Inv, 2 Sqr, 1 Add] t.redc2x(V0) - t.inv() # v1 = 1 / (a0² - β a1²) + t.inv() # v1 = 1 / (a0² - β a1²) # [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg] r.c0.prod(a.c0, t) # r0 = a0 / (a0² - β a1²) @@ -1288,35 +1308,37 @@ func square2x*(r: var QuadraticExt2x, a: QuadraticExt) = else: r.square2x_disjoint(a.c0, a.c1) +func square_disjoint*[F](r: var QuadraticExt[F], a0, a1: F) = + # TODO understand why Fp4[BLS12_377] + # is so slow in the branch + # TODO: + # - On Fp4, we can have a.c0.c0 off by p + # a reduction is missing + static: doAssert not r.fromComplexExtension(), "Faster specialization not implemented" + + var d {.noInit.}: doublePrec(typeof(r)) + d.square2x_disjoint(a0, a1) + r.c0.redc2x(d.c0) + r.c1.redc2x(d.c1) + func square*(r: var QuadraticExt, a: QuadraticExt) = when r.fromComplexExtension(): - when true: - when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem() and r.typeof.has1extraBit(): - if ({.noSideEffect.}: hasAdx()): - r.coords.sqrx_complex_sparebit_asm_adx(a.coords) - else: - r.square_complex(a) + when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem() and r.typeof.has1extraBit(): + if ({.noSideEffect.}: hasAdx()): + r.coords.sqrx_complex_sparebit_asm_adx(a.coords) else: r.square_complex(a) - else: # slower - var d {.noInit.}: doublePrec(typeof(r)) - d.square2x(a) - r.c0.redc2x(d.c0) - r.c1.redc2x(d.c1) + else: + r.square_complex(a) else: - when true: # r.typeof.F.C in {BLS12_377, BW6_761}: + when QuadraticExt.C.has_large_field_elem(): # BW6-761 requires too many registers for Dbl width path r.square_generic(a) - else: - # TODO understand why Fp4[BLS12_377] - # is so slow in the branch - # TODO: - # - On Fp4, we can have a.c0.c0 off by p - # a reduction is missing - var d {.noInit.}: doublePrec(typeof(r)) - d.square2x_disjoint(a.c0, a.c1) - r.c0.redc2x(d.c0) - r.c1.redc2x(d.c1) + elif QuadraticExt is Fp4[BLS12_377]: + # TODO BLS12-377 slowness to fix + r.square_generic(a) + else: + r.square_disjoint(a.c0, a.c1) func square*(a: var QuadraticExt) = ## In-place squaring @@ -1326,26 +1348,25 @@ func prod*(r: var QuadraticExt, a, b: QuadraticExt) = ## Multiplication r <- a*b when r.fromComplexExtension(): - when false: - r.prod_complex(a, b) - else: # faster - when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem(): - if ({.noSideEffect.}: hasAdx()): - r.coords.mul_fp2_complex_asm_adx(a.coords, b.coords) - else: - var d {.noInit.}: doublePrec(typeof(r)) - d.prod2x_complex(a, b) - r.c0.redc2x(d.c0) - r.c1.redc2x(d.c1) + when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem(): + if ({.noSideEffect.}: hasAdx()): + r.coords.mul_fp2_complex_asm_adx(a.coords, b.coords) else: var d {.noInit.}: doublePrec(typeof(r)) d.prod2x_complex(a, b) r.c0.redc2x(d.c0) r.c1.redc2x(d.c1) + else: + var d {.noInit.}: doublePrec(typeof(r)) + d.prod2x_complex(a, b) + r.c0.redc2x(d.c0) + r.c1.redc2x(d.c1) else: - when r.typeof.F.C.has_large_field_elem(): + when QuadraticExt is Fp12 or r.typeof.F.C.has_large_field_elem(): # BW6-761 requires too many registers for Dbl width path r.prod_generic(a, b) + elif QuadraticExt is Fp4 and QuadraticExt.C.has_P_3mod8_primeModulus(): + r.prodImpl_fp4o2_p3mod8(a, b) else: var d {.noInit.}: doublePrec(typeof(r)) d.prod2x_disjoint(a.c0, a.c1, b.c0, b.c1) @@ -1595,7 +1616,49 @@ func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) = r.c0.prod(m12, NonResidue) r.c0 += s0 -# Multiplications +# Multiplications (specializations) +# ------------------------------------------------------------------- + +func prodImpl_fp6o2_p3mod8[C: static Curve](r: var Fp6[C], a, b: Fp6[C]) = + ## Returns r = a * b + ## For 𝔽p6/𝔽p2 with p ≡ 3 (mod 8), + ## hence 𝔽p QNR is 𝑖 = √-1 as p ≡ 3 (mod 8) implies p ≡ 3 (mod 4) + ## and 𝔽p SNR is (1 + i) + # https://eprint.iacr.org/2022/367 - Equation 8 + static: doAssert C.has_P_3mod8_primeModulus() + var + b10_p_b11{.noInit.}, b10_m_b11{.noInit.}: Fp[C] + b20_p_b21{.noInit.}, b20_m_b21{.noInit.}: Fp[C] + + n_a01{.noInit.}, n_a11{.noInit.}, n_a21{.noInit.}: Fp[C] + + t{.noInit.}: Fp6[C] + + b10_p_b11.sum(b.c1.c0, b.c1.c1) + b10_m_b11.diff(b.c1.c0, b.c1.c1) + b20_p_b21.sum(b.c2.c0, b.c2.c1) + b20_m_b21.diff(b.c2.c0, b.c2.c1) + + n_a01.neg(a.c0.c1) + n_a11.neg(a.c1.c1) + n_a21.neg(a.c2.c1) + + t.c0.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11, a.c2.c0, n_a21], + [b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21, b10_m_b11, b10_p_b11]) + t.c0.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1], + [b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21, b10_p_b11, b10_m_b11]) + t.c1.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11, a.c2.c0, n_a21], + [b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21]) + t.c1.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1], + [b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21]) + t.c2.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11, a.c2.c0, n_a21], + [b.c2.c0, b.c2.c1, b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1]) + t.c2.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1], + [b.c2.c1, b.c2.c0, b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0]) + + r = t + +# Multiplications (generic) # ------------------------------------------------------------------- func prodImpl(r: var CubicExt, a, b: CubicExt) = @@ -1686,6 +1749,40 @@ func prod2xImpl(r: var CubicExt2x, a, b: CubicExt) = # Sparse multiplication # ---------------------------------------------------------------------- +func mul_sparse_by_x00*(r: var CubicExt, a: CubicExt, sparseB: auto) = + ## Sparse multiplication of a cubic extension element + ## with coordinates (a₀, a₁, a₂) by (b₀, b0, 0) + + when typeof(sparseB) is typeof(a): + template b(): untyped = sparseB.c0 + elif typeof(sparseB) is typeof(a.c0): + template b(): untyped = sparseB + else: + {.error: "sparseB type is " & $typeof(sparseB) & + " which does not match with either a (" & $typeof(a) & + ") or a.c0 (" & $typeof(a.c0) & ")".} + + r.c0.prod(a.c0, b) + r.c1.prod(a.c1, b) + r.c2.prod(a.c2, b) + +func mul2x_sparse_by_x00*(r: var CubicExt2x, a: CubicExt, sparseB: auto) = + ## Sparse multiplication of a cubic extension element + ## with coordinates (a₀, a₁, a₂) by (b₀, b0, 0) + + when typeof(sparseB) is typeof(a): + template b(): untyped = sparseB.c0 + elif typeof(sparseB) is typeof(a.c0): + template b(): untyped = sparseB + else: + {.error: "sparseB type is " & $typeof(sparseB) & + " which does not match with either a (" & $typeof(a) & + ") or a.c0 (" & $typeof(a.c0) & ")".} + + r.c0.prod2x(a.c0, b) + r.c1.prod2x(a.c1, b) + r.c2.prod2x(a.c2, b) + func mul_sparse_by_0y0*(r: var CubicExt, a: CubicExt, sparseB: auto) = ## Sparse multiplication of a cubic extenion element ## with coordinates (a₀, a₁, a₂) by (0, b₁, 0) @@ -1718,6 +1815,171 @@ func mul_sparse_by_0y0*(r: var CubicExt, a: CubicExt, sparseB: auto) = r.c1.prod(a.c0, b) r.c2.prod(a.c1, b) +func mul2x_sparse_by_0y0*(r: var CubicExt2x, a: CubicExt, sparseB: auto) = + ## Sparse multiplication of a cubic extenion element + ## with coordinates (a₀, a₁, a₂) by (0, b₁, 0) + + when typeof(sparseB) is typeof(a): + template b(): untyped = sparseB.c1 + elif typeof(sparseB) is typeof(a.c0): + template b(): untyped = sparseB + else: + {.error: "sparseB type is " & $typeof(sparseB) & + " which does not match with either a (" & $typeof(a) & + ") or a.c0 (" & $typeof(a.c0) & ")".} + + r.c0.prod2x(a.c2, b) + r.c0.prod2x(r.c0, NonResidue) + r.c1.prod2x(a.c0, b) + r.c2.prod2x(a.c1, b) + + +func mul_sparse_by_xy0*[Fpkdiv3](r: var CubicExt, a: CubicExt, + x, y: Fpkdiv3) = + ## Sparse multiplication of a cubic extension element + ## with coordinates (a₀, a₁, a₂) by (b₀, b₁, 0) + ## + ## r and a must not alias + + # v0 = a0 b0 + # v1 = a1 b1 + # v2 = a2 b2 = 0 + # + # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0 + # = ξ (a1 b1 + a2 b1 - v1) + v0 + # = ξa2 b1 + v0 + # r1 = (a0 + a1) * (b0 + b1) - v0 - v1 + ξ v2 + # = (a0 + a1) * (b0 + b1) - v0 - v1 + # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1 + # = a0 b0 + a2 b0 - v0 + v1 + # = a2 b0 + v1 + + static: doAssert a.c0 is Fpkdiv3 + + var + v0 {.noInit.}: Fpkdiv3 + v1 {.noInit.}: Fpkdiv3 + + v0.prod(a.c0, x) + v1.prod(a.c1, y) + + r.c0.prod(a.c2, y) + r.c0 *= NonResidue + r.c0 += v0 + + r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated + r.c2.sum(x, y) + r.c1 *= r.c2 + r.c1 -= v0 + r.c1 -= v1 + + r.c2.prod(a.c2, x) + r.c2 += v1 + + +func mul2x_sparse_by_xy0*[Fpkdiv3](r: var CubicExt2x, a: CubicExt, + x, y: Fpkdiv3) = + ## Sparse multiplication of a cubic extension element + ## with coordinates (a₀, a₁, a₂) by (b₀, b₁, 0) + ## + ## r and a must not alias + + static: doAssert a.c0 is Fpkdiv3 + + var + V0 {.noInit.}: doubleprec(Fpkdiv3) + V1 {.noInit.}: doubleprec(Fpkdiv3) + t0{.noInit.}: Fpkdiv3 + t1{.noInit.}: Fpkdiv3 + + V0.prod2x(a.c0, x) + V1.prod2x(a.c1, y) + + r.c0.prod2x(a.c2, y) + r.c0.prod2x(r.c0, NonResidue) + r.c0.sum2xMod(r.c0, V0) + + t0.sum(a.c0, a.c1) + t1.sum(x, y) + r.c1.prod2x(t0, t1) + r.c1.diff2xMod(r.c1, V0) + r.c1.diff2xMod(r.c1, V1) + + r.c2.prod2x(a.c2, x) + r.c2.sum2xMod(r.c2, V1) + +func mul_sparse_by_0yz*[Fpkdiv3](r: var CubicExt, a: CubicExt, y, z: Fpkdiv3) = + ## Sparse multiplication of a cubic extension element + ## with coordinates (a₀, a₁, a₂) by (0, b₁, b₂) + ## + ## r and a must not alias + + # v0 = a0 b0 = 0 + # v1 = a1 b1 + # v2 = a2 b2 + # + # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0 + # = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + # r1 = (a0 + a1) * (b0 + b1) - v0 - v1 + ξ v2 + # = a0 b1 + a1 b1 - v1 + ξ v2 + # = a0 b1 + ξ v2 + # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1 + # = a0 b2 + a2 b2 - v0 - v2 + v1 + # = a0 b2 + v1 + + static: doAssert a.c0 is Fpkdiv3 + + var + v1 {.noInit.}: Fpkdiv3 + v2 {.noInit.}: Fpkdiv3 + + v1.prod(a.c1, y) + v2.prod(a.c2, z) + + r.c1.sum(a.c1, a.c2) + r.c2.sum( y, z) + r.c0.prod(r.c1, r.c2) + r.c0 -= v1 + r.c0 -= v2 + r.c0 *= NonResidue + + r.c1.prod(a.c0, y) + v2 *= NonResidue + r.c1 += v2 + + r.c2.prod(a.c0, z) + r.c2 += v1 + +func mul2x_sparse_by_0yz*[Fpkdiv3](r: var CubicExt2x, a: CubicExt, y, z: Fpkdiv3) = + ## Sparse multiplication of a cubic extension element + ## with coordinates (a₀, a₁, a₂) by (0, b₁, b₂) + ## + ## r and a must not alias + static: doAssert a.c0 is Fpkdiv3 + + var + V1 {.noInit.}: doubleprec(Fpkdiv3) + V2 {.noInit.}: doubleprec(Fpkdiv3) + t1 {.noInit.}: Fpkdiv3 + t2 {.noInit.}: Fpkdiv3 + + V1.prod2x(a.c1, y) + V2.prod2x(a.c2, z) + + t1.sum(a.c1, a.c2) + t2.sum( y, z) + r.c0.prod2x(t1, t2) + r.c0.diff2xMod(r.c0, V1) + r.c0.diff2xMod(r.c0, V2) + r.c0.prod2x(r.c0, NonResidue) + + r.c1.prod2x(a.c0, y) + V2.prod2x(V2, NonResidue) + r.c1.sum2xMod(r.c1, V2) + + r.c2.prod2x(a.c0, z) + r.c2.sum2xMod(r.c2, V1) + # Inversion # ---------------------------------------------------------------------- @@ -1856,6 +2118,8 @@ func prod*(r: var CubicExt, a, b: CubicExt) = ## Out-of-place multiplication when CubicExt.C.has_large_field_elem(): r.prodImpl(a, b) + elif r is Fp6 and CubicExt.C.has_P_3mod8_primeModulus(): + r.prodImpl_fp6o2_p3mod8(a, b) else: var d {.noInit.}: doublePrec(typeof(r)) d.prod2x(a, b) diff --git a/constantine/math/io/io_extfields.nim b/constantine/math/io/io_extfields.nim index f7c3f62..16ea485 100644 --- a/constantine/math/io/io_extfields.nim +++ b/constantine/math/io/io_extfields.nim @@ -33,7 +33,7 @@ proc spaces*(num: int): string = for i in 0 ..< num: result[i] = ' ' -func appendHex*(accum: var string, f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, order: static Endianness = bigEndian) = +func appendHex*(accum: var string, f: ExtensionField, indent = 0, order: static Endianness = bigEndian) = ## Hex accumulator accum.add static($f.typeof.genericHead() & '(') staticFor i, 0, f.coords.len: @@ -46,7 +46,7 @@ func appendHex*(accum: var string, f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, ord accum.appendHex(f.coords[i], indent+2, order) accum.add ")" -func toHex*(f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, order: static Endianness = bigEndian): string = +func toHex*(f: ExtensionField, indent = 0, order: static Endianness = bigEndian): string = ## Stringify a tower field element to hex. ## Note. Leading zeros are not removed. ## Result is prefixed with 0x diff --git a/constantine/math/isogenies/frobenius.nim b/constantine/math/isogenies/frobenius.nim index 6b932f6..5440519 100644 --- a/constantine/math/isogenies/frobenius.nim +++ b/constantine/math/isogenies/frobenius.nim @@ -11,7 +11,7 @@ import ../../platforms/primitives, ../arithmetic, ../extension_fields, - ../curves/zoo_frobenius + ../constants/zoo_frobenius # Frobenius Map # ------------------------------------------------------------ @@ -79,17 +79,24 @@ func frobenius_map*[C](r: var Fp6[C], a: Fp6[C], k: static int = 1) {.inline.} = func frobenius_map*[C](r: var Fp12[C], a: Fp12[C], k: static int = 1) {.inline.} = ## Computes a^(pᵏ) ## The p-power frobenius automorphism on 𝔽p12 - static: doAssert r.c0 is Fp4 staticFor i, 0, r.coords.len: staticFor j, 0, r.coords[0].coords.len: r.coords[i].coords[j].frobenius_map(a.coords[i].coords[j], k) - r.c0.c0.mulCheckSparse frobMapConst(C, 0, k) - r.c0.c1.mulCheckSparse frobMapConst(C, 3, k) - r.c1.c0.mulCheckSparse frobMapConst(C, 1, k) - r.c1.c1.mulCheckSparse frobMapConst(C, 4, k) - r.c2.c0.mulCheckSparse frobMapConst(C, 2, k) - r.c2.c1.mulCheckSparse frobMapConst(C, 5, k) + when r.c0 is Fp4: + r.c0.c0.mulCheckSparse frobMapConst(C, 0, k) + r.c0.c1.mulCheckSparse frobMapConst(C, 3, k) + r.c1.c0.mulCheckSparse frobMapConst(C, 1, k) + r.c1.c1.mulCheckSparse frobMapConst(C, 4, k) + r.c2.c0.mulCheckSparse frobMapConst(C, 2, k) + r.c2.c1.mulCheckSparse frobMapConst(C, 5, k) + else: + r.c0.c0.mulCheckSparse frobMapConst(C, 0, k) + r.c0.c1.mulCheckSparse frobMapConst(C, 2, k) + r.c0.c2.mulCheckSparse frobMapConst(C, 4, k) + r.c1.c0.mulCheckSparse frobMapConst(C, 1, k) + r.c1.c1.mulCheckSparse frobMapConst(C, 3, k) + r.c1.c2.mulCheckSparse frobMapConst(C, 5, k) # ψ (Psi) - Untwist-Frobenius-Twist Endomorphisms on twisted curves # ----------------------------------------------------------------- diff --git a/constantine/math/pairing/cyclotomic_subgroup.nim b/constantine/math/pairing/cyclotomic_subgroup.nim index 41a49f5..179d641 100644 --- a/constantine/math/pairing/cyclotomic_subgroup.nim +++ b/constantine/math/pairing/cyclotomic_subgroup.nim @@ -215,7 +215,7 @@ func cyclotomic_inv*[FT](r: var FT, a: FT) {.meter.} = ## consequently `a` MUST be unitary r.conj(a) -func cyclotomic_square*[FT](r: var FT, a: FT) {.meter.} = +func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) = ## Square `a` into `r` ## `a` MUST be in the cyclotomic subgroup ## consequently `a` MUST be unitary @@ -224,47 +224,147 @@ func cyclotomic_square*[FT](r: var FT, a: FT) {.meter.} = # Granger, Scott, 2009 # https://eprint.iacr.org/2009/565.pdf + # Cubic extension field + # A = 3a² − 2 ̄a + # B = 3 √i c² + 2 ̄b + # C = 3b² − 2 ̄c + var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(a.c0) + + template a0: untyped = a.c0.c0 + template a1: untyped = a.c0.c1 + template a2: untyped = a.c1.c0 + template a3: untyped = a.c1.c1 + template a4: untyped = a.c2.c0 + template a5: untyped = a.c2.c1 + + v0.square(a.c0) + v1.square(a.c1) + v2.square(a.c2) + + # From here on, r aliasing with a is only for the first operation + # and only read/write the exact same coordinates + + # 3v₀₀ - 2a₀ + r.c0.c0.diff(v0.c0, a0) + r.c0.c0.double() + r.c0.c0 += v0.c0 + # 3v₀₁ + 2a₁ + r.c0.c1.sum(v0.c1, a1) + r.c0.c1.double() + r.c0.c1 += v0.c1 + # 3v₁₀ - 2a₄ + r.c2.c0.diff(v1.c0, a4) + r.c2.c0.double() + r.c2.c0 += v1.c0 + # 3v₁₁ + 2b₅ + r.c2.c1.sum(v1.c1, a5) + r.c2.c1.double() + r.c2.c1 += v1.c1 + + # Now B = 3 √i c² + 2 ̄b + # beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i + + # 3 (√i c²)₀ + 2a₂ + v2.c1 *= NonResidue + r.c1.c0.sum(v2.c1, a2) + r.c1.c0.double() + r.c1.c0 += v2.c1 + + # 3 (√i c²)₁ - 2a₃ + r.c1.c1.diff(v2.c0, a3) + r.c1.c1.double() + r.c1.c1 += v2.c0 + +func cyclotomic_square_quad_over_cube[F](r: var QuadraticExt[F], a: QuadraticExt[F]) = + ## Square `a` into `r` + ## `a` MUST be in the cyclotomic subgroup + ## consequently `a` MUST be unitary + # Mapping between towering schemes + # -------------------------------- + # + # canonical <=> cubic over quadratic <=> quadratic over cubic + # c₀ <=> a₀ <=> b₀ + # c₁ <=> a₂ <=> b₃ + # c₂ <=> a₄ <=> b₁ + # c₃ <=> a₁ <=> b₄ + # c₄ <=> a₃ <=> b₂ + # c₅ <=> a₅ <=> b₅ + # + # Hence, this formula for a cubic extension field + # A = 3a² − 2 ̄a + # B = 3 √i c² + 2 ̄b + # C = 3b² − 2 ̄c + # + # becomes + # A = (b₀, b₄) = 3(b₀, b₄)² - 2(b₀,-b₄) + # B = (b₃, b₂) = 3 √i(b₁, b₅)² + 2(b₃, -b₂) + # C = (b₁, b₅) = 3(b₃, b₂)² - 2(b₁, -b₅) + # + # with + # v₀ = (b₀, b₄) = (a.c0.c0, a.c1.c1) + # v₁ = (b₃, b₂) = (a.c1.c0, a.c0.c2) + # v₂ = (b₁, b₅) = (a.c0.c1, a.c1.c2) + var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: QuadraticExt[typeof(r.c0.c0)] + + template b0: untyped = a.c0.c0 + template b1: untyped = a.c0.c1 + template b2: untyped = a.c0.c2 + template b3: untyped = a.c1.c0 + template b4: untyped = a.c1.c1 + template b5: untyped = a.c1.c2 + + v0.square_disjoint(b0, b4) + v1.square_disjoint(b3, b2) + v2.square_disjoint(b1, b5) + + # From here on, r aliasing with a is only for the first operation + # and only read/write the exact same coordinates + + # 3v₀₀ - 2b₀ + r.c0.c0.diff(v0.c0, b0) + r.c0.c0.double() + r.c0.c0 += v0.c0 + # 3v₁₀ - 2b₁ + r.c0.c1.diff(v1.c0, b1) + r.c0.c1.double() + r.c0.c1 += v1.c0 + # 3v₀₁ + 2b₄ + r.c1.c1.sum(v0.c1, b4) + r.c1.c1.double() + r.c1.c1 += v0.c1 + # 3v₁₁ + 2b₅ + r.c1.c2.sum(v1.c1, b5) + r.c1.c2.double() + r.c1.c2 += v1.c1 + + # Now B = (b₃, b₂) = 3 √i(b₁, b₅)² + 2(b₃, -b₂) + # beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i + + # 3 (√i (b₁, b₅)²)₀ + 2b₃ + v2.c1 *= NonResidue + r.c1.c0.sum(v2.c1, b3) + r.c1.c0.double() + r.c1.c0 += v2.c1 + + # 3 (√i (b₁, b₅)²)₁ - 2b₃ + r.c0.c2.diff(v2.c0, b2) + r.c0.c2.double() + r.c0.c2 += v2.c0 + +func cyclotomic_square*[FT](r: var FT, a: FT) {.inline, meter.} = + ## Square `a` into `r` + ## `a` MUST be in the cyclotomic subgroup + ## consequently `a` MUST be unitary + # + # Faster Squaring in the Cyclotomic Subgroup of Sixth Degree Extensions + # Granger, Scott, 2009 + # https://eprint.iacr.org/2009/565.pdf when a is CubicExt: - # Cubic extension field - # A = 3a² − 2 ̄a - # B = 3 √i c² + 2 ̄b - # C = 3b² − 2 ̄c - var t0{.noinit.}, t1{.noinit.}: typeof(a.c0) - - t0.square(a.c0) # t0 = a² - t1.double(t0) # t1 = 2a² - t1 += t0 # t1 = 3a² - - t0.conj(a.c0) # t0 = ̄a - t0.double() # t0 = 2 ̄a - r.c0.diff(t1, t0) # r0 = 3a² − 2 ̄a - - # Aliasing: a.c0 unused - - t0.square(a.c2) # t0 = c² - t0 *= NonResidue # t0 = √i c² - t1.double(t0) # t1 = 2 √i c² - t0 += t1 # t0 = 3 √i c² - - t1.square(a.c1) # t1 = b² - - r.c1.conj(a.c1) # r1 = ̄b - r.c1.double() # r1 = 2 ̄b - r.c1 += t0 # r1 = 3 √i c² + 2 ̄b - - # Aliasing: a.c1 unused - - t0.double(t1) # t0 = 2b² - t0 += t1 # t0 = 3b² - - t1.conj(a.c2) # r2 = ̄c - t1.double() # r2 = 2 ̄c - r.c2.diff(t0, t1) # r2 = 3b² - 2 ̄c - + r.cyclotomic_square_cube_over_quad(a) else: - {.error: "Not implemented".} + r.cyclotomic_square_quad_over_cube(a) -func cyclotomic_square*[FT](a: var FT) {.meter.} = +func cyclotomic_square*[FT](a: var FT) {.inline.} = ## Square `a` into `r` ## `a` MUST be in the cyclotomic subgroup ## consequently `a` MUST be unitary @@ -560,7 +660,10 @@ func fromFpk*[Fpkdiv6, Fpk]( {.error: "a must be a sextic extension field".} elif a is QuadraticExt: when a.c0 is CubicExt: - {.error: "𝔽pᵏᐟ⁶ -> 𝔽pᵏᐟ³ -> 𝔽pᵏ towering (quadratic over cubic) is not implemented.".} + g.g2 = a.c1.c0 + g.g3 = a.c0.c2 + g.g4 = a.c0.c1 + g.g5 = a.c1.c2 else: {.error: "a must be a sextic extension field".} else: @@ -584,7 +687,12 @@ func asFpk*[Fpkdiv6, Fpk]( {.error: "a must be a sextic extension field".} elif a is QuadraticExt: when a.c0 is CubicExt: - {.error: "𝔽pᵏᐟ⁶ -> 𝔽pᵏᐟ³ -> 𝔽pᵏ towering (quadratic over cubic) is not implemented.".} + a.c0.c0 = g0 + a.c0.c1 = g.g4 + a.c0.c2 = g.g3 + a.c1.c0 = g.g2 + a.c1.c1 = g1 + a.c1.c2 = g.g5 else: {.error: "a must be a sextic extension field".} else: @@ -598,9 +706,6 @@ func cyclotomic_exp_compressed*[N: static int, Fpk]( ## Exponentiation is done least-signigicant bits first ## `squarings` represents the number of squarings ## to do before the next multiplication. - ## - ## `accumSquarings` stores the accumulated squarings so far - ## iff N != 1 type Fpkdiv6 = typeof(a.c0.c0) diff --git a/constantine/math/pairing/lines_eval.nim b/constantine/math/pairing/lines_eval.nim index 8c11535..367582f 100644 --- a/constantine/math/pairing/lines_eval.nim +++ b/constantine/math/pairing/lines_eval.nim @@ -17,7 +17,7 @@ import ec_shortweierstrass_projective ], ../io/io_extfields, - ../curves/zoo_constants + ../constants/zoo_constants # No exceptions allowed {.push raises: [].} @@ -390,44 +390,19 @@ func line_add*[F1, F2]( # D-Twist # ------------------------------------------------------------ -func mul_by_line_xy0*[Fpkdiv2, Fpkdiv6]( - r: var Fpkdiv2, - a: Fpkdiv2, - x, y: Fpkdiv6) = - ## Sparse multiplication of an 𝔽pᵏᐟ² element - ## with coordinates (a₀, a₁, a₂) by a line (x, y, 0) - ## The z coordinates in the line will be ignored. - ## `r` and `a` must not alias - - static: - doAssert a is CubicExt - doAssert a.c0 is Fpkdiv6 - - var - v0 {.noInit.}: Fpkdiv6 - v1 {.noInit.}: Fpkdiv6 - - v0.prod(a.c0, x) - v1.prod(a.c1, y) - r.c0.prod(a.c2, y) - r.c0 *= SexticNonResidue - r.c0 += v0 - - r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated - r.c2.sum(x, y) - r.c1 *= r.c2 - r.c1 -= v0 - r.c1 -= v1 - - r.c2.prod(a.c2, x) - r.c2 += v1 - -func mul_sparse_by_line_ab00c0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) = +func mul_sparse_by_line_a00bc0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) = ## Sparse multiplication of an 𝔽pᵏ element ## by a sparse 𝔽pᵏ element coming from an D-Twist line function - ## With a quadratic over cubic towering (Fp2 -> Fp6 -> Fp12) + ## with a cubic over quadratic towering (𝔽p2 -> 𝔽p6 -> 𝔽p12) ## The sparse element is represented by a packed Line type - ## with coordinate (a,b,c) matching 𝔽pᵏ coordinates ab00c0 + ## with coordinate (a,b,c) matching 𝔽pᵏ coordinates a00bc0 + + # a0b0 = (a00, a01, a02).( x, 0, 0) + # a1b1 = (a10, a11, a12).( y, z, 0) + # + # r0 = a0 b0 + ξ a1 b1 + # r1 = a0 b1 + a1 b0 (Schoolbook) + # = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) static: doAssert Fpk.C.getSexticTwist() == D_Twist @@ -436,27 +411,290 @@ func mul_sparse_by_line_ab00c0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) = type Fpkdiv2 = typeof(f.c0) - var - v0 {.noInit.}: Fpkdiv2 - v1 {.noInit.}: Fpkdiv2 - v2 {.noInit.}: Line[Fpkdiv6] - v3 {.noInit.}: Fpkdiv2 + when Fpk.C.has_large_field_elem(): + var + v0 {.noInit.}: Fpkdiv2 + v1 {.noInit.}: Fpkdiv2 + t {.noInit.}: Fpkdiv6 - v0.mul_by_line_xy0(f.c0, l.a, l.b) - v1.mul_sparse_by_0y0(f.c1, l.c) + v1.sum(f.c0, f.c1) + t.sum(l.a, l.b) + v0.mul_sparse_by_xy0(v1, t, l.c) - v2.x = l.a - v2.y.sum(l.b, l.c) - f.c1 += f.c0 - v3.mul_by_line_xy0(f.c1, v2.x, v2.y) - v3 -= v0 - f.c1.diff(v3, v1) + v1.mul_sparse_by_xy0(f.c1, l.b, l.c) - v3.c0.prod(v1.c2, SexticNonResidue) - v3.c0 += v0.c0 - v3.c1.sum(v0.c1, v1.c0) - v3.c2.sum(v0.c2, v1.c1) - f.c0 = v3 + f.c1.diff(v0, v1) + + v0.mul_sparse_by_x00(f.c0, l.a) + f.c1 -= v0 + + v1 *= NonResidue + f.c0.sum(v0, v1) + + else: # Lazy reduction + var + V0 {.noInit.}: doubleprec(Fpkdiv2) + V1 {.noInit.}: doubleprec(Fpkdiv2) + V {.noInit.}: doubleprec(Fpkdiv2) + t {.noInit.}: Fpkdiv6 + u {.noInit.}: Fpkdiv2 + + u.sum(f.c0, f.c1) + t.sum(l.a, l.b) + V.mul2x_sparse_by_xy0(u, t, l.c) + V1.mul2x_sparse_by_xy0(f.c1, l.b, l.c) + V0.mul2x_sparse_by_x00(f.c0, l.a) + + V.diff2xMod(V, V1) + V.diff2xMod(V, V0) + f.c1.redc2x(V) + + V1.prod2x(V1, NonResidue) + V0.sum2xMod(V0, V1) + f.c0.redc2x(V0) + +func prod_x00yz0_x00yz0_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Line[Fpkdiv6]) = + ## Multiply 2 lines together + ## The result is sparse in f. + # Preambule + # + # A sparse xy0 by xy0 multiplication in 𝔽pᵏᐟ² (cubic) would be + # (a0', a1', 0).(b0', b1', 0) + # + # r0' = a0'b0' + # r1' = a0'b1 + a1'b0' + # or (a0'+b1')(a1'+b0')-a0'a1'-b0'b1' + # r2' = a1'b1 + # + # Note¹ that the Karatsuba does not reuse terms! + # + # In the following equations (taken from quad extension implementation) + # a0 = ( x , 0 , 0) + # a1 = ( y , z , 0) + # b0 = ( x', 0, 0) + # b1 = ( y', z', 0) + # + # a0b0 = ( x, 0, 0).( x', 0 , 0) = (xx', 0, 0 ) + # a1b1 = ( y, z, 0).( y', z', 0) = (yy', yz'+zy', zz') + + # r0 = a0 b0 + ξ a1 b1 + # r1 = a0 b1 + a1 b0 (Schoolbook) + # = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) + # + # Note² that Karatsuba doesn't help as a0 and b0 are actually very cheap (x, 0, 0) + # + # Replacing by the coordinates in the lower tower we get + # + # r0 = (xx'+ξzz', yy', yz'+zy') + # r1 = ( x, 0, 0)( y', z', 0) + ( y , z , 0)( x', 0, 0) + # = (xy'+yx', xz'+zx', 0) + # + # Now we can apply Karasuba¹² for a total of 6 multiplications + + static: + doAssert Fpk.C.getSexticTwist() == D_Twist + doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²" + doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶" + + var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6) + + XX.prod2x(l0.a, l1.a) + YY.prod2x(l0.b, l1.b) + ZZ.prod2x(l0.c, l1.c) + + var V{.noInit.}: doublePrec(Fpkdiv6) + var t0{.noInit.}, t1{.noInit.}: Fpkdiv6 + + # r0 = (xx'+ξzz', yy', yz'+zy') + # r00 = xx'+ξzz'' + # r01 = yy' + # r02 = yz'+zy' = (y+z)(y'+z') - yy' - zz' + V.prod2x(ZZ, NonResidue) + V.sum2xMod(V,XX) + f.c0.c0.redc2x(V) + + f.c0.c1.redc2x(YY) + + t0.sum(l0.b, l0.c) + t1.sum(l1.b, l1.c) + V.prod2x(t0, t1) + V.diff2xMod(V, YY) + V.diff2xMod(V, ZZ) + f.c0.c2.redc2x(V) + + # r1 = (xy'+yx', xz'+zx', 0) + # r10 = xy'+yx' = (x+y)(x'+y') - xx' - yy' + # r11 = xz'+zx' = (x+z)(x'+z') - xx' - zz' + # r12 = 0 + t0.sum(l0.a, l0.b) + t1.sum(l1.a, l1.b) + V.prod2x(t0, t1) + V.diff2xMod(V, XX) + V.diff2xMod(V, YY) + f.c1.c0.redc2x(V) + + t0.sum(l0.a, l0.c) + t1.sum(l1.a, l1.c) + V.prod2x(t0, t1) + V.diff2xMod(V, XX) + V.diff2xMod(V, ZZ) + f.c1.c1.redc2x(V) + + f.c1.c2.setZero() + +# M-Twist +# ------------------------------------------------------------ + +func mul_sparse_by_line_cb00a0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) = + ## Sparse multiplication of an 𝔽pᵏ element + ## by a sparse 𝔽pᵏ element coming from an M-Twist line function + ## with a cubic over quadratic towering (𝔽p2 -> 𝔽p6 -> 𝔽p12) + ## The sparse element is represented by a packed Line type + ## with coordinate (a,b,c) matching 𝔽pᵏ coordinates cb00a0 + + # a0b0 = (a00, a01, a02).( z, y, 0) + # a1b1 = (a10, a11, a12).( 0, x, 0) + # + # r0 = a0 b0 + ξ a1 b1 + # r1 = a0 b1 + a1 b0 (Schoolbook) + # = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) + + static: + doAssert Fpk.C.getSexticTwist() == M_Twist + doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²" + doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶" + + type Fpkdiv2 = typeof(f.c0) + + when Fpk.C.has_large_field_elem(): + var + v0 {.noInit.}: Fpkdiv2 + v1 {.noInit.}: Fpkdiv2 + t {.noInit.}: Fpkdiv6 + + v1.sum(f.c0, f.c1) + t.sum(l.b, l.a) + v0.mul_sparse_by_xy0(v1, l.c, t) + + v1.mul_sparse_by_0y0(f.c1, l.a) + + f.c1.diff(v0, v1) + + v0.mul_sparse_by_xy0(f.c0, l.c, l.b) + f.c1 -= v0 + + v1 *= NonResidue + f.c0.sum(v0, v1) + else: # Lazy reduction + var + V0 {.noInit.}: doubleprec(Fpkdiv2) + V1 {.noInit.}: doubleprec(Fpkdiv2) + V {.noInit.}: doubleprec(Fpkdiv2) + t {.noInit.}: Fpkdiv6 + u {.noInit.}: Fpkdiv2 + + u.sum(f.c0, f.c1) + t.sum(l.b, l.a) + V.mul2x_sparse_by_xy0(u, l.c, t) + V1.mul2x_sparse_by_0y0(f.c1, l.a) + V0.mul2x_sparse_by_xy0(f.c0, l.c, l.b) + + V.diff2xMod(V, V1) + V.diff2xMod(V, V0) + f.c1.redc2x(V) + + V1.prod2x(V1, NonResidue) + V0.sum2xMod(V0, V1) + f.c0.redc2x(V0) + +func prod_zy00x0_zy00x0_into_abcdef00ghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Line[Fpkdiv6]) = + ## Multiply 2 lines together + ## The result is sparse in f.c1.c0 + # Preambule + # + # A sparse xy0 by xy0 multiplication in 𝔽pᵏᐟ² (cubic) would be + # (a0', a1', 0).(b0', b1', 0) + # + # r0' = a0'b0' + # r1' = a0'b1 + a1'b0' + # or (a0'+b1')(a1'+b0')-a0'a1'-b0'b1' + # r2' = a1'b1 + # + # Note¹ that the Karatsuba does not reuse terms! + # + # In the following equations (taken from quad extension implementation) + # a0 = ( z , y , 0) + # a1 = ( 0 , x , 0) + # b0 = ( z', y', 0) + # b1 = ( 0 , x', 0) + # + # a0b0 = ( z, y, 0).(z', y', 0) = (zz', zy'+yz', yy') + # a1b1 = ( 0, x, 0).( 0, x', 0) = ( 0, 0, xx') + + # r0 = a0 b0 + ξ a1 b1 + # r1 = a0 b1 + a1 b0 (Schoolbook) + # = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) + # + # Note² that Karatsuba doesn't help as a1 and b1 are actually very cheap (0, x, 0) + # + # Replacing by the coordinates in the lower tower we get + # + # r0 = (zz'+ξxx', zy'+z'y, yy') + # r1 = (z, y, 0)(0, x', 0) + (z', y', 0)(0, x, 0) + # = (0, zx'+xz', yx'+xy') + # + # Now we can apply Karasuba¹² for a total of 6 multiplications + + static: + doAssert Fpk.C.getSexticTwist() == M_Twist + doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²" + doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶" + + var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6) + + XX.prod2x(l0.a, l1.a) + YY.prod2x(l0.b, l1.b) + ZZ.prod2x(l0.c, l1.c) + + var V{.noInit.}: doublePrec(Fpkdiv6) + var t0{.noInit.}, t1{.noInit.}: Fpkdiv6 + + # r0 = (zz'+ξxx', zy'+z'y, yy') + # r00 = zz'+ξxx' + # r01 = zy'+z'y = (z+y)(z'+y') - zz' - yy' + # r02 = yy' + V.prod2x(XX, NonResidue) + V.sum2xMod(V, ZZ) + f.c0.c0.redc2x(V) + + t0.sum(l0.c, l0.b) + t1.sum(l1.c, l1.b) + V.prod2x(t0, t1) + V.diff2xMod(V, ZZ) + V.diff2xMod(V, YY) + f.c0.c1.redc2x(V) + + f.c0.c2.redc2x(YY) + + # r1 = (0, zx'+xz', yx'+xy') + # r10 = 0 + # r11 = zx'+xz' = (z+x)(z'+x') - zz' - xx' + # r12 = xy'+yx' = (x+y)(x'+y') - xx' - yy' + f.c1.c0.setZero() + + t0.sum(l0.c, l0.a) + t1.sum(l1.c, l1.a) + V.prod2x(t0, t1) + V.diff2xMod(V, ZZ) + V.diff2xMod(V, XX) + f.c1.c1.redc2x(V) + + t0.sum(l0.a, l0.b) + t1.sum(l1.a, l1.b) + V.prod2x(t0, t1) + V.diff2xMod(V, XX) + V.diff2xMod(V, YY) + f.c1.c2.redc2x(V) # ############################################################ # @@ -556,16 +794,16 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin ## Multiply 2 lines together ## The result is sparse in f.c1.c1 # In the following equations (taken from cubic extension implementation) - # a0 = (x0, z0) - # a1 = (y0, 0) - # a2 = ( 0, 0) - # b0 = (x1, z1) - # b1 = (y1, 0) - # b2 = ( 0, 0) + # a0 = ( x, z) + # a1 = ( y, 0) + # a2 = ( 0, 0) + # b0 = (x', z') + # b1 = (y', 0) + # b2 = ( 0, 0) # - # v0 = a0 b0 = (x0, z0).(x1, z1) - # v1 = a1 b1 = (y0, 0).(y1, 0) - # v2 = a2 b2 = ( 0, 0).( 0, 0) + # v0 = a0 b0 = (x, z).(x', z') + # v1 = a1 b1 = (y, 0).(y', 0) + # v2 = a2 b2 = (0, 0).( 0, 0) # # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0 # = ξ (a1 b1 + a2 b1 - v1) + v0 @@ -575,106 +813,65 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1 # = a0 b0 - v0 + v1 # = v1 + # + # r1 is computed in 3 extra muls in the base field 𝔽pᵏᐟ⁶ (Karatsuba product in quadratic field) + # + # If we dive deeper: + # r1 = a0b1 + a1b0 = (xy'+yx', zy'+yz') (Schoolbook - 4 muls) + # r10 = zy'+yz' = (x+y)(x'+y') - xx' - yy' + # r11 = xy'+yx' = (z+y)(z'+y') - zz' - yy' + # + # The substraction are all computed as part of v0 and v1 + # hence r1 can be compute for 2 extra muls in 𝔽pᵏᐟ⁶ only static: doAssert Fpk.C.getSexticTwist() == D_Twist doAssert f is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³" doAssert f.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶" - type Fpkdiv3 = typeof(f.c0) + var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6) - var V0{.noInit.}, f2x{.noInit.}: doublePrec(Fpkdiv3) - var V1{.noInit.}: doublePrec(Fpkdiv6) + XX.prod2x(l0.a, l1.a) + YY.prod2x(l0.b, l1.b) + ZZ.prod2x(l0.c, l1.c) - V0.prod2x_disjoint(l0.a, l0.c, l1.a, l1.c) # a0 b0 = (x0, z0).(x1, z1) - V1.prod2x(l0.b, l1.b) # a1 b1 = (y0, 0).(y1, 0) + var V{.noInit.}: doublePrec(Fpkdiv6) + var t0{.noInit.}, t1{.noInit.}: Fpkdiv6 - # r1 = (a0 + a1) * (b0 + b1) - v0 - v1 - f.c1.c0.sum(l0.a, l0.b) # x0 + y0 - f.c1.c1.sum(l1.a, l1.b) # x1 + y1 - f2x.prod2x_disjoint(f.c1.c0, l0.c, f.c1.c1, l1.c) # (x0 + y0, z0)(x1 + y1, z1) = (a0 + a1) * (b0 + b1) - f2x.diff2xMod(f2x, V0) - f2x.c0.diff2xMod(f2x.c0, V1) - f.c1.redc2x(f2x) + # r0 = a0 b0 = (x, z).(x', z') + # r00 = xx' + βzz' + # r01 = (x+z)(x'+z') - zz' - xx' + V.prod2x(ZZ, NonResidue) + V.sum2xMod(V, XX) + f.c0.c0.redc2x(V) + + t0.sum(l0.a, l0.c) + t1.sum(l1.a, l1.c) + V.prod2x(t0, t1) + V.diff2xMod(V, ZZ) + V.diff2xMod(V, XX) + f.c0.c1.redc2x(V) - # r0 = v0 - f.c0.redc2x(V0) + # r10 = zy'+yz' = (x+y)(x'+y') - xx' - yy' + # r11 = xy'+yx' = (z+y)(z'+y') - zz' - yy' + t0.sum(l0.a, l0.b) + t1.sum(l1.a, l1.b) + V.prod2x(t0, t1) + V.diff2xMod(V, XX) + V.diff2xMod(V, YY) + f.c1.c0.redc2x(V) - # r2 = v1 - f.c2.c0.redc2x(V1) + t0.sum(l0.c, l0.b) + t1.sum(l1.c, l1.b) + V.prod2x(t0, t1) + V.diff2xMod(V, ZZ) + V.diff2xMod(V, YY) + f.c1.c1.redc2x(V) + + # r2 = v2 = (y, 0).(y', 0) = (yy', 0) + f.c2.c0.redc2x(YY) f.c2.c1.setZero() -func mul_sparse_by_abcdefghij00*[Fpk]( - a: var Fpk, b: Fpk) = - ## Sparse multiplication of an 𝔽pᵏ element - ## by a sparse 𝔽pᵏ element abcdefghij00 - ## with each representing 𝔽pᵏᐟ⁶ coordinate - - static: - doAssert Fpk.C.getSexticTwist() == D_Twist - doAssert a is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³" - doAssert a.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶" - - type Fpkdiv3 = typeof(a.c0) - - # In the following equations (taken from cubic extension implementation) - # b0 = (b00, b01) - # b1 = (b10, b11) - # b2 = (b20, 0) - # - # v0 = a0 b0 = (f00, f01).(b00, b01) - # v1 = a1 b1 = (f10, f11).(b10, b11) - # v2 = a2 b2 = (f20, f21).(b20, 0) - # - # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀ - # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂ - # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁ - - var V0 {.noInit.}, V1 {.noInit.}, V2 {.noinit.}: doublePrec(Fpkdiv3) - var t0 {.noInit.}, t1 {.noInit.}: Fpkdiv3 - var f2x{.noInit.}, g2x {.noinit.}: doublePrec(Fpkdiv3) - - V0.prod2x(a.c0, b.c0) - V1.prod2x(a.c1, b.c1) - V2.mul2x_sparse_by_x0(a.c2, b.c2) - - # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀ - t0.sum(a.c1, a.c2) - t1.c0.sum(b.c1.c0, b.c2.c0) # b₂ = (b20, 0) - f2x.prod2x_disjoint(t0, t1.c0, b.c1.c1) # (a₁ + a₂).(b₁ + b₂) - f2x.diff2xMod(f2x, V1) - f2x.diff2xMod(f2x, V2) - f2x.prod2x(f2x, NonResidue) - f2x.sum2xMod(f2x, V0) - - # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ - t0.sum(a.c0, a.c1) - t1.sum(b.c0, b.c1) - g2x.prod2x(t0, t1) - g2x.diff2xMod(g2x, V0) - g2x.diff2xMod(g2x, V1) - - # r₂ = (a₀ + a₂) and (b₀ + b₂) - t0.sum(a.c0, a.c2) - t1.c0.sum(b.c0.c0, b.c2.c0) # b₂ = (b20, 0) - - # Now we are aliasing free - - # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀ - a.c0.redc2x(f2x) - - # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂ - f2x.prod2x(V2, NonResidue) - g2x.sum2xMod(g2x, f2x) - a.c1.redc2x(g2x) - - # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁ - f2x.prod2x_disjoint(t0, t1.c0, b.c0.c1) - f2x.diff2xMod(f2x, V0) - f2x.diff2xMod(f2x, V2) - f2x.sum2xMod(f2x, V1) - a.c2.redc2x(f2x) - # M-Twist # ------------------------------------------------------------ @@ -769,16 +966,16 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin ## Multiply 2 lines together ## The result is sparse in f.c1.c1 # In the following equations (taken from cubic extension implementation) - # a0 = (z0, x0) + # a0 = ( z, x) # a1 = ( 0, 0) - # a2 = (y0, 0) - # b0 = (z1, x1) + # a2 = ( y, 0) + # b0 = (z', x') # b1 = ( 0, 0) - # b2 = (y0, 0) + # b2 = (y', 0) # - # v0 = a0 b0 = (z0, x0).(z1, x1) - # v1 = a1 b1 = ( 0, 0).( 0, 0) - # v2 = a2 b2 = (y0, 0).(y1, 0) + # v0 = a0 b0 = (z, x).(z', x') + # v1 = a1 b1 = (0, 0).(0 , 0) + # v2 = a2 b2 = (y, 0).(y', 0) # # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0 # = ξ (a1 b2 + a2 b2 - v2) + v0 @@ -788,36 +985,221 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin # = ξ v2 # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1 # = (a0 + a2) * (b0 + b2) - v0 - v2 + # + # r2 is computed in 3 extra muls in the base field 𝔽pᵏᐟ⁶ (Karatsuba product in quadratic field) + # + # If we dive deeper: + # r2 = a0b2 + a2b0 = (zy'+yz', xy'+x'y) (Schoolbook - 4 muls) + # r20 = zy'+z'y = (z+y)(z'+y') - zz' - yy' + # r21 = xy'+x'y = (x+y)(x'+y') - xx' - yy' + # + # The substraction are all computed as part of v0 and v2 + # hence r2 can be compute for 2 extra muls in 𝔽pᵏᐟ⁶ only static: doAssert Fpk.C.getSexticTwist() == M_Twist doAssert f is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³" doAssert f.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶" - type Fpkdiv3 = typeof(f.c0) + var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6) - var V0{.noInit.}, f2x{.noInit.}: doublePrec(Fpkdiv3) - var V2{.noInit.}: doublePrec(Fpkdiv6) + XX.prod2x(l0.a, l1.a) + YY.prod2x(l0.b, l1.b) + ZZ.prod2x(l0.c, l1.c) - V0.prod2x_disjoint(l0.c, l0.a, l1.c, l1.a) # a0 b0 = (z0, x0).(z1, x1) - V2.prod2x(l0.b, l1.b) # a2 b2 = (y0, 0).(y1, 0) + var V{.noInit.}: doublePrec(Fpkdiv6) + var t0{.noInit.}, t1{.noInit.}: Fpkdiv6 - # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 - f.c2.c0.sum(l0.b, l0.c) # y0 + z0 - f.c2.c1.sum(l1.b, l1.c) # y1 + z1 - f2x.prod2x_disjoint(f.c2.c0, l0.a, f.c2.c1, l1.a) # (z0 + y0, x0).(z1 + y1, x1) = (a0 + a2) * (b0 + b2) - f2x.diff2xMod(f2x, V0) # (a0 + a2) * (b0 + b2) - v0 - f2x.c0.diff2xMod(f2x.c0, V2) # (a0 + a2) * (b0 + b2) - v0 - v2 - f.c2.redc2x(f2x) + # r0 = a0 b0 = (z, x).(z', x') + # r00 = zz' + βxx' + # r01 = (z+x)(z'+x') - zz' - xx' + V.prod2x(XX, NonResidue) + V.sum2xMod(V, ZZ) + f.c0.c0.redc2x(V) + + t0.sum(l0.c, l0.a) + t1.sum(l1.c, l1.a) + V.prod2x(t0, t1) + V.diff2xMod(V, ZZ) + V.diff2xMod(V, XX) + f.c0.c1.redc2x(V) - # r1 = ξ v2 - f.c1.c1.redc2x(V2) + # r1 = ξ v2 = ξ(y, 0).(y', 0) = ξ(yy', 0) = (0, yy') f.c1.c0.setZero() + f.c1.c1.redc2x(YY) - # r0 = v0 - f.c0.redc2x(V0) + # r20 = zy'+z'y = (z+y)(z'+y') - zz' - yy' + # r21 = xy'+x'y = (x+y)(x'+y') - xx' - yy' + t0.sum(l0.c, l0.b) + t1.sum(l1.c, l1.b) + V.prod2x(t0, t1) + V.diff2xMod(V, ZZ) + V.diff2xMod(V, YY) + f.c2.c0.redc2x(V) -func mul_sparse_by_abcd00efghij*[Fpk]( + t0.sum(l0.a, l0.b) + t1.sum(l1.a, l1.b) + V.prod2x(t0, t1) + V.diff2xMod(V, XX) + V.diff2xMod(V, YY) + f.c2.c1.redc2x(V) + +# ############################################################ +# +# Sparse multiplication +# +# ############################################################ + +func mul_sparse_by_abcdefghij00_quad_over_cube*[Fpk]( + a: var Fpk, b: Fpk) = + ## Sparse multiplication of an 𝔽pᵏ element + ## by a sparse 𝔽pᵏ element abcdefghij00 + ## with each representing 𝔽pᵏᐟ⁶ coordinate + + static: + doAssert Fpk.C.getSexticTwist() == D_Twist + doAssert a is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²" + doAssert a.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶" + + type Fpkdiv2 = typeof(a.c0) + + # In the following equations (taken from quadratic extension implementation) + # b0 = (b00, b01, b02) + # b1 = (b10, b11, 0) + # + # v0 = a0 b0 = (a00, a01, a02).(b00, b01, b02) + # v1 = a1 b1 = (a10, a11, a12).(b10, b11, 0) + # + # r0 = a0 b0 + ξ a1 b1 + # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 + + when Fpk.C.has_large_field_elem(): + var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: Fpkdiv2 + + # v2 <- (a0 + a1)(b0 + b1) + v0.sum(a.c0, a.c1) + v1.c0.sum(b.c0.c0, b.c1.c0) + v1.c1.sum(b.c0.c1, b.c1.c1) + v1.c2 = b.c0.c2 + v2.prod(v0, v1) + + # v0 <- a0 b0 + # v1 <- a1 b1 + v0.prod(a.c0, b.c0) + v1.mul_sparse_by_xy0(a.c1, b.c1.c0, b.c1.c1) + + # aliasing: a and b unneeded now + + # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 + a.c1.diff(v2, v0) + a.c1 -= v1 + + # r0 <- a0 b0 + β a1 b1 + v1 *= NonResidue + a.c0.sum(v0, v1) + + else: # Lazy reduction + var V0 {.noInit.}, V1 {.noInit.}, V2 {.noInit.}: doublePrec(Fpkdiv2) + var t0{.noInit.}, t1{.noInit.}: Fpkdiv2 + + # v2 <- (a0 + a1)(b0 + b1) + t0.sum(a.c0, a.c1) + t1.c0.sum(b.c0.c0, b.c1.c0) + t1.c1.sum(b.c0.c1, b.c1.c1) + t1.c2 = b.c0.c2 + V2.prod2x(t0, t1) + + # v0 <- a0 b0 + # v1 <- a1 b1 + V0.prod2x(a.c0, b.c0) + V1.mul2x_sparse_by_xy0(a.c1, b.c1.c0, b.c1.c1) + + # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 + V2.diff2xMod(V2, V0) + V2.diff2xMod(V2, V1) + a.c1.redc2x(V2) + + # r0 <- a0 b0 + β a1 b1 + V1.prod2x(V1, NonResidue) + V2.sum2xMod(V0, V1) + a.c0.redc2x(V2) + +func mul_sparse_by_abcdef00ghij_quad_over_cube*[Fpk]( + a: var Fpk, b: Fpk) = + ## Sparse multiplication of an 𝔽pᵏ element + ## by a sparse 𝔽pᵏ element abcdef00ghij + ## with each representing 𝔽pᵏᐟ⁶ coordinate + + static: + doAssert Fpk.C.getSexticTwist() == M_Twist + doAssert a is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²" + doAssert a.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶" + + type Fpkdiv2 = typeof(a.c0) + + # In the following equations (taken from quadratic extension implementation) + # b0 = (b00, b01, b02) + # b1 = ( 0, b11, b12) + # + # v0 = a0 b0 = (a00, a01, a02).(b00, b01, b02) + # v1 = a1 b1 = (a10, a11, a12).( 0, b11, b12) + # + # r0 = a0 b0 + ξ a1 b1 + # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 + + when Fpk.C.has_large_field_elem(): + var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: Fpkdiv2 + + # v2 <- (a0 + a1)(b0 + b1) + v0.sum(a.c0, a.c1) + v1.c0 = b.c0.c0 + v1.c1.sum(b.c0.c1, b.c1.c1) + v1.c2.sum(b.c0.c2, b.c1.c2) + v2.prod(v0, v1) + + # v0 <- a0 b0 + # v1 <- a1 b1 + v0.prod(a.c0, b.c0) + v1.mul_sparse_by_0yz(a.c1, b.c1.c1, b.c1.c2) + + # aliasing: a and b unneeded now + + # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 + a.c1.diff(v2, v0) + a.c1 -= v1 + + # r0 <- a0 b0 + β a1 b1 + v1 *= NonResidue + a.c0.sum(v0, v1) + + else: # Lazy reduction + var V0 {.noInit.}, V1 {.noInit.}, V2 {.noInit.}: doublePrec(Fpkdiv2) + var t0{.noInit.}, t1{.noInit.}: Fpkdiv2 + + # v2 <- (a0 + a1)(b0 + b1) + t0.sum(a.c0, a.c1) + t1.c0 = b.c0.c0 + t1.c1.sum(b.c0.c1, b.c1.c1) + t1.c2.sum(b.c0.c2, b.c1.c2) + V2.prod2x(t0, t1) + + # v0 <- a0 b0 + # v1 <- a1 b1 + V0.prod2x(a.c0, b.c0) + V1.mul2x_sparse_by_0yz(a.c1, b.c1.c1, b.c1.c2) + + # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 + V2.diff2xMod(V2, V0) + V2.diff2xMod(V2, V1) + a.c1.redc2x(V2) + + # r0 <- a0 b0 + β a1 b1 + V1.prod2x(V1, NonResidue) + V2.sum2xMod(V0, V1) + a.c0.redc2x(V2) + + +func mul_sparse_by_abcd00efghij_cube_over_quad*[Fpk]( a: var Fpk, b: Fpk) = ## Sparse multiplication of an 𝔽pᵏ element ## by a sparse 𝔽pᵏ element abcd00efghij @@ -888,15 +1270,92 @@ func mul_sparse_by_abcd00efghij*[Fpk]( f2x.sum2xMod(f2x, V1) a.c2.redc2x(f2x) +func mul_sparse_by_abcdefghij00_cube_over_quad*[Fpk]( + a: var Fpk, b: Fpk) = + ## Sparse multiplication of an 𝔽pᵏ element + ## by a sparse 𝔽pᵏ element abcdefghij00 + ## with each representing 𝔽pᵏᐟ⁶ coordinate + + static: + doAssert Fpk.C.getSexticTwist() == D_Twist + doAssert a is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³" + doAssert a.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶" + + type Fpkdiv3 = typeof(a.c0) + + # In the following equations (taken from cubic extension implementation) + # b0 = (b00, b01) + # b1 = (b10, b11) + # b2 = (b20, 0) + # + # v0 = a0 b0 = (f00, f01).(b00, b01) + # v1 = a1 b1 = (f10, f11).(b10, b11) + # v2 = a2 b2 = (f20, f21).(b20, 0) + # + # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀ + # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂ + # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁ + + var V0 {.noInit.}, V1 {.noInit.}, V2 {.noinit.}: doublePrec(Fpkdiv3) + var t0 {.noInit.}, t1 {.noInit.}: Fpkdiv3 + var f2x{.noInit.}, g2x {.noinit.}: doublePrec(Fpkdiv3) + + V0.prod2x(a.c0, b.c0) + V1.prod2x(a.c1, b.c1) + V2.mul2x_sparse_by_x0(a.c2, b.c2) + + # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀ + t0.sum(a.c1, a.c2) + t1.c0.sum(b.c1.c0, b.c2.c0) # b₂ = (b20, 0) + f2x.prod2x_disjoint(t0, t1.c0, b.c1.c1) # (a₁ + a₂).(b₁ + b₂) + f2x.diff2xMod(f2x, V1) + f2x.diff2xMod(f2x, V2) + f2x.prod2x(f2x, NonResidue) + f2x.sum2xMod(f2x, V0) + + # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + t0.sum(a.c0, a.c1) + t1.sum(b.c0, b.c1) + g2x.prod2x(t0, t1) + g2x.diff2xMod(g2x, V0) + g2x.diff2xMod(g2x, V1) + + # r₂ = (a₀ + a₂) and (b₀ + b₂) + t0.sum(a.c0, a.c2) + t1.c0.sum(b.c0.c0, b.c2.c0) # b₂ = (b20, 0) + + # Now we are aliasing free + + # r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀ + a.c0.redc2x(f2x) + + # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂ + f2x.prod2x(V2, NonResidue) + g2x.sum2xMod(g2x, f2x) + a.c1.redc2x(g2x) + + # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁ + f2x.prod2x_disjoint(t0, t1.c0, b.c0.c1) + f2x.diff2xMod(f2x, V0) + f2x.diff2xMod(f2x, V2) + f2x.sum2xMod(f2x, V1) + a.c2.redc2x(f2x) + # Dispatch # ------------------------------------------------------------ func mul_by_line*[Fpk, Fpkdiv6](f: var Fpk, line: Line[Fpkdiv6]) {.inline.} = ## Multiply an element of Fp12 by a sparse line function when Fpk.C.getSexticTwist() == D_Twist: - f.mul_sparse_by_line_acb000(line) + when f is CubicExt: + f.mul_sparse_by_line_acb000(line) + else: + f.mul_sparse_by_line_a00bc0(line) elif Fpk.C.getSexticTwist() == M_Twist: - f.mul_sparse_by_line_ca00b0(line) + when f is CubicExt: + f.mul_sparse_by_line_ca00b0(line) + else: + f.mul_sparse_by_line_cb00a0(line) else: {.error: "A line function assumes that the curve has a twist".} @@ -905,18 +1364,30 @@ func prod_from_2_lines*[Fpk, Fpkdiv6](f: var Fpk, line0, line1: Line[Fpkdiv6]) { ## and store the result in f ## f is overwritten when Fpk.C.getSexticTwist() == D_Twist: - f.prod_xzy000_xzy000_into_abcdefghij00(line0, line1) + when f is CubicExt: + f.prod_xzy000_xzy000_into_abcdefghij00(line0, line1) + else: + f.prod_x00yz0_x00yz0_into_abcdefghij00(line0, line1) elif Fpk.C.getSexticTwist() == M_Twist: - f.prod_zx00y0_zx00y0_into_abcd00efghij(line0, line1) + when f is CubicExt: + f.prod_zx00y0_zx00y0_into_abcd00efghij(line0, line1) + else: + f.prod_zy00x0_zy00x0_into_abcdef00ghij(line0, line1) else: {.error: "A line function assumes that the curve has a twist".} func mul_by_prod_of_2_lines*[Fpk](f: var Fpk, g: Fpk) {.inline.} = ## Multiply f by the somewhat sparse product of 2 lines when Fpk.C.getSexticTwist() == D_Twist: - f.mul_sparse_by_abcdefghij00(g) + when f is CubicExt: + f.mul_sparse_by_abcdefghij00_cube_over_quad(g) + else: + f.mul_sparse_by_abcdefghij00_quad_over_cube(g) elif Fpk.C.getSexticTwist() == M_Twist: - f.mul_sparse_by_abcd00efghij(g) + when f is CubicExt: + f.mul_sparse_by_abcd00efghij_cube_over_quad(g) + else: + f.mul_sparse_by_abcdef00ghij_quad_over_cube(g) else: {.error: "A line function assumes that the curve has a twist".} diff --git a/constantine/math/pairing/pairing_bls12.nim b/constantine/math/pairing/pairing_bls12.nim index 63e903a..bbd3f67 100644 --- a/constantine/math/pairing/pairing_bls12.nim +++ b/constantine/math/pairing/pairing_bls12.nim @@ -15,7 +15,7 @@ import ec_shortweierstrass_projective ], ../isogenies/frobenius, - ../curves/zoo_pairings, + ../constants/zoo_pairings, ../arithmetic, ./cyclotomic_subgroup, ./lines_eval, diff --git a/constantine/math/pairing/pairing_bn.nim b/constantine/math/pairing/pairing_bn.nim index 4cfcee7..128ef4e 100644 --- a/constantine/math/pairing/pairing_bn.nim +++ b/constantine/math/pairing/pairing_bn.nim @@ -15,7 +15,7 @@ import ec_shortweierstrass_projective ], ../isogenies/frobenius, - ../curves/zoo_pairings, + ../constants/zoo_pairings, ./lines_eval, ./cyclotomic_subgroup, ./miller_loops diff --git a/constantine/math/pairing/pairing_bw6_761.nim b/constantine/math/pairing/pairing_bw6_761.nim index d5b1a05..7ee4a55 100644 --- a/constantine/math/pairing/pairing_bw6_761.nim +++ b/constantine/math/pairing/pairing_bw6_761.nim @@ -15,7 +15,7 @@ import ec_shortweierstrass_projective ], ../isogenies/frobenius, - ../curves/zoo_pairings, + ../constants/zoo_pairings, ./lines_eval, ./miller_loops diff --git a/constantine/platforms/isa/macro_assembler_x86.nim b/constantine/platforms/isa/macro_assembler_x86.nim index 8552d23..d9061fc 100644 --- a/constantine/platforms/isa/macro_assembler_x86.nim +++ b/constantine/platforms/isa/macro_assembler_x86.nim @@ -74,6 +74,7 @@ type kRegister kFromArray kArrayAddr + k2dArrayAddr Operand* = object desc*: OperandDesc @@ -84,6 +85,9 @@ type offset: int of kArrayAddr: buf: seq[Operand] + of k2dArrayAddr: + dims: array[2, int] + buf2d: seq[Operand] OperandDesc* = ref object asmId*: string # [a] - ASM id @@ -141,11 +145,17 @@ proc `[]`*(opArray: OperandArray, index: int): Operand = func `[]`*(opArray: var OperandArray, index: int): var Operand = opArray.buf[index] -func `[]`*(arrayAddr: Operand, index: int): Operand = - arrayAddr.buf[index] +func `[]`*(arrAddr: Operand, index: int): Operand = + arrAddr.buf[index] -func `[]`*(arrayAddr: var Operand, index: int): var Operand = - arrayAddr.buf[index] +func `[]`*(arrAddr: var Operand, index: int): var Operand = + arrAddr.buf[index] + +func `[]`*(arr2dAddr: Operand, i, j: int): Operand = + arr2dAddr.buf2d[i*arr2dAddr.dims[1] + j] + +func `[]`*(arr2dAddr: var Operand, i, j: int): var Operand = + arr2dAddr.buf2d[i*arr2dAddr.dims[1] + j] func init*(T: type Assembler_x86, Word: typedesc[SomeUnsignedInt]): Assembler_x86 = result.wordSize = sizeof(Word) @@ -236,6 +246,22 @@ func asArrayAddr*(op: Register, len: int): Operand = offset: i ) +func as2dArrayAddr*(op: Operand, rows, cols: int): Operand = + ## Use the value stored in an operand as an array address + doAssert op.desc.rm in {Reg, PointerInReg, ElemsInReg}+SpecificRegisters + result = Operand( + kind: k2dArrayAddr, + desc: nil, + dims: [rows, cols], + buf2d: newSeq[Operand](rows*cols) + ) + for i in 0 ..< rows*cols: + result.buf2d[i] = Operand( + desc: op.desc, + kind: kFromArray, + offset: i + ) + # Code generation # ------------------------------------------------------------------------------------------------------------ @@ -344,7 +370,7 @@ func generate*(a: Assembler_x86): NimNode = func getStrOffset(a: Assembler_x86, op: Operand): string = if op.kind != kFromArray: - if op.kind == kArrayAddr: + if op.kind in {kArrayAddr, k2dArrayAddr}: # We are operating on an array pointer # instead of array elements if op.buf[0].desc.constraint == ClobberedRegister: diff --git a/constantine/signatures/bls_signatures.nim b/constantine/signatures/bls_signatures.nim index 3db0061..bd374dc 100644 --- a/constantine/signatures/bls_signatures.nim +++ b/constantine/signatures/bls_signatures.nim @@ -8,7 +8,7 @@ import ../math/[ec_shortweierstrass, pairings], - ../math/curves/zoo_generators, + ../math/constants/zoo_generators, ../hash_to_curve/hash_to_curve, ../hashes diff --git a/metering/m_pairings.nim b/metering/m_pairings.nim index 86c1a7c..e999261 100644 --- a/metering/m_pairings.nim +++ b/metering/m_pairings.nim @@ -12,7 +12,7 @@ import ../constantine/math/config/[common, curves], ../constantine/math/[arithmetic, extension_fields], ../constantine/math/elliptic/ec_shortweierstrass_projective, - ../constantine/math/curves/zoo_subgroups, + ../constantine/math/constants/zoo_subgroups, ../constantine/math/pairing/pairing_bls12, # Helpers ../helpers/prng_unsafe diff --git a/tests/math/t_ec_template.nim b/tests/math/t_ec_template.nim index 5e15dab..fcd3479 100644 --- a/tests/math/t_ec_template.nim +++ b/tests/math/t_ec_template.nim @@ -27,7 +27,7 @@ import ec_twistededwards_projective, ec_scalar_mul], ../../constantine/math/io/[io_bigints, io_fields, io_ec], - ../../constantine/math/curves/zoo_subgroups, + ../../constantine/math/constants/zoo_subgroups, # Test utilities ../../helpers/prng_unsafe, ./support/ec_reference_scalar_mult diff --git a/tests/math/t_finite_fields_mulsquare.nim b/tests/math/t_finite_fields_mulsquare.nim index b85447d..52b473f 100644 --- a/tests/math/t_finite_fields_mulsquare.nim +++ b/tests/math/t_finite_fields_mulsquare.nim @@ -28,7 +28,7 @@ echo "test_finite_fields_mulsquare xoshiro512** seed: ", seed static: doAssert defined(testingCurves), "This modules requires the -d:testingCurves compile option" proc sanity(C: static Curve) = - test "Squaring 0,1,2 with "& $Curve(C) & " [FastSquaring = " & $(Fp[C].getSpareBits() >= 2) & "]": + test "Squaring 0,1,2 with " & $Curve(C) & " [FastSquaring = " & $(Fp[C].getSpareBits() >= 2) & "]": block: # 0² mod var n: Fp[C] @@ -310,3 +310,74 @@ suite "Modular squaring - bugs highlighted by property-based testing": check: bool(a2mul == expected) bool(a2sqr == expected) + + +proc random_sumprod(C: static Curve, N: static int) = + template sumprod_test(random_instancer: untyped) = + block: + var a: array[N, Fp[C]] + var b: array[N, Fp[C]] + + for i in 0 ..< N: + a[i] = rng.random_instancer(Fp[C]) + b[i] = rng.random_instancer(Fp[C]) + + var r, r_ref, t: Fp[C] + + r_ref.prod(a[0], b[0]) + for i in 1 ..< N: + t.prod(a[i], b[i]) + r_ref += t + + r.sumprod(a, b) + + doAssert bool(r == r_ref) + + template sumProdMax() = + block: + var a: array[N, Fp[C]] + var b: array[N, Fp[C]] + + for i in 0 ..< N: + a[i].setMinusOne() + b[i].setMinusOne() + + var r, r_ref, t: Fp[C] + + r_ref.prod(a[0], b[0]) + for i in 1 ..< N: + t.prod(a[i], b[i]) + r_ref += t + + r.sumprod(a, b) + + doAssert bool(r == r_ref) + + sumprod_test(random_unsafe) + sumprod_test(randomHighHammingWeight) + sumprod_test(random_long01Seq) + sumProdMax() + +suite "Random sum products is consistent with naive " & " [" & $WordBitwidth & "-bit mode]": + + const MaxLength = 8 + test "Random sum products mod P-224]": + for _ in 0 ..< Iters: + staticFor N, 2, MaxLength: + random_sumprod(P224, N) + test "Random sum products mod BN254_Nogami]": + for _ in 0 ..< Iters: + staticFor N, 2, MaxLength: + random_sumprod(BN254_Nogami, N) + test "Random sum products mod BN254_Snarks]": + for _ in 0 ..< Iters: + staticFor N, 2, MaxLength: + random_sumprod(BN254_Snarks, N) + test "Random sum products mod BLS12_377]": + for _ in 0 ..< Iters: + staticFor N, 2, MaxLength: + random_sumprod(BLS12_377, N) + test "Random sum products mod BLS12_381]": + for _ in 0 ..< Iters: + staticFor N, 2, MaxLength: + random_sumprod(BLS12_381, N) \ No newline at end of file diff --git a/tests/math/t_fp12_bn254_nogami.nim b/tests/math/t_fp12_bn254_nogami.nim new file mode 100644 index 0000000..89a7fe6 --- /dev/null +++ b/tests/math/t_fp12_bn254_nogami.nim @@ -0,0 +1,26 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Internals + ../../constantine/math/extension_fields, + ../../constantine/math/config/curves, + # Test utilities + ./t_fp_tower_template + +const TestCurves = [ + BN254_Nogami, + ] + +runTowerTests( + ExtDegree = 12, + Iters = 12, + TestCurves = TestCurves, + moduleName = "test_fp12_" & $BN254_Nogami, + testSuiteDesc = "𝔽p12 = 𝔽p6[w] " & $BN254_Nogami +) diff --git a/tests/math/t_fp6_bn254_nogami.nim b/tests/math/t_fp6_bn254_nogami.nim new file mode 100644 index 0000000..11fc55c --- /dev/null +++ b/tests/math/t_fp6_bn254_nogami.nim @@ -0,0 +1,26 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Internals + ../../constantine/math/extension_fields, + ../../constantine/math/config/curves, + # Test utilities + ./t_fp_tower_template + +const TestCurves = [ + BN254_Nogami, + ] + +runTowerTests( + ExtDegree = 6, + Iters = 12, + TestCurves = TestCurves, + moduleName = "test_fp6_" & $BN254_Nogami, + testSuiteDesc = "𝔽p6 = 𝔽p2[w] " & $BN254_Nogami +) diff --git a/tests/math/t_fp_cubic_root.nim b/tests/math/t_fp_cubic_root.nim index ce817b3..0948378 100644 --- a/tests/math/t_fp_cubic_root.nim +++ b/tests/math/t_fp_cubic_root.nim @@ -10,7 +10,7 @@ import std/unittest, ../../constantine/platforms/abstractions, ../../constantine/math/arithmetic, ../../constantine/math/config/curves, - ../../constantine/math/curves/zoo_endomorphisms + ../../constantine/math/constants/zoo_endomorphisms echo "\n------------------------------------------------------\n" diff --git a/tests/math/t_fp_tower_template.nim b/tests/math/t_fp_tower_template.nim index 1cf48c3..9bd6143 100644 --- a/tests/math/t_fp_tower_template.nim +++ b/tests/math/t_fp_tower_template.nim @@ -260,11 +260,11 @@ proc runTowerTests*[N]( test(ExtField(ExtDegree, curve)): r.prod(x, Z) doAssert bool(r == Z), - "\nExpected zero but got (" & $ExtField(ExtDegree, curve) & "): " & x.toHex() + "\nExpected zero but got \n(" & $ExtField(ExtDegree, curve) & "): " & x.toHex() test(ExtField(ExtDegree, curve)): r.prod(Z, x) doAssert bool(r == Z), - "\nExpected zero but got (" & $ExtField(ExtDegree, curve) & "): " & x.toHex() + "\nExpected zero but got \n(" & $ExtField(ExtDegree, curve) & "): " & x.toHex() test(ExtField(ExtDegree, curve)): r.prod(x, O) doAssert bool(r == x), @@ -285,7 +285,7 @@ proc runTowerTests*[N]( rMul.prod(a, a) rSqr.square(a) - doAssert bool(rMul == rSqr), "Failure with a (" & $Field & "): " & a.toHex() & "\n" & + doAssert bool(rMul == rSqr), "Failure with a (" & $Field & "): \nInput:" & a.toHex() & "\n" & "Mul: " & rMul.toHex() & "\n" & "Sqr: " & rSqr.toHex() & "\n" @@ -306,7 +306,7 @@ proc runTowerTests*[N]( rSqr.square(a) rNegSqr.square(na) - doAssert bool(rSqr == rNegSqr), "Failure with a (" & $Field & "): " & a.toHex() & "\n" & + doAssert bool(rSqr == rNegSqr), "Failure with a \n(" & $Field & "): " & a.toHex() & "\n" & "Sqr: " & rSqr.toHex() & "\n" & "SqrNeg: " & rNegSqr.toHex() & "\n" diff --git a/tests/math/t_pairing_mul_fp12_by_lines.nim b/tests/math/t_pairing_mul_fp12_by_lines.nim index 61eec8d..1593b06 100644 --- a/tests/math/t_pairing_mul_fp12_by_lines.nim +++ b/tests/math/t_pairing_mul_fp12_by_lines.nim @@ -56,7 +56,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi let y = rng.random_elem(Fp2[C], gen) let b = Fp4[C](coords: [Fp2[C](), y]) - var r {.noInit.}, r2 {.noInit.}: Fp4[C] + var r, r2: Fp4[C] r.prod(a, b) r2.mul_sparse_by_0y(a, y) @@ -75,7 +75,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi let y = rng.random_elem(Fp2[C], gen) let b = Fp6[C](coords: [Fp2[C](), y, Fp2[C]()]) - var r {.noInit.}, r2 {.noInit.}: Fp6[C] + var r, r2: Fp6[C] r.prod(a, b) r2.mul_sparse_by_0y0(a, y) @@ -95,10 +95,10 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi let y = rng.random_elem(Fp2[C], gen) let b = Fp6[C](coords: [x, y, Fp2[C]()]) - var r {.noInit.}, r2 {.noInit.}: Fp6[C] + var r, r2: Fp6[C] r.prod(a, b) - r2.mul_by_line_xy0(a, x, y) + r2.mul_sparse_by_xy0(a, x, y) check: bool(r == r2) @@ -107,58 +107,247 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi test_fp6_xy0(curve, gen = HighHammingWeight) test_fp6_xy0(curve, gen = Long01Sequence) + test "Dense 𝔽p6 by Sparse 0yz": + proc test_fp6_0yz(C: static Curve, gen: static RandomGen) = + for _ in 0 ..< Iters: + let a = rng.random_elem(Fp6[C], gen) + let y = rng.random_elem(Fp2[C], gen) + let z = rng.random_elem(Fp2[C], gen) + let b = Fp6[C](coords: [Fp2[C](), y, z]) + + var r, r2: Fp6[C] + + r.prod(a, b) + r2.mul_sparse_by_0yz(a, y, z) + + check: bool(r == r2) + + staticFor(curve, TestCurves): + test_fp6_0yz(curve, gen = Uniform) + test_fp6_0yz(curve, gen = HighHammingWeight) + test_fp6_0yz(curve, gen = Long01Sequence) + when Fp12[BN254_Snarks]().c0.typeof is Fp6: - test "Sparse 𝔽p12/𝔽p6 resulting from ab00c0 line function": - proc test_fp12_ab00c0(C: static Curve, gen: static RandomGen) = - for _ in 0 ..< Iters: - var a = rng.random_elem(Fp12[C], gen) - var a2 = a + # =========== Towering 𝔽p12/𝔽p6 ====================================== - var x = rng.random_elem(Fp2[C], gen) - var y = rng.random_elem(Fp2[C], gen) - var z = rng.random_elem(Fp2[C], gen) + test "Sparse 𝔽p12/𝔽p6 resulting from a00bc0 line function": + proc test_fp12_a00bc0(C: static Curve, gen: static RandomGen) = + when C.getSexticTwist() == D_Twist: + for _ in 0 ..< Iters: + var a = rng.random_elem(Fp12[C], gen) + var a2 = a - let line = Line[Fp2[C]](a: x, b: y, c: z) - let b = Fp12[C]( - c0: Fp6[C](coords: [ x, y, Fp2[C]()]), - c1: Fp6[C](coords: [Fp2[C](), z, Fp2[C]()]) - ) + var x = rng.random_elem(Fp2[C], gen) + var y = rng.random_elem(Fp2[C], gen) + var z = rng.random_elem(Fp2[C], gen) - a *= b - a2.mul_sparse_by_line_ab00c0(line) + let line = Line[Fp2[C]](a: x, b: y, c: z) + let b = Fp12[C]( coords: [ + Fp6[C](coords: [ x, Fp2[C](), Fp2[C]()]), + Fp6[C](coords: [ y, z, Fp2[C]()]) + ]) - check: bool(a == a2) + a *= b + a2.mul_sparse_by_line_a00bc0(line) + + check: bool(a == a2) + + staticFor(curve, TestCurves): + test_fp12_a00bc0(curve, gen = Uniform) + test_fp12_a00bc0(curve, gen = HighHammingWeight) + test_fp12_a00bc0(curve, gen = Long01Sequence) + + test "Sparse 𝔽p12/𝔽p6 resulting from cb00a0 line function": + proc test_fp12_cb00a0(C: static Curve, gen: static RandomGen) = + when C.getSexticTwist() == M_Twist: + for _ in 0 ..< Iters: + var a = rng.random_elem(Fp12[C], gen) + var a2 = a + + var x = rng.random_elem(Fp2[C], gen) + var y = rng.random_elem(Fp2[C], gen) + var z = rng.random_elem(Fp2[C], gen) + + let line = Line[Fp2[C]](a: x, b: y, c: z) + let b = Fp12[C](coords: [ + Fp6[C](coords: [ z, y, Fp2[C]()]), + Fp6[C](coords: [Fp2[C](), x, Fp2[C]()]) + ]) + + a *= b + a2.mul_sparse_by_line_cb00a0(line) + + check: bool(a == a2) + + staticFor(curve, TestCurves): + test_fp12_cb00a0(curve, gen = Uniform) + test_fp12_cb00a0(curve, gen = HighHammingWeight) + test_fp12_cb00a0(curve, gen = Long01Sequence) + + test "Somewhat-sparse 𝔽p12/𝔽p6 resulting from a00bc0*a00bc0 line functions (D-twist only)": + proc test_fp12_a00bc0_a00bc0(C: static Curve, gen: static RandomGen) = + when C.getSexticTwist() == D_Twist: + for _ in 0 ..< Iters: + var x0 = rng.random_elem(Fp2[C], gen) + var y0 = rng.random_elem(Fp2[C], gen) + var z0 = rng.random_elem(Fp2[C], gen) + + let line0 = Line[Fp2[C]](a: x0, b: y0, c: z0) + let f0 = Fp12[C]( coords: [ + Fp6[C](coords: [ x0, Fp2[C](), Fp2[C]()]), + Fp6[C](coords: [ y0, z0, Fp2[C]()]) + ]) + + + var x1 = rng.random_elem(Fp2[C], gen) + var y1 = rng.random_elem(Fp2[C], gen) + var z1 = rng.random_elem(Fp2[C], gen) + + let line1 = Line[Fp2[C]](a: x1, b: y1, c: z1) + let f1 = Fp12[C]( coords: [ + Fp6[C](coords: [ x1, Fp2[C](), Fp2[C]()]), + Fp6[C](coords: [ y1, z1, Fp2[C]()]) + ]) + + var r: Fp12[C] + r.prod(f0, f1) + + var rl: Fp12[C] + rl.prod_x00yz0_x00yz0_into_abcdefghij00(line0, line1) + + check: bool(r == rl) staticFor(curve, TestCurves): - test_fp12_xy00z0(curve, gen = Uniform) - test_fp12_xy00z0(curve, gen = HighHammingWeight) - test_fp12_xy00z0(curve, gen = Long01Sequence) + test_fp12_a00bc0_a00bc0(curve, gen = Uniform) + test_fp12_a00bc0_a00bc0(curve, gen = HighHammingWeight) + test_fp12_a00bc0_a00bc0(curve, gen = Long01Sequence) - test "Sparse 𝔽p12/𝔽p6 resulting from acb000 line function": - proc test_fp12_acb000(C: static Curve, gen: static RandomGen) = - for _ in 0 ..< Iters: - var a = rng.random_elem(Fp12[C], gen) - var a2 = a + test "Somewhat-sparse 𝔽p12/𝔽p6 resulting from cb00a0*cb00a0 line functions (M-twist only)": + proc test_fp12_cb00a0_cb00a0(C: static Curve, gen: static RandomGen) = + when C.getSexticTwist() == M_Twist: + for _ in 0 ..< Iters: + var x0 = rng.random_elem(Fp2[C], gen) + var y0 = rng.random_elem(Fp2[C], gen) + var z0 = rng.random_elem(Fp2[C], gen) - var x = rng.random_elem(Fp2[C], gen) - var y = rng.random_elem(Fp2[C], gen) - var z = rng.random_elem(Fp2[C], gen) + let line0 = Line[Fp2[C]](a: x0, b: y0, c: z0) + let f0 = Fp12[C](coords: [ + Fp6[C](coords: [ z0, y0, Fp2[C]()]), + Fp6[C](coords: [Fp2[C](), x0, Fp2[C]()]) + ]) - let line = Line[Fp2[C]](a: x, b: y, c: z) - let b = Fp12[C]( - c0: Fp6[C](coords: [x, z, y]) - ) + var x1 = rng.random_elem(Fp2[C], gen) + var y1 = rng.random_elem(Fp2[C], gen) + var z1 = rng.random_elem(Fp2[C], gen) - a *= b - a2.mul_sparse_by_line_xyz000(line) + let line1 = Line[Fp2[C]](a: x1, b: y1, c: z1) + let f1 = Fp12[C](coords: [ + Fp6[C](coords: [ z1, y1, Fp2[C]()]), + Fp6[C](coords: [Fp2[C](), x1, Fp2[C]()]) + ]) - check: bool(a == a2) + var r: Fp12[C] + r.prod(f0, f1) + + var rl: Fp12[C] + rl.prod_zy00x0_zy00x0_into_abcdef00ghij(line0, line1) + + check: bool(r == rl) staticFor(curve, TestCurves): - test_fp12_acb000(curve, gen = Uniform) - test_fp12_acb000(curve, gen = HighHammingWeight) - test_fp12_acb000(curve, gen = Long01Sequence) - else: + test_fp12_cb00a0_cb00a0(curve, gen = Uniform) + test_fp12_cb00a0_cb00a0(curve, gen = HighHammingWeight) + test_fp12_cb00a0_cb00a0(curve, gen = Long01Sequence) + + test "Somewhat-sparse 𝔽p12/𝔽p6 mul by the product a00bc0*a00bc0 of line functions (D-twist only)": + proc test_fp12_abcdefghij00(C: static Curve, gen: static RandomGen) = + when C.getSexticTwist() == D_Twist: + for _ in 0 ..< Iters: + var x0 = rng.random_elem(Fp2[C], gen) + var y0 = rng.random_elem(Fp2[C], gen) + var z0 = rng.random_elem(Fp2[C], gen) + + let line0 = Line[Fp2[C]](a: x0, b: y0, c: z0) + let f0 = Fp12[C]( coords: [ + Fp6[C](coords: [ x0, Fp2[C](), Fp2[C]()]), + Fp6[C](coords: [ y0, z0, Fp2[C]()]) + ]) + + + var x1 = rng.random_elem(Fp2[C], gen) + var y1 = rng.random_elem(Fp2[C], gen) + var z1 = rng.random_elem(Fp2[C], gen) + + let line1 = Line[Fp2[C]](a: x1, b: y1, c: z1) + let f1 = Fp12[C]( coords: [ + Fp6[C](coords: [ x1, Fp2[C](), Fp2[C]()]), + Fp6[C](coords: [ y1, z1, Fp2[C]()]) + ]) + + + var r: Fp12[C] + r.prod(f0, f1) + + var rl: Fp12[C] + rl.prod_x00yz0_x00yz0_into_abcdefghij00(line0, line1) + + var f = rng.random_elem(Fp12[C], gen) + var f2 = f + + f *= rl + f2.mul_sparse_by_abcdefghij00_quad_over_cube(rl) + + check: bool(f == f2) + + staticFor(curve, TestCurves): + test_fp12_abcdefghij00(curve, gen = Uniform) + test_fp12_abcdefghij00(curve, gen = HighHammingWeight) + test_fp12_abcdefghij00(curve, gen = Long01Sequence) + + test "Somewhat-sparse 𝔽p12/𝔽p6 mul by the product (cb00a0*cb00a0) of line functions (M-twist only)": + proc test_fp12_abcdef00ghij(C: static Curve, gen: static RandomGen) = + when C.getSexticTwist() == M_Twist: + for _ in 0 ..< Iters: + var x0 = rng.random_elem(Fp2[C], gen) + var y0 = rng.random_elem(Fp2[C], gen) + var z0 = rng.random_elem(Fp2[C], gen) + + let line0 = Line[Fp2[C]](a: x0, b: y0, c: z0) + let f0 = Fp12[C](coords: [ + Fp6[C](coords: [ z0, y0, Fp2[C]()]), + Fp6[C](coords: [Fp2[C](), x0, Fp2[C]()]) + ]) + + var x1 = rng.random_elem(Fp2[C], gen) + var y1 = rng.random_elem(Fp2[C], gen) + var z1 = rng.random_elem(Fp2[C], gen) + + let line1 = Line[Fp2[C]](a: x1, b: y1, c: z1) + let f1 = Fp12[C](coords: [ + Fp6[C](coords: [ z1, y1, Fp2[C]()]), + Fp6[C](coords: [Fp2[C](), x1, Fp2[C]()]) + ]) + + var r: Fp12[C] + r.prod(f0, f1) + + var rl: Fp12[C] + rl.prod_zy00x0_zy00x0_into_abcdef00ghij(line0, line1) + + var f = rng.random_elem(Fp12[C], gen) + var f2 = f + + f *= rl + f2.mul_sparse_by_abcdef00ghij_quad_over_cube(rl) + + check: bool(f == f2) + + staticFor(curve, TestCurves): + test_fp12_abcdef00ghij(curve, gen = Uniform) + test_fp12_abcdef00ghij(curve, gen = HighHammingWeight) + test_fp12_abcdef00ghij(curve, gen = Long01Sequence) + + else: # =========== Towering 𝔽p12/𝔽p4 ====================================== static: doAssert Fp12[BN254_Snarks]().c0.typeof is Fp4 test "Sparse 𝔽p12/𝔽p4 resulting from ca00b0 line function (M-twist only)": @@ -347,7 +536,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi var f2 = f f *= rl - f2.mul_sparse_by_abcdefghij00(rl) + f2.mul_sparse_by_abcdefghij00_cube_over_quad(rl) check: bool(f == f2) @@ -396,7 +585,7 @@ suite "Pairing - Sparse 𝔽p12 multiplication by line function is consistent wi var f2 = f f *= rl - f2.mul_sparse_by_abcd00efghij(rl) + f2.mul_sparse_by_abcd00efghij_cube_over_quad(rl) check: bool(f == f2) diff --git a/tests/math/t_pairing_template.nim b/tests/math/t_pairing_template.nim index 2ca9848..cdc1816 100644 --- a/tests/math/t_pairing_template.nim +++ b/tests/math/t_pairing_template.nim @@ -15,7 +15,7 @@ import ../../constantine/math/extension_fields, ../../constantine/math/config/curves, ../../constantine/math/elliptic/[ec_shortweierstrass_affine, ec_shortweierstrass_projective], - ../../constantine/math/curves/[zoo_subgroups, zoo_pairings], + ../../constantine/math/constants/[zoo_subgroups, zoo_pairings], ../../constantine/math/pairing/cyclotomic_subgroup, ../../constantine/math/io/io_extfields, diff --git a/tests/t_hash_to_curve_random.nim b/tests/t_hash_to_curve_random.nim index b529120..cc13bb1 100644 --- a/tests/t_hash_to_curve_random.nim +++ b/tests/t_hash_to_curve_random.nim @@ -15,7 +15,7 @@ import ../constantine/math/ec_shortweierstrass, ../constantine/hash_to_curve/hash_to_curve, ../constantine/hashes, - ../constantine/math/curves/zoo_subgroups, + ../constantine/math/constants/zoo_subgroups, # Test utilities ../helpers/prng_unsafe