From 82819b1b105c010e46aae0267e014beb1a993cc7 Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Sat, 23 Jan 2021 20:55:40 +0100 Subject: [PATCH] Square Root & Inversion addition chains - 20% perf increase (#132) * Addition chain for sqrt BLS12-381: 20% perf improvement * sqrt addchain for BN254_Snarks - 20% perf improvement as well * Fix operation count [skip ci] * BLS12-377 sqrt - 10% perf improvement * sqrt addition chain for BW6-761 - 6% speedup * BN254_Nogami inversion addchain * sqrt addchain for BN254_Nogami * Inversion addchain for BLS12-377 * inversion ddition chain for BW6-761 --- benchmarks/bench_fields_template.nim | 40 +- benchmarks/bench_fp.nim | 18 +- constantine.nimble | 1 + constantine/arithmetic/finite_fields.nim | 6 + .../arithmetic/finite_fields_inversion.nim | 7 +- .../arithmetic/finite_fields_square_root.nim | 193 +++++++-- constantine/curves/bls12_377_inversion.nim | 204 ++++++++++ constantine/curves/bls12_377_sqrt.nim | 188 ++++++++- constantine/curves/bls12_381_inversion.nim | 5 +- constantine/curves/bls12_381_sqrt.nim | 223 +++++++++++ constantine/curves/bn254_nogami_inversion.nim | 98 +++++ constantine/curves/bn254_nogami_sqrt.nim | 89 +++++ constantine/curves/bn254_snarks_pairing.nim | 62 +-- constantine/curves/bn254_snarks_sqrt.nim | 158 ++++++++ constantine/curves/bw6_761_inversion.nim | 376 ++++++++++++++++++ constantine/curves/bw6_761_sqrt.nim | 373 +++++++++++++++++ constantine/curves/zoo_inversions.nim | 16 + constantine/curves/zoo_square_roots.nim | 27 +- tests/t_finite_fields_sqrt.nim | 1 + 19 files changed, 1988 insertions(+), 97 deletions(-) create mode 100644 constantine/curves/bls12_377_inversion.nim create mode 100644 constantine/curves/bls12_381_sqrt.nim create mode 100644 constantine/curves/bn254_nogami_inversion.nim create mode 100644 constantine/curves/bn254_nogami_sqrt.nim create mode 100644 constantine/curves/bn254_snarks_sqrt.nim create mode 100644 constantine/curves/bw6_761_inversion.nim create mode 100644 constantine/curves/bw6_761_sqrt.nim diff --git a/benchmarks/bench_fields_template.nim b/benchmarks/bench_fields_template.nim index a1b9d28..334e7ab 100644 --- a/benchmarks/bench_fields_template.nim +++ b/benchmarks/bench_fields_template.nim @@ -22,15 +22,15 @@ import ./bench_blueprint export notes -proc separator*() = separator(145) +proc separator*() = separator(165) proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) = let ns = inNanoseconds((stop-start) div iters) let throughput = 1e9 / float64(ns) when SupportsGetTicks: - echo &"{op:<50} {field:<18} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" + echo &"{op:<70} {field:<18} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" else: - echo &"{op:<50} {field:<18} {throughput:>15.3f} ops/s {ns:>9} ns/op" + echo &"{op:<70} {field:<18} {throughput:>15.3f} ops/s {ns:>9} ns/op" macro fixFieldDisplay(T: typedesc): untyped = # At compile-time, enums are integers and their display is buggy @@ -93,20 +93,20 @@ proc invBench*(T: typedesc, iters: int) = var r: T let x = rng.random_unsafe(T) preventOptimAway(r) - bench("Inversion (constant-time default method)", T, iters): + bench("Inversion (constant-time default impl)", T, iters): r.inv(x) proc invEuclidBench*(T: typedesc, iters: int) = var r: T let x = rng.random_unsafe(T) preventOptimAway(r) - bench("Inversion via constant-time Euclid", T, iters): + bench("Inversion (constant-time Euclid)", T, iters): r.inv_euclid(x) proc invPowFermatBench*(T: typedesc, iters: int) = let x = rng.random_unsafe(T) const exponent = T.getInvModExponent() - bench("Inversion via exponentiation p-2 (Little Fermat)", T, iters): + bench("Inversion (exponentiation p-2, Little Fermat)", T, iters): var r = x r.powUnsafeExponent(exponent) @@ -114,15 +114,39 @@ proc invAddChainBench*(T: typedesc, iters: int) = var r: T let x = rng.random_unsafe(T) preventOptimAway(r) - bench("Inversion via addition chain", T, iters): + bench("Inversion (addition chain)", T, iters): r.inv_addchain(x) proc sqrtBench*(T: typedesc, iters: int) = let x = rng.random_unsafe(T) - bench("Square Root + square check (constant-time)", T, iters): + bench("Square Root + isSquare (constant-time default impl)", T, iters): var r = x discard r.sqrt_if_square() +proc sqrtP3mod4Bench*(T: typedesc, iters: int) = + let x = rng.random_unsafe(T) + bench("SquareRoot + isSquare (p ≡ 3 (mod 4) exponentiation)", T, iters): + var r = x + discard r.sqrt_if_square_p3mod4() + +proc sqrtAddChainBench*(T: typedesc, iters: int) = + let x = rng.random_unsafe(T) + bench("SquareRoot + isSquare (addition chain)", T, iters): + var r = x + discard r.sqrt_if_square_addchain() + +proc sqrtTonelliBench*(T: typedesc, iters: int) = + let x = rng.random_unsafe(T) + bench("SquareRoot + isSquare (constant-time Tonelli-Shanks exponentiation)", T, iters): + var r = x + discard r.sqrt_if_square_tonelli_shanks(useAddChain = false) + +proc sqrtTonelliAddChainBench*(T: typedesc, iters: int) = + let x = rng.random_unsafe(T) + bench("SquareRoot + isSquare (constant-time Tonelli-Shanks addchain)", T, iters): + var r = x + discard r.sqrt_if_square_tonelli_shanks(useAddChain = true) + proc powBench*(T: typedesc, iters: int) = let x = rng.random_unsafe(T) let exponent = rng.random_unsafe(BigInt[T.C.getCurveOrderBitwidth()]) diff --git a/benchmarks/bench_fp.nim b/benchmarks/bench_fp.nim index c76c3f6..8b9d53c 100644 --- a/benchmarks/bench_fp.nim +++ b/benchmarks/bench_fp.nim @@ -8,9 +8,10 @@ import # Internals - ../constantine/config/curves, + ../constantine/config/[curves, common], ../constantine/arithmetic, ../constantine/io/io_bigints, + ../constantine/curves/[zoo_inversions, zoo_square_roots], # Helpers ../helpers/static_for, ./bench_fields_template, @@ -24,8 +25,8 @@ import # ############################################################ -const Iters = 1_000_000 -const ExponentIters = 1000 +const Iters = 100_000 +const ExponentIters = 100 const AvailableCurves = [ # P224, BN254_Nogami, @@ -35,6 +36,7 @@ const AvailableCurves = [ # Secp256k1, BLS12_377, BLS12_381, + BW6_761 ] proc main() = @@ -50,9 +52,15 @@ proc main() = sqrBench(Fp[curve], Iters) invEuclidBench(Fp[curve], ExponentIters) invPowFermatBench(Fp[curve], ExponentIters) - when curve in {BN254_Snarks, BLS12_381}: + when curve.hasInversionAddchain(): invAddChainBench(Fp[curve], ExponentIters) - sqrtBench(Fp[curve], ExponentIters) + when (BaseType(curve.Mod.limbs[0]) and 3) == 3: + sqrtP3mod4Bench(Fp[curve], ExponentIters) + when curve.hasSqrtAddchain(): + sqrtAddChainBench(Fp[curve], ExponentIters) + when curve in {BLS12_377}: + sqrtTonelliBench(Fp[curve], ExponentIters) + sqrtTonelliAddChainBench(Fp[curve], ExponentIters) # Exponentiation by a "secret" of size ~the curve order powBench(Fp[curve], ExponentIters) powUnsafeBench(Fp[curve], ExponentIters) diff --git a/constantine.nimble b/constantine.nimble index 7b81384..f664d7f 100644 --- a/constantine.nimble +++ b/constantine.nimble @@ -218,6 +218,7 @@ proc test(flags, path: string, commandFile = false) = exec command else: exec "echo \'" & command & "\' >> " & buildParallel + exec "echo \"------------------------------------------------------\"" proc buildBench(benchName: string, compiler = "", useAsm = true, run = false) = if not dirExists "build": diff --git a/constantine/arithmetic/finite_fields.nim b/constantine/arithmetic/finite_fields.nim index 4597183..2933064 100644 --- a/constantine/arithmetic/finite_fields.nim +++ b/constantine/arithmetic/finite_fields.nim @@ -386,6 +386,12 @@ func square_repeated*(r: var FF, num: int) {.inline.} = for _ in 0 ..< num: r.square() +func square_repeated*(r: var FF, a: FF, num: int) {.inline.} = + ## Repeated squarings + r.square(a) + for _ in 1 ..< num: + r.square() + func `*=`*(a: var FF, b: static int) {.inline.} = ## Multiplication by a small integer known at compile-time # Implementation: diff --git a/constantine/arithmetic/finite_fields_inversion.nim b/constantine/arithmetic/finite_fields_inversion.nim index 51ee7b2..f26c803 100644 --- a/constantine/arithmetic/finite_fields_inversion.nim +++ b/constantine/arithmetic/finite_fields_inversion.nim @@ -36,7 +36,7 @@ func inv*(r: var Fp, a: Fp) {.inline.} = # neither for Secp256k1 nor BN curves # Performance is slower than GCD # To be revisited with faster squaring/multiplications - when Fp.C in {BN254_Snarks, BLS12_381}: + when Fp.C.hasInversionAddchain(): r.inv_addchain(a) else: r.inv_euclid(a) @@ -48,10 +48,7 @@ func inv*(a: var Fp) {.inline.} = ## Incidentally this avoids extra check ## to convert Jacobian and Projective coordinates ## to affine for elliptic curve - # For now we don't activate the addition chains - # for Secp256k1 nor BN curves - # Performance is slower than GCD - when Fp.C in {BN254_Snarks, BLS12_381}: + when Fp.C.hasInversionAddchain(): a.inv_addchain(a) else: a.inv_euclid(a) diff --git a/constantine/arithmetic/finite_fields_square_root.nim b/constantine/arithmetic/finite_fields_square_root.nim index c022b3f..5c66803 100644 --- a/constantine/arithmetic/finite_fields_square_root.nim +++ b/constantine/arithmetic/finite_fields_square_root.nim @@ -46,6 +46,10 @@ func isSquare*(a: Fp): SecretBool {.inline.} = # Specialized routine for p ≡ 3 (mod 4) # ------------------------------------------------------------ +func hasP3mod4_primeModulus(C: static Curve): static bool = + ## Returns true iff p ≡ 3 (mod 4) + (BaseType(C.Mod.limbs[0]) and 3) == 3 + func sqrt_p3mod4(a: var Fp) {.inline.} = ## Compute the square root of ``a`` ## @@ -93,7 +97,7 @@ func sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.i test.square(sqrt) result = test == a -func sqrt_if_square_p3mod4(a: var Fp): SecretBool {.inline.} = +func sqrt_if_square_p3mod4*(a: var Fp): SecretBool {.inline.} = ## If ``a`` is a square, compute the square root of ``a`` ## if not, ``a`` is unmodified. ## @@ -108,14 +112,60 @@ func sqrt_if_square_p3mod4(a: var Fp): SecretBool {.inline.} = result = sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt, a) a.ccopy(sqrt, result) +# Specialized routines for addchain-based square roots +# ------------------------------------------------------------ + +func sqrt_addchain(a: var Fp) {.inline.} = + ## Compute the square root of ``a`` + ## + ## This requires ``a`` to be a square + ## The result is undefined otherwise + ## + ## The square root, if it exist is multivalued, + ## i.e. both x² == (-x)² + ## This procedure returns a deterministic result + var invsqrt {.noInit.}: Fp + invsqrt.invsqrt_addchain(a) + a *= invsqrt + +func sqrt_invsqrt_addchain(sqrt, invsqrt: var Fp, a: Fp) {.inline.} = + ## If ``a`` is a square, compute the square root of ``a`` in sqrt + ## and the inverse square root of a in invsqrt + invsqrt.invsqrt_addchain(a) + sqrt.prod(invsqrt, a) + +func sqrt_invsqrt_if_square_addchain(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.inline.} = + ## If ``a`` is a square, compute the square root of ``a`` in sqrt + ## and the inverse square root of a in invsqrt + ## + ## If a is not square, sqrt and invsqrt are undefined + sqrt_invsqrt_addchain(sqrt, invsqrt, a) + var test {.noInit.}: Fp + test.square(sqrt) + result = test == a + +func sqrt_if_square_addchain*(a: var Fp): SecretBool {.inline.} = + ## If ``a`` is a square, compute the square root of ``a`` + ## if not, ``a`` is unmodified. + ## + ## The square root, if it exist is multivalued, + ## i.e. both x² == (-x)² + ## This procedure returns a deterministic result + var sqrt {.noInit.}, invsqrt {.noInit.}: Fp + result = sqrt_invsqrt_if_square_addchain(sqrt, invsqrt, a) + a.ccopy(sqrt, result) + # Tonelli Shanks for any prime # ------------------------------------------------------------ func precompute_tonelli_shanks( a_pre_exp: var Fp, - a: Fp) = + a: Fp, useAddChain: static bool) = a_pre_exp = a - a_pre_exp.powUnsafeExponent(Fp.C.tonelliShanks(exponent)) + when useAddChain: + a_pre_exp.precompute_tonelli_shanks_addchain(a) + else: + a_pre_exp.powUnsafeExponent(Fp.C.tonelliShanks(exponent)) func isSquare_tonelli_shanks( a, a_pre_exp: Fp): SecretBool = @@ -126,10 +176,9 @@ func isSquare_tonelli_shanks( ## a^((p-1-2^e)/(2*2^e)) const e = Fp.C.tonelliShanks(twoAdicity) var r {.noInit.}: Fp - r.square(a_pre_exp) # a^(2(q-1-2^e)/(2*2^e)) = a^((q-1)/2^e - 1) - r *= a # a^((q-1)/2^e) - for _ in 0 ..< e-1: - r.square() # a^((q-1)/2) + r.square(a_pre_exp) # a^(2(q-1-2^e)/(2*2^e)) = a^((q-1)/2^e - 1) + r *= a # a^((q-1)/2^e) + r.square_repeated(e-1) # a^((q-1)/2) result = not(r.isMinusOne()) # r can be: @@ -143,14 +192,14 @@ func isSquare_tonelli_shanks( r.isMinusOne() ) -func sqrt_invsqrt_tonelli_shanks( +func sqrt_invsqrt_tonelli_shanks_pre( sqrt, invsqrt: var Fp, a, a_pre_exp: Fp) = ## Compute the square_root and inverse_square_root ## of `a` via constant-time Tonelli-Shanks ## ## a_pre_exp is a precomputation a^((p-1-2^e)/(2*2^e)) - ## ThItat is shared with the simultaneous isSquare routine + ## That is shared with the simultaneous isSquare routine template z: untyped = a_pre_exp template r: untyped = invsqrt var t {.noInit.}: Fp @@ -165,8 +214,7 @@ func sqrt_invsqrt_tonelli_shanks( var buf {.noInit.}: Fp for i in countdown(e, 2, 1): - for j in 1 .. i-2: - b.square() + b.square_repeated(i-2) let bNotOne = not b.isOne() buf.prod(r, root) @@ -178,8 +226,72 @@ func sqrt_invsqrt_tonelli_shanks( sqrt.prod(invsqrt, a) +# ---------------------------------------------- + +func sqrt_tonelli_shanks(a: var Fp, useAddChain: static bool) {.inline.} = + ## Compute the square root of ``a`` + ## + ## This requires ``a`` to be a square + ## + ## The result is undefined otherwise + ## + ## The square root, if it exist is multivalued, + ## i.e. both x² == (-x)² + ## This procedure returns a deterministic result + ## This procedure is constant-time + var a_pre_exp{.noInit.}, sqrt{.noInit.}, invsqrt{.noInit.}: Fp + a_pre_exp.precompute_tonelli_shanks(a, useAddChain) + sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp) + a = sqrt + +func sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool) {.inline.} = + ## Compute the square root and inverse square root of ``a`` + ## + ## This requires ``a`` to be a square + ## + ## The result is undefined otherwise + ## + ## The square root, if it exist is multivalued, + ## i.e. both x² == (-x)² + ## This procedure returns a deterministic result + var a_pre_exp{.noInit.}: Fp + a_pre_exp.precompute_tonelli_shanks(a, useAddChain) + sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp) + +func sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool): SecretBool {.inline.} = + ## Compute the square root and ivnerse square root of ``a`` + ## + ## This returns true if ``a`` is square and sqrt/invsqrt contains the square root/inverse square root + ## + ## The result is undefined otherwise + ## + ## The square root, if it exist is multivalued, + ## i.e. both x² == (-x)² + ## This procedure returns a deterministic result + var a_pre_exp{.noInit.}: Fp + a_pre_exp.precompute_tonelli_shanks(a, useAddChain) + result = isSquare_tonelli_shanks(a, a_pre_exp) + sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp) + a = sqrt + +func sqrt_if_square_tonelli_shanks*(a: var Fp, useAddChain: static bool): SecretBool {.inline.} = + ## If ``a`` is a square, compute the square root of ``a`` + ## if not, ``a`` is unmodified. + ## + ## The square root, if it exist is multivalued, + ## i.e. both x² == (-x)² + ## This procedure returns a deterministic result + ## This procedure is constant-time + var a_pre_exp{.noInit.}, sqrt{.noInit.}, invsqrt{.noInit.}: Fp + a_pre_exp.precompute_tonelli_shanks(a, useAddChain) + result = isSquare_tonelli_shanks(a, a_pre_exp) + sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp) + a = sqrt + # Public routines # ------------------------------------------------------------ +# Note: we export the inner sqrt_invsqrt_IMPL +# for benchmarking purposes. func sqrt*[C](a: var Fp[C]) {.inline.} = ## Compute the square root of ``a`` @@ -192,30 +304,12 @@ func sqrt*[C](a: var Fp[C]) {.inline.} = ## i.e. both x² == (-x)² ## This procedure returns a deterministic result ## This procedure is constant-time - when (BaseType(C.Mod.limbs[0]) and 3) == 3: + when C.hasSqrtAddchain(): + sqrt_addchain(a) + elif C.hasP3mod4_primeModulus(): sqrt_p3mod4(a) else: - var a_pre_exp{.noInit.}, sqrt{.noInit.}, invsqrt{.noInit.}: Fp[C] - a_pre_exp.precompute_tonelli_shanks(a) - sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt, a, a_pre_exp) - a = sqrt - -func sqrt_if_square*[C](a: var Fp[C]): SecretBool {.inline.} = - ## If ``a`` is a square, compute the square root of ``a`` - ## if not, ``a`` is unmodified. - ## - ## The square root, if it exist is multivalued, - ## i.e. both x² == (-x)² - ## This procedure returns a deterministic result - ## This procedure is constant-time - when (BaseType(C.Mod.limbs[0]) and 3) == 3: - result = sqrt_if_square_p3mod4(a) - else: - var a_pre_exp{.noInit.}, sqrt{.noInit.}, invsqrt{.noInit.}: Fp[C] - a_pre_exp.precompute_tonelli_shanks(a) - result = isSquare_tonelli_shanks(a, a_pre_exp) - sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt, a, a_pre_exp) - a = sqrt + sqrt_tonelli_shanks(a, useAddChain = C.hasTonelliShanksAddchain()) func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) {.inline.} = ## Compute the square root and inverse square root of ``a`` @@ -227,12 +321,12 @@ func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) {.inline.} = ## The square root, if it exist is multivalued, ## i.e. both x² == (-x)² ## This procedure returns a deterministic result - when (BaseType(C.Mod.limbs[0]) and 3) == 3: + when C.hasSqrtAddchain(): + sqrt_invsqrt_addchain(sqrt, invsqrt, a) + elif C.hasP3mod4_primeModulus(): sqrt_invsqrt_p3mod4(sqrt, invsqrt, a) else: - var a_pre_exp{.noInit.}: Fp[C] - a_pre_exp.precompute_tonelli_shanks(a) - sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt, a, a_pre_exp) + sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt, a, useAddChain = C.hasTonelliShanksAddchain()) func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool {.inline.} = ## Compute the square root and ivnerse square root of ``a`` @@ -244,11 +338,24 @@ func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool ## The square root, if it exist is multivalued, ## i.e. both x² == (-x)² ## This procedure returns a deterministic result - when (BaseType(C.Mod.limbs[0]) and 3) == 3: + when C.hasSqrtAddchain(): + result = sqrt_invsqrt_if_square_addchain(sqrt, invsqrt, a) + elif C.hasP3mod4_primeModulus(): result = sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt, a) else: - var a_pre_exp{.noInit.}: Fp[C] - a_pre_exp.precompute_tonelli_shanks(a) - result = isSquare_tonelli_shanks(a, a_pre_exp) - sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt, a, a_pre_exp) - a = sqrt + result = sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt, a, useAddChain = C.hasTonelliShanksAddchain()) + +func sqrt_if_square*[C](a: var Fp[C]): SecretBool {.inline.} = + ## If ``a`` is a square, compute the square root of ``a`` + ## if not, ``a`` is unmodified. + ## + ## The square root, if it exist is multivalued, + ## i.e. both x² == (-x)² + ## This procedure returns a deterministic result + ## This procedure is constant-time + when C.hasSqrtAddchain(): + result = sqrt_if_square_addchain(a) + elif C.hasP3mod4_primeModulus(): + result = sqrt_if_square_p3mod4(a) + else: + result = sqrt_if_square_tonelli_shanks(a, useAddChain = C.hasTonelliShanksAddchain()) diff --git a/constantine/curves/bls12_377_inversion.nim b/constantine/curves/bls12_377_inversion.nim new file mode 100644 index 0000000..c4c682f --- /dev/null +++ b/constantine/curves/bls12_377_inversion.nim @@ -0,0 +1,204 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/curves, + ../arithmetic/finite_fields + +# ############################################################ +# +# Specialized inversion for BLS12-377 +# +# ############################################################ + +func inv_addchain*(r: var Fp[BLS12_377], a: Fp[BLS12_377]) = + let a = a # ensure a.inv_addchain(a) is OK + + var + x10 {.noInit.}: Fp[BLS12_377] + x11 {.noInit.}: Fp[BLS12_377] + x100 {.noInit.}: Fp[BLS12_377] + x101 {.noInit.}: Fp[BLS12_377] + x111 {.noInit.}: Fp[BLS12_377] + x1001 {.noInit.}: Fp[BLS12_377] + x1011 {.noInit.}: Fp[BLS12_377] + x1111 {.noInit.}: Fp[BLS12_377] + x10001 {.noInit.}: Fp[BLS12_377] + x10011 {.noInit.}: Fp[BLS12_377] + x10111 {.noInit.}: Fp[BLS12_377] + x11011 {.noInit.}: Fp[BLS12_377] + x11101 {.noInit.}: Fp[BLS12_377] + x11111 {.noInit.}: Fp[BLS12_377] + x110100 {.noInit.}: Fp[BLS12_377] + x11010000 {.noInit.}: Fp[BLS12_377] + x11010111 {.noInit.}: Fp[BLS12_377] + + x10 .square(a) + x11 .prod(a, x10) + x100 .prod(a, x11) + x101 .prod(a, x100) + x111 .prod(x10, x101) + x1001 .prod(x10, x111) + x1011 .prod(x10, x1001) + x1111 .prod(x100, x1011) + x10001 .prod(x10, x1111) + x10011 .prod(x10, x10001) + x10111 .prod(x100, x10011) + x11011 .prod(x100, x10111) + x11101 .prod(x10, x11011) + x11111 .prod(x10, x11101) + x110100 .prod(x10111, x11101) + x11010000 .square_repeated(x110100, 2) + x11010111 .prod(x111, x11010000) + # 18 operations + + # TODO: we can accumulate in a partially reduced + # doubled-size `r` to avoid the final substractions. + # and only reduce at the end. + # This requires the number of op to be less than log2(p) == 381 + + # 18 + 18 = 36 operations + r.square_repeated(x11010111, 8) + r *= x11101 + r.square_repeated(7) + r *= x10001 + r.square() + + # 36 + 14 = 50 operations + r *= a + r.square_repeated(9) + r *= x10111 + r.square_repeated(2) + r *= x11 + + # 50 + 21 = 71 operations + r.square_repeated(6) + r *= x101 + r.square_repeated(4) + r *= a + r.square_repeated(9) + + # 71 + 13 = 84 operations + r *= x11101 + r.square_repeated(5) + r *= x1011 + r.square_repeated(5) + r *= x11 + + # 84 + 21 = 105 operations + r.square_repeated(8) + r *= x11101 + r.square() + r *= a + r.square_repeated(10) + + # 105 + 20 = 125 operations + r *= x10111 + r.square_repeated(12) + r *= x11011 + r.square_repeated(5) + r *= x101 + + # 125 + 22 = 147 operations + r.square_repeated(7) + r *= x101 + r.square_repeated(6) + r *= x1001 + r.square_repeated(7) + + # 147 + 11 = 158 operations + r *= x11101 + r.square_repeated(5) + r *= x10001 + r.square_repeated(3) + r *= x101 + + # 158 + 23 = 181 operations + r.square_repeated(8) + r *= x10001 + r.square_repeated(6) + r *= x11011 + r.square_repeated(7) + + # 181 + 19 = 200 operations + r *= x11111 + r.square_repeated(4) + r *= x11 + r.square_repeated(12) + r *= x1111 + + # 200 + 19 = 219 operations + r.square_repeated(4) + r *= x101 + r.square_repeated(8) + r *= x10011 + r.square_repeated(5) + + # 219 + 13 = 232 operations + r *= x10001 + r.square_repeated(3) + r *= x111 + r.square_repeated(7) + r *= x1111 + + # 232 + 22 = 254 operations + r.square_repeated(5) + r *= x1111 + r.square_repeated(7) + r *= x11011 + r.square_repeated(8) + + # 254 + 13 = 269 operations + r *= x10001 + r.square_repeated(6) + r *= x11111 + r.square_repeated(6) + r *= x11101 + + # 269 + 35 = 304 operations + r.square_repeated(9) + r *= x1001 + r.square_repeated(5) + r *= x1001 + r.square_repeated(19) + + # 304 + 17 = 321 operations + r *= x10111 + r.square_repeated(8) + r *= x1011 + r.square_repeated(6) + r *= x10111 + + # 321 + 16 = 337 operations + r.square_repeated(4) + r *= x101 + r.square_repeated(4) + r *= a + r.square_repeated(6) + + # 337 + 29 = 376 operations + r *= x11 + r.square_repeated(29) + r *= a + r.square_repeated(7) + r *= x101 + + # 376 + 16 = 392 operations + r.square_repeated(9) + r *= x10001 + r.square_repeated(6) + + # 392 + 8*6 = 440 operations + for _ in 0 ..< 8: + r *= x11111 + r.square_repeated(5) + + r *= x11111 + r.square() + r *= a + # Total 443 operations diff --git a/constantine/curves/bls12_377_sqrt.nim b/constantine/curves/bls12_377_sqrt.nim index f374990..a68267a 100644 --- a/constantine/curves/bls12_377_sqrt.nim +++ b/constantine/curves/bls12_377_sqrt.nim @@ -8,7 +8,8 @@ import ../config/[curves, type_bigint, type_ff], - ../io/[io_bigints, io_fields] + ../io/[io_bigints, io_fields], + ../arithmetic/finite_fields const # with e = 2adicity @@ -18,3 +19,188 @@ const BLS12_377_TonelliShanks_exponent* = BigInt[330].fromHex"0x35c748c2f8a21d58c760b80d94292763445b3e601ea271e3de6c45f741290002e16ba88600000010a11" BLS12_377_TonelliShanks_twoAdicity* = 46 BLS12_377_TonelliShanks_root_of_unity* = Fp[BLS12_377].fromHex"0x382d3d99cdbc5d8fe9dee6aa914b0ad14fcaca7022110ec6eaa2bc56228ac41ea03d28cc795186ba6b5ef26b00bbe8" + +# ############################################################ +# +# Specialized Tonelli-Shanks for BLS12-377 +# +# ############################################################ + +func precompute_tonelli_shanks_addchain*( + r: var Fp[BLS12_377], + a: Fp[BLS12_377]) = + ## Does a^BLS12_377_TonelliShanks_exponent + ## via an addition-chain + + var + x10 {.noInit.}: Fp[BLS12_377] + x11 {.noInit.}: Fp[BLS12_377] + x100 {.noInit.}: Fp[BLS12_377] + x101 {.noInit.}: Fp[BLS12_377] + x111 {.noInit.}: Fp[BLS12_377] + x1001 {.noInit.}: Fp[BLS12_377] + x1011 {.noInit.}: Fp[BLS12_377] + x1111 {.noInit.}: Fp[BLS12_377] + x10001 {.noInit.}: Fp[BLS12_377] + x10011 {.noInit.}: Fp[BLS12_377] + x10111 {.noInit.}: Fp[BLS12_377] + x11011 {.noInit.}: Fp[BLS12_377] + x11101 {.noInit.}: Fp[BLS12_377] + x11111 {.noInit.}: Fp[BLS12_377] + x110100 {.noInit.}: Fp[BLS12_377] + x11010000 {.noInit.}: Fp[BLS12_377] + x11010111 {.noInit.}: Fp[BLS12_377] + + x10 .square(a) + x11 .prod(a, x10) + x100 .prod(a, x11) + x101 .prod(a, x100) + x111 .prod(x10, x101) + x1001 .prod(x10, x111) + x1011 .prod(x10, x1001) + x1111 .prod(x100, x1011) + x10001 .prod(x10, x1111) + x10011 .prod(x10, x10001) + x10111 .prod(x100, x10011) + x11011 .prod(x100, x10111) + x11101 .prod(x10, x11011) + x11111 .prod(x10, x11101) + x110100 .prod(x10111, x11101) + x11010000 .square_repeated(x110100, 2) + x11010111 .prod(x111, x11010000) + # 18 operations + + # TODO: we can accumulate in a partially reduced + # doubled-size `r` to avoid the final substractions. + # and only reduce at the end. + # This requires the number of op to be less than log2(p) == 381 + + # 18 + 18 = 36 operations + r.square_repeated(x11010111, 8) + r *= x11101 + r.square_repeated(7) + r *= x10001 + r.square() + + # 36 + 14 = 50 operations + r *= a + r.square_repeated(9) + r *= x10111 + r.square_repeated(2) + r *= x11 + + # 50 + 21 = 71 operations + r.square_repeated(6) + r *= x101 + r.square_repeated(4) + r *= a + r.square_repeated(9) + + # 71 + 13 = 84 operations + r *= x11101 + r.square_repeated(5) + r *= x1011 + r.square_repeated(5) + r *= x11 + + # 84 + 21 = 105 operations + r.square_repeated(8) + r *= x11101 + r.square() + r *= a + r.square_repeated(10) + + # 105 + 20 = 125 operations + r *= x10111 + r.square_repeated(12) + r *= x11011 + r.square_repeated(5) + r *= x101 + + # 125 + 22 = 147 operations + r.square_repeated(7) + r *= x101 + r.square_repeated(6) + r *= x1001 + r.square_repeated(7) + + # 147 + 11 = 158 operations + r *= x11101 + r.square_repeated(5) + r *= x10001 + r.square_repeated(3) + r *= x101 + + # 158 + 23 = 181 operations + r.square_repeated(8) + r *= x10001 + r.square_repeated(6) + r *= x11011 + r.square_repeated(7) + + # 181 + 19 = 200 operations + r *= x11111 + r.square_repeated(4) + r *= x11 + r.square_repeated(12) + r *= x1111 + + # 200 + 19 = 219 operations + r.square_repeated(4) + r *= x101 + r.square_repeated(8) + r *= x10011 + r.square_repeated(5) + + # 219 + 13 = 232 operations + r *= x10001 + r.square_repeated(3) + r *= x111 + r.square_repeated(7) + r *= x1111 + + # 232 + 22 = 254 operations + r.square_repeated(5) + r *= x1111 + r.square_repeated(7) + r *= x11011 + r.square_repeated(8) + + # 254 + 13 = 269 operations + r *= x10001 + r.square_repeated(6) + r *= x11111 + r.square_repeated(6) + r *= x11101 + + # 269 + 35 = 304 operations + r.square_repeated(9) + r *= x1001 + r.square_repeated(5) + r *= x1001 + r.square_repeated(19) + + # 304 + 17 = 321 operations + r *= x10111 + r.square_repeated(8) + r *= x1011 + r.square_repeated(6) + r *= x10111 + + # 321 + 16 = 337 operations + r.square_repeated(4) + r *= x101 + r.square_repeated(4) + r *= a + r.square_repeated(6) + + # 337 + 29 = 376 operations + r *= x11 + r.square_repeated(29) + r *= a + r.square_repeated(7) + r *= x101 + + # 376 + 10 = 386 operations + r.square_repeated(9) + r *= x10001 diff --git a/constantine/curves/bls12_381_inversion.nim b/constantine/curves/bls12_381_inversion.nim index eb555aa..590e627 100644 --- a/constantine/curves/bls12_381_inversion.nim +++ b/constantine/curves/bls12_381_inversion.nim @@ -88,7 +88,8 @@ func inv_addchain*(r: var Fp[BLS12_381], a: Fp[BLS12_381]) = x11100101 .prod(x100, x11100001) x11101011 .prod(x10100, x11010111) x11110101 .prod(x10100, x11100001) - x11111111 .prod(x10100, x11101011) # 35 operations + x11111111 .prod(x10100, x11101011) + # 35 operations # TODO: we can accumulate in a partially reduced # doubled-size `r` to avoid the final substractions. @@ -109,7 +110,7 @@ func inv_addchain*(r: var Fp[BLS12_381], a: Fp[BLS12_381]) = r *= x11111111 r.square_repeated(7) - # 88 + 22 = 107 operations + # 85 + 22 = 107 operations r *= x1001101 r.square_repeated(9) r *= x1101001 diff --git a/constantine/curves/bls12_381_sqrt.nim b/constantine/curves/bls12_381_sqrt.nim new file mode 100644 index 0000000..0269bf6 --- /dev/null +++ b/constantine/curves/bls12_381_sqrt.nim @@ -0,0 +1,223 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/curves, + ../arithmetic/finite_fields + +# ############################################################ +# +# Specialized invsqrt for BLS12-381 +# +# ############################################################ + +func invsqrt_addchain*(r: var Fp[BLS12_381], a: Fp[BLS12_381]) = + var + x10 {.noInit.}: Fp[BLS12_381] + x100 {.noInit.}: Fp[BLS12_381] + x1000 {.noInit.}: Fp[BLS12_381] + x1001 {.noInit.}: Fp[BLS12_381] + x1011 {.noInit.}: Fp[BLS12_381] + x1101 {.noInit.}: Fp[BLS12_381] + x10001 {.noInit.}: Fp[BLS12_381] + x10100 {.noInit.}: Fp[BLS12_381] + x10101 {.noInit.}: Fp[BLS12_381] + x11001 {.noInit.}: Fp[BLS12_381] + x11010 {.noInit.}: Fp[BLS12_381] + x110100 {.noInit.}: Fp[BLS12_381] + x110110 {.noInit.}: Fp[BLS12_381] + x110111 {.noInit.}: Fp[BLS12_381] + x1001101 {.noInit.}: Fp[BLS12_381] + x1001111 {.noInit.}: Fp[BLS12_381] + x1010101 {.noInit.}: Fp[BLS12_381] + x1011101 {.noInit.}: Fp[BLS12_381] + x1100111 {.noInit.}: Fp[BLS12_381] + x1101001 {.noInit.}: Fp[BLS12_381] + x1110111 {.noInit.}: Fp[BLS12_381] + x1111011 {.noInit.}: Fp[BLS12_381] + x10001001 {.noInit.}: Fp[BLS12_381] + x10010101 {.noInit.}: Fp[BLS12_381] + x10010111 {.noInit.}: Fp[BLS12_381] + x10101001 {.noInit.}: Fp[BLS12_381] + x10110001 {.noInit.}: Fp[BLS12_381] + x10111111 {.noInit.}: Fp[BLS12_381] + x11000011 {.noInit.}: Fp[BLS12_381] + x11010000 {.noInit.}: Fp[BLS12_381] + x11010111 {.noInit.}: Fp[BLS12_381] + x11100001 {.noInit.}: Fp[BLS12_381] + x11100101 {.noInit.}: Fp[BLS12_381] + x11101011 {.noInit.}: Fp[BLS12_381] + x11110101 {.noInit.}: Fp[BLS12_381] + x11111111 {.noInit.}: Fp[BLS12_381] + + x10 .square(a) + x100 .square(x10) + x1000 .square(x100) + x1001 .prod(a, x1000) + x1011 .prod(x10, x1001) + x1101 .prod(x10, x1011) + x10001 .prod(x100, x1101) + x10100 .prod(x1001, x1011) + x10101 .prod(a, x10100) + x11001 .prod(x100, x10101) + x11010 .prod(a, x11001) + x110100 .square(x11010) + x110110 .prod(x10, x110100) + x110111 .prod(a, x110110) + x1001101 .prod(x11001, x110100) + x1001111 .prod(x10, x1001101) + x1010101 .prod(x1000, x1001101) + x1011101 .prod(x1000, x1010101) + x1100111 .prod(x11010, x1001101) + x1101001 .prod(x10, x1100111) + x1110111 .prod(x11010, x1011101) + x1111011 .prod(x100, x1110111) + x10001001 .prod(x110100, x1010101) + x10010101 .prod(x11010, x1111011) + x10010111 .prod(x10, x10010101) + x10101001 .prod(x10100, x10010101) + x10110001 .prod(x1000, x10101001) + x10111111 .prod(x110110, x10001001) + x11000011 .prod(x100, x10111111) + x11010000 .prod(x1101, x11000011) + x11010111 .prod(x10100, x11000011) + x11100001 .prod(x10001, x11010000) + x11100101 .prod(x100, x11100001) + x11101011 .prod(x10100, x11010111) + x11110101 .prod(x10100, x11100001) + x11111111 .prod(x10100, x11101011) + # 36 operations + + # TODO: we can accumulate in a partially reduced + # doubled-size `r` to avoid the final substractions. + # and only reduce at the end. + # This requires the number of op to be less than log2(p) == 381 + + # 36 + 22 = 58 operations + r.prod(x10111111, x11100001) + r.square_repeated(8) + r *= x10001 + r.square_repeated(11) + r *= x11110101 + + # 58 + 28 = 86 operations + r.square_repeated(11) + r *= x11100101 + r.square_repeated(8) + r *= x11111111 + r.square_repeated(7) + + # 86 + 22 = 108 operations + r *= x1001101 + r.square_repeated(9) + r *= x1101001 + r.square_repeated(10) + r *= x10110001 + + # 108+24 = 132 operations + r.square_repeated(7) + r *= x1011101 + r.square_repeated(9) + r *= x1111011 + r.square_repeated(6) + + # 132+23 = 155 operations + r *= x11001 + r.square_repeated(11) + r *= x1101001 + r.square_repeated(9) + r *= x11101011 + + # 155+28 = 183 operations + r.square_repeated(10) + r *= x11010111 + r.square_repeated(6) + r *= x11001 + r.square_repeated(10) + + # 183+23 = 206 operations + r *= x1110111 + r.square_repeated(9) + r *= x10010111 + r.square_repeated(11) + r *= x1001111 + + # 206+30 = 236 operations + r.square_repeated(10) + r *= x11100001 + r.square_repeated(9) + r *= x10001001 + r.square_repeated(9) + + # 236+21 = 257 operations + r *= x10111111 + r.square_repeated(8) + r *= x1100111 + r.square_repeated(10) + r *= x11000011 + + # 257+28 = 285 operations + r.square_repeated(9) + r *= x10010101 + r.square_repeated(12) + r *= x1111011 + r.square_repeated(5) + + # 285 + 21 = 306 operations + r *= x1011 + r.square_repeated(11) + r *= x1111011 + r.square_repeated(7) + r *= x1001 + + # 306+32 = 338 operations + r.square_repeated(13) + r *= x11110101 + r.square_repeated(9) + r *= x10111111 + r.square_repeated(8) + + # 338+22 = 360 operations + r *= x11111111 + r.square_repeated(8) + r *= x11101011 + r.square_repeated(11) + r *= x10101001 + + # 360+24 = 384 operations + r.square_repeated(8) + r *= x11111111 + r.square_repeated(8) + r *= x11111111 + r.square_repeated(6) + + # 384+22 = 406 operations + r *= x110111 + r.square_repeated(10) + r *= x11111111 + r.square_repeated(9) + r *= x11111111 + + # 406+26 = 432 operations + r.square_repeated(8) + r *= x11111111 + r.square_repeated(8) + r *= x11111111 + r.square_repeated(8) + + # 432+17 = 449 operations + r *= x11111111 + r.square_repeated(7) + r *= x1010101 + r.square_repeated(6) + r *= x10101 + r.square() + + # Total 449 operations: + # - 75 multiplications + # - 374 squarings diff --git a/constantine/curves/bn254_nogami_inversion.nim b/constantine/curves/bn254_nogami_inversion.nim new file mode 100644 index 0000000..b43925d --- /dev/null +++ b/constantine/curves/bn254_nogami_inversion.nim @@ -0,0 +1,98 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/curves, + ../arithmetic/finite_fields + +# ############################################################ +# +# Specialized inversion for BN254-Nogami +# +# ############################################################ + +func inv_addchain*(r: var Fp[BN254_Nogami], a: Fp[BN254_Nogami]) = + var + x100 {.noInit.}: Fp[BN254_Nogami] + x1000 {.noInit.}: Fp[BN254_Nogami] + x1100 {.noInit.}: Fp[BN254_Nogami] + x1101 {.noInit.}: Fp[BN254_Nogami] + x10001 {.noInit.}: Fp[BN254_Nogami] + x100010 {.noInit.}: Fp[BN254_Nogami] + x1000100 {.noInit.}: Fp[BN254_Nogami] + x1010101 {.noInit.}: Fp[BN254_Nogami] + + x100 .square_repeated(a, 2) + x1000 .square(x100) + x1100 .prod(x100, x1000) + x1101 .prod(a, x1100) + x10001 .prod(x100, x1101) + x100010 .square(x10001) + x1000100 .square(x100010) + x1010101 .prod(x10001, x1000100) + # 9 operations + + var + r13 {.noInit.}: Fp[BN254_Nogami] + r17 {.noInit.}: Fp[BN254_Nogami] + r18 {.noInit.}: Fp[BN254_Nogami] + r23 {.noInit.}: Fp[BN254_Nogami] + r26 {.noInit.}: Fp[BN254_Nogami] + r27 {.noInit.}: Fp[BN254_Nogami] + r28 {.noInit.}: Fp[BN254_Nogami] + r36 {.noInit.}: Fp[BN254_Nogami] + r38 {.noInit.}: Fp[BN254_Nogami] + r39 {.noInit.}: Fp[BN254_Nogami] + r40 {.noInit.}: Fp[BN254_Nogami] + + r13.square_repeated(x1010101, 2) + r13 *= x100010 + r13 *= x1101 + + r17.square(r13) + r17 *= r13 + r17.square_repeated(2) + + r18.prod(r13, r17) + + r23.square_repeated(r18, 3) + r23 *= r18 + r23 *= r17 + + r26.square_repeated(r23, 2) + r26 *= r23 + + r27.prod(r23, r26) + r28.prod(r26, r27) + + r36.square(r28) + r36 *= r28 + r36.square_repeated(2) + r36 *= r28 + r36.square_repeated(3) + + r38.prod(r28, r36) + r38 *= r27 + r39.square(r38) + r40.prod(r38, r39) + + r.prod(r39, r40) + r.square_repeated(3) + r *= r40 + r.square_repeated(55) + r *= r38 + + r.square_repeated(55) + r *= r28 + r.square_repeated(56) + r *= r18 + r.square_repeated(56) + + r *= x10001 + + # Total 271 operations diff --git a/constantine/curves/bn254_nogami_sqrt.nim b/constantine/curves/bn254_nogami_sqrt.nim new file mode 100644 index 0000000..4ae7a1e --- /dev/null +++ b/constantine/curves/bn254_nogami_sqrt.nim @@ -0,0 +1,89 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/curves, + ../arithmetic/finite_fields + +# ############################################################ +# +# Specialized inversion for BN254-Nogami +# +# ############################################################ + +func invsqrt_addchain*(r: var Fp[BN254_Nogami], a: Fp[BN254_Nogami]) = + var + x10 {.noInit.}: Fp[BN254_Nogami] + x11 {.noInit.}: Fp[BN254_Nogami] + + x10 .square(a) + x11 .prod(a, x10) + # 2 operations + + var + r10 {.noInit.}: Fp[BN254_Nogami] + r14 {.noInit.}: Fp[BN254_Nogami] + r15 {.noInit.}: Fp[BN254_Nogami] + r20 {.noInit.}: Fp[BN254_Nogami] + r23 {.noInit.}: Fp[BN254_Nogami] + r24 {.noInit.}: Fp[BN254_Nogami] + r25 {.noInit.}: Fp[BN254_Nogami] + r33 {.noInit.}: Fp[BN254_Nogami] + r35 {.noInit.}: Fp[BN254_Nogami] + r36 {.noInit.}: Fp[BN254_Nogami] + r37 {.noInit.}: Fp[BN254_Nogami] + r98 {.noInit.}: Fp[BN254_Nogami] + r263 {.noInit.}: Fp[BN254_Nogami] + + r10.square_repeated(x11, 7) + r10 *= x11 + + r14.square(r10) + r14 *= r10 + r14.square_repeated(2) + + r15.prod(r10, r14) + + r20.square_repeated(r15, 3) + r20 *= r15 + r20 *= r14 + + r23.square_repeated(r20, 2) + r23 *= r20 + + r24.prod(r20, r23) + r25.prod(r23, r24) + + r33.square(r25) + r33 *= r25 + r33.square_repeated(2) + r33 *= r25 + r33.square_repeated(3) + + r35.prod(r25, r33) + r35 *= r24 + + r36.square(r35) + r37.prod(r35, r36) + + r.prod(r36, r37) + r.square_repeated(3) + r *= r37 + r.square_repeated(55) + r *= r35 + + r.square_repeated(55) + r *= r25 + r.square_repeated(56) + r *= r15 + r.square_repeated(52) + + r *= a + r.square_repeated(2) + + # Total 265 operations diff --git a/constantine/curves/bn254_snarks_pairing.nim b/constantine/curves/bn254_snarks_pairing.nim index 7145eef..c9b3669 100644 --- a/constantine/curves/bn254_snarks_pairing.nim +++ b/constantine/curves/bn254_snarks_pairing.nim @@ -71,44 +71,44 @@ func pow_u*(r: var Fp12[BN254_Snarks], a: Fp12[BN254_Snarks], invert = BN254_Sna x10001110 .prod(x10110, x1111000) var - i15 {.noInit.}: Fp12[BN254_Snarks] - i16 {.noInit.}: Fp12[BN254_Snarks] - i17 {.noInit.}: Fp12[BN254_Snarks] - i18 {.noInit.}: Fp12[BN254_Snarks] - i20 {.noInit.}: Fp12[BN254_Snarks] - i21 {.noInit.}: Fp12[BN254_Snarks] - i22 {.noInit.}: Fp12[BN254_Snarks] - i26 {.noInit.}: Fp12[BN254_Snarks] - i27 {.noInit.}: Fp12[BN254_Snarks] - i61 {.noInit.}: Fp12[BN254_Snarks] + r15 {.noInit.}: Fp12[BN254_Snarks] + r16 {.noInit.}: Fp12[BN254_Snarks] + r17 {.noInit.}: Fp12[BN254_Snarks] + r18 {.noInit.}: Fp12[BN254_Snarks] + r20 {.noInit.}: Fp12[BN254_Snarks] + r21 {.noInit.}: Fp12[BN254_Snarks] + r22 {.noInit.}: Fp12[BN254_Snarks] + r26 {.noInit.}: Fp12[BN254_Snarks] + r27 {.noInit.}: Fp12[BN254_Snarks] + r61 {.noInit.}: Fp12[BN254_Snarks] - i15.cyclotomic_square(x10001110) - i15 *= x1001010 - i16.prod(x10001110, i15) - i17.prod(x1111, i16) - i18.prod(i16, i17) + r15.cyclotomic_square(x10001110) + r15 *= x1001010 + r16.prod(x10001110, r15) + r17.prod(x1111, r16) + r18.prod(r16, r17) - i20.cyclotomic_square(i18) - i20 *= i17 - i21.prod(x1111000, i20) - i22.prod(i15, i21) + r20.cyclotomic_square(r18) + r20 *= r17 + r21.prod(x1111000, r20) + r22.prod(r15, r21) - i26.cyclotomic_square(i22) - i26.cyclotomic_square() - i26 *= i22 - i26 *= i18 + r26.cyclotomic_square(r22) + r26.cyclotomic_square() + r26 *= r22 + r26 *= r18 - i27.prod(i22, i26) + r27.prod(r22, r26) - i61.prod(i26, i27) - i61.cycl_sqr_repeated(17) - i61 *= i27 - i61.cycl_sqr_repeated(14) - i61 *= i21 + r61.prod(r26, r27) + r61.cycl_sqr_repeated(17) + r61 *= r27 + r61.cycl_sqr_repeated(14) + r61 *= r21 - r = i61 + r = r61 r.cycl_sqr_repeated(16) - r *= i20 + r *= r20 if invert: r.cyclotomic_inv() diff --git a/constantine/curves/bn254_snarks_sqrt.nim b/constantine/curves/bn254_snarks_sqrt.nim new file mode 100644 index 0000000..f88021e --- /dev/null +++ b/constantine/curves/bn254_snarks_sqrt.nim @@ -0,0 +1,158 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/curves, + ../arithmetic/finite_fields + +# ############################################################ +# +# Specialized inversion for BN254-Snarks +# +# ############################################################ + +func invsqrt_addchain*(r: var Fp[BN254_Snarks], a: Fp[BN254_Snarks]) = + var + x10 {.noInit.}: Fp[BN254_Snarks] + x11 {.noInit.}: Fp[BN254_Snarks] + x101 {.noInit.}: Fp[BN254_Snarks] + x110 {.noInit.}: Fp[BN254_Snarks] + x1000 {.noInit.}: Fp[BN254_Snarks] + x1101 {.noInit.}: Fp[BN254_Snarks] + x10010 {.noInit.}: Fp[BN254_Snarks] + x10011 {.noInit.}: Fp[BN254_Snarks] + x10100 {.noInit.}: Fp[BN254_Snarks] + x10111 {.noInit.}: Fp[BN254_Snarks] + x11100 {.noInit.}: Fp[BN254_Snarks] + x100000 {.noInit.}: Fp[BN254_Snarks] + x100011 {.noInit.}: Fp[BN254_Snarks] + x101011 {.noInit.}: Fp[BN254_Snarks] + x101111 {.noInit.}: Fp[BN254_Snarks] + x1000001 {.noInit.}: Fp[BN254_Snarks] + x1010011 {.noInit.}: Fp[BN254_Snarks] + x1011011 {.noInit.}: Fp[BN254_Snarks] + x1100001 {.noInit.}: Fp[BN254_Snarks] + x1110101 {.noInit.}: Fp[BN254_Snarks] + x10010001 {.noInit.}: Fp[BN254_Snarks] + x10010101 {.noInit.}: Fp[BN254_Snarks] + x10110101 {.noInit.}: Fp[BN254_Snarks] + x10111011 {.noInit.}: Fp[BN254_Snarks] + x11000001 {.noInit.}: Fp[BN254_Snarks] + x11000011 {.noInit.}: Fp[BN254_Snarks] + x11010011 {.noInit.}: Fp[BN254_Snarks] + x11100001 {.noInit.}: Fp[BN254_Snarks] + x11100011 {.noInit.}: Fp[BN254_Snarks] + x11100111 {.noInit.}: Fp[BN254_Snarks] + + x10 .square(a) + x11 .prod(x10, a) + x101 .prod(x10, x11) + x110 .prod(x101, a) + x1000 .prod(x10, x110) + x1101 .prod(x101, x1000) + x10010 .prod(x101, x1101) + x10011 .prod(x10010, a) + x10100 .prod(x10011, a) + x10111 .prod(x11, x10100) + x11100 .prod(x101, x10111) + x100000 .prod(x1101, x10011) + x100011 .prod(x11, x100000) + x101011 .prod(x1000, x100011) + x101111 .prod(x10011, x11100) + x1000001 .prod(x10010, x101111) + x1010011 .prod(x10010, x1000001) + x1011011 .prod(x1000, x1010011) + x1100001 .prod(x110, x1011011) + x1110101 .prod(x10100, x1100001) + x10010001 .prod(x11100, x1110101) + x10010101 .prod(x100000, x1110101) + x10110101 .prod(x100000, x10010101) + x10111011 .prod(x110, x10110101) + x11000001 .prod(x110, x10111011) + x11000011 .prod(x10, x11000001) + x11010011 .prod(x10010, x11000001) + x11100001 .prod(x100000, x11000001) + x11100011 .prod(x10, x11100001) + x11100111 .prod(x110, x11100001) # 30 operations + + # 30 + 27 = 57 operations + r.square(x11000001) + r.square_repeated(7) + r *= x10010001 + r.square_repeated(10) + r *= x11100111 + r.square_repeated(7) + + # 57 + 19 = 76 operations + r *= x10111 + r.square_repeated(9) + r *= x10011 + r.square_repeated(7) + r *= x1101 + + # 76 + 33 = 109 operations + r.square_repeated(14) + r *= x1010011 + r.square_repeated(9) + r *= x11100001 + r.square_repeated(8) + + # 109 + 18 = 127 operations + r *= x1000001 + r.square_repeated(10) + r *= x1011011 + r.square_repeated(5) + r *= x1101 + + # 127 + 34 = 161 operations + r.square_repeated(8) + r *= x11 + r.square_repeated(12) + r *= x101011 + r.square_repeated(12) + + # 161 + 25 = 186 operations + r *= x10111011 + r.square_repeated(8) + r *= x101111 + r.square_repeated(14) + r *= x10110101 + + # 186 + 28 = 214 + r.square_repeated(9) + r *= x10010001 + r.square_repeated(5) + r *= x1101 + r.square_repeated(12) + + # 214 + 22 = 236 + r *= x11100011 + r.square_repeated(8) + r *= x10010101 + r.square_repeated(11) + r *= x11010011 + + # 236 + 32 = 268 + r.square_repeated(7) + r *= x1100001 + r.square_repeated(11) + r *= x100011 + r.square_repeated(12) + + # 268 + 20 = 288 + r *= x1011011 + r.square_repeated(9) + r *= x11000011 + r.square_repeated(8) + r *= x11100111 + + # 288 + 13 = 301 + r.square_repeated(7) + r *= x1110101 + r.square_repeated(4) + r *= a diff --git a/constantine/curves/bw6_761_inversion.nim b/constantine/curves/bw6_761_inversion.nim new file mode 100644 index 0000000..b62c05b --- /dev/null +++ b/constantine/curves/bw6_761_inversion.nim @@ -0,0 +1,376 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/curves, + ../arithmetic/finite_fields + +# ############################################################ +# +# Specialized inversion for BW6-761 +# +# ############################################################ + +func inv_addchain*(r: var Fp[BW6_761], a: Fp[BW6_761]) = + let a = a # ensure a.inv_addchain(a) is OK + + var + x10 {.noInit.}: Fp[BW6_761] + x11 {.noInit.}: Fp[BW6_761] + x101 {.noInit.}: Fp[BW6_761] + x111 {.noInit.}: Fp[BW6_761] + x1001 {.noInit.}: Fp[BW6_761] + x1011 {.noInit.}: Fp[BW6_761] + x1101 {.noInit.}: Fp[BW6_761] + x1111 {.noInit.}: Fp[BW6_761] + x10001 {.noInit.}: Fp[BW6_761] + x10010 {.noInit.}: Fp[BW6_761] + x10011 {.noInit.}: Fp[BW6_761] + x10111 {.noInit.}: Fp[BW6_761] + x11001 {.noInit.}: Fp[BW6_761] + x11011 {.noInit.}: Fp[BW6_761] + x11101 {.noInit.}: Fp[BW6_761] + x11111 {.noInit.}: Fp[BW6_761] + x100001 {.noInit.}: Fp[BW6_761] + x100011 {.noInit.}: Fp[BW6_761] + x100101 {.noInit.}: Fp[BW6_761] + x100111 {.noInit.}: Fp[BW6_761] + x101001 {.noInit.}: Fp[BW6_761] + x101011 {.noInit.}: Fp[BW6_761] + x101101 {.noInit.}: Fp[BW6_761] + x101111 {.noInit.}: Fp[BW6_761] + x110001 {.noInit.}: Fp[BW6_761] + x110011 {.noInit.}: Fp[BW6_761] + x110101 {.noInit.}: Fp[BW6_761] + x110111 {.noInit.}: Fp[BW6_761] + x111001 {.noInit.}: Fp[BW6_761] + x111011 {.noInit.}: Fp[BW6_761] + x111101 {.noInit.}: Fp[BW6_761] + x1111010 {.noInit.}: Fp[BW6_761] + x1111111 {.noInit.}: Fp[BW6_761] + x11111110 {.noInit.}: Fp[BW6_761] + x11111111 {.noInit.}: Fp[BW6_761] + + x10 .square(a) + x11 .prod(a, x10) + x101 .prod(x10, x11) + x111 .prod(x10, x101) + x1001 .prod(x10, x111) + x1011 .prod(x10, x1001) + x1101 .prod(x10, x1011) + x1111 .prod(x10, x1101) + x10001 .prod(x10, x1111) + x10010 .prod(a, x10001) + x10011 .prod(a, x10010) + x10111 .prod(x101, x10010) + x11001 .prod(x10, x10111) + x11011 .prod(x10, x11001) + x11101 .prod(x10, x11011) + x11111 .prod(x10, x11101) + x100001 .prod(x10, x11111) + x100011 .prod(x10, x100001) + x100101 .prod(x10, x100011) + x100111 .prod(x10, x100101) + x101001 .prod(x10, x100111) + x101011 .prod(x10, x101001) + x101101 .prod(x10, x101011) + x101111 .prod(x10, x101101) + x110001 .prod(x10, x101111) + x110011 .prod(x10, x110001) + x110101 .prod(x10, x110011) + x110111 .prod(x10, x110101) + x111001 .prod(x10, x110111) + x111011 .prod(x10, x111001) + x111101 .prod(x10, x111011) + x1111010 .square(x111101) + x1111111 .prod(x101, x1111010) + x11111110 .square(x1111111) + x11111111 .prod(a, x11111110) + # 35 operations + + # TODO: we can accumulate in a partially reduced + # doubled-size `r` to avoid the final substractions. + # and only reduce at the end. + # This requires the number of op to be less than log2(p) == 381 + + # 35 + 8 = 43 operations + r.prod(x100001, x11111111) + r.square_repeated(3) + r *= x10111 + r.square_repeated(2) + r *= a + + # 43 + 22 = 65 operations + r.square_repeated(9) + r *= x1001 + r.square_repeated(7) + r *= x11111 + r.square_repeated(4) + + # 65 + 17 = 82 operations + r *= x111 + r.square_repeated(9) + r *= x1111 + r.square_repeated(5) + r *= x111 + + # 82 + 29 = 111 operations + r.square_repeated(11) + r *= x101011 + r.square_repeated(7) + r *= x100011 + r.square_repeated(9) + + # 111 + 28 = 139 operations + r *= x11111 + r.square_repeated(8) + r *= x100101 + r.square_repeated(17) + r *= x100111 + + # 139 + 22 = 161 operations + r.square_repeated(4) + r *= x1101 + r.square_repeated(9) + r *= x11111111 + r.square_repeated(7) + + # 161 + 15 = 176 operations + r *= x11111 + r.square_repeated(6) + r *= x10111 + r.square_repeated(6) + r *= x1001 + + # 176 + 22 = 198 operations + r.square_repeated(4) + r *= x11 + r.square_repeated(6) + r *= x11 + r.square_repeated(10) + + # 198 + 16 = 214 operations + r *= x110101 + r.square_repeated(2) + r *= a + r.square_repeated(11) + r *= x11101 + + # 214 + 28 = 238 operations + r.square_repeated(6) + r *= x101 + r.square_repeated(7) + r *= x1101 + r.square_repeated(9) + + # 238 + 21 = 259 operations + r *= x100001 + r.square_repeated(7) + r *= x100101 + r.square_repeated(11) + r *= x100111 + + # 259 + 28 = 287 operations + r.square_repeated(7) + r *= x101111 + r.square_repeated(6) + r *= x11111 + r.square_repeated(13) + + # 287 + 25 = 302 operations + r *= x100001 + r.square_repeated(6) + r *= x111011 + r.square_repeated(6) + r *= x111001 + + # 302 + 27 = 329 operations + r.square_repeated(10) + r *= x10111 + r.square_repeated(11) + r *= x111101 + r.square_repeated(4) + + # 329 + 17 = 346 operations + r *= x1101 + r.square_repeated(8) + r *= x110001 + r.square_repeated(6) + r *= x110001 + + # 346 + 20 = 366 operations + r.square_repeated(5) + r *= x11001 + r.square_repeated(3) + r *= x11 + r.square_repeated(10) + + # 366 + 16 = 382 operations + r *= x100111 + r.square_repeated(5) + r *= x1001 + r.square_repeated(8) + r *= x11001 + + # 382 + 25 = 407 operations + r.square_repeated(10) + r *= x1111 + r.square_repeated(7) + r *= x11101 + r.square_repeated(6) + + # 407 + 20 = 427 operations + r *= x11101 + r.square_repeated(9) + r *= x11111111 + r.square_repeated(8) + r *= x100101 + + # 427 + 27 = 454 operations + r.square_repeated(6) + r *= x101101 + r.square_repeated(10) + r *= x100011 + r.square_repeated(9) + + # 454 + 20 = 474 operations + r *= x1001 + r.square_repeated(8) + r *= x1101 + r.square_repeated(9) + r *= x100111 + + # 474 + 25 = 499 operations + r.square_repeated(8) + r *= x100011 + r.square_repeated(6) + r *= x101101 + r.square_repeated(9) + + # 499 + 16 = 515 operations + r *= x100101 + r.square_repeated(4) + r *= x1111 + r.square_repeated(9) + r *= x1111111 + + # 515 + 25 = 540 operations + r.square_repeated(6) + r *= x11001 + r.square_repeated(8) + r *= x111 + r.square_repeated(9) + + # 540 + 15 = 555 operations + r *= x111011 + r.square_repeated(5) + r *= x10011 + r.square_repeated(7) + r *= x100111 + + # 555 + 22 = 577 operations + r.square_repeated(5) + r *= x10111 + r.square_repeated(9) + r *= x111001 + r.square_repeated(6) + + # 577 + 14 = 591 operations + r *= x111101 + r.square_repeated(9) + r *= x11111111 + r.square_repeated(2) + r *= x11 + + # 591 + 21 = 612 operations + r.square_repeated(7) + r *= x10111 + r.square_repeated(6) + r *= x10011 + r.square_repeated(6) + + # 612 + 18 = 630 operations + r *= x101 + r.square_repeated(9) + r *= x10001 + r.square_repeated(6) + r *= x11011 + + # 630 + 27 = 657 operations + r.square_repeated(10) + r *= x100101 + r.square_repeated(7) + r *= x110011 + r.square_repeated(8) + + # 657 + 13 = 670 operations + r *= x111101 + r.square_repeated(7) + r *= x100011 + r.square_repeated(3) + r *= x111 + + # 670 + 26 = 696 operations + r.square_repeated(10) + r *= x1011 + r.square_repeated(11) + r *= x110011 + r.square_repeated(3) + + # 696 + 17 = 713 operations + r *= x111 + r.square_repeated(9) + r *= x101011 + r.square_repeated(5) + r *= x10111 + + # 713 + 21 = 734 operations + r.square_repeated(7) + r *= x101011 + r.square_repeated(2) + r *= x11 + r.square_repeated(10) + + # 734 + 19 = 753 operations + r *= x101001 + r.square_repeated(10) + r *= x110111 + r.square_repeated(6) + r *= x111001 + + # 753 + 23 = 776 operations + r.square_repeated(6) + r *= x101001 + r.square_repeated(9) + r *= x100111 + r.square_repeated(6) + + # 776 + 12 = 788 operations + r *= x110011 + r.square_repeated(7) + r *= x100001 + r.square_repeated(2) + r *= x11 + + # 788 + 39 = 827 operations + r.square_repeated(21) + r *= a + r.square_repeated(11) + r *= x101111 + r.square_repeated(5) + + # 827 + 55 = 882 operations + r *= x1001 + r.square_repeated(7) + r *= x11101 + r.square_repeated(45) + r *= x10001 + + # 882 + 4 = 886 operations + r.square_repeated(3) + r *= a diff --git a/constantine/curves/bw6_761_sqrt.nim b/constantine/curves/bw6_761_sqrt.nim new file mode 100644 index 0000000..b8e94a6 --- /dev/null +++ b/constantine/curves/bw6_761_sqrt.nim @@ -0,0 +1,373 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/curves, + ../arithmetic/finite_fields + +# ############################################################ +# +# Specialized invsqrt for BW6-761 +# +# ############################################################ + +func invsqrt_addchain*(r: var Fp[BW6_761], a: Fp[BW6_761]) = + var + x10 {.noInit.}: Fp[BW6_761] + x11 {.noInit.}: Fp[BW6_761] + x101 {.noInit.}: Fp[BW6_761] + x111 {.noInit.}: Fp[BW6_761] + x1001 {.noInit.}: Fp[BW6_761] + x1011 {.noInit.}: Fp[BW6_761] + x1101 {.noInit.}: Fp[BW6_761] + x1111 {.noInit.}: Fp[BW6_761] + x10001 {.noInit.}: Fp[BW6_761] + x10010 {.noInit.}: Fp[BW6_761] + x10011 {.noInit.}: Fp[BW6_761] + x10111 {.noInit.}: Fp[BW6_761] + x11001 {.noInit.}: Fp[BW6_761] + x11011 {.noInit.}: Fp[BW6_761] + x11101 {.noInit.}: Fp[BW6_761] + x11111 {.noInit.}: Fp[BW6_761] + x100001 {.noInit.}: Fp[BW6_761] + x100011 {.noInit.}: Fp[BW6_761] + x100101 {.noInit.}: Fp[BW6_761] + x100111 {.noInit.}: Fp[BW6_761] + x101001 {.noInit.}: Fp[BW6_761] + x101011 {.noInit.}: Fp[BW6_761] + x101101 {.noInit.}: Fp[BW6_761] + x101111 {.noInit.}: Fp[BW6_761] + x110001 {.noInit.}: Fp[BW6_761] + x110011 {.noInit.}: Fp[BW6_761] + x110101 {.noInit.}: Fp[BW6_761] + x110111 {.noInit.}: Fp[BW6_761] + x111001 {.noInit.}: Fp[BW6_761] + x111011 {.noInit.}: Fp[BW6_761] + x111101 {.noInit.}: Fp[BW6_761] + x1111010 {.noInit.}: Fp[BW6_761] + x1111111 {.noInit.}: Fp[BW6_761] + x11111110 {.noInit.}: Fp[BW6_761] + x11111111 {.noInit.}: Fp[BW6_761] + + x10 .square(a) + x11 .prod(a, x10) + x101 .prod(x10, x11) + x111 .prod(x10, x101) + x1001 .prod(x10, x111) + x1011 .prod(x10, x1001) + x1101 .prod(x10, x1011) + x1111 .prod(x10, x1101) + x10001 .prod(x10, x1111) + x10010 .prod(a, x10001) + x10011 .prod(a, x10010) + x10111 .prod(x101, x10010) + x11001 .prod(x10, x10111) + x11011 .prod(x10, x11001) + x11101 .prod(x10, x11011) + x11111 .prod(x10, x11101) + x100001 .prod(x10, x11111) + x100011 .prod(x10, x100001) + x100101 .prod(x10, x100011) + x100111 .prod(x10, x100101) + x101001 .prod(x10, x100111) + x101011 .prod(x10, x101001) + x101101 .prod(x10, x101011) + x101111 .prod(x10, x101101) + x110001 .prod(x10, x101111) + x110011 .prod(x10, x110001) + x110101 .prod(x10, x110011) + x110111 .prod(x10, x110101) + x111001 .prod(x10, x110111) + x111011 .prod(x10, x111001) + x111101 .prod(x10, x111011) + x1111010 .square(x111101) + x1111111 .prod(x101, x1111010) + x11111110 .square(x1111111) + x11111111 .prod(a, x11111110) + # 35 operations + + # TODO: we can accumulate in a partially reduced + # doubled-size `r` to avoid the final substractions. + # and only reduce at the end. + # This requires the number of op to be less than log2(p) == 381 + + # 35 + 8 = 43 operations + r.prod(x100001, x11111111) + r.square_repeated(3) + r *= x10111 + r.square_repeated(2) + r *= a + + # 43 + 22 = 65 operations + r.square_repeated(9) + r *= x1001 + r.square_repeated(7) + r *= x11111 + r.square_repeated(4) + + # 65 + 17 = 82 operations + r *= x111 + r.square_repeated(9) + r *= x1111 + r.square_repeated(5) + r *= x111 + + # 82 + 29 = 111 operations + r.square_repeated(11) + r *= x101011 + r.square_repeated(7) + r *= x100011 + r.square_repeated(9) + + # 111 + 28 = 139 operations + r *= x11111 + r.square_repeated(8) + r *= x100101 + r.square_repeated(17) + r *= x100111 + + # 139 + 22 = 161 operations + r.square_repeated(4) + r *= x1101 + r.square_repeated(9) + r *= x11111111 + r.square_repeated(7) + + # 161 + 15 = 176 operations + r *= x11111 + r.square_repeated(6) + r *= x10111 + r.square_repeated(6) + r *= x1001 + + # 176 + 22 = 198 operations + r.square_repeated(4) + r *= x11 + r.square_repeated(6) + r *= x11 + r.square_repeated(10) + + # 198 + 16 = 214 operations + r *= x110101 + r.square_repeated(2) + r *= a + r.square_repeated(11) + r *= x11101 + + # 214 + 28 = 238 operations + r.square_repeated(6) + r *= x101 + r.square_repeated(7) + r *= x1101 + r.square_repeated(9) + + # 238 + 21 = 259 operations + r *= x100001 + r.square_repeated(7) + r *= x100101 + r.square_repeated(11) + r *= x100111 + + # 259 + 28 = 287 operations + r.square_repeated(7) + r *= x101111 + r.square_repeated(6) + r *= x11111 + r.square_repeated(13) + + # 287 + 25 = 302 operations + r *= x100001 + r.square_repeated(6) + r *= x111011 + r.square_repeated(6) + r *= x111001 + + # 302 + 27 = 329 operations + r.square_repeated(10) + r *= x10111 + r.square_repeated(11) + r *= x111101 + r.square_repeated(4) + + # 329 + 17 = 346 operations + r *= x1101 + r.square_repeated(8) + r *= x110001 + r.square_repeated(6) + r *= x110001 + + # 346 + 20 = 366 operations + r.square_repeated(5) + r *= x11001 + r.square_repeated(3) + r *= x11 + r.square_repeated(10) + + # 366 + 16 = 382 operations + r *= x100111 + r.square_repeated(5) + r *= x1001 + r.square_repeated(8) + r *= x11001 + + # 382 + 25 = 407 operations + r.square_repeated(10) + r *= x1111 + r.square_repeated(7) + r *= x11101 + r.square_repeated(6) + + # 407 + 20 = 427 operations + r *= x11101 + r.square_repeated(9) + r *= x11111111 + r.square_repeated(8) + r *= x100101 + + # 427 + 27 = 454 operations + r.square_repeated(6) + r *= x101101 + r.square_repeated(10) + r *= x100011 + r.square_repeated(9) + + # 454 + 20 = 474 operations + r *= x1001 + r.square_repeated(8) + r *= x1101 + r.square_repeated(9) + r *= x100111 + + # 474 + 25 = 499 operations + r.square_repeated(8) + r *= x100011 + r.square_repeated(6) + r *= x101101 + r.square_repeated(9) + + # 499 + 16 = 515 operations + r *= x100101 + r.square_repeated(4) + r *= x1111 + r.square_repeated(9) + r *= x1111111 + + # 515 + 25 = 540 operations + r.square_repeated(6) + r *= x11001 + r.square_repeated(8) + r *= x111 + r.square_repeated(9) + + # 540 + 15 = 555 operations + r *= x111011 + r.square_repeated(5) + r *= x10011 + r.square_repeated(7) + r *= x100111 + + # 555 + 22 = 577 operations + r.square_repeated(5) + r *= x10111 + r.square_repeated(9) + r *= x111001 + r.square_repeated(6) + + # 577 + 14 = 591 operations + r *= x111101 + r.square_repeated(9) + r *= x11111111 + r.square_repeated(2) + r *= x11 + + # 591 + 21 = 612 operations + r.square_repeated(7) + r *= x10111 + r.square_repeated(6) + r *= x10011 + r.square_repeated(6) + + # 612 + 18 = 630 operations + r *= x101 + r.square_repeated(9) + r *= x10001 + r.square_repeated(6) + r *= x11011 + + # 630 + 27 = 657 operations + r.square_repeated(10) + r *= x100101 + r.square_repeated(7) + r *= x110011 + r.square_repeated(8) + + # 657 + 13 = 670 operations + r *= x111101 + r.square_repeated(7) + r *= x100011 + r.square_repeated(3) + r *= x111 + + # 670 + 26 = 696 operations + r.square_repeated(10) + r *= x1011 + r.square_repeated(11) + r *= x110011 + r.square_repeated(3) + + # 696 + 17 = 713 operations + r *= x111 + r.square_repeated(9) + r *= x101011 + r.square_repeated(5) + r *= x10111 + + # 713 + 21 = 734 operations + r.square_repeated(7) + r *= x101011 + r.square_repeated(2) + r *= x11 + r.square_repeated(10) + + # 734 + 19 = 753 operations + r *= x101001 + r.square_repeated(10) + r *= x110111 + r.square_repeated(6) + r *= x111001 + + # 753 + 23 = 776 operations + r.square_repeated(6) + r *= x101001 + r.square_repeated(9) + r *= x100111 + r.square_repeated(6) + + # 776 + 12 = 788 operations + r *= x110011 + r.square_repeated(7) + r *= x100001 + r.square_repeated(2) + r *= x11 + + # 788 + 39 = 827 operations + r.square_repeated(21) + r *= a + r.square_repeated(11) + r *= x101111 + r.square_repeated(5) + + # 827 + 55 = 882 operations + r *= x1001 + r.square_repeated(7) + r *= x11101 + r.square_repeated(45) + r *= x10001 + + # 882 + 1 = 883 operations + r.square() diff --git a/constantine/curves/zoo_inversions.nim b/constantine/curves/zoo_inversions.nim index b9e20d8..3619cba 100644 --- a/constantine/curves/zoo_inversions.nim +++ b/constantine/curves/zoo_inversions.nim @@ -7,11 +7,27 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import + ../config/[curves, type_ff], + ./bls12_377_inversion, ./bls12_381_inversion, + ./bn254_nogami_inversion, ./bn254_snarks_inversion, + ./bw6_761_inversion, ./secp256k1_inversion export + bls12_377_inversion, bls12_381_inversion, + bn254_nogami_inversion, bn254_snarks_inversion, + bw6_761_inversion, secp256k1_inversion + +func hasInversionAddchain*(C: static Curve): static bool = + # TODO: For now we don't activate the addition chains + # for Secp256k1 + # Performance is slower than GCD + when C in {BN254_Nogami, BN254_Snarks, BLS12_377, BLS12_381, BW6_761}: + true + else: + false diff --git a/constantine/curves/zoo_square_roots.nim b/constantine/curves/zoo_square_roots.nim index be937b3..d6ff897 100644 --- a/constantine/curves/zoo_square_roots.nim +++ b/constantine/curves/zoo_square_roots.nim @@ -8,11 +8,34 @@ import std/macros, - ../config/curves, - ./bls12_377_sqrt + ../config/[curves, type_ff], + ./bls12_377_sqrt, + ./bls12_381_sqrt, + ./bn254_nogami_sqrt, + ./bn254_snarks_sqrt, + ./bw6_761_sqrt + +export + bls12_377_sqrt, + bls12_381_sqrt, + bn254_nogami_sqrt, + bn254_snarks_sqrt, + bw6_761_sqrt + +func hasSqrtAddchain*(C: static Curve): static bool = + when C in {BLS12_381, BN254_Nogami, BN254_Snarks, BW6_761}: + true + else: + false {.experimental: "dynamicBindSym".} macro tonelliShanks*(C: static Curve, value: untyped): untyped = ## Get Square Root via Tonelli-Shanks related constants return bindSym($C & "_TonelliShanks_" & $value) + +func hasTonelliShanksAddchain*(C: static Curve): static bool = + when C in {BLS12_377}: + true + else: + false diff --git a/tests/t_finite_fields_sqrt.nim b/tests/t_finite_fields_sqrt.nim index 074f8dd..8664df5 100644 --- a/tests/t_finite_fields_sqrt.nim +++ b/tests/t_finite_fields_sqrt.nim @@ -125,6 +125,7 @@ proc main() = randomSqrtCheck BN254_Snarks randomSqrtCheck BLS12_377 # p ≢ 3 (mod 4) randomSqrtCheck BLS12_381 + randomSqrtCheck BW6_761 suite "Modular square root - 32-bit bugs highlighted by property-based testing " & " [" & $WordBitwidth & "-bit mode]": # test "FKM12_447 - #30": - Deactivated, we don't support the curve as no one uses it.