From 2613356281ed33626bd9cc8b2cbaf68ae11a0867 Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Sun, 14 Jun 2020 15:39:06 +0200 Subject: [PATCH] Endomorphism acceleration for Scalar Multiplication (#44) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add MultiScalar recoding from "Efficient and Secure Algorithms for GLV-Based Scalar Multiplication" by Faz et al * precompute cube root of unity - Add VM precomputation of Fp - workaround upstream bug https://github.com/nim-lang/Nim/issues/14585 * Add the φ-accelerated lookup table builder * Add a dedicated bithacks file * cosmetic import consistency * Build the φ precompute table with n-1 EC additions instead of 2^(n-1) additions * remove binary * Add the GLV precomputations to the sage scripts * You can't avoid it, bigint multiplication is needed at one point * Add bigint multiplication discarding some low words * Implement the lattice decomposition in sage * Proper decomposition for BN254 * Prepare the code for a new scalar mul * We compile, and now debugging hunt * More helpers to debug GLV scalar Mul * Fix conditional negation * Endomorphism accelerated scalar mul working for BN254 curve * Implement endomorphism acceleration for BLS12-381 (needed cofactor clearing of the point) * fix nimble test script after bench rename --- ...ch_ec_swei_proj_g1.nim => bench_ec_g1.nim} | 12 +- benchmarks/bench_elliptic_template.nim | 59 +- constantine.nimble | 40 +- constantine/arithmetic.nim | 2 - constantine/arithmetic/bigints.nim | 130 ++-- constantine/arithmetic/finite_fields.nim | 17 +- .../arithmetic/finite_fields_inversion.nim | 4 +- constantine/arithmetic/limbs.nim | 146 ++++- constantine/arithmetic/limbs_montgomery.nim | 49 +- constantine/config/curves.nim | 20 +- constantine/config/curves_declaration.nim | 4 + constantine/config/curves_derived.nim | 40 +- constantine/config/curves_parser.nim | 25 +- .../precomputed.nim => config/precompute.nim} | 134 +++- constantine/config/type_bigint.nim | 53 ++ constantine/config/type_fp.nim | 28 + .../elliptic/ec_endomorphism_accel.nim | 600 ++++++++++++++++++ .../elliptic/ec_endomorphism_params.nim | 135 ++++ constantine/elliptic/ec_scalar_mul.nim | 229 +++++++ .../elliptic/ec_weierstrass_projective.nim | 215 +------ constantine/io/io_bigints.nim | 3 +- constantine/io/io_ec.nim | 13 +- constantine/io/io_fields.nim | 2 +- constantine/primitives.nim | 6 +- constantine/primitives/bithacks.nim | 77 +++ constantine/primitives/constant_time.nim | 56 +- constantine/primitives/extended_precision.nim | 16 +- .../research/addcarry_subborrow_compiler.nim | 4 +- .../quadratic_extensions.nim | 1 - formal_verification/bls12_381_q_64.nim | 2 +- helpers/static_for.nim | 2 +- sage/curve_family_bls12.sage | 43 ++ sage/curve_family_bn.sage | 50 +- sage/lattice_decomposition_bls12_381_g1.sage | 202 ++++++ ...lattice_decomposition_bn254_snarks_g1.sage | 193 ++++++ sage/testgen_bls12_381.sage | 8 +- sage/testgen_bn254_snarks.sage | 1 + tests/test_bigints.nim | 127 +++- ...vs_gmp.nim => test_bigints_mod_vs_gmp.nim} | 4 +- tests/test_bigints_mul_high_words_vs_gmp.nim | 151 +++++ tests/test_bigints_mul_vs_gmp.nim | 140 ++++ tests/test_bigints_multimod.nim | 2 +- tests/test_ec_bls12_381.nim | 93 +-- tests/test_ec_bn254.nim | 13 +- tests/test_ec_weierstrass_projective_g1.nim | 20 +- tests/test_finite_fields.nim | 4 +- tests/test_finite_fields_mulsquare.nim | 15 +- tests/test_finite_fields_powinv.nim | 17 +- tests/test_finite_fields_sqrt.nim | 18 +- tests/test_finite_fields_vs_gmp.nim | 2 +- tests/test_precomputed.nim | 37 ++ 51 files changed, 2760 insertions(+), 504 deletions(-) rename benchmarks/{bench_ec_swei_proj_g1.nim => bench_ec_g1.nim} (81%) rename constantine/{arithmetic/precomputed.nim => config/precompute.nim} (78%) create mode 100644 constantine/config/type_bigint.nim create mode 100644 constantine/config/type_fp.nim create mode 100644 constantine/elliptic/ec_endomorphism_accel.nim create mode 100644 constantine/elliptic/ec_endomorphism_params.nim create mode 100644 constantine/elliptic/ec_scalar_mul.nim create mode 100644 constantine/primitives/bithacks.nim create mode 100644 sage/lattice_decomposition_bls12_381_g1.sage create mode 100644 sage/lattice_decomposition_bn254_snarks_g1.sage rename tests/{test_bigints_vs_gmp.nim => test_bigints_mod_vs_gmp.nim} (98%) create mode 100644 tests/test_bigints_mul_high_words_vs_gmp.nim create mode 100644 tests/test_bigints_mul_vs_gmp.nim create mode 100644 tests/test_precomputed.nim diff --git a/benchmarks/bench_ec_swei_proj_g1.nim b/benchmarks/bench_ec_g1.nim similarity index 81% rename from benchmarks/bench_ec_swei_proj_g1.nim rename to benchmarks/bench_ec_g1.nim index 5d5c3d5..175e226 100644 --- a/benchmarks/bench_ec_swei_proj_g1.nim +++ b/benchmarks/bench_ec_g1.nim @@ -51,14 +51,16 @@ proc main() = separator() doublingBench(ECP_SWei_Proj[Fp[curve]], Iters) separator() - scalarMulBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 2, MulIters) + scalarMulUnsafeDoubleAddBench(ECP_SWei_Proj[Fp[curve]], MulIters) separator() - scalarMulBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 3, MulIters) + scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 2, MulIters) separator() - scalarMulBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 4, MulIters) + scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 3, MulIters) + separator() + scalarMulGenericBench(ECP_SWei_Proj[Fp[curve]], scratchSpaceSize = 1 shl 4, MulIters) + separator() + scalarMulGLV(ECP_SWei_Proj[Fp[curve]], MulIters) separator() - # scalarMulUnsafeDoubleAddBench(ECP_SWei_Proj[Fp[curve]], MulIters) - # separator() separator() main() diff --git a/benchmarks/bench_elliptic_template.nim b/benchmarks/bench_elliptic_template.nim index d1ccf73..a654b5b 100644 --- a/benchmarks/bench_elliptic_template.nim +++ b/benchmarks/bench_elliptic_template.nim @@ -17,11 +17,14 @@ import ../constantine/config/curves, ../constantine/arithmetic, ../constantine/io/io_bigints, + ../constantine/elliptic/[ec_weierstrass_projective, ec_scalar_mul, ec_endomorphism_accel], # Helpers ../helpers/[prng_unsafe, static_for], ./platforms, # Standard library - std/[monotimes, times, strformat, strutils, macros] + std/[monotimes, times, strformat, strutils, macros], + # Reference unsafe scalar multiplication + ../tests/support/ec_reference_scalar_mult var rng: RngState let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32 @@ -71,15 +74,15 @@ when SupportsGetTicks: echo "\n=================================================================================================================\n" proc separator*() = - echo "-".repeat(157) + echo "-".repeat(177) proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) = let ns = inNanoseconds((stop-start) div iters) let throughput = 1e9 / float64(ns) when SupportsGetTicks: - echo &"{op:<40} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" + echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" else: - echo &"{op:<40} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op" + echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op" macro fixEllipticDisplay(T: typedesc): untyped = # At compile-time, enums are integers and their display is buggy @@ -120,11 +123,11 @@ proc doublingBench*(T: typedesc, iters: int) = bench("EC Double G1", T, iters): r.double(P) -proc scalarMulBench*(T: typedesc, scratchSpaceSize: static int, iters: int) = +proc scalarMulGenericBench*(T: typedesc, scratchSpaceSize: static int, iters: int) = const bits = T.F.C.getCurveOrderBitwidth() var r {.noInit.}: T - let P = rng.random_unsafe(T) + let P = rng.random_unsafe(T) # TODO: clear cofactor let exponent = rng.random_unsafe(BigInt[bits]) var exponentCanonical{.noInit.}: array[(bits+7) div 8, byte] @@ -132,22 +135,32 @@ proc scalarMulBench*(T: typedesc, scratchSpaceSize: static int, iters: int) = var scratchSpace{.noInit.}: array[scratchSpaceSize, T] - bench("EC ScalarMul G1 (scratchsize = " & $scratchSpaceSize & ')', T, iters): + bench("EC ScalarMul Generic G1 (scratchsize = " & $scratchSpaceSize & ')', T, iters): r = P - r.scalarMul(exponentCanonical, scratchSpace) + r.scalarMulGeneric(exponentCanonical, scratchSpace) -# import ../tests/support/ec_reference_scalar_mult -# -# proc scalarMulUnsafeDoubleAddBench*(T: typedesc, iters: int) = -# const bits = T.F.C.getCurveOrderBitwidth() -# -# var r {.noInit.}: T -# let P = rng.random_unsafe(T) -# -# let exponent = rng.random_unsafe(BigInt[bits]) -# var exponentCanonical{.noInit.}: array[(bits+7) div 8, byte] -# exponentCanonical.exportRawUint(exponent, bigEndian) -# -# bench("EC ScalarMul G1 (unsafe DoubleAdd)", T, iters): -# r = P -# r.unsafe_ECmul_double_add(exponentCanonical) +proc scalarMulGLV*(T: typedesc, iters: int) = + const bits = T.F.C.getCurveOrderBitwidth() + + var r {.noInit.}: T + let P = rng.random_unsafe(T) # TODO: clear cofactor + + let exponent = rng.random_unsafe(BigInt[bits]) + + bench("EC ScalarMul G1 (GLV endomorphism accelerated)", T, iters): + r = P + r.scalarMulGLV(exponent) + +proc scalarMulUnsafeDoubleAddBench*(T: typedesc, iters: int) = + const bits = T.F.C.getCurveOrderBitwidth() + + var r {.noInit.}: T + let P = rng.random_unsafe(T) # TODO: clear cofactor + + let exponent = rng.random_unsafe(BigInt[bits]) + var exponentCanonical{.noInit.}: array[(bits+7) div 8, byte] + exponentCanonical.exportRawUint(exponent, bigEndian) + + bench("EC ScalarMul G1 (unsafe reference DoubleAdd)", T, iters): + r = P + r.unsafe_ECmul_double_add(exponentCanonical) diff --git a/constantine.nimble b/constantine.nimble index e718117..d21c12f 100644 --- a/constantine.nimble +++ b/constantine.nimble @@ -59,7 +59,7 @@ task test, "Run all tests": test "", "tests/test_bigints.nim" test "", "tests/test_bigints_multimod.nim" - test "", "tests/test_bigints_vs_gmp.nim" + test "", "tests/test_bigints_mod_vs_gmp.nim" # Field test "", "tests/test_io_fields" @@ -70,6 +70,9 @@ task test, "Run all tests": test "", "tests/test_finite_fields_vs_gmp.nim" + # Precompute + test "", "tests/test_precomputed" + # Towers of extension fields test "", "tests/test_fp2.nim" test "", "tests/test_fp6.nim" @@ -89,7 +92,7 @@ task test, "Run all tests": test "-d:Constantine32", "tests/test_bigints.nim" test "-d:Constantine32", "tests/test_bigints_multimod.nim" - test "-d:Constantine32", "tests/test_bigints_vs_gmp.nim" + test "-d:Constantine32", "tests/test_bigints_mod_vs_gmp.nim" # Field test "-d:Constantine32", "tests/test_io_fields" @@ -100,6 +103,9 @@ task test, "Run all tests": test "-d:Constantine32", "tests/test_finite_fields_vs_gmp.nim" + # Precompute + test "-d:Constantine32", "tests/test_precomputed" + # Towers of extension fields test "-d:Constantine32", "tests/test_fp2.nim" test "-d:Constantine32", "tests/test_fp6.nim" @@ -118,7 +124,7 @@ task test, "Run all tests": runBench("bench_fp2") runBench("bench_fp6") runBench("bench_fp12") - runBench("bench_ec_swei_proj_g1") + runBench("bench_ec_g1") task test_no_gmp, "Run tests that don't require GMP": # -d:testingCurves is configured in a *.nim.cfg for convenience @@ -138,6 +144,9 @@ task test_no_gmp, "Run tests that don't require GMP": test "", "tests/test_finite_fields_sqrt.nim" test "", "tests/test_finite_fields_powinv.nim" + # Precompute + test "", "tests/test_precomputed" + # Towers of extension fields test "", "tests/test_fp2.nim" test "", "tests/test_fp6.nim" @@ -164,6 +173,9 @@ task test_no_gmp, "Run tests that don't require GMP": test "-d:Constantine32", "tests/test_finite_fields_sqrt.nim" test "-d:Constantine32", "tests/test_finite_fields_powinv.nim" + # Precompute + test "-d:Constantine32", "tests/test_precomputed" + # Towers of extension fields test "-d:Constantine32", "tests/test_fp2.nim" test "-d:Constantine32", "tests/test_fp6.nim" @@ -182,7 +194,7 @@ task test_no_gmp, "Run tests that don't require GMP": runBench("bench_fp2") runBench("bench_fp6") runBench("bench_fp12") - runBench("bench_ec_swei_proj_g1") + runBench("bench_ec_g1") task test_parallel, "Run all tests in parallel (via GNU parallel)": # -d:testingCurves is configured in a *.nim.cfg for convenience @@ -197,7 +209,8 @@ task test_parallel, "Run all tests in parallel (via GNU parallel)": test "", "tests/test_bigints.nim", cmdFile test "", "tests/test_bigints_multimod.nim", cmdFile - test "", "tests/test_bigints_vs_gmp.nim", cmdFile + test "", "tests/test_bigints_mul_vs_gmp.nim", cmdFile + test "", "tests/test_bigints_mod_vs_gmp.nim", cmdFile # Field test "", "tests/test_io_fields", cmdFile @@ -233,7 +246,8 @@ task test_parallel, "Run all tests in parallel (via GNU parallel)": test "-d:Constantine32", "tests/test_bigints.nim", cmdFile test "-d:Constantine32", "tests/test_bigints_multimod.nim", cmdFile - test "-d:Constantine32", "tests/test_bigints_vs_gmp.nim", cmdFile + test "-d:Constantine32", "tests/test_bigints_mul_vs_gmp.nim", cmdFile + test "-d:Constantine32", "tests/test_bigints_mod_vs_gmp.nim", cmdFile # Field test "-d:Constantine32", "tests/test_io_fields", cmdFile @@ -268,7 +282,7 @@ task test_parallel, "Run all tests in parallel (via GNU parallel)": runBench("bench_fp2") runBench("bench_fp6") runBench("bench_fp12") - runBench("bench_ec_swei_proj_g1") + runBench("bench_ec_g1") task bench_fp, "Run benchmark 𝔽p with your default compiler": runBench("bench_fp") @@ -306,11 +320,11 @@ task bench_fp12_gcc, "Run benchmark 𝔽p12 with gcc": task bench_fp12_clang, "Run benchmark 𝔽p12 with clang": runBench("bench_fp12", "clang") -task bench_ec_swei_proj_g1, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC": - runBench("bench_ec_swei_proj_g1") +task bench_ec_g1, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC": + runBench("bench_ec_g1") -task bench_ec_swei_proj_g1_gcc, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC": - runBench("bench_ec_swei_proj_g1", "gcc") +task bench_ec_gcc, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - GCC": + runBench("bench_ec_g1", "gcc") -task bench_ec_swei_proj_g1_clang, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - Clang": - runBench("bench_ec_swei_proj_g1", "clang") +task bench_ec_g1_clang, "Run benchmark on Elliptic Curve group 𝔾1 - Short Weierstrass with Projective Coordinates - Clang": + runBench("bench_ec_g1", "clang") diff --git a/constantine/arithmetic.nim b/constantine/arithmetic.nim index 7d2c6ca..5a85716 100644 --- a/constantine/arithmetic.nim +++ b/constantine/arithmetic.nim @@ -7,11 +7,9 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import - arithmetic/[limbs, limbs_modular, limbs_montgomery], arithmetic/bigints, arithmetic/[finite_fields, finite_fields_inversion] export - limbs, limbs_modular, limbs_montgomery, bigints, finite_fields, finite_fields_inversion diff --git a/constantine/arithmetic/bigints.nim b/constantine/arithmetic/bigints.nim index 26280d0..9b682ea 100644 --- a/constantine/arithmetic/bigints.nim +++ b/constantine/arithmetic/bigints.nim @@ -7,10 +7,12 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import - ../config/common, + ../config/[common, type_bigint], ../primitives, ./limbs, ./limbs_montgomery, ./limbs_modular +export BigInt + # ############################################################ # # BigInts @@ -55,41 +57,6 @@ import # having redundant representations that forces costly reductions before multiplications. # https://github.com/mratsim/constantine/issues/15 -func wordsRequired(bits: int): int {.compileTime.} = - ## Compute the number of limbs required - # from the **announced** bit length - (bits + WordBitWidth - 1) div WordBitWidth - -type - BigInt*[bits: static int] = object - ## Fixed-precision big integer - ## - ## - "bits" is the announced bit-length of the BigInt - ## This is public data, usually equal to the curve prime bitlength. - ## - ## - "limbs" is an internal field that holds the internal representation - ## of the big integer. Least-significant limb first. Within limbs words are native-endian. - ## - ## This internal representation can be changed - ## without notice and should not be used by external applications or libraries. - limbs*: array[bits.wordsRequired, SecretWord] - -# For unknown reason, `bits` doesn't semcheck if -# `limbs: Limbs[bits.wordsRequired]` -# with -# `Limbs[N: static int] = distinct array[N, SecretWord]` -# so we don't set Limbs as a distinct type - -debug: - import strutils - - func `$`*(a: BigInt): string = - result = "BigInt[" - result.add $BigInt.bits - result.add "](limbs: " - result.add a.limbs.toString() - result.add ")" - # No exceptions allowed {.push raises: [].} {.push inline.} @@ -123,6 +90,14 @@ func cswap*(a, b: var BigInt, ctl: CTBool) = ## memory accesses are done (unless the compiler tries to be clever) cswap(a.limbs, b.limbs, ctl) +func copyTruncatedFrom*[dBits, sBits: static int](dst: var BigInt[dBits], src: BigInt[sBits]) = + ## Copy `src` into `dst` + ## if `dst` is not big enough, only the low words are copied + ## if `src` is smaller than `dst` the higher words of `dst` will NOT be overwritten + + for wordIdx in 0 ..< min(dst.limbs.len, src.limbs.len): + dst.limbs[wordIdx] = src.limbs[wordIdx] + # Comparison # ------------------------------------------------------------ @@ -161,11 +136,17 @@ func cadd*(a: var BigInt, b: BigInt, ctl: SecretBool): SecretBool = (SecretBool) cadd(a.limbs, b.limbs, ctl) func csub*(a: var BigInt, b: BigInt, ctl: SecretBool): SecretBool = - ## Constant-time in-place conditional addition - ## The addition is only performed if ctl is "true" - ## The result carry is always computed. + ## Constant-time in-place conditional substraction + ## The substraction is only performed if ctl is "true" + ## The result borrow is always computed. (SecretBool) csub(a.limbs, b.limbs, ctl) +func csub*(a: var BigInt, b: SecretWord, ctl: SecretBool): SecretBool = + ## Constant-time in-place conditional substraction + ## The substraction is only performed if ctl is "true" + ## The result borrow is always computed. + (SecretBool) csub(a.limbs, b, ctl) + func cdouble*(a: var BigInt, ctl: SecretBool): SecretBool = ## Constant-time in-place conditional doubling ## The doubling is only performed if ctl is "true" @@ -182,11 +163,36 @@ func add*(a: var BigInt, b: SecretWord): SecretBool = ## Returns the carry (SecretBool) add(a.limbs, b) +func `+=`*(a: var BigInt, b: BigInt) = + ## Constant-time in-place addition + ## Discards the carry + discard add(a.limbs, b.limbs) + +func `+=`*(a: var BigInt, b: SecretWord) = + ## Constant-time in-place addition + ## Discards the carry + discard add(a.limbs, b) + func sub*(a: var BigInt, b: BigInt): SecretBool = ## Constant-time in-place substraction ## Returns the borrow (SecretBool) sub(a.limbs, b.limbs) +func sub*(a: var BigInt, b: SecretWord): SecretBool = + ## Constant-time in-place substraction + ## Returns the borrow + (SecretBool) sub(a.limbs, b) + +func `-=`*(a: var BigInt, b: BigInt) = + ## Constant-time in-place substraction + ## Discards the borrow + discard sub(a.limbs, b.limbs) + +func `-=`*(a: var BigInt, b: SecretWord) = + ## Constant-time in-place substraction + ## Discards the borrow + discard sub(a.limbs, b) + func double*(a: var BigInt): SecretBool = ## Constant-time in-place doubling ## Returns the carry @@ -222,6 +228,34 @@ func cneg*(a: var BigInt, ctl: CTBool) = ## Negate if ``ctl`` is true a.limbs.cneg(ctl) +func prod*[rBits, aBits, bBits](r: var BigInt[rBits], a: BigInt[aBits], b: BigInt[bBits]) = + ## Multi-precision multiplication + ## r <- a*b + ## `a`, `b`, `r` can have different sizes + ## if r.bits < a.bits + b.bits + ## the multiplication will overflow. + ## It will be truncated if it cannot fit in r limbs. + r.limbs.prod(a.limbs, b.limbs) + +func prod_high_words*[rBits, aBits, bBits](r: var BigInt[rBits], a: BigInt[aBits], b: BigInt[bBits], lowestWordIndex: static int) = + ## Multi-precision multiplication keeping only high words + ## r <- a*b >> (2^WordBitWidth)^lowestWordIndex + ## + ## `a`, `b`, `r` can have a different number of limbs + ## if `r`.limbs.len < a.limbs.len + b.limbs.len - lowestWordIndex + ## The result will be truncated, i.e. it will be + ## a * b >> (2^WordBitWidth)^lowestWordIndex (mod (2^WordBitwidth)^r.limbs.len) + ## + # This is useful for + # - Barret reduction + # - Approximating multiplication by a fractional constant in the form f(a) = K/C * a + # with K and C known at compile-time. + # We can instead find a well chosen M = (2^WordBitWidth)^w, with M > C (i.e. M is a power of 2 bigger than C) + # Precompute P = K*M/C at compile-time + # and at runtime do P*a/M <=> P*a >> WordBitWidth*w + # i.e. prod_high_words(result, P, a, w) + r.limbs.prod_high_words(a.limbs, b.limbs, lowestWordIndex) + # Bit Manipulation # ------------------------------------------------------------ @@ -231,6 +265,24 @@ func shiftRight*(a: var BigInt, k: int) = ## k MUST be less than the base word size (2^31 or 2^63) a.limbs.shiftRight(k) +func bit*[bits: static int](a: BigInt[bits], index: int): Ct[uint8] = + ## Access an individual bit of `a` + ## Bits are accessed as-if the bit representation is bigEndian + ## for a 8-bit "big-integer" we have + ## (b7, b6, b5, b4, b3, b2, b1, b0) + ## for a 256-bit big-integer + ## (b255, b254, ..., b1, b0) + const SlotShift = log2(WordBitWidth.uint32) + const SelectMask = WordBitWidth - 1 + const BitMask = SecretWord 1 + + let slot = a.limbs[index shr SlotShift] # LimbEndianness is littleEndian + result = ct(slot shr (index and SelectMask) and BitMask, uint8) + +func bit0*(a: BigInt): Ct[uint8] = + ## Access the least significant bit + ct(a.limbs[0] and SecretWord(1), uint8) + # ############################################################ # # Modular BigInt diff --git a/constantine/arithmetic/finite_fields.nim b/constantine/arithmetic/finite_fields.nim index e46bcc2..ecab650 100644 --- a/constantine/arithmetic/finite_fields.nim +++ b/constantine/arithmetic/finite_fields.nim @@ -26,23 +26,10 @@ import ../primitives, - ../config/[common, curves], + ../config/[common, type_fp, curves], ./bigints, ./limbs_montgomery -type - Fp*[C: static Curve] = object - ## All operations on a field are modulo P - ## P being the prime modulus of the Curve C - ## Internally, data is stored in Montgomery n-residue form - ## with the magic constant chosen for convenient division (a power of 2 depending on P bitsize) - mres*: matchingBigInt(C) - -debug: - func `$`*[C: static Curve](a: Fp[C]): string = - result = "Fp[" & $C - result.add "](" - result.add $a.mres - result.add ')' +export Fp # No exceptions allowed {.push raises: [].} diff --git a/constantine/arithmetic/finite_fields_inversion.nim b/constantine/arithmetic/finite_fields_inversion.nim index 661a884..a59706b 100644 --- a/constantine/arithmetic/finite_fields_inversion.nim +++ b/constantine/arithmetic/finite_fields_inversion.nim @@ -24,7 +24,7 @@ template repeat(num: int, body: untyped) = # Secp256k1 # ------------------------------------------------------------ -func invmod_addchain(r: var Fp[Secp256k1], a: Fp[Secp256k1]) = +func invmod_addchain(r: var Fp[Secp256k1], a: Fp[Secp256k1]) {.used.}= ## We invert via Little Fermat's theorem ## a^(-1) ≡ a^(p-2) (mod p) ## with p = "0xFFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F" @@ -120,7 +120,7 @@ func invmod_addchain(r: var Fp[Secp256k1], a: Fp[Secp256k1]) = # Note: it only works for u positive, in particular BN254 doesn't work :/ # Is there a way to only use a^-u or even powers? -func invmod_addchain_bn[C](r: var Fp[C], a: Fp[C]) = +func invmod_addchain_bn[C](r: var Fp[C], a: Fp[C]) {.used.}= ## Inversion on BN prime fields with positive base parameter `u` ## via Little Fermat theorem and leveraging the prime low Hamming weight ## diff --git a/constantine/arithmetic/limbs.nim b/constantine/arithmetic/limbs.nim index 53a36b1..a25822c 100644 --- a/constantine/arithmetic/limbs.nim +++ b/constantine/arithmetic/limbs.nim @@ -8,7 +8,8 @@ import ../config/common, - ../primitives + ../primitives, + ../../helpers/static_for # ############################################################ # @@ -43,21 +44,6 @@ type Limbs*[N: static int] = array[N, SecretWord] ## ## but for unknown reason, it prevents semchecking `bits` -debug: - import strutils - - func toString*(a: Limbs): string = - result = "[" - result.add " 0x" & toHex(BaseType(a[0])) - for i in 1 ..< a.len: - result.add ", 0x" & toHex(BaseType(a[i])) - result.add "])" - - func toHex*(a: Limbs): string = - result = "0x" - for i in countdown(a.len-1, 0): - result.add toHex(BaseType(a[i])) - # No exceptions allowed {.push raises: [].} @@ -180,7 +166,28 @@ func isOdd*(a: Limbs): SecretBool = ## Returns true if a is odd SecretBool(a[0] and SecretWord(1)) -# Arithmetic +# Bit manipulation +# ------------------------------------------------------------ + +func shiftRight*(a: var Limbs, k: int) {.inline.}= + ## Shift right by k. + ## + ## k MUST be less than the base word size (2^32 or 2^64) + # We don't reuse shr as this is an in-place operation + # Do we need to return the shifted out part? + # + # Note: for speed, loading a[i] and a[i+1] + # instead of a[i-1] and a[i] + # is probably easier to parallelize for the compiler + # (antidependence WAR vs loop-carried dependence RAW) + + # checkWordShift(k) + + for i in 0 ..< a.len-1: + a[i] = (a[i] shr k) or (a[i+1] shl (WordBitWidth - k)) + a[a.len-1] = a[a.len-1] shr k + +# Basic Arithmetic # ------------------------------------------------------------ func add*(a: var Limbs, b: Limbs): Carry = @@ -229,6 +236,14 @@ func sub*(a: var Limbs, b: Limbs): Borrow = for i in 0 ..< a.len: subB(result, a[i], a[i], b[i], result) +func sub*(a: var Limbs, w: SecretWord): Borrow = + ## Limbs substraction, sub a number that fits in a word + ## Returns the borrow + result = Borrow(0) + subB(result, a[0], a[0], w, result) + for i in 1 ..< a.len: + subB(result, a[i], a[i], Zero, result) + func csub*(a: var Limbs, b: Limbs, ctl: SecretBool): Borrow = ## Limbs conditional substraction ## Returns the borrow @@ -244,6 +259,17 @@ func csub*(a: var Limbs, b: Limbs, ctl: SecretBool): Borrow = subB(result, diff, a[i], b[i], result) ctl.ccopy(a[i], diff) +func csub*(a: var Limbs, w: SecretWord, ctl: SecretBool): Borrow = + ## Limbs conditional substraction, sub a number that fits in a word + ## Returns the borrow + result = Carry(0) + var diff: SecretWord + subB(result, diff, a[0], w, result) + ctl.ccopy(a[0], diff) + for i in 1 ..< a.len: + subB(result, diff, a[i], Zero, result) + ctl.ccopy(a[i], diff) + func diff*(r: var Limbs, a, b: Limbs): Borrow = ## Diff `a` and `b` into `r` ## `r` is initialized/overwritten @@ -266,33 +292,85 @@ func cneg*(a: var Limbs, ctl: CTBool) = # So we need to xor all words and then add 1 # The "+1" might carry # So we fuse the 2 steps - let mask = -SecretWord(ctl) # Obtain a 0xFF... or 0x00... mask + let mask = -SecretWord(ctl) # Obtain a 0xFF... or 0x00... mask var carry = SecretWord(ctl) for i in 0 ..< a.len: let t = (a[i] xor mask) + carry # XOR with mask and add 0x01 or 0x00 respectively - carry = SecretWord(t < carry) # Carry on + carry = SecretWord(t < carry) # Carry on a[i] = t -# Bit manipulation +{.pop.} # inline + +# Multiplication # ------------------------------------------------------------ -func shiftRight*(a: var Limbs, k: int) = - ## Shift right by k. +func prod*[rLen, aLen, bLen](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) = + ## Multi-precision multiplication + ## r <- a*b ## - ## k MUST be less than the base word size (2^32 or 2^64) - # We don't reuse shr as this is an in-place operation - # Do we need to return the shifted out part? + ## `a`, `b`, `r` can have a different number of limbs + ## if `r`.limbs.len < a.limbs.len + b.limbs.len + ## The result will be truncated, i.e. it will be + ## a * b (mod (2^WordBitwidth)^r.limbs.len) + + # We use Product Scanning / Comba multiplication + var t, u, v = SecretWord(0) + var z: Limbs[rLen] # zero-init, ensure on stack and removes in-place problems + + staticFor i, 0, min(a.len+b.len, r.len): + const ib = min(b.len-1, i) + const ia = i - ib + staticFor j, 0, min(a.len - ia, ib+1): + mulAcc(t, u, v, a[ia+j], b[ib-j]) + + z[i] = v + v = u + u = t + t = SecretWord(0) + + r = z + +func prod_high_words*[rLen, aLen, bLen]( + r: var Limbs[rLen], + a: Limbs[aLen], b: Limbs[bLen], + lowestWordIndex: static int) = + ## Multi-precision multiplication keeping only high words + ## r <- a*b >> (2^WordBitWidth)^lowestWordIndex + ## + ## `a`, `b`, `r` can have a different number of limbs + ## if `r`.limbs.len < a.limbs.len + b.limbs.len - lowestWordIndex + ## The result will be truncated, i.e. it will be + ## a * b >> (2^WordBitWidth)^lowestWordIndex (mod (2^WordBitwidth)^r.limbs.len) # - # Note: for speed, loading a[i] and a[i+1] - # instead of a[i-1] and a[i] - # is probably easier to parallelize for the compiler - # (antidependence WAR vs loop-carried dependence RAW) + # This is useful for + # - Barret reduction + # - Approximating multiplication by a fractional constant in the form f(a) = K/C * a + # with K and C known at compile-time. + # We can instead find a well chosen M = (2^WordBitWidth)^w, with M > C (i.e. M is a power of 2 bigger than C) + # Precompute P = K*M/C at compile-time + # and at runtime do P*a/M <=> P*a >> (WordBitWidth*w) + # i.e. prod_high_words(result, P, a, w) - # checkWordShift(k) + # We use Product Scanning / Comba multiplication + var t, u, v = SecretWord(0) # Will raise warning on empty iterations + var z: Limbs[rLen] # zero-init, ensure on stack and removes in-place problems - for i in 0 ..< a.len-1: - a[i] = (a[i] shr k) or (a[i+1] shl (WordBitWidth - k)) - a[a.len-1] = a[a.len-1] shr k + # The previous 2 columns can affect the lowest word due to carries + # but not the ones before (we accumulate in 3 words (t, u, v)) + const w = lowestWordIndex - 2 + + staticFor i, max(0, w), min(a.len+b.len, r.len+lowestWordIndex): + const ib = min(b.len-1, i) + const ia = i - ib + staticFor j, 0, min(a.len - ia, ib+1): + mulAcc(t, u, v, a[ia+j], b[ib-j]) + + when i >= lowestWordIndex: + z[i-lowestWordIndex] = v + v = u + u = t + t = SecretWord(0) + + r = z -{.pop.} # inline {.pop.} # raises no exceptions diff --git a/constantine/arithmetic/limbs_montgomery.nim b/constantine/arithmetic/limbs_montgomery.nim index 683d065..76edde4 100644 --- a/constantine/arithmetic/limbs_montgomery.nim +++ b/constantine/arithmetic/limbs_montgomery.nim @@ -7,10 +7,12 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import + # Stadard library + std/macros, + # Internal ../config/common, ../primitives, - ./limbs, - macros + ./limbs # ############################################################ # @@ -118,7 +120,7 @@ func montyMul_CIOS_nocarry(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) = discard t.csub(M, not(t < M)) r = t -func montyMul_CIOS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) = +func montyMul_CIOS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) {.used.} = ## Montgomery Multiplication using Coarse Grained Operand Scanning (CIOS) # - Analyzing and Comparing Montgomery Multiplication Algorithms # Cetin Kaya Koc and Tolga Acar and Burton S. Kaliski Jr. @@ -166,6 +168,43 @@ func montyMul_CIOS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) = discard t.csub(M, tN.isNonZero() or not(t < M)) # TODO: (t >= M) is unnecessary for prime in the form (2^64)^w r = t +func montyMul_FIPS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) = + ## Montgomery Multiplication using Finely Integrated Product Scanning (FIPS) + # - Architectural Enhancements for Montgomery + # Multiplication on Embedded RISC Processors + # Johann Großschädl and Guy-Armand Kamendje, 2003 + # https://pure.tugraz.at/ws/portalfiles/portal/2887154/ACNS2003_AEM.pdf + # + # - New Speed Records for Montgomery Modular + # Multiplication on 8-bit AVR Microcontrollers + # Zhe Liu and Johann Großschädl, 2013 + # https://eprint.iacr.org/2013/882.pdf + var z: typeof(r) # zero-init, ensure on stack and removes in-place problems in tower fields + const L = r.len + var t, u, v = SecretWord(0) + + staticFor i, 0, L: + staticFor j, 0, i: + mulAcc(t, u, v, a[j], b[i-j]) + mulAcc(t, u, v, z[j], M[i-j]) + mulAcc(t, u, v, a[i], b[0]) + z[i] = v * SecretWord(m0ninv) + mulAcc(t, u, v, z[i], M[0]) + v = u + u = t + t = SecretWord(0) + staticFor i, L, 2*L: + staticFor j, i-L+1, L: + mulAcc(t, u, v, a[j], b[i-j]) + mulAcc(t, u, v, z[j], M[i-j]) + z[i-L] = v + v = u + u = t + t = SecretWord(0) + + discard z.csub(M, v.isNonZero() or not(z < M)) + r = z + func montySquare_CIOS_nocarry(r: var Limbs, a, M: Limbs, m0ninv: BaseType) = ## Montgomery Multiplication using Coarse Grained Operand Scanning (CIOS) ## and no-carry optimization. @@ -253,7 +292,7 @@ func montySquare_CIOS(r: var Limbs, a, M: Limbs, m0ninv: BaseType) = # (_, t[N]) <- t[N+1] + C var carryR: Carry addC(carryR, t[N-1], tN, C, Carry(0)) - addC(carryR, tN, SecretWord(tNp1), Zero, carryR) + addC(carryR, tN, tNp1, Zero, carryR) discard t.csub(M, tN.isNonZero() or not(t < M)) # TODO: (t >= M) is unnecessary for prime in the form (2^64)^w r = t @@ -293,7 +332,7 @@ func montyMul*( when canUseNoCarryMontyMul: montyMul_CIOS_nocarry(r, a, b, M, m0ninv) else: - montyMul_CIOS(r, a, b, M, m0ninv) + montyMul_FIPS(r, a, b, M, m0ninv) func montySquare*(r: var Limbs, a, M: Limbs, m0ninv: static BaseType, canUseNoCarryMontySquare: static bool) = diff --git a/constantine/config/curves.nim b/constantine/config/curves.nim index f5896c1..cc6ecdb 100644 --- a/constantine/config/curves.nim +++ b/constantine/config/curves.nim @@ -8,10 +8,10 @@ import # Standard library - macros, + std/macros, # Internal - ./curves_declaration, ./curves_derived, ./curves_parser, - ../arithmetic/bigints + ./type_bigint, ./common, + ./curves_declaration, ./curves_derived, ./curves_parser export CurveFamily, Curve, SexticTwist @@ -170,6 +170,16 @@ macro getBN_param_6u_minus_1_BE*(C: static Curve): untyped = ## of a BN curve in canonical big-endian representation result = bindSym($C & "_BN_6u_minus_1_BE") +# Endomorphism +# ------------------------------------------------------- +macro getCubicRootOfUnity_mod_p*(C: static Curve): untyped = + ## Get a non-trivial cubic root of unity (mod p) with p the prime field + result = bindSym($C & "_cubicRootOfUnity_mod_p") + +macro getCubicRootOfUnity_mod_r*(C: static Curve): untyped = + ## Get a non-trivial cubic root of unity (mod r) with r the curve order + result = bindSym($C & "_cubicRootOfUnity_mod_r") + # ############################################################ # # Debug info printed at compile-time @@ -187,12 +197,16 @@ macro debugConsts(): untyped {.used.} = let modulus = bindSym(curveName & "_Modulus") let r2modp = bindSym(curveName & "_R2modP") let negInvModWord = bindSym(curveName & "_NegInvModWord") + let cubeRootOfUnity = ident(curveName & "_cubicRootOfUnity") result.add quote do: echo "Curve ", `curveName`,':' echo " Field Modulus: ", `modulus` echo " Montgomery R² (mod P): ", `r2modp` echo " Montgomery -1/P[0] (mod 2^", WordBitWidth, "): ", `negInvModWord` + when declared(`cubeRootOfUnity`): + echo " Cube root of unity: ", `cubeRootOfUnity` + result.add quote do: echo "----------------------------------------------------------------------------" diff --git a/constantine/config/curves_declaration.nim b/constantine/config/curves_declaration.nim index 25ccd75..3efdfc7 100644 --- a/constantine/config/curves_declaration.nim +++ b/constantine/config/curves_declaration.nim @@ -91,6 +91,9 @@ declareCurves: family: BarretoNaehrig bn_u_bitwidth: 63 bn_u: "0x44E992B44A6909F1" # u: 4965661367192848881 + cubicRootOfUnity_modP: "0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48" + # For sanity checks + cubicRootOfUnity_modR: "0x30644e72e131a029048b6e193fd84104cc37a73fec2bc5e9b8ca0b2d36636f23" # G1 Equation: Y^2 = X^3 + 3 # G2 Equation: Y^2 = X^3 + 3/(9+𝑖) @@ -141,6 +144,7 @@ declareCurves: modulus: "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab" family: BarretoLynnScott # u: -(2^63 + 2^62 + 2^60 + 2^57 + 2^48 + 2^16) + cubicRootOfUnity_mod_p: "0x1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4897d29650fb85f9b409427eb4f49fffd8bfd00000000aaac" # G1 Equation: y² = x³ + 4 # G2 Equation: y² = x³ + 4 (1+i) diff --git a/constantine/config/curves_derived.nim b/constantine/config/curves_derived.nim index a055114..93805d4 100644 --- a/constantine/config/curves_derived.nim +++ b/constantine/config/curves_derived.nim @@ -8,10 +8,12 @@ import # Standard library - macros, + std/macros, # Internal - ../arithmetic/precomputed, - ./curves_declaration + ./precompute, + ./curves_declaration, + ./type_fp, + ../io/io_bigints {.experimental: "dynamicBindSym".} @@ -30,8 +32,6 @@ macro genDerivedConstants*(): untyped = # "for curve in low(Curve) .. high(Curve):" # As an ugly workaround, we count # The item at position 0 is a pragma - let curveList = Curve.getType[1].getType - result = newStmtList() template used(name: string): NimNode = @@ -124,6 +124,34 @@ macro genDerivedConstants*(): untyped = ) ) + # const MyCurve_cubicRootOfUnity_mod_p + block: + let cubicHex = ident(curve & "_cubicRootOfUnity_modP_Hex") + let cubic = used(curve & "_cubicRootOfUnity_mod_p") + let M = bindSym(curve & "_Modulus") + let r2modM = ident(curve & "_R2modP") + let m0ninv = ident(curve & "_NegInvModWord") + result.add quote do: + when declared(`cubicHex`): + const `cubic` = block: + var cubic: Fp[Curve(`curveSym`)] + montyResidue_precompute( + cubic.mres, + fromHex(cubic.mres.typeof, `cubicHex`), + `M`, `r2modM`, `m0ninv` + ) + cubic + # const MyCurve_cubicRootOfUnity_mod_r + block: # For scalar decomposition sanity checks + let cubicHex = ident(curve & "_cubicRootOfUnity_modR_Hex") + let cubic = used(curve & "_cubicRootOfUnity_mod_r") + let getCurveOrderBitwidth = ident"getCurveOrderBitwidth" + result.add quote do: + when declared(`cubicHex`): + const `cubic` = fromHex(BigInt[ + `getCurveOrderBitwidth`(Curve(`curveSym`)) + ], `cubicHex`) + if CurveFamilies[curveSym] == BarretoNaehrig: # when declared(MyCurve_BN_param_u): # const MyCurve_BN_u_BE = toCanonicalIntRepr(MyCurve_BN_param_u) @@ -148,3 +176,5 @@ macro genDerivedConstants*(): untyped = bnStmts ) ) + + # echo result.toStrLit() diff --git a/constantine/config/curves_parser.nim b/constantine/config/curves_parser.nim index c85a70f..98162e2 100644 --- a/constantine/config/curves_parser.nim +++ b/constantine/config/curves_parser.nim @@ -10,7 +10,8 @@ import # Standard library std/[macros, strutils], # Internal - ../io/io_bigints, ../arithmetic/[bigints, precomputed] + ../io/io_bigints, + ./type_bigint # Parsing is done in 2 steps: # 1. All declared parameters are collected in a {.compileTime.} seq[CurveParams] @@ -108,6 +109,10 @@ type sexticTwist: SexticTwist sexticNonResidue_fp2: NimNode # nnkPar(nnkIntLit, nnkIntLit) + # Endomorphisms + cubicRootOfUnity_modP: NimNode # nnkStrLit + cubicRootOfUnity_modR: NimNode # nnkStrLit + family: CurveFamily # BN family # ------------------------ @@ -181,6 +186,10 @@ proc parseCurveDecls(defs: var seq[CurveParams], curves: NimNode) = params.bn_u_bitwidth = sectionVal elif sectionId.eqIdent"bn_u": params.bn_u = sectionVal + elif sectionId.eqident"cubicRootOfUnity_modP": + params.cubicRootOfUnity_modP = sectionVal + elif sectionId.eqident"cubicRootOfUnity_modR": + params.cubicRootOfUnity_modR = sectionVal elif sectionId.eqIdent"eq_form": params.eq_form = parseEnum[CurveEquationForm]($sectionVal) elif sectionId.eqIdent"coef_a": @@ -271,6 +280,7 @@ proc genMainConstants(defs: var seq[CurveParams]): NimNode = MapCurveFamily.add nnkExprColonExpr.newTree( curve, newLit(family) ) + # Curve equation # ----------------------------------------------- curveEllipticStmts.add newConstStmt( @@ -313,6 +323,19 @@ proc genMainConstants(defs: var seq[CurveParams]): NimNode = curveDef.sexticNonResidue_fp2 ) + # Endomorphisms + # ----------------------------------------------- + if not curveDef.cubicRootOfUnity_modP.isNil: + curveExtraStmts.add newConstStmt( + exported($curve & "_cubicRootOfUnity_modP_Hex"), + curveDef.cubicRootOfUnity_modP + ) + if not curveDef.cubicRootOfUnity_modR.isNil: + curveExtraStmts.add newConstStmt( + exported($curve & "_cubicRootOfUnity_modR_Hex"), + curveDef.cubicRootOfUnity_modR + ) + # BN curves # ----------------------------------------------- if family == BarretoNaehrig: diff --git a/constantine/arithmetic/precomputed.nim b/constantine/config/precompute.nim similarity index 78% rename from constantine/arithmetic/precomputed.nim rename to constantine/config/precompute.nim index a2c1f41..b6c7f6b 100644 --- a/constantine/arithmetic/precomputed.nim +++ b/constantine/config/precompute.nim @@ -7,17 +7,18 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import - ./bigints, - ../primitives/constant_time, - ../config/common, + ./type_bigint, ./common, + ../primitives, ../io/io_bigints # Precomputed constants -# ############################################################ +# We need alternate code paths for the VM +# for various reasons +# ------------------------------------------------------------ # ############################################################ # -# Modular primitives +# BigInt primitives # # ############################################################ # @@ -25,7 +26,7 @@ import # Those are NOT tagged compile-time, using CTBool seems to confuse the VM # We don't use distinct types here, they confuse the VM -# Similarly, using addC / subB confuses the VM +# Similarly, using addC / subB from primitives confuses the VM # As we choose to use the full 32/64 bits of the integers and there is no carry flag # in the compile-time VM we need a portable (and slow) "adc" and "sbb". @@ -36,9 +37,15 @@ const HalfBase = (BaseType(1) shl HalfWidth) HalfMask = HalfBase - 1 +func hi(n: BaseType): BaseType = + result = n shr HalfWidth + +func lo(n: BaseType): BaseType = + result = n and HalfMask + func split(n: BaseType): tuple[hi, lo: BaseType] = - result.hi = n shr HalfWidth - result.lo = n and HalfMask + result.hi = n.hi + result.lo = n.lo func merge(hi, lo: BaseType): BaseType = (hi shl HalfWidth) or lo @@ -104,6 +111,56 @@ func sub(a: var BigInt, w: BaseType): bool = result = bool(borrow) +func mul(hi, lo: var BaseType, u, v: BaseType) = + ## Extended precision multiplication + ## (hi, lo) <- u * v + var x0, x1, x2, x3: BaseType + + let + (uh, ul) = u.split() + (vh, vl) = v.split() + + x0 = ul * vl + x1 = ul * vh + x2 = uh * vl + x3 = uh * vh + + x1 += hi(x0) # This can't carry + x1 += x2 # but this can + if x1 < x2: # if carry, add it to x3 + x3 += HalfBase + + hi = x3 + hi(x1) + lo = merge(x1, lo(x0)) + +func muladd1(hi, lo: var BaseType, a, b, c: BaseType) {.inline.} = + ## Extended precision multiplication + addition + ## (hi, lo) <- a*b + c + ## + ## Note: 0xFFFFFFFF_FFFFFFFF² -> (hi: 0xFFFFFFFFFFFFFFFE, lo: 0x0000000000000001) + ## so adding any c cannot overflow + var carry: BaseType + mul(hi, lo, a, b) + addC(carry, lo, lo, c, 0) + addC(carry, hi, hi, 0, carry) + +func muladd2(hi, lo: var BaseType, a, b, c1, c2: BaseType) {.inline.}= + ## Extended precision multiplication + addition + addition + ## (hi, lo) <- a*b + c1 + c2 + ## + ## Note: 0xFFFFFFFF_FFFFFFFF² -> (hi: 0xFFFFFFFFFFFFFFFE, lo: 0x0000000000000001) + ## so adding 0xFFFFFFFFFFFFFFFF leads to (hi: 0xFFFFFFFFFFFFFFFF, lo: 0x0000000000000000) + ## and we have enough space to add again 0xFFFFFFFFFFFFFFFF without overflowing + var carry1, carry2: BaseType + + mul(hi, lo, a, b) + # Carry chain 1 + addC(carry1, lo, lo, c1, 0) + addC(carry1, hi, hi, 0, carry1) + # Carry chain 2 + addC(carry2, lo, lo, c2, 0) + addC(carry2, hi, hi, 0, carry2) + func cadd(a: var BigInt, b: BigInt, ctl: bool): bool = ## In-place optional addition ## @@ -147,6 +204,22 @@ func doubleMod(a: var BigInt, M: BigInt) = ctl = ctl or not a.csub(M, false) discard csub(a, M, ctl) +func `<`(a, b: BigInt): bool = + var diff, borrow: BaseType + for i in 0 ..< a.limbs.len: + subB(borrow, diff, BaseType(a.limbs[i]), BaseType(b.limbs[i]), borrow) + + result = bool borrow + +func shiftRight*(a: var BigInt, k: int) = + ## Shift right by k. + ## + ## k MUST be less than the base word size (2^32 or 2^64) + + for i in 0 ..< a.limbs.len-1: + a.limbs[i] = (a.limbs[i] shr k) or (a.limbs[i+1] shl (WordBitWidth - k)) + a.limbs[a.limbs.len-1] = a.limbs[a.limbs.len-1] shr k + # ############################################################ # # Montgomery Magic Constants precomputation @@ -413,3 +486,48 @@ func bn_6u_minus_1_BE*[bits: static int]( # Export result.exportRawUint(u_ext, bigEndian) + +# ############################################################ +# +# Compile-time Conversion to Montgomery domain +# +# ############################################################ +# This is needed to avoid recursive dependencies + +func montyMul_precompute(r: var BigInt, a, b, M: BigInt, m0ninv: BaseType) = + ## Montgomery Multiplication using Coarse Grained Operand Scanning (CIOS) + var t: typeof(M) # zero-init + const N = t.limbs.len + var tN: BaseType + var tNp1: BaseType + + var tmp: BaseType # Distinct types bug in the VM are a huge pain ... + + for i in 0 ..< N: + var A: BaseType + for j in 0 ..< N: + muladd2(A, tmp, BaseType(a.limbs[j]), BaseType(b.limbs[i]), BaseType(t.limbs[j]), A) + t.limbs[j] = SecretWord(tmp) + addC(tNp1, tN, tN, A, 0) + + var C, lo: BaseType + let m = BaseType(t.limbs[0]) * m0ninv + muladd1(C, lo, m, BaseType(M.limbs[0]), BaseType(t.limbs[0])) + for j in 1 ..< N: + muladd2(C, tmp, m, BaseType(M.limbs[j]), BaseType(t.limbs[j]), C) + t.limbs[j-1] = SecretWord(tmp) + + var carry: BaseType + addC(carry, tmp, tN, C, 0) + t.limbs[N-1] = SecretWord(tmp) + addC(carry, tN, tNp1, 0, carry) + + discard t.csub(M, (tN != 0) or not(t < M)) + r = t + +func montyResidue_precompute*(r: var BigInt, a, M, r2modM: BigInt, + m0ninv: BaseType) = + ## Transform a bigint ``a`` from it's natural representation (mod N) + ## to a the Montgomery n-residue representation + ## This is intended for compile-time precomputations-only + montyMul_precompute(r, a, r2ModM, M, m0ninv) diff --git a/constantine/config/type_bigint.nim b/constantine/config/type_bigint.nim new file mode 100644 index 0000000..340cfff --- /dev/null +++ b/constantine/config/type_bigint.nim @@ -0,0 +1,53 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import ./common + +func wordsRequired*(bits: int): int {.compileTime.} = + ## Compute the number of limbs required + # from the **announced** bit length + (bits + WordBitWidth - 1) div WordBitWidth + +type + BigInt*[bits: static int] = object + ## Fixed-precision big integer + ## + ## - "bits" is the announced bit-length of the BigInt + ## This is public data, usually equal to the curve prime bitlength. + ## + ## - "limbs" is an internal field that holds the internal representation + ## of the big integer. Least-significant limb first. Within limbs words are native-endian. + ## + ## This internal representation can be changed + ## without notice and should not be used by external applications or libraries. + limbs*: array[bits.wordsRequired, SecretWord] + + +debug: + import std/strutils + + type Limbs[N: static int] = array[N, SecretWord] + + func toString*(a: Limbs): string = + result = "[" + result.add " 0x" & toHex(BaseType(a[0])) + for i in 1 ..< a.len: + result.add ", 0x" & toHex(BaseType(a[i])) + result.add "])" + + func toHex*(a: Limbs): string = + result = "0x" + for i in countdown(a.len-1, 0): + result.add toHex(BaseType(a[i])) + + func `$`*(a: BigInt): string = + result = "BigInt[" + result.add $BigInt.bits + result.add "](limbs: " + result.add a.limbs.toString() + result.add ")" diff --git a/constantine/config/type_fp.nim b/constantine/config/type_fp.nim new file mode 100644 index 0000000..a206021 --- /dev/null +++ b/constantine/config/type_fp.nim @@ -0,0 +1,28 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ./common, + ./curves_declaration + +type + Fp*[C: static Curve] = object + ## All operations on a field are modulo P + ## P being the prime modulus of the Curve C + ## Internally, data is stored in Montgomery n-residue form + ## with the magic constant chosen for convenient division (a power of 2 depending on P bitsize) + mres*: matchingBigInt(C) + +debug: + import ./type_bigint + + func `$`*[C: static Curve](a: Fp[C]): string = + result = "Fp[" & $C + result.add "](" + result.add $a.mres + result.add ')' diff --git a/constantine/elliptic/ec_endomorphism_accel.nim b/constantine/elliptic/ec_endomorphism_accel.nim new file mode 100644 index 0000000..eece904 --- /dev/null +++ b/constantine/elliptic/ec_endomorphism_accel.nim @@ -0,0 +1,600 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Standard Library + std/typetraits, + # Internal + ../../helpers/static_for, + ../primitives, + ../config/[common, curves, type_bigint], + ../arithmetic, + ../io/io_bigints, + ../towers, + ./ec_weierstrass_affine, + ./ec_weierstrass_projective, + ./ec_endomorphism_params + +# ############################################################ +# +# Endomorphism acceleration for +# Scalar Multiplication +# +# ############################################################ +# +# This files implements endomorphism-acceleration of scalar multiplication +# using: +# - GLV endomorphism on G1 (Gallant-Lambert-Vanstone) +# - GLV and GLS endomorphisms on G2 (Galbraith-Lin-Scott) +# - NAF recoding (windowed Non-Adjacent-Form) + + +# Secret scalar + dynamic point +# ---------------------------------------------------------------- +# +# This section targets the case where the scalar multiplication [k]P +# involves: +# - a secret scalar `k`, hence requiring constant-time operations +# - a dynamic `P` +# +# For example signing a message +# +# When P is known ahead of time (for example it's the generator) +# We can precompute the point decomposition with plain scalar multiplication +# and not require a fast endomorphism. +# (For example generating a public-key) + +type + Recoded[LengthInDigits: static int] = distinct array[LengthInDigits, byte] + GLV_SAC[M, LengthInDigits: static int] = array[M, Recoded[LengthInDigits]] + ## GLV-Based Sign-Aligned-Column representation + ## see Faz-Hernandez, 2013 + ## + ## (i) Length of every sub-scalar is fixed and given by + ## l = ⌈log2 r/m⌉ + 1 where r is the prime subgroup order + ## and m the number of dimensions of the GLV endomorphism + ## (ii) Exactly one subscalar which should be odd + ## is expressed by a signed nonzero representation + ## with all digits ∈ {1, −1} + ## (iii) Other subscalars have digits ∈ {0, 1, −1} + ## + ## We pack the representation, using 2 bits per digit: + ## 0 = 0b00 + ## 1 = 0b01 + ## -1 = 0b11 + ## + ## This means that GLV_SAC uses twice the size of a canonical integer + ## + ## Digit-Endianness is bigEndian + +const + BitSize = 2 + Shift = 2 # log2(4) - we can store 4 digit per byte + ByteMask = 3 # we need (mod 4) to access a packed bytearray + DigitMask = 0b11 # Digits take 2-bit + +# template signExtend_2bit(recoded: byte): int8 = +# ## We need to extend: +# ## - 0b00 to 0b0000_0000 ( 0) +# ## - 0b01 to 0b0000_0001 ( 1) +# ## - 0b11 to 0b1111_1111 (-1) +# ## +# ## This can be done by shifting left to have +# ## - 0b00 to 0b0000_0000 +# ## - 0b01 to 0b0100_0000 +# ## - 0b11 to 0b1100_0000 +# ## +# ## And then an arithmetic right shift (SAR) +# ## +# ## However there is no builtin SAR +# ## we can get it in C by right-shifting +# ## with the main compilers/platforms +# ## (GCC, Clang, MSVC, ...) +# ## but this is implementation defined behavior +# ## Nim `ashr` uses C signed right shifting +# ## +# ## We could check the compiler to ensure we only use +# ## well documented behaviors: https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation +# ## but if we can avoid that altogether in a crypto library +# ## +# ## Instead we use signed bitfield which are automatically sign-extended +# ## in a portable way as sign extension is automatic for builtin types + +type + SignExtender = object + ## Uses C builtin types sign extension to sign extend 2-bit to 8-bit + ## in a portable way as sign extension is automatic for builtin types + ## http://graphics.stanford.edu/~seander/bithacks.html#FixedSignExtend + digit {.bitsize:2.}: int8 + +# TODO: use unsigned to avoid checks and potentially leaking secrets +# or push checks off (or prove that checks can be elided once Nim has Z3 in the compiler) +proc `[]`(recoding: Recoded, + digitIdx: int): int8 {.inline.}= + ## 0 <= digitIdx < LengthInDigits + ## returns digit ∈ {0, 1, −1} + const len = Recoded.LengthInDigits + assert digitIdx < len + + let slot = distinctBase(recoding)[ + len-1 - (digitIdx shr Shift) + ] + let recoded = slot shr (BitSize*(digitIdx and ByteMask)) and DigitMask + var signExtender: SignExtender + # Hack with C assignment that return values + {.emit: [result, " = ", signExtender, ".digit = ", recoded, ";"].} + # " # Fix highlighting bug in VScode + + +proc `[]=`(recoding: var Recoded, + digitIdx: int, value: int8) {.inline.}= + ## 0 <= digitIdx < LengthInDigits + ## returns digit ∈ {0, 1, −1} + ## This is write-once + const len = Recoded.LengthInDigits + assert digitIdx < Recoded.LengthInDigits + + let slot = distinctBase(recoding)[ + len-1 - (digitIdx shr Shift) + ].addr + + let shifted = byte((value and DigitMask) shl (BitSize*(digitIdx and ByteMask))) + slot[] = slot[] or shifted + + +func nDimMultiScalarRecoding[M, L: static int]( + dst: var GLV_SAC[M, L], + src: MultiScalar[M, L] + ) = + ## This recodes N scalar for GLV multi-scalar multiplication + ## with side-channel resistance. + ## + ## Precondition src[0] is odd + # + # - Efficient and Secure Algorithms for GLV-Based Scalar + # Multiplication and their Implementation on GLV-GLS + # Curves (Extended Version) + # Armando Faz-Hernández, Patrick Longa, Ana H. Sánchez, 2013 + # https://eprint.iacr.org/2013/158.pdf + # + # Algorithm 1 Protected Recoding Algorithm for the GLV-SAC Representation. + # ------------------------------------------------------------------------ + # + # Input: m l-bit positive integers kj = (kj_l−1, ..., kj_0)_2 for + # 0 ≤ j < m, an odd “sign-aligner” kJ ∈ {kj}^m, where + # l = ⌈log2 r/m⌉ + 1, m is the GLV dimension and r is + # the prime subgroup order. + # Output: (bj_l−1 , ..., bj_0)GLV-SAC for 0 ≤ j < m, where + # bJ_i ∈ {1, −1}, and bj_i ∈ {0, bJ_i} for 0 ≤ j < m with + # j != J. + # ------------------------------------------------------------------------ + # + # 1: bJ_l-1 = 1 + # 2: for i = 0 to (l − 2) do + # 3: bJ_i = 2kJ_i+1 - 1 + # 4: for j = 0 to (m − 1), j != J do + # 5: for i = 0 to (l − 1) do + # 6: bj_i = bJ_i kj_0 + # 7: kj = ⌊kj/2⌋ − ⌊bj_i/2⌋ + # 8: return (bj_l−1 , . . . , bj_0)_GLV-SAC for 0 ≤ j < m. + # + # - Guide to Pairing-based Cryptography + # Chapter 6: Scalar Multiplication and Exponentiation in Pairing Groups + # Joppe Bos, Craig Costello, Michael Naehrig + # + # We choose kJ = k0 + # + # Implementation strategy and points of attention + # - The subscalars kj must support extracting the least significant bit + # - The subscalars kj must support floor division by 2 + # For that floored division, kj is 0 or positive + # - The subscalars kj must support individual bit accesses + # - The subscalars kj must support addition by a small value (0 or 1) + # Hence we choose to use our own BigInt representation. + # + # - The digit bji must support floor division by 2 + # For that floored division, bji may be negative!!! + # In particular floored division of -1 is -1 not 0. + # This means that arithmetic right shift must be used instead of logical right shift + + # assert src[0].isOdd - Only happen on implementation error, we don't want to leak a single bit + + var k = src # Keep the source multiscalar in registers + template b: untyped {.dirty.} = dst + + b[0][L-1] = 1 + for i in 0 .. L-2: + b[0][i] = 2 * k[0].bit(i+1).int8 - 1 + for j in 1 .. M-1: + for i in 0 .. L-1: + let bji = b[0][i] * k[j].bit0.int8 + b[j][i] = bji + # In the following equation + # kj = ⌊kj/2⌋ − ⌊bj_i/2⌋ + # We have ⌊bj_i/2⌋ (floor division) + # = -1 if bj_i == -1 + # = 0 if bj_i ∈ {0, 1} + # So we turn that statement in an addition + # by the opposite + k[j].div2() + k[j] += SecretWord -bji.ashr(1) + +func buildLookupTable[M: static int, F]( + P: ECP_SWei_Proj[F], + endomorphisms: array[M-1, ECP_SWei_Proj[F]], + lut: var array[1 shl (M-1), ECP_SWei_Proj[F]], + ) = + ## Build the lookup table from the base point P + ## and the curve endomorphism + # + # Algorithm + # Compute P[u] = P0 + u0 P1 +...+ um−2 Pm−1 for all 0≤u<2^m−1, where + # u= (um−2,...,u0)_2. + # + # Traduction: + # for u in 0 ..< 2^(m-1) + # lut[u] = P0 + # iterate on the bit representation of u + # if the bit is set, add the matching endomorphism to lut[u] + # + # Note: This is for variable/unknown point P + # when P is fixed at compile-time (for example is the generator point) + # alternative algorithms are more efficient. + # + # Implementation: + # We optimize the basic algorithm to reuse already computed table entries + # by noticing for example that: + # - 6 represented as 0b0110 requires P0 + P2 + P3 + # - 2 represented as 0b0010 already required P0 + P2 + # To find the already computed table entry, we can index + # the table with the current `u` with the MSB unset + # and add to it the endormorphism at the index matching the MSB position + # + # This scheme ensures 1 addition per table entry instead of a number + # of addition dependent on `u` Hamming Weight + # + # TODO: + # 1. Window method for M == 2 + # 2. Have P in affine coordinate and build the table with mixed addition + # assuming endomorphism φi(P) do not affect the Z coordinates + # (if table is big enough/inversion cost is amortized) + # 3. Use Montgomery simultaneous inversion to have the table in + # affine coordinate so that we can use mixed addition in teh main loop + lut[0] = P + for u in 1'u32 ..< 1 shl (M-1): + # The recoding allows usage of 2^(n-1) table instead of the usual 2^n with NAF + let msb = u.log2() # No undefined, u != 0 + lut[u].sum(lut[u.clearBit(msb)], endomorphisms[msb]) + # } # highlight bug, ... + +func tableIndex(glv: GLV_SAC, bit: int): SecretWord = + ## Compose the secret table index from + ## the GLV-SAC representation and the "bit" accessed + # TODO: + # We are currently storing 2-bit for 0, 1, -1 in the GLV-SAC representation + # but since columns have all the same sign, determined by k0, + # we only need 0 and 1 dividing storage per 2 + staticFor i, 1, GLV_SAC.M: + result = result or SecretWord((glv[i][bit] and 1) shl (i-1)) + +func isNeg(glv: GLV_SAC, bit: int): SecretBool = + ## Returns true if the bit requires substraction + SecretBool(glv[0][bit] < 0) + +func secretLookup[T](dst: var T, table: openArray[T], index: SecretWord) = + ## Load a table[index] into `dst` + ## This is constant-time, whatever the `index`, its value is not leaked + ## This is also protected against cache-timing attack by always scanning the whole table + for i in 0 ..< table.len: + let selector = SecretWord(i) == index + dst.ccopy(table[i], selector) + +func scalarMulGLV*[scalBits]( + P: var ECP_SWei_Proj, + scalar: BigInt[scalBits] + ) = + ## Elliptic Curve Scalar Multiplication + ## + ## P <- [k] P + ## + ## This is a scalar multiplication accelerated by an endomorphism + ## via the GLV (Gallant-lambert-Vanstone) decomposition. + const C = P.F.C # curve + static: doAssert: scalBits == C.getCurveOrderBitwidth() + when P.F is Fp: + const M = 2 + + # 1. Compute endomorphisms + var endomorphisms: array[M-1, typeof(P)] # TODO: zero-init not required + endomorphisms[0] = P + endomorphisms[0].x *= C.getCubicRootOfUnity_mod_p() + + # 2. Decompose scalar into mini-scalars + const L = (C.getCurveOrderBitwidth() + M - 1) div M + 1 + var miniScalars: array[M, BigInt[L]] # TODO: zero-init not required + when C == BN254_Snarks: + scalar.decomposeScalar_BN254_Snarks_G1( + miniScalars + ) + elif C == BLS12_381: + scalar.decomposeScalar_BLS12_381_G1( + miniScalars + ) + else: + {.error: "Unsupported curve for GLV acceleration".} + + # 3. TODO: handle negative mini-scalars + # Either negate the associated base and the scalar (in the `endomorphisms` array) + # Or use Algorithm 3 from Faz et al which can encode the sign + # in the GLV representation at the low low price of 1 bit + + # 4. Precompute lookup table + var lut: array[1 shl (M-1), ECP_SWei_Proj] # TODO: zero-init not required + buildLookupTable(P, endomorphisms, lut) + # TODO: Montgomery simultaneous inversion (or other simultaneous inversion techniques) + # so that we use mixed addition formulas in the main loop + + # 5. Recode the miniscalars + # we need the base miniscalar (that encodes the sign) + # to be odd, and this in constant-time to protect the secret least-significant bit. + var k0isOdd = miniScalars[0].isOdd() + discard miniScalars[0].csub(SecretWord(1), not k0isOdd) + + var recoded: GLV_SAC[2, L] # zero-init required + recoded.nDimMultiScalarRecoding(miniScalars) + + # 6. Proceed to GLV accelerated scalar multiplication + var Q: typeof(P) # TODO: zero-init not required + Q.secretLookup(lut, recoded.tableIndex(L-1)) + Q.cneg(recoded.isNeg(L-1)) + + for i in countdown(L-2, 0): + Q.double() + var tmp: typeof(Q) # TODO: zero-init not required + tmp.secretLookup(lut, recoded.tableIndex(i)) + tmp.cneg(recoded.isNeg(i)) + Q += tmp + + # Now we need to correct if the sign miniscalar was not odd + # we missed an addition + lut[0] += Q # Contains Q + P0 + P = Q + P.ccopy(lut[0], not k0isOdd) + +# Sanity checks +# ---------------------------------------------------------------- +# See page 7 of +# +# - Efficient and Secure Algorithms for GLV-Based Scalar +# Multiplication and their Implementation on GLV-GLS +# Curves (Extended Version) +# Armando Faz-Hernández, Patrick Longa, Ana H. Sánchez, 2013 +# https://eprint.iacr.org/2013/158.pdf + +when isMainModule: + import ../io/io_bigints + + proc toString(glvSac: GLV_SAC): string = + for j in 0 ..< glvSac.M: + result.add "k" & $j & ": [" + for i in countdown(glvSac.LengthInDigits-1, 0): + result.add " " & (block: + case glvSac[j][i] + of -1: "1\u{0305}" + of 0: "0" + of 1: "1" + else: + raise newException(ValueError, "Unexpected encoded value: " & $glvSac[j][i]) + ) # " # Unbreak VSCode highlighting bug + result.add " ]\n" + + + iterator bits(u: SomeInteger): tuple[bitIndex: int32, bitValue: uint8] = + ## bit iterator, starts from the least significant bit + var u = u + var idx = 0'i32 + while u != 0: + yield (idx, uint8(u and 1)) + u = u shr 1 + inc idx + + func buildLookupTable_naive[M: static int]( + P: string, + endomorphisms: array[M-1, string], + lut: var array[1 shl (M-1), string], + ) = + ## Checking the LUT by building strings of endomorphisms additions + ## This naively translates the lookup table algorithm + ## Compute P[u] = P0 + u0 P1 +...+ um−2 Pm−1 for all 0≤u<2m−1, where + ## u= (um−2,...,u0)_2. + ## The number of additions done per entries is equal to the + ## iteration variable `u` Hamming Weight + for u in 0 ..< 1 shl (M-1): + lut[u] = P + for u in 0 ..< 1 shl (M-1): + for idx, bit in bits(u): + if bit == 1: + lut[u] &= " + " & endomorphisms[idx] + + func buildLookupTable_reuse[M: static int]( + P: string, + endomorphisms: array[M-1, string], + lut: var array[1 shl (M-1), string], + ) = + ## Checking the LUT by building strings of endomorphisms additions + ## This reuses previous table entries so that only one addition is done + ## per new entries + lut[0] = P + for u in 1'u32 ..< 1 shl (M-1): + let msb = u.log2() # No undefined, u != 0 + lut[u] = lut[u.clearBit(msb)] & " + " & endomorphisms[msb] + + proc main_lut() = + const M = 4 # GLS-4 decomposition + const miniBitwidth = 4 # Bitwidth of the miniscalars resulting from scalar decomposition + + var k: MultiScalar[M, miniBitwidth] + var kRecoded: GLV_SAC[M, miniBitwidth+1] + + k[0].fromUint(11) + k[1].fromUint(6) + k[2].fromuint(14) + k[3].fromUint(3) + + kRecoded.nDimMultiScalarRecoding(k) + + echo kRecoded.toString() + + var lut: array[1 shl (M-1), string] + let + P = "P0" + endomorphisms = ["P1", "P2", "P3"] + + buildLookupTable_naive(P, endomorphisms, lut) + echo lut + doAssert lut[0] == "P0" + doAssert lut[1] == "P0 + P1" + doAssert lut[2] == "P0 + P2" + doAssert lut[3] == "P0 + P1 + P2" + doAssert lut[4] == "P0 + P3" + doAssert lut[5] == "P0 + P1 + P3" + doAssert lut[6] == "P0 + P2 + P3" + doAssert lut[7] == "P0 + P1 + P2 + P3" + + var lut_reuse: array[1 shl (M-1), string] + buildLookupTable_reuse(P, endomorphisms, lut_reuse) + echo lut_reuse + doAssert lut == lut_reuse + + main_lut() + echo "---------------------------------------------" + + proc main_decomp() = + const M = 2 + const scalBits = BN254_Snarks.getCurveOrderBitwidth() + const miniBits = (scalBits+M-1) div M + + block: + let scalar = BigInt[scalBits].fromHex( + "0x24a0b87203c7a8def0018c95d7fab106373aebf920265c696f0ae08f8229b3f3" + ) + + var decomp: MultiScalar[M, miniBits] + decomposeScalar_BN254_Snarks_G1(scalar, decomp) + + doAssert: bool(decomp[0] == BigInt[127].fromHex"14928105460c820ccc9a25d0d953dbfe") + doAssert: bool(decomp[1] == BigInt[127].fromHex"13a2f911eb48a578844b901de6f41660") + + block: + let scalar = BigInt[scalBits].fromHex( + "24554fa6d0c06f6dc51c551dea8b058cd737fc8d83f7692fcebdd1842b3092c4" + ) + + var decomp: MultiScalar[M, miniBits] + decomposeScalar_BN254_Snarks_G1(scalar, decomp) + + doAssert: bool(decomp[0] == BigInt[127].fromHex"28cf7429c3ff8f7e82fc419e90cc3a2") + doAssert: bool(decomp[1] == BigInt[127].fromHex"457efc201bdb3d2e6087df36430a6db6") + + block: + let scalar = BigInt[scalBits].fromHex( + "288c20b297b9808f4e56aeb70eabf269e75d055567ff4e05fe5fb709881e6717" + ) + + var decomp: MultiScalar[M, miniBits] + decomposeScalar_BN254_Snarks_G1(scalar, decomp) + + doAssert: bool(decomp[0] == BigInt[127].fromHex"4da8c411566c77e00c902eb542aaa66b") + doAssert: bool(decomp[1] == BigInt[127].fromHex"5aa8f2f15afc3217f06677702bd4e41a") + + + main_decomp() + + echo "---------------------------------------------" + + # This tests the multiplication against the Table 1 + # of the paper + + # Coef Decimal Binary GLV-SAC recoded + # | k0 | | 11 | | 0 1 0 1 1 | | 1 -1 1 -1 1 | + # | k1 | = | 6 | = | 0 0 1 1 0 | = | 1 -1 0 -1 0 | + # | k2 | | 14 | | 0 1 1 1 0 | | 1 0 0 -1 0 | + # | k3 | | 3 | | 0 0 0 1 1 | | 0 0 1 -1 1 | + + # i | 3 2 1 0 + # -------------------+---------------------------------------------------------------------- + # 2Q | 2P0+2P1+2P2 2P0+2P1+4P2 6P0+4P1+8P2+2P3 10P0+6P1+14P2+2P3 + # Q + sign_i T[ki] | P0+P1+2P2 3P0+2P1+4P2+P3 5P0+3P1+7P2+P3 11P0+6P1+14P2+3P3 + + type Endo = enum + P0 + P1 + P2 + P3 + + func buildLookupTable_reuse[M: static int]( + P: Endo, + endomorphisms: array[M-1, Endo], + lut: var array[1 shl (M-1), set[Endo]], + ) = + ## Checking the LUT by building strings of endomorphisms additions + ## This reuses previous table entries so that only one addition is done + ## per new entries + lut[0].incl P + for u in 1'u32 ..< 1 shl (M-1): + let msb = u.log2() # No undefined, u != 0 + lut[u] = lut[u.clearBit(msb)] + {endomorphisms[msb]} + + + proc mainFullMul() = + const M = 4 # GLS-4 decomposition + const miniBitwidth = 4 # Bitwidth of the miniscalars resulting from scalar decomposition + const L = miniBitwidth + 1 # Bitwidth of the recoded scalars + + var k: MultiScalar[M, miniBitwidth] + var kRecoded: GLV_SAC[M, L] + + k[0].fromUint(11) + k[1].fromUint(6) + k[2].fromuint(14) + k[3].fromUint(3) + + kRecoded.nDimMultiScalarRecoding(k) + + echo kRecoded.toString() + + var lut: array[1 shl (M-1), set[Endo]] + let + P = P0 + endomorphisms = [P1, P2, P3] + + buildLookupTable_reuse(P, endomorphisms, lut) + echo lut + + var Q: array[Endo, int] + + # Multiplication + assert bool k[0].isOdd() + # Q = sign_l-1 P[K_l-1] + let idx = kRecoded.tableIndex(L-1) + for p in lut[int(idx)]: + Q[p] = if kRecoded.isNeg(L-1).bool: -1 else: 1 + # Loop + for i in countdown(L-2, 0): + # Q = 2Q + for val in Q.mitems: val *= 2 + echo "2Q: ", Q + # Q = Q + sign_l-1 P[K_l-1] + let idx = kRecoded.tableIndex(i) + for p in lut[int(idx)]: + Q[p] += (if kRecoded.isNeg(i).bool: -1 else: 1) + echo "Q + sign_l-1 P[K_l-1]: ", Q + + echo Q + + mainFullMul() diff --git a/constantine/elliptic/ec_endomorphism_params.nim b/constantine/elliptic/ec_endomorphism_params.nim new file mode 100644 index 0000000..1f00a75 --- /dev/null +++ b/constantine/elliptic/ec_endomorphism_params.nim @@ -0,0 +1,135 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Internal + ../../helpers/static_for, + ../primitives, + ../config/[common, curves, type_bigint], + ../arithmetic, + ../io/io_bigints, + ../towers, + ./ec_weierstrass_projective + +# Parameters for GLV endomorphisms acceleration +# ---------------------------------------------------------------------------------------- +# TODO: cleanup, those should be derived in the config folder +# and stored in a constant + +type + MultiScalar*[M, LengthInBits: static int] = array[M, BigInt[LengthInBits]] + ## Decomposition of a secret scalar in multiple scalars + +# Chapter 6.3.1 - Guide to Pairing-based Cryptography +const Lattice_BN254_Snarks_G1: array[2, array[2, tuple[b: BigInt[127], isNeg: bool]]] = [ + # Curve of order 254 -> mini scalars of size 127 + # u = 0x44E992B44A6909F1 + [(BigInt[127].fromHex"0x89d3256894d213e3", false), # 2u + 1 + (BigInt[127].fromHex"0x6f4d8248eeb859fd0be4e1541221250b", false)], # 6u² + 4u + 1 + [(BigInt[127].fromHex"0x6f4d8248eeb859fc8211bbeb7d4f1128", false), # 6u² + 2u + (BigInt[127].fromHex"0x89d3256894d213e3", true)] # -2u - 1 +] + +const Babai_BN254_Snarks_G1 = [ + # Vector for Babai rounding + BigInt[127].fromHex"0x89d3256894d213e3", # 2u + 1 + BigInt[127].fromHex"0x6f4d8248eeb859fd0be4e1541221250b" # 6u² + 4u + 1 +] + +func decomposeScalar_BN254_Snarks_G1*[M, scalBits, L: static int]( + scalar: BigInt[scalBits], + miniScalars: var MultiScalar[M, L] + ) = + ## Decompose a secret scalar into mini-scalar exploiting + ## BN254_Snarks specificities. + ## + ## TODO: Generalize to all BN curves + ## - needs a Lattice type + ## - needs to better support negative bigints, (extra bit for sign?) + + static: doAssert L == (scalBits + M - 1) div M + 1 + # 𝛼0 = (0x2d91d232ec7e0b3d7 * s) >> 256 + # 𝛼1 = (0x24ccef014a773d2d25398fd0300ff6565 * s) >> 256 + const + w = BN254_Snarks.getCurveOrderBitwidth().wordsRequired() + alphaHats = (BigInt[66].fromHex"0x2d91d232ec7e0b3d7", + BigInt[130].fromHex"0x24ccef014a773d2d25398fd0300ff6565") + + var alphas{.noInit.}: array[M, BigInt[scalBits]] # TODO size 66+254 and 130+254 + + staticFor i, 0, M: + alphas[i].prod_high_words(alphaHats[i], scalar, w) + + # We have k0 = s - 𝛼0 b00 - 𝛼1 b10 + # and kj = 0 - 𝛼j b0j - 𝛼1 b1j + var k: array[M, BigInt[scalBits]] + k[0] = scalar + for miniScalarIdx in 0 ..< M: + for basisIdx in 0 ..< M: + var alphaB {.noInit.}: BigInt[scalBits] + alphaB.prod(alphas[basisIdx], Lattice_BN254_Snarks_G1[basisIdx][miniScalarIdx].b) # TODO small lattice size + if Lattice_BN254_Snarks_G1[basisIdx][miniScalarIdx].isNeg: + k[miniScalarIdx] += alphaB + else: + k[miniScalarIdx] -= alphaB + + miniScalars[miniScalarIdx].copyTruncatedFrom(k[miniScalarIdx]) + +const Lattice_BLS12_381_G1: array[2, array[2, tuple[b: BigInt[128], isNeg: bool]]] = [ + # Curve of order 254 -> mini scalars of size 127 + # u = 0x44E992B44A6909F1 + [(BigInt[128].fromHex"0xac45a4010001a40200000000ffffffff", false), # u² - 1 + (BigInt[128].fromHex"0x1", true)], # -1 + [(BigInt[128].fromHex"0x1", false), # 1 + (BigInt[128].fromHex"0xac45a4010001a4020000000100000000", false)] # u² +] + +const Babai_BLS12_381_G1 = [ + # Vector for Babai rounding + BigInt[128].fromHex"0xac45a4010001a4020000000100000000", + BigInt[128].fromHex"0x1" +] + +func decomposeScalar_BLS12_381_G1*[M, scalBits, L: static int]( + scalar: BigInt[scalBits], + miniScalars: var MultiScalar[M, L] + ) = + ## Decompose a secret scalar into mini-scalar exploiting + ## BLS12_381 specificities. + ## + ## TODO: Generalize to all BLS curves + ## - needs a Lattice type + ## - needs to better support negative bigints, (extra bit for sign?) + + static: doAssert L == (scalBits + M - 1) div M + 1 + # 𝛼0 = (0x2d91d232ec7e0b3d7 * s) >> 256 + # 𝛼1 = (0x24ccef014a773d2d25398fd0300ff6565 * s) >> 256 + const + w = BLS12_381.getCurveOrderBitwidth().wordsRequired() + alphaHats = (BigInt[129].fromHex"0x17c6becf1e01faadd63f6e522f6cfee30", + BigInt[2].fromHex"0x2") + + var alphas{.noInit.}: array[M, BigInt[scalBits]] # TODO size 256+255 and 132+255 + + staticFor i, 0, M: + alphas[i].prod_high_words(alphaHats[i], scalar, w) + + # We have k0 = s - 𝛼0 b00 - 𝛼1 b10 + # and kj = 0 - 𝛼j b0j - 𝛼1 b1j + var k: array[M, BigInt[scalBits]] + k[0] = scalar + for miniScalarIdx in 0 ..< M: + for basisIdx in 0 ..< M: + var alphaB {.noInit.}: BigInt[scalBits] + alphaB.prod(alphas[basisIdx], Lattice_BLS12_381_G1[basisIdx][miniScalarIdx].b) # TODO small lattice size + if Lattice_BLS12_381_G1[basisIdx][miniScalarIdx].isNeg: + k[miniScalarIdx] += alphaB + else: + k[miniScalarIdx] -= alphaB + + miniScalars[miniScalarIdx].copyTruncatedFrom(k[miniScalarIdx]) diff --git a/constantine/elliptic/ec_scalar_mul.nim b/constantine/elliptic/ec_scalar_mul.nim new file mode 100644 index 0000000..3fc8cfd --- /dev/null +++ b/constantine/elliptic/ec_scalar_mul.nim @@ -0,0 +1,229 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../primitives, + ../config/[common, curves], + ../arithmetic, + ../towers, + ./ec_weierstrass_projective, + ./ec_endomorphism_accel + +# ############################################################ +# # +# Scalar Multiplication # +# # +# ############################################################ +# +# Scalar multiplication is a key algorithm for cryptographic protocols: +# - it is slow, +# - it is performance critical as it is used to generate signatures and authenticate messages +# - it is a high-value target as the "scalar" is very often the user secret key +# +# A safe scalar multiplication MUST: +# - Use no branching (to prevent timing and simple power analysis attacks) +# - Always do the same memory accesses (in particular for table lookups) (to prevent cache-timing attacks) +# - Not expose the bitlength of the exponent (use the curve order bitlength instead) +# +# Constantine does not make an extra effort to defend against the smart-cards +# and embedded device attacks: +# - Differential Power-Analysis which may allow for example retrieving bit content depending on the cost of writing 0 or 1 +# (Address-bit DPA by Itoh, Izu and Takenaka) +# - Electro-Magnetic which can be used in a similar way to power analysis but based on EM waves +# - Fault Attacks which can be used by actively introducing faults (via a laser for example) in an algorithm +# +# The current security efforts are focused on preventing attacks +# that are effective remotely including through the network, +# a colocated VM or a malicious process on your phone. +# +# - Survey for Performance & Security Problems of Passive Side-channel Attacks Countermeasures in ECC\ +# Rodrigo Abarúa, Claudio Valencia, and Julio López, 2019\ +# https://eprint.iacr.org/2019/010 +# +# - State-of-the-art of secure ECC implementations:a survey on known side-channel attacks and countermeasures\ +# Junfeng Fan,XuGuo, Elke De Mulder, Patrick Schaumont, Bart Preneel and Ingrid Verbauwhede, 2010 +# https://www.esat.kuleuven.be/cosic/publications/article-1461.pdf + +template checkScalarMulScratchspaceLen(len: int) = + ## CHeck that there is a minimum of scratchspace to hold the temporaries + debug: + assert len >= 2, "Internal Error: the scratchspace for scalar multiplication should be equal or greater than 2" + +func getWindowLen(bufLen: int): uint = + ## Compute the maximum window size that fits in the scratchspace buffer + checkScalarMulScratchspaceLen(bufLen) + result = 4 + while (1 shl result) + 1 > bufLen: + dec result + +func scalarMulPrologue( + P: var ECP_SWei_Proj, + scratchspace: var openarray[ECP_SWei_Proj] + ): uint = + ## Setup the scratchspace + ## Returns the fixed-window size for scalar mul with window optimization + result = scratchspace.len.getWindowLen() + # Precompute window content, special case for window = 1 + # (i.e scratchspace has only space for 2 temporaries) + # The content scratchspace[2+k] is set at [k]P + # with scratchspace[0] untouched + if result == 1: + scratchspace[1] = P + else: + scratchspace[2] = P + for k in 2 ..< 1 shl result: + scratchspace[k+1].sum(scratchspace[k], P) + + # Set a to infinity + P.setInf() + +func scalarMulDoubling( + P: var ECP_SWei_Proj, + exponent: openArray[byte], + tmp: var ECP_SWei_Proj, + window: uint, + acc, acc_len: var uint, + e: var int + ): tuple[k, bits: uint] {.inline.} = + ## Doubling steps of doubling and add for scalar multiplication + ## Get the next k bits in range [1, window) + ## and double k times + ## Returns the number of doubling done and the corresponding bits. + ## + ## Updates iteration variables and accumulators + # + # ⚠️: Extreme care should be used to not leak + # the exponent bits nor its real bitlength + # i.e. if the exponent is zero but encoded in a + # 256-bit integer, only "256" should leak + # as for most applications like ECDSA or BLS signature schemes + # the scalar is the user secret key. + + # Get the next bits + # acc/acc_len must be uint to avoid Nim runtime checks leaking bits + # e is public + var k = window + if acc_len < window: + if e < exponent.len: + acc = (acc shl 8) or exponent[e].uint + inc e + acc_len += 8 + else: # Drained all exponent bits + k = acc_len + + let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1) + acc_len -= k + + # We have k bits and can do k doublings + for i in 0 ..< k: + tmp.double(P) + P = tmp + + return (k, bits) + + +func scalarMulGeneric*( + P: var ECP_SWei_Proj, + scalar: openArray[byte], + scratchspace: var openArray[ECP_SWei_Proj] + ) = + ## Elliptic Curve Scalar Multiplication + ## + ## P <- [k] P + ## + ## This uses fixed-window optimization if possible + ## `scratchspace` MUST be of size 2 .. 2^4 + ## + ## This is suitable to use with secret `scalar`, in particular + ## to derive a public key from a private key or + ## to sign a message. + ## + ## Particular care has been given to defend against the following side-channel attacks: + ## - timing attacks: all exponents of the same length + ## will take the same time including + ## a "zero" exponent of length 256-bit + ## - cache-timing attacks: Constantine does use a precomputed table + ## but when extracting a value from the table + ## the whole table is always accessed with the same pattern + ## preventing malicious attacks through CPU cache delay analysis. + ## - simple power-analysis and electromagnetic attacks: Constantine always do the same + ## double and add sequences and those cannot be analyzed to distinguish + ## the exponent 0 and 1. + ## + ## I.e. As far as the author know, Constantine implements all countermeasures to the known + ## **remote** attacks on ECC implementations. + ## + ## Disclaimer: + ## Constantine is provided as-is without any guarantees. + ## Use at your own risks. + ## Thorough evaluation of your threat model, the security of any cryptographic library you are considering, + ## and the secrets you put in jeopardy is strongly advised before putting data at risk. + ## The author would like to remind users that the best code can only mitigate + ## but not protect against human failures which are the weakest links and largest + ## backdoors to secrets exploited today. + ## + ## Constantine is resistant to + ## - Fault Injection attacks: Constantine does not have branches that could + ## be used to skip some additions and reveal which were dummy and which were real. + ## Dummy operations are like the double-and-add-always timing attack countermeasure. + ## + ## + ## Constantine DOES NOT defend against Address-Bit Differential Power Analysis attacks by default, + ## which allow differentiating between writing a 0 or a 1 to a memory cell. + ## This is a threat for smart-cards and embedded devices (for example to handle authentication to a cable or satellite service) + ## Constantine can be extended to use randomized projective coordinates to foil this attack. + + let window = scalarMulPrologue(P, scratchspace) + + # We process bits with from most to least significant. + # At each loop iteration with have acc_len bits in acc. + # To maintain constant-time the number of iterations + # or the number of operations or memory accesses should be the same + # regardless of acc & acc_len + var + acc, acc_len: uint + e = 0 + while acc_len > 0 or e < scalar.len: + let (k, bits) = scalarMulDoubling( + P, scalar, scratchspace[0], + window, acc, acc_len, e + ) + + # Window lookup: we set scratchspace[1] to the lookup value + # If the window length is 1 it's already set. + if window > 1: + # otherwise we need a constant-time lookup + # in particular we need the same memory accesses, we can't + # just index the openarray with the bits to avoid cache attacks. + for i in 1 ..< 1 shl k: + let ctl = SecretWord(i) == SecretWord(bits) + scratchspace[1].ccopy(scratchspace[1+i], ctl) + + # Multiply with the looked-up value + # we need to keep the product only ig the exponent bits are not all zeroes + scratchspace[0].sum(P, scratchspace[1]) + P.ccopy(scratchspace[0], SecretWord(bits).isNonZero()) + +func scalarMul*( + P: var ECP_SWei_Proj, + scalar: BigInt + ) {.inline.} = + ## Elliptic Curve Scalar Multiplication + ## + ## P <- [k] P + # This calls endomorphism accelerated scalar mul if available + # or the generic scalar mul otherwise + when ECP_SWei_Proj.F.C in {BN254_Snarks, BLS12_381}: + # ⚠️ This requires the cofactor to be cleared + scalarMulGLV(P, scalar) + else: + var + scratchSpace: array[1 shl 4, ECP_SWei_Proj] + scalarCanonicalBE: array[(scalar.bits+7)div 8, byte] # canonical big endian representation + scalarCanonicalBE.exportRawUint(scalar, bigEndian) # Export is constant-time + P.scalarMulGeneric(scratchSpace) diff --git a/constantine/elliptic/ec_weierstrass_projective.nim b/constantine/elliptic/ec_weierstrass_projective.nim index bf61f47..d818b8a 100644 --- a/constantine/elliptic/ec_weierstrass_projective.nim +++ b/constantine/elliptic/ec_weierstrass_projective.nim @@ -108,6 +108,15 @@ func neg*(P: var ECP_SWei_Proj) = ## Negate ``P`` P.y.neg(P.y) +func cneg*(P: var ECP_SWei_Proj, ctl: CTBool) = + ## Conditional negation. + ## Negate if ``ctl`` is true + var Q{.noInit.}: typeof(P) + Q.x = P.x + Q.y.neg(P.y) + Q.z = P.z + P.ccopy(Q, ctl) + func sum*[F]( r: var ECP_SWei_Proj[F], P, Q: ECP_SWei_Proj[F] @@ -274,197 +283,23 @@ func double*[F]( else: {.error: "Not implemented.".} -# ############################################################ -# # -# Scalar Multiplication # -# # -# ############################################################ -# -# Scalar multiplication is a key algorithm for cryptographic protocols: -# - it is slow, -# - it is performance critical as it is used to generate signatures and authenticate messages -# - it is a high-value target as the "scalar" is very often the user secret key -# -# A safe scalar multiplication MUST: -# - Use no branching (to prevent timing and simple power analysis attacks) -# - Always do the same memory accesses (in particular for table lookups) (to prevent cache-timing attacks) -# - Not expose the bitlength of the exponent (use the curve order bitlength instead) -# -# Constantine does not make an extra effort to defend against the smart-cards -# and embedded device attacks: -# - Differential Power-Analysis which may allow for example retrieving bit content depending on the cost of writing 0 or 1 -# (Address-bit DPA by Itoh, Izu and Takenaka) -# - Electro-Magnetic which can be used in a similar way to power analysis but based on EM waves -# - Fault Attacks which can be used by actively introducing faults (via a laser for example) in an algorithm -# -# The current security efforts are focused on preventing attacks -# that are effective remotely including through the network, -# a colocated VM or a malicious process on your phone. -# -# - Survey for Performance & Security Problems of Passive Side-channel Attacks Countermeasures in ECC\ -# Rodrigo Abarúa, Claudio Valencia, and Julio López, 2019\ -# https://eprint.iacr.org/2019/010 -# -# - State-of-the-art of secure ECC implementations:a survey on known side-channel attacks and countermeasures\ -# Junfeng Fan,XuGuo, Elke De Mulder, Patrick Schaumont, Bart Preneel and Ingrid Verbauwhede, 2010 -# https://www.esat.kuleuven.be/cosic/publications/article-1461.pdf +func `+=`*[F](P: var ECP_SWei_Proj[F], Q: ECP_SWei_Proj[F]) = + var tmp {.noInit.}: ECP_SWei_Proj[F] + tmp.sum(P, Q) + P = tmp -template checkScalarMulScratchspaceLen(len: int) = - ## CHeck that there is a minimum of scratchspace to hold the temporaries - debug: - assert len >= 2, "Internal Error: the scratchspace for scalar multiplication should be equal or greater than 2" +func double*[F](P: var ECP_SWei_Proj[F]) = + var tmp {.noInit.}: ECP_SWei_Proj[F] + tmp.double(P) + P = tmp -func getWindowLen(bufLen: int): uint = - ## Compute the maximum window size that fits in the scratchspace buffer - checkScalarMulScratchspaceLen(bufLen) - result = 4 - while (1 shl result) + 1 > bufLen: - dec result +func affineFromProjective*[F](aff: var ECP_SWei_Proj[F], proj: ECP_SWei_Proj) = + # TODO: for now we reuse projective coordinate backend + # TODO: simultaneous inversion -func scalarMulPrologue( - P: var ECP_SWei_Proj, - scratchspace: var openarray[ECP_SWei_Proj] - ): uint = - ## Setup the scratchspace - ## Returns the fixed-window size for scalar mul with window optimization - result = scratchspace.len.getWindowLen() - # Precompute window content, special case for window = 1 - # (i.e scratchspace has only space for 2 temporaries) - # The content scratchspace[2+k] is set at [k]P - # with scratchspace[0] untouched - if result == 1: - scratchspace[1] = P - else: - scratchspace[2] = P - for k in 2 ..< 1 shl result: - scratchspace[k+1].sum(scratchspace[k], P) + var invZ {.noInit.}: F + invZ.inv(proj.z) - # Set a to infinity - P.setInf() - -func scalarMulDoubling( - P: var ECP_SWei_Proj, - exponent: openArray[byte], - tmp: var ECP_SWei_Proj, - window: uint, - acc, acc_len: var uint, - e: var int - ): tuple[k, bits: uint] {.inline.} = - ## Doubling steps of doubling and add for scalar multiplication - ## Get the next k bits in range [1, window) - ## and double k times - ## Returns the number of doubling done and the corresponding bits. - ## - ## Updates iteration variables and accumulators - # - # ⚠️: Extreme care should be used to not leak - # the exponent bits nor its real bitlength - # i.e. if the exponent is zero but encoded in a - # 256-bit integer, only "256" should leak - # as for most applications like ECDSA or BLS signature schemes - # the scalar is the user secret key. - - # Get the next bits - # acc/acc_len must be uint to avoid Nim runtime checks leaking bits - # e is public - var k = window - if acc_len < window: - if e < exponent.len: - acc = (acc shl 8) or exponent[e].uint - inc e - acc_len += 8 - else: # Drained all exponent bits - k = acc_len - - let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1) - acc_len -= k - - # We have k bits and can do k doublings - for i in 0 ..< k: - tmp.double(P) - P = tmp - - return (k, bits) - - -func scalarMul*( - P: var ECP_SWei_Proj, - scalar: openArray[byte], - scratchspace: var openArray[ECP_SWei_Proj] - ) = - ## Elliptic Curve Scalar Multiplication - ## - ## P <- [k] P - ## - ## This uses fixed-window optimization if possible - ## `scratchspace` MUST be of size 2 .. 2^4 - ## - ## This is suitable to use with secret `scalar`, in particular - ## to derive a public key from a private key or - ## to sign a message. - ## - ## Particular care has been given to defend against the following side-channel attacks: - ## - timing attacks: all exponents of the same length - ## will take the same time including - ## a "zero" exponent of length 256-bit - ## - cache-timing attacks: Constantine does use a precomputed table - ## but when extracting a value from the table - ## the whole table is always accessed with the same pattern - ## preventing malicious attacks through CPU cache delay analysis. - ## - simple power-analysis and electromagnetic attacks: Constantine always do the same - ## double and add sequences and those cannot be analyzed to distinguish - ## the exponent 0 and 1. - ## - ## I.e. As far as the author know, Constantine implements all countermeasures to the known - ## **remote** attacks on ECC implementations. - ## - ## Disclaimer: - ## Constantine is provided as-is without any guarantees. - ## Use at your own risks. - ## Thorough evaluation of your threat model, the security of any cryptographic library you are considering, - ## and the secrets you put in jeopardy is strongly advised before putting data at risk. - ## The author would like to remind users that the best code can only mitigate - ## but not protect against human failures which are the weakest links and largest - ## backdoors to secrets exploited today. - ## - ## Constantine is resistant to - ## - Fault Injection attacks: Constantine does not have branches that could - ## be used to skip some additions and reveal which were dummy and which were real. - ## Dummy operations are like the double-and-add-always timing attack countermeasure. - ## - ## - ## Constantine DOES NOT defend against Address-Bit Differential Power Analysis attacks by default, - ## which allow differentiating between writing a 0 or a 1 to a memory cell. - ## This is a threat for smart-cards and embedded devices (for example to handle authentication to a cable or satellite service) - ## Constantine can be extended to use randomized projective coordinates to foil this attack. - - let window = scalarMulPrologue(P, scratchspace) - - # We process bits with from most to least significant. - # At each loop iteration with have acc_len bits in acc. - # To maintain constant-time the number of iterations - # or the number of operations or memory accesses should be the same - # regardless of acc & acc_len - var - acc, acc_len: uint - e = 0 - while acc_len > 0 or e < scalar.len: - let (k, bits) = scalarMulDoubling( - P, scalar, scratchspace[0], - window, acc, acc_len, e - ) - - # Window lookup: we set scratchspace[1] to the lookup value - # If the window length is 1 it's already set. - if window > 1: - # otherwise we need a constant-time lookup - # in particular we need the same memory accesses, we can't - # just index the openarray with the bits to avoid cache attacks. - for i in 1 ..< 1 shl k: - let ctl = SecretWord(i) == SecretWord(bits) - scratchspace[1].ccopy(scratchspace[1+i], ctl) - - # Multiply with the looked-up value - # we need to keep the product only ig the exponent bits are not all zeroes - scratchspace[0].sum(P, scratchspace[1]) - P.ccopy(scratchspace[0], SecretWord(bits).isNonZero()) + aff.z.setOne() + aff.x.prod(proj.x, invZ) + aff.y.prod(proj.y, invZ) diff --git a/constantine/io/io_bigints.nim b/constantine/io/io_bigints.nim index 8e12306..47b2075 100644 --- a/constantine/io/io_bigints.nim +++ b/constantine/io/io_bigints.nim @@ -12,8 +12,7 @@ import ../primitives/constant_time, - ../arithmetic/bigints, - ../config/common + ../config/[common, type_bigint] # ############################################################ # diff --git a/constantine/io/io_ec.nim b/constantine/io/io_ec.nim index ca2c57d..d487a5e 100644 --- a/constantine/io/io_ec.nim +++ b/constantine/io/io_ec.nim @@ -9,7 +9,6 @@ import ./io_bigints, ./io_fields, ../config/curves, - ../arithmetic/[bigints, finite_fields], ../elliptic/[ ec_weierstrass_affine, ec_weierstrass_projective @@ -38,12 +37,14 @@ func toHex*(P: ECP_SWei_Proj): string = ## TODO: only normalize and don't display the Z coordinate ## ## This proc output may change format in the future - result = $P.F.C & "(x: " - result &= P.x.tohex(bigEndian) + + var aff {.noInit.}: typeof(P) + aff.affineFromProjective(P) + + result = $aff.F.C & "(x: " + result &= aff.x.tohex(bigEndian) result &= ", y: " - result &= P.y.tohex(bigEndian) - result &= ", z: " - result &= P.y.tohex(bigEndian) + result &= aff.y.tohex(bigEndian) result &= ')' func fromHex*(dst: var ECP_SWei_Proj, x, y: string): bool {.raises: [ValueError].}= diff --git a/constantine/io/io_fields.nim b/constantine/io/io_fields.nim index dbc1c43..cf132a2 100644 --- a/constantine/io/io_fields.nim +++ b/constantine/io/io_fields.nim @@ -9,7 +9,7 @@ import ./io_bigints, ../config/curves, - ../arithmetic/[bigints, finite_fields] + ../arithmetic/finite_fields # No exceptions allowed {.push raises: [].} diff --git a/constantine/primitives.nim b/constantine/primitives.nim index d311f84..0bb449c 100644 --- a/constantine/primitives.nim +++ b/constantine/primitives.nim @@ -11,11 +11,13 @@ import primitives/constant_time, primitives/multiplexers, primitives/addcarry_subborrow, - primitives/extended_precision + primitives/extended_precision, + primitives/bithacks export constant_time_types, constant_time, multiplexers, addcarry_subborrow, - extended_precision + extended_precision, + bithacks diff --git a/constantine/primitives/bithacks.nim b/constantine/primitives/bithacks.nim new file mode 100644 index 0000000..160ff2c --- /dev/null +++ b/constantine/primitives/bithacks.nim @@ -0,0 +1,77 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# ############################################################ +# +# Bit hacks +# +# ############################################################ + +# Bithacks +# ------------------------------------------------------------ +# TODO: Nim std/bitops is unsatisfactory +# in particular the "noUndefined" flag +# for countLeadingZeroBits/countTrailingZeroBits +# is returning zero instead of the integer bitwidth +# +# Furthermore it is not guaranteed constant-time +# And lastly, even compiler builtin may be slightly inefficient +# for example when doing fastLog2 +# which is "31 - builtin_clz" we get +# `bsr + xor (from clz) + sub` +# instead of plain `bsr` +# +# At the moment we don't need them to operate on secret data +# +# See: https://www.chessprogramming.org/BitScan +# https://www.chessprogramming.org/General_Setwise_Operations +# and https://graphics.stanford.edu/%7Eseander/bithacks.html +# for compendiums of bit manipulation + +proc clearMask[T: SomeInteger](v: T, mask: T): T {.inline.} = + ## Returns ``v``, with all the ``1`` bits from ``mask`` set to 0 + v and not mask + +proc clearBit*[T: SomeInteger](v: T, bit: T): T {.inline.} = + ## Returns ``v``, with the bit at position ``bit`` set to 0 + v.clearMask(1.T shl bit) + +func log2*(x: uint32): uint32 = + ## Find the log base 2 of a 32-bit or less integer. + ## using De Bruijn multiplication + ## Works at compile-time, guaranteed constant-time. + ## TODO: at runtime BitScanReverse or CountLeadingZero are more efficient + # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn + const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, + 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31] + var v = x + v = v or v shr 1 # first round down to one less than a power of 2 + v = v or v shr 2 + v = v or v shr 4 + v = v or v shr 8 + v = v or v shr 16 + lookup[(v * 0x07C4ACDD'u32) shr 27] + +func log2*(x: uint64): uint64 {.inline, noSideEffect.} = + ## Find the log base 2 of a 32-bit or less integer. + ## using De Bruijn multiplication + ## Works at compile-time, guaranteed constant-time. + ## TODO: at runtime BitScanReverse or CountLeadingZero are more efficient + # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn + const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54, + 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62, + 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31, + 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63] + var v = x + v = v or v shr 1 # first round down to one less than a power of 2 + v = v or v shr 2 + v = v or v shr 4 + v = v or v shr 8 + v = v or v shr 16 + v = v or v shr 32 + lookup[(v * 0x03F6EAF2CD271461'u64) shr 58] diff --git a/constantine/primitives/constant_time.nim b/constantine/primitives/constant_time.nim index 1507ef9..715f8c6 100644 --- a/constantine/primitives/constant_time.nim +++ b/constantine/primitives/constant_time.nim @@ -40,6 +40,9 @@ template cfalse*(T: typedesc[Ct or BaseUint]): auto = template ct*[T: BaseUint](x: T): Ct[T] = (Ct[T])(x) +template ct*(x: auto, T: typedesc[BaseUint]): Ct[T] = + (Ct[T])(x) + template `$`*[T](x: Ct[T]): string = $T(x) @@ -119,60 +122,17 @@ template `-`*[T: Ct](x: T): T = {.emit:[neg, " = -", x, ";"].} neg -# ############################################################ -# -# Bit hacks -# -# ############################################################ - -template isMsbSet*[T: Ct](x: T): CTBool[T] = - ## Returns the most significant bit of an integer - const msb_pos = T.sizeof * 8 - 1 - (CTBool[T])(x shr msb_pos) - -func log2*(x: uint32): uint32 = - ## Find the log base 2 of a 32-bit or less integer. - ## using De Bruijn multiplication - ## Works at compile-time, guaranteed constant-time. - ## Note: at runtime BitScanReverse or CountLeadingZero are more efficient - ## but log2 is never needed at runtime. - # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn - const lookup: array[32, uint8] = [0'u8, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, - 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31] - var v = x - v = v or v shr 1 # first round down to one less than a power of 2 - v = v or v shr 2 - v = v or v shr 4 - v = v or v shr 8 - v = v or v shr 16 - lookup[(v * 0x07C4ACDD'u32) shr 27] - -func log2*(x: uint64): uint64 {.inline, noSideEffect.} = - ## Find the log base 2 of a 32-bit or less integer. - ## using De Bruijn multiplication - ## Works at compile-time, guaranteed constant-time. - ## Note: at runtime BitScanReverse or CountLeadingZero are more efficient - ## but log2 is never needed at runtime. - # https://graphics.stanford.edu/%7Eseander/bithacks.html#IntegerLogDeBruijn - const lookup: array[64, uint8] = [0'u8, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54, - 33, 42, 3, 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, 62, - 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, 56, 45, 25, 31, - 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5, 63] - var v = x - v = v or v shr 1 # first round down to one less than a power of 2 - v = v or v shr 2 - v = v or v shr 4 - v = v or v shr 8 - v = v or v shr 16 - v = v or v shr 32 - lookup[(v * 0x03F6EAF2CD271461'u64) shr 58] - # ############################################################ # # Hardened Boolean primitives # # ############################################################ +template isMsbSet[T: Ct](x: T): CTBool[T] = + ## Returns the most significant bit of an integer + const msb_pos = T.sizeof * 8 - 1 + (CTBool[T])(x shr msb_pos) + template fmap[T: Ct](x: CTBool[T], op: untyped, y: CTBool[T]): CTBool[T] = CTBool[T](op(T(x), T(y))) diff --git a/constantine/primitives/extended_precision.nim b/constantine/primitives/extended_precision.nim index 22a91f6..ae0bc6f 100644 --- a/constantine/primitives/extended_precision.nim +++ b/constantine/primitives/extended_precision.nim @@ -12,7 +12,10 @@ # # ############################################################ -import ./constant_time_types, ./addcarry_subborrow +import + ./constant_time_types, + ./addcarry_subborrow, + ./constant_time # ############################################################ # @@ -88,7 +91,7 @@ when sizeof(int) == 8: from ./extended_precision_64bit_uint128 import mul, muladd1, muladd2 else: from ./extended_precision_64bit_uint128 import unsafeDiv2n1n, mul, muladd1, muladd2 - export unsafeDiv2n1n, muladd1, muladd2 + export unsafeDiv2n1n, mul, muladd1, muladd2 # ############################################################ # @@ -126,3 +129,12 @@ func mulDoubleAdd2*[T: Ct[uint32]|Ct[uint64]](r2: var Carry, r1, r0: var T, a, b # result at most in (carry: 1, hi: 0xFFFFFFFF_FFFFFFFF, lo: 0x00000000_00000000) addC(carry, r0, r0, dLo, Carry(0)) addC(carry, r1, r1, T(dHi), carry) + +func mulAcc*[T: Ct[uint32]|Ct[uint64]](t, u, v: var T, a, b: T) {.inline.} = + ## (t, u, v) <- (t, u, v) + a * b + var UV: array[2, T] + var carry: Carry + mul(UV[1], UV[0], a, b) + addC(carry, v, v, UV[0], Carry(0)) + addC(carry, u, u, UV[1], carry) + t += T(carry) diff --git a/constantine/primitives/research/addcarry_subborrow_compiler.nim b/constantine/primitives/research/addcarry_subborrow_compiler.nim index 63d2523..3391577 100644 --- a/constantine/primitives/research/addcarry_subborrow_compiler.nim +++ b/constantine/primitives/research/addcarry_subborrow_compiler.nim @@ -17,7 +17,7 @@ # # This overcome the bad GCC codegen aven with addcary_u64 intrinsic. -import macros +import std/macros func wordsRequired(bits: int): int {.compileTime.} = ## Compute the number of limbs required @@ -86,7 +86,7 @@ func `+=`(a: var BigInt, b: BigInt) {.noinline.}= # ############################################# when isMainModule: - import random + import std/random proc rand(T: typedesc[BigInt]): T = for i in 0 ..< result.limbs.len: result.limbs[i] = uint64(rand(high(int))) diff --git a/constantine/tower_field_extensions/quadratic_extensions.nim b/constantine/tower_field_extensions/quadratic_extensions.nim index f7a6a12..f0b1538 100644 --- a/constantine/tower_field_extensions/quadratic_extensions.nim +++ b/constantine/tower_field_extensions/quadratic_extensions.nim @@ -8,7 +8,6 @@ import ../arithmetic, - ../config/common, ../primitives, ./tower_common diff --git a/formal_verification/bls12_381_q_64.nim b/formal_verification/bls12_381_q_64.nim index b5da75e..07cb51c 100644 --- a/formal_verification/bls12_381_q_64.nim +++ b/formal_verification/bls12_381_q_64.nim @@ -128,7 +128,7 @@ func fromHex(output: var openArray[byte], hexStr: string, order: static[Endianne # ------------------------------------------------------------------------- when isMainModule: - import random, std/monotimes, times, strformat, ../benchmarks/platforms + import std/[random, monotimes, times, strformat], ../benchmarks/platforms const Iters = 1_000_000 const InvIters = 1000 diff --git a/helpers/static_for.nim b/helpers/static_for.nim index 0232664..ffedcad 100644 --- a/helpers/static_for.nim +++ b/helpers/static_for.nim @@ -1,4 +1,4 @@ -import macros +import std/macros proc replaceNodes(ast: NimNode, what: NimNode, by: NimNode): NimNode = # Replace "what" ident node by "by" diff --git a/sage/curve_family_bls12.sage b/sage/curve_family_bls12.sage index a8324a0..bda3566 100644 --- a/sage/curve_family_bls12.sage +++ b/sage/curve_family_bls12.sage @@ -30,11 +30,54 @@ def compute_curve_characteristic(u_str): else: print(' Parameter u (hex): 0x' + u.hex()) + print(f' p mod 3: ' + str(p % 3)) print(f' p mod 4: ' + str(p % 4)) print(f' p mod 8: ' + str(p % 8)) print(f' p mod 12: ' + str(p % 12)) print(f' p mod 16: ' + str(p % 16)) + print() + + print(f' Endomorphism-based acceleration when p mod 3 == 1') + print(f' Endomorphism can be field multiplication by one of the non-trivial cube root of unity 𝜑') + print(f' Rationale:') + print(f' curve equation is y² = x³ + b, and y² = (x𝜑)³ + b <=> y² = x³ + b (with 𝜑³ == 1) so we are still on the curve') + print(f' this means that multiplying by 𝜑 the x-coordinate is equivalent to a scalar multiplication by some λᵩ') + print(f' with λᵩ² + λᵩ + 1 ≡ 0 (mod CurveOrder), see below. Hence we have a 2 dimensional decomposition of the scalar multiplication') + print(f' i.e. For any [s]P, we can find a corresponding [k1]P + [k2][λᵩ]P with [λᵩ]P being a simple field multiplication by 𝜑') + print(f' Finding cube roots:') + print(f' x³−1=0 <=> (x−1)(x²+x+1) = 0, if x != 1, x solves (x²+x+1) = 0 <=> x = (-1±√3)/2') + print(f' cube roots of unity: ' + str(['0x' + Integer(root).hex() for root in GF(p)(1).nth_root(3, all=True)])) + print(f' GLV-2 decomposition of s into (k1, k2) on G1') + print(f' (k1, k2) = (s, 0) - 𝛼1 b1 - 𝛼2 b2') + print(f' 𝛼i = 𝛼\u0302i * s / r') + print(f' Lattice b1: ' + str(['0x' + b.hex() for b in [u^2-1, -1]])) + print(f' Lattice b2: ' + str(['0x' + b.hex() for b in [1, u^2]])) + + # Babai rounding + ahat1 = u^2 + ahat2 = 1 + # We want a1 = ahat1 * s/r with m = 2 (for a 2-dim decomposition) and r the curve order + # To handle rounding errors we instead multiply by + # 𝜈 = (2^WordBitWidth)^w (i.e. the same as the R magic constant for Montgomery arithmetic) + # with 𝜈 > r and w minimal so that 𝜈 > r + # a1 = ahat1*𝜈/r * s/𝜈 + v = int(r).bit_length() + print(f' r.bit_length(): {v}') + v = int(((v + 64 - 1) // 64) * 64) # round to next multiple of 64 + print(f' 𝜈 > r, 𝜈: 2^{v}') + print(f' Babai roundings') + print(f' 𝛼\u03021: ' + '0x' + ahat1.hex()) + print(f' 𝛼\u03022: ' + '0x' + ahat2.hex()) + print(f' Handle rounding errors') + print(f' 𝛼\u03051 = 𝛼\u03021 * s / r with 𝛼1 = (𝛼\u03021 * 𝜈/r) * s/𝜈') + print(f' 𝛼\u03052 = 𝛼\u03022 * s / r with 𝛼2 = (𝛼\u03022 * 𝜈/r) * s/𝜈') + print(f' -----------------------------------------------------') + l1 = Integer(ahat1 << v) // r + l2 = Integer(ahat2 << v) // r + print(f' 𝛼1 = (0x{l1.hex()} * s) >> {v}') + print(f' 𝛼2 = (0x{l2.hex()} * s) >> {v}') + if __name__ == "__main__": # Usage # sage sage/curve_family_bls12.sage '-(2^63 + 2^62 + 2^60 + 2^57 + 2^48 + 2^16)' diff --git a/sage/curve_family_bn.sage b/sage/curve_family_bn.sage index 2b81286..a546487 100644 --- a/sage/curve_family_bn.sage +++ b/sage/curve_family_bn.sage @@ -22,22 +22,68 @@ def compute_curve_characteristic(u_str): r = 36*u^4 + 36*u^3 + 18*u^2 + 6*u + 1 print(f'BN family - {p.nbits()} bits') - print(' Prime modulus: 0x' + p.hex()) - print(' Curve order: 0x' + r.hex()) + print(' Prime modulus p: 0x' + p.hex()) + print(' Curve order r: 0x' + r.hex()) print(' Parameter u: ' + u_str) if u < 0: print(' Parameter u (hex): -0x' + (-u).hex()) else: print(' Parameter u (hex): 0x' + u.hex()) + print(f' p mod 3: ' + str(p % 3)) print(f' p mod 4: ' + str(p % 4)) print(f' p mod 8: ' + str(p % 8)) print(f' p mod 12: ' + str(p % 12)) print(f' p mod 16: ' + str(p % 16)) + print() + + print(f' Endomorphism-based acceleration when p mod 3 == 1') + print(f' Endomorphism can be field multiplication by one of the non-trivial cube root of unity 𝜑') + print(f' Rationale:') + print(f' curve equation is y² = x³ + b, and y² = (x𝜑)³ + b <=> y² = x³ + b (with 𝜑³ == 1) so we are still on the curve') + print(f' this means that multiplying by 𝜑 the x-coordinate is equivalent to a scalar multiplication by some λᵩ') + print(f' with λᵩ² + λᵩ + 1 ≡ 0 (mod r) and 𝜑² + 𝜑 + 1 ≡ 0 (mod p), see below.') + print(f' Hence we have a 2 dimensional decomposition of the scalar multiplication') + print(f' i.e. For any [s]P, we can find a corresponding [k1]P + [k2][λᵩ]P with [λᵩ]P being a simple field multiplication by 𝜑') + print(f' Finding cube roots:') + print(f' x³−1=0 <=> (x−1)(x²+x+1) = 0, if x != 1, x solves (x²+x+1) = 0 <=> x = (-1±√3)/2') + print(f' cube roots of unity 𝜑 (mod p): ' + str(['0x' + Integer(root).hex() for root in GF(p)(1).nth_root(3, all=True)])) + print(f' cube roots of unity λᵩ (mod r): ' + str(['0x' + Integer(root).hex() for root in GF(r)(1).nth_root(3, all=True)])) + print(f' GLV-2 decomposition of s into (k1, k2) on G1') + print(f' (k1, k2) = (s, 0) - 𝛼1 b1 - 𝛼2 b2') + print(f' 𝛼i = 𝛼\u0302i * s / r') + print(f' Lattice b1: ' + str(['0x' + b.hex() for b in [2*u+1, 6*u^2+4*u+1]])) + print(f' Lattice b2: ' + str(['0x' + b.hex() for b in [6*u^2+2*u, -2*u-1]])) + + # Babai rounding + ahat1 = 2*u+1 + ahat2 = 6*u^2+4*u+1 + # We want a1 = ahat1 * s/r with m = 2 (for a 2-dim decomposition) and r the curve order + # To handle rounding errors we instead multiply by + # 𝜈 = (2^WordBitWidth)^w (i.e. the same as the R magic constant for Montgomery arithmetic) + # with 𝜈 > r and w minimal so that 𝜈 > r + # a1 = ahat1*𝜈/r * s/𝜈 + v = int(r).bit_length() + print(f' r.bit_length(): {v}') + v = int(((v + 64 - 1) // 64) * 64) # round to next multiple of 64 + print(f' 𝜈 > r, 𝜈: 2^{v}') + print(f' Babai roundings') + print(f' 𝛼\u03021: ' + '0x' + ahat1.hex()) + print(f' 𝛼\u03022: ' + '0x' + ahat2.hex()) + print(f' Handle rounding errors') + print(f' 𝛼1 = 𝛼\u03021 * s / r with 𝛼1 = (𝛼\u03021 * 𝜈/r) * s/𝜈') + print(f' 𝛼2 = 𝛼\u03022 * s / r with 𝛼2 = (𝛼\u03022 * 𝜈/r) * s/𝜈') + print(f' -----------------------------------------------------') + l1 = Integer(ahat1 << v) // r + l2 = Integer(ahat2 << v) // r + print(f' 𝛼1 = (0x{l1.hex()} * s) >> {v}') + print(f' 𝛼2 = (0x{l2.hex()} * s) >> {v}') + if __name__ == "__main__": # Usage # sage sage/curve_family_bn.sage '-(2^62 + 2^55 + 1)' + # sage sage/curve_family_bn.sage 4965661367192848881 from argparse import ArgumentParser diff --git a/sage/lattice_decomposition_bls12_381_g1.sage b/sage/lattice_decomposition_bls12_381_g1.sage new file mode 100644 index 0000000..3d082c8 --- /dev/null +++ b/sage/lattice_decomposition_bls12_381_g1.sage @@ -0,0 +1,202 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# ############################################################ +# +# BN254 GLV Endomorphism +# Lattice Decomposition +# +# ############################################################ + +# Parameters +u = -(2^63 + 2^62 + 2^60 + 2^57 + 2^48 + 2^16) +p = (u - 1)^2 * (u^4 - u^2 + 1)//3 + u +r = u^4 - u^2 + 1 +cofactor = Integer('0x396c8c005555e1568c00aaab0000aaab') +print('p : ' + p.hex()) +print('r : ' + r.hex()) + +# Cube root of unity (mod r) formula for any BLS12 curves +lambda1_r = u^2 - 1 +assert lambda1_r^3 % r == 1 +print('λᵩ1 : ' + lambda1_r.hex()) +print('λᵩ1+r: ' + (lambda1_r+r).hex()) + +lambda2_r = u^4 +assert lambda2_r^3 % r == 1 +print('λᵩ2 : ' + lambda2_r.hex()) + +# Finite fields +F = GF(p) +# K2. = PolynomialRing(F) +# F2. = F.extension(u^2+9) +# K6. = PolynomialRing(F2) +# F6. = F2.extension(v^3-beta) +# K12. = PolynomialRing(F6) +# K12. = F6.extension(w^2-eta) + +# Curves +b = 4 +G1 = EllipticCurve(F, [0, b]) +# G2 = EllipticCurve(F2, [0, b*beta]) + +(phi1, phi2) = (root for root in GF(p)(1).nth_root(3, all=True) if root != 1) +print('𝜑1 :' + Integer(phi1).hex()) +print('𝜑2 :' + Integer(phi2).hex()) +assert phi1^3 % p == 1 +assert phi2^3 % p == 1 + +# Test generator +set_random_seed(1337) + +# Check +def checkEndo(): + Prand = G1.random_point() + assert Prand != G1([0, 1, 0]) # Infinity + + # Clear cofactor + P = Prand * cofactor + + (Px, Py, Pz) = P + Qendo1 = G1([Px*phi1 % p, Py, Pz]) + Qendo2 = G1([Px*phi2 % p, Py, Pz]) + + Q1 = lambda1_r * P + Q2 = lambda2_r * P + + assert P != Q1 + assert P != Q2 + + assert (F(Px)*F(phi1))^3 == F(Px)^3 + assert (F(Px)*F(phi2))^3 == F(Px)^3 + + assert Q1 == Qendo2 + assert Q2 == Qendo2 + + print('Endomorphism OK with 𝜑2') + +checkEndo() + +# Lattice +b = [ + [u^2-1, -1], + [1, u^2] +] +# Babai rounding +ahat = [u^2, 1] +v = int(r).bit_length() +v = int(((v + 64 - 1) // 64) * 64) # round to next multiple of 64 + +l = [Integer(a << v) // r for a in ahat] +print('𝛼\u03051: ' + l[0].hex()) +print('𝛼\u03052: ' + l[1].hex()) + +def getGLV2_decomp(scalar): + + a0 = (l[0] * scalar) >> v + a1 = (l[1] * scalar) >> v + + k0 = scalar - a0 * b[0][0] - a1 * b[1][0] + k1 = 0 - a0 * b[0][1] - a1 * b[1][1] + + assert int(k0).bit_length() <= (int(r).bit_length() + 1) // 2 + assert int(k1).bit_length() <= (int(r).bit_length() + 1) // 2 + + assert scalar == (k0 + k1 * (lambda1_r % r)) % r + assert scalar == (k0 + k1 * (lambda2_r % r)) % r + + return k0, k1 + +def recodeScalars(k): + m = 2 + L = ((int(r).bit_length() + m-1) // m) + 1 # l = ⌈log2 r/m⌉ + 1 + + b = [[0] * L, [0] * L] + b[0][L-1] = 1 + for i in range(0, L-1): # l-2 inclusive + b[0][i] = 2 * ((k[0] >> (i+1)) & 1) - 1 + for j in range(1, m): + for i in range(0, L): + b[j][i] = b[0][i] * (k[j] & 1) + k[j] = (k[j]//2) - (b[j][i] // 2) + + return b + +def buildLut(P0, P1): + m = 2 + lut = [0] * (1 << (m-1)) + lut[0] = P0 + lut[1] = P0 + P1 + return lut + +def pointToString(P): + (Px, Py, Pz) = P + return '(x: ' + Integer(Px).hex() + ', y: ' + Integer(Py).hex() + ', z: ' + Integer(Pz).hex() + ')' + +def scalarMulGLV(scalar, P0): + m = 2 + L = ((int(r).bit_length() + m-1) // m) + 1 # l = ⌈log2 r/m⌉ + 1 + + print('L: ' + str(L)) + + print('scalar: ' + Integer(scalar).hex()) + + k0, k1 = getGLV2_decomp(scalar) + print('k0: ' + k0.hex()) + print('k1: ' + k1.hex()) + + P1 = (lambda1_r % r) * P0 + (Px, Py, Pz) = P0 + P1_endo = G1([Px*phi2 % p, Py, Pz]) + assert P1 == P1_endo + + expected = scalar * P0 + decomp = k0*P0 + k1*P1 + assert expected == decomp + + print('------ recode scalar -----------') + even = k0 & 1 == 1 + if even: + k0 -= 1 + + b = recodeScalars([k0, k1]) + print('b0: ' + str(list(reversed(b[0])))) + print('b1: ' + str(list(reversed(b[1])))) + + print('------------ lut ---------------') + + lut = buildLut(P0, P1) + + print('------------ mul ---------------') + print('b0 L-1: ' + str(b[0][L-1])) + Q = b[0][L-1] * lut[b[1][L-1] & 1] + for i in range(L-2, -1, -1): + Q *= 2 + Q += b[0][i] * lut[b[1][i] & 1] + + if even: + Q += P0 + + print('final Q: ' + pointToString(Q)) + print('expected: ' + pointToString(expected)) + assert Q == expected # TODO debug + +# Test generator +set_random_seed(1337) + +for i in range(1): + print('---------------------------------------') + # scalar = randrange(r) # Pick an integer below curve order + # P = G1.random_point() + scalar = Integer('0xf7e60a832eb77ac47374bc93251360d6c81c21add62767ff816caf11a20d8db') + P = G1([ + Integer('0xf9679bb02ee7f352fff6a6467a5e563ec8dd38c86a48abd9e8f7f241f1cdd29d54bc3ddea3a33b62e0d7ce22f3d244a'), + Integer('0x50189b992cf856846b30e52205ff9ef72dc081e9680726586231cbc29a81a162120082585f401e00382d5c86fb1083f'), + Integer(1) + ]) + scalarMulGLV(scalar, P) diff --git a/sage/lattice_decomposition_bn254_snarks_g1.sage b/sage/lattice_decomposition_bn254_snarks_g1.sage new file mode 100644 index 0000000..ab2fb49 --- /dev/null +++ b/sage/lattice_decomposition_bn254_snarks_g1.sage @@ -0,0 +1,193 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# ############################################################ +# +# BN254 GLV Endomorphism +# Lattice Decomposition +# +# ############################################################ + +# Parameters +u = Integer('0x44E992B44A6909F1') +p = 36*u^4 + 36*u^3 + 24*u^2 + 6*u + 1 +r = 36*u^4 + 36*u^3 + 18*u^2 + 6*u + 1 +cofactor = 1 + +# Cube root of unity (mod r) formula for any BN curves +lambda1_r = (-(36*u^3+18*u^2+6*u+2)) +assert lambda1_r^3 % r == 1 +print('λᵩ1 : ' + lambda1_r.hex()) +print('λᵩ1+r: ' + (lambda1_r+r).hex()) + +lambda2_r = (36*u^4-1) +assert lambda2_r^3 % r == 1 +print('λᵩ2 : ' + lambda2_r.hex()) + +# Finite fields +F = GF(p) +# K2. = PolynomialRing(F) +# F2. = F.extension(u^2+9) +# K6. = PolynomialRing(F2) +# F6. = F2.extension(v^3-beta) +# K12. = PolynomialRing(F6) +# K12. = F6.extension(w^2-eta) + +# Curves +b = 3 +G1 = EllipticCurve(F, [0, b]) +# G2 = EllipticCurve(F2, [0, b/beta]) + +(phi1, phi2) = (root for root in GF(p)(1).nth_root(3, all=True) if root != 1) +print('𝜑1 :' + Integer(phi1).hex()) +print('𝜑2 :' + Integer(phi2).hex()) + +# Test generator +set_random_seed(1337) + +# Check +def checkEndo(): + P = G1.random_point() + assert P != G1([0, 1, 0]) # Infinity + + (Px, Py, Pz) = P + Qendo1 = G1([Px*phi1 % p, Py, Pz]) + Qendo2 = G1([Px*phi2 % p, Py, Pz]) + + Q1 = lambda1_r * P + Q2 = lambda2_r * P + + assert P != Q1 + assert P != Q2 + + assert (F(Px)*F(phi1))^3 == F(Px)^3 + assert (F(Px)*F(phi2))^3 == F(Px)^3 + + assert Q1 == Qendo1 + assert Q2 == Qendo1 + + print('Endomorphism OK with 𝜑1') + +checkEndo() + +# Lattice +b = [ + [2*u+1, 6*u^2+4*u+1], + [6*u^2+2*u, -2*u-1] +] +# Babai rounding +ahat = [2*u+1, 6*u^2+4*u+1] +v = int(r).bit_length() +v = int(((v + 64 - 1) // 64) * 64) # round to next multiple of 64 + +l = [Integer(a << v) // r for a in ahat] + +def getGLV2_decomp(scalar): + + a0 = (l[0] * scalar) >> v + a1 = (l[1] * scalar) >> v + + k0 = scalar - a0 * b[0][0] - a1 * b[1][0] + k1 = 0 - a0 * b[0][1] - a1 * b[1][1] + + assert int(k0).bit_length() <= (int(r).bit_length() + 1) // 2 + assert int(k1).bit_length() <= (int(r).bit_length() + 1) // 2 + + assert scalar == (k0 + k1 * (lambda1_r % r)) % r + assert scalar == (k0 + k1 * (lambda2_r % r)) % r + + return k0, k1 + +def recodeScalars(k): + m = 2 + l = ((int(r).bit_length() + m-1) // m) + 1 # l = ⌈log2 r/m⌉ + 1 + + b = [[0] * l, [0] * l] + b[0][l-1] = 1 + for i in range(0, l-1): # l-2 inclusive + b[0][i] = 2 * ((k[0] >> (i+1)) & 1) - 1 + for j in range(1, m): + for i in range(0, l): + b[j][i] = b[0][i] * (k[j] & 1) + k[j] = (k[j]//2) - (b[j][i] // 2) + + return b + +def buildLut(P0, P1): + m = 2 + lut = [0] * (1 << (m-1)) + lut[0] = P0 + lut[1] = P0 + P1 + return lut + +def pointToString(P): + (Px, Py, Pz) = P + return '(x: ' + Integer(Px).hex() + ', y: ' + Integer(Py).hex() + ', z: ' + Integer(Pz).hex() + ')' + +def scalarMulGLV(scalar, P0): + m = 2 + L = ((int(r).bit_length() + m-1) // m) + 1 # l = ⌈log2 r/m⌉ + 1 + + print('L: ' + str(L)) + + print('scalar: ' + Integer(scalar).hex()) + + k0, k1 = getGLV2_decomp(scalar) + print('k0: ' + k0.hex()) + print('k1: ' + k1.hex()) + + P1 = (lambda1_r % r) * P0 + (Px, Py, Pz) = P0 + P1_endo = G1([Px*phi1 % p, Py, Pz]) + assert P1 == P1_endo + + expected = scalar * P0 + decomp = k0*P0 + k1*P1 + assert expected == decomp + + print('------ recode scalar -----------') + even = k0 & 1 == 1 + if even: + k0 -= 1 + + b = recodeScalars([k0, k1]) + print('b0: ' + str(list(reversed(b[0])))) + print('b1: ' + str(list(reversed(b[1])))) + + print('------------ lut ---------------') + + lut = buildLut(P0, P1) + + print('------------ mul ---------------') + print('b0 L-1: ' + str(b[0][L-1])) + Q = b[0][L-1] * lut[b[1][L-1] & 1] + for i in range(L-2, -1, -1): + Q *= 2 + Q += b[0][i] * lut[b[1][i] & 1] + + if even: + Q += P0 + + print('final Q: ' + pointToString(Q)) + print('expected: ' + pointToString(expected)) + assert Q == expected # TODO debug + +# Test generator +set_random_seed(1337) + +for i in range(1): + print('---------------------------------------') + # scalar = randrange(r) # Pick an integer below curve order + # P = G1.random_point() + scalar = Integer('0x0e08a292f940cfb361cc82bc24ca564f51453708c9745a9cf8707b11c84bc448') + P = G1([ + Integer('0x22d3af0f3ee310df7fc1a2a204369ac13eb4a48d969a27fcd2861506b2dc0cd7'), + Integer('0x1c994169687886ccd28dd587c29c307fb3cab55d796d73a5be0bbf9aab69912e'), + Integer(1) + ]) + scalarMulGLV(scalar, P) diff --git a/sage/testgen_bls12_381.sage b/sage/testgen_bls12_381.sage index ee96098..d681f64 100644 --- a/sage/testgen_bls12_381.sage +++ b/sage/testgen_bls12_381.sage @@ -37,7 +37,11 @@ set_random_seed(1337) for i in range(10): print('---------------------------------------') - P = G1.random_point() + Prand = G1.random_point() + + # Clear cofactor + P = Prand * cofactor + (Px, Py, Pz) = P print('Px: ' + Integer(Px).hex()) print('Py: ' + Integer(Py).hex()) @@ -50,7 +54,7 @@ for i in range(10): print('Qx: ' + Integer(Qx).hex()) print('Qy: ' + Integer(Qy).hex()) print('Qz: ' + Integer(Qz).hex()) - +print('=========================================') # CurveOrder sanity check # diff --git a/sage/testgen_bn254_snarks.sage b/sage/testgen_bn254_snarks.sage index 0e9a5f0..9afd828 100644 --- a/sage/testgen_bn254_snarks.sage +++ b/sage/testgen_bn254_snarks.sage @@ -50,6 +50,7 @@ for i in range(10): print('Qx: ' + Integer(Qx).hex()) print('Qy: ' + Integer(Qy).hex()) print('Qz: ' + Integer(Qz).hex()) +print('=========================================') # CurveOrder sanity check # diff --git a/tests/test_bigints.nim b/tests/test_bigints.nim index ed4f50f..45726bb 100644 --- a/tests/test_bigints.nim +++ b/tests/test_bigints.nim @@ -6,7 +6,7 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import unittest, +import std/unittest, ../constantine/io/io_bigints, ../constantine/arithmetic, ../constantine/config/common, @@ -138,6 +138,131 @@ proc mainArith() = discard a.add(SecretWord 1) check: bool(a == expected) + suite "Multi-precision multiplication": + test "Same size operand into double size result": + block: + var r: BigInt[256] + let a = BigInt[128].fromHex"0x12345678_FF11FFAA_00321321_CAFECAFE" + let b = BigInt[128].fromHex"0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF" + + let expected = BigInt[256].fromHex"fd5bdef43d64113f371ab5d8843beca889c07fd549b84d8a5001a8f102e0722" + + r.prod(a, b) + check: bool(r == expected) + r.prod(b, a) + check: bool(r == expected) + + test "Different size into large result": + block: + var r: BigInt[200] + let a = BigInt[29].fromHex"0x12345678" + let b = BigInt[128].fromHex"0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF" + + let expected = BigInt[200].fromHex"fd5bdee65f787f665f787f665f787f65621ca08" + + r.prod(a, b) + check: bool(r == expected) + r.prod(b, a) + check: bool(r == expected) + + test "Destination is properly zero-padded if multiplicands are too short": + block: + var r = BigInt[200].fromHex"0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF_DE" + let a = BigInt[29].fromHex"0x12345678" + let b = BigInt[128].fromHex"0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF" + + let expected = BigInt[200].fromHex"fd5bdee65f787f665f787f665f787f65621ca08" + + r.prod(a, b) + check: bool(r == expected) + r.prod(b, a) + check: bool(r == expected) + + suite "Multi-precision multiplication keeping only high words": + test "Same size operand into double size result - discard first word": + block: + var r: BigInt[256] + let a = BigInt[128].fromHex"0x12345678_FF11FFAA_00321321_CAFECAFE" + let b = BigInt[128].fromHex"0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF" + + when WordBitWidth == 32: + let expected = BigInt[256].fromHex"fd5bdef43d64113f371ab5d8843beca889c07fd549b84d8a5001a8f" + else: + let expected = BigInt[256].fromHex"fd5bdef43d64113f371ab5d8843beca889c07fd549b84d8" + + r.prod_high_words(a, b, 1) + check: bool(r == expected) + r.prod_high_words(b, a, 1) + check: bool(r == expected) + + test "Same size operand into double size result - discard first 3 words": + block: + var r: BigInt[256] + let a = BigInt[128].fromHex"0x12345678_FF11FFAA_00321321_CAFECAFE" + let b = BigInt[128].fromHex"0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF" + + when WordBitWidth == 32: + let expected = BigInt[256].fromHex"fd5bdef43d64113f371ab5d8843beca889c07fd" + else: + let expected = BigInt[256].fromHex"fd5bdef43d64113" + + r.prod_high_words(a, b, 3) + check: bool(r == expected) + r.prod_high_words(b, a, 3) + check: bool(r == expected) + + test "All lower words trigger a carry": + block: + var r: BigInt[256] + let a = BigInt[256].fromHex"0xFFFFF000_FFFFF111_FFFFFFFA_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF" + let b = BigInt[256].fromHex"0xFFFFFFFF_FFFFF222_FFFFFFFB_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF" + + # Full product: + # fffff000_ffffe335_00ddc21a_00cf3972_00008109_00000013_ffffffff_fffffffe + # 00000fff_00001ccb_00000009_00000000_00000000_00000000_00000000_00000001 + let expected = BigInt[256].fromHex"0xfffff000_ffffe335_00ddc21a_00cf3972_00008109_00000013_ffffffff_fffffffe" + when WordBitWidth == 32: + const startWord = 8 + else: + const startWord = 4 + + r.prod_high_words(a, b, startWord) + check: bool(r == expected) + r.prod_high_words(b, a, startWord) + check: bool(r == expected) + + test "Different size into large result": + block: + var r: BigInt[200] + let a = BigInt[29].fromHex"0x12345678" + let b = BigInt[128].fromHex"0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF" + + when WordBitWidth == 32: + let expected = BigInt[200].fromHex"fd5bdee65f787f665f787f6" + else: + let expected = BigInt[200].fromHex"fd5bdee" + + r.prod_high_words(a, b, 2) + check: bool(r == expected) + r.prod_high_words(b, a, 2) + check: bool(r == expected) + + test "Destination is properly zero-padded if multiplicands are too short": + block: + var r = BigInt[200].fromHex"0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF_DE" + let a = BigInt[29].fromHex"0x12345678" + let b = BigInt[128].fromHex"0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF" + + when WordBitWidth == 32: + let expected = BigInt[200].fromHex"fd5bdee65f787f665f787f6" + else: + let expected = BigInt[200].fromHex"fd5bdee" + + r.prod_high_words(a, b, 2) + check: bool(r == expected) + r.prod_high_words(b, a, 2) + check: bool(r == expected) + suite "Modular operations - small modulus": # Vectors taken from Stint - https://github.com/status-im/nim-stint test "100 mod 13": diff --git a/tests/test_bigints_vs_gmp.nim b/tests/test_bigints_mod_vs_gmp.nim similarity index 98% rename from tests/test_bigints_vs_gmp.nim rename to tests/test_bigints_mod_vs_gmp.nim index fe71145..de7fb01 100644 --- a/tests/test_bigints_vs_gmp.nim +++ b/tests/test_bigints_mod_vs_gmp.nim @@ -8,7 +8,7 @@ import # Standard library - random, macros, times, strutils, + std/[random, macros, times, strutils], # Third-party gmp, stew/byteutils, # Internal @@ -148,7 +148,7 @@ proc main() = # Reexport as bigEndian for debugging discard mpz_export(aBuf[0].addr, aW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, a) discard mpz_export(mBuf[0].addr, mW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, m) - "\nModulus with operand\n" & + "\nModulus with operands\n" & " a (" & align($aBits, 4) & "-bit): " & aBuf.toHex & "\n" & " m (" & align($mBits, 4) & "-bit): " & mBuf.toHex & "\n" & "failed:" & "\n" & diff --git a/tests/test_bigints_mul_high_words_vs_gmp.nim b/tests/test_bigints_mul_high_words_vs_gmp.nim new file mode 100644 index 0000000..85c48f2 --- /dev/null +++ b/tests/test_bigints_mul_high_words_vs_gmp.nim @@ -0,0 +1,151 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Standard library + std/[random, macros, times, strutils], + # Third-party + gmp, stew/byteutils, + # Internal + ../constantine/io/io_bigints, + ../constantine/arithmetic, + ../constantine/primitives, + ../constantine/config/[common, type_bigint] + +# We test up to 1024-bit, more is really slow + +var bitSizeRNG {.compileTime.} = initRand(1234) + +macro testRandomModSizes(numSizes: static int, rBits, aBits, bBits, wordsStartIndex, body: untyped): untyped = + ## Generate `numSizes` random bit sizes known at compile-time to test against GMP + ## for A mod M + result = newStmtList() + + for _ in 0 ..< numSizes: + let aBitsVal = bitSizeRNG.rand(126 .. 2048) + let bBitsVal = bitSizeRNG.rand(126 .. 2048) + let rBitsVal = bitSizeRNG.rand(62 .. 4096+128) + let wordsStartIndexVal = bitSizeRNG.rand(1 .. wordsRequired(4096+128)) + + result.add quote do: + block: + const `aBits` = `aBitsVal` + const `bBits` = `bBitsVal` + const `rBits` = `rBitsVal` + const `wordsStartIndex` = `wordsStartIndexVal` + + block: + `body` + +const # https://gmplib.org/manual/Integer-Import-and-Export.html + GMP_WordLittleEndian {.used.} = -1'i32 + GMP_WordNativeEndian {.used.} = 0'i32 + GMP_WordBigEndian {.used.} = 1'i32 + + GMP_MostSignificantWordFirst = 1'i32 + GMP_LeastSignificantWordFirst {.used.} = -1'i32 + +proc main() = + var gmpRng: gmp_randstate_t + gmp_randinit_mt(gmpRng) + # The GMP seed varies between run so that + # test coverage increases as the library gets tested. + # This requires to dump the seed in the console or the function inputs + # to be able to reproduce a bug + let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32 + echo "GMP seed: ", seed + gmp_randseed_ui(gmpRng, seed) + + var r, a, b: mpz_t + mpz_init(r) + mpz_init(a) + mpz_init(b) + + testRandomModSizes(128, rBits, aBits, bBits, wordsStartIndex): + # echo "--------------------------------------------------------------------------------" + echo "Testing: random mul_high_words r (", align($rBits, 4), + "-bit, keeping from ", wordsStartIndex, + " word index) <- a (", align($aBits, 4), + "-bit) * b (", align($bBits, 4), "-bit) (full mul bits: ", align($(aBits+bBits), 4), + "), r large enough? ", wordsRequired(rBits) >= wordsRequired(aBits+bBits) - wordsStartIndex + + # Generate random value in the range 0 ..< 2^aBits + mpz_urandomb(a, gmpRng, aBits) + # Generate random modulus and ensure the MSB is set + mpz_urandomb(b, gmpRng, bBits) + mpz_setbit(r, aBits+bBits) + + # discard gmp_printf(" -- %#Zx mod %#Zx\n", a.addr, m.addr) + + ######################################################### + # Conversion buffers + const aLen = (aBits + 7) div 8 + const bLen = (bBits + 7) div 8 + + var aBuf: array[aLen, byte] + var bBuf: array[bLen, byte] + + {.push warnings: off.} # deprecated csize + var aW, bW: csize # Word written by GMP + {.pop.} + + discard mpz_export(aBuf[0].addr, aW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, a) + discard mpz_export(bBuf[0].addr, bW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, b) + + # Since the modulus is using all bits, it's we can test for exact amount copy + doAssert aLen >= aW, "Expected at most " & $aLen & " bytes but wrote " & $aW & " for " & toHex(aBuf) & " (big-endian)" + doAssert bLen >= bW, "Expected at most " & $bLen & " bytes but wrote " & $bW & " for " & toHex(bBuf) & " (big-endian)" + + # Build the bigint + let aTest = BigInt[aBits].fromRawUint(aBuf.toOpenArray(0, aW-1), bigEndian) + let bTest = BigInt[bBits].fromRawUint(bBuf.toOpenArray(0, bW-1), bigEndian) + + ######################################################### + # Multiplication + drop low words + mpz_mul(r, a, b) + var shift: mpz_t + mpz_init(shift) + r.mpz_tdiv_q_2exp(r, WordBitwidth * wordsStartIndex) + + # If a*b overflow the result size we truncate + const numWords = wordsRequired(rBits) + when numWords < wordsRequired(aBits+bBits): + echo " truncating from ", wordsRequired(aBits+bBits), " words to ", numWords, " (2^", WordBitwidth * numWords, ")" + r.mpz_tdiv_r_2exp(r, WordBitwidth * numWords) + + # Constantine + var rTest: BigInt[rBits] + rTest.prod_high_words(aTest, bTest, wordsStartIndex) + + ######################################################### + # Check + const rLen = numWords * WordBitWidth + var rGMP: array[rLen, byte] + {.push warnings: off.} # deprecated csize + var rW: csize # Word written by GMP + {.pop.} + discard mpz_export(rGMP[0].addr, rW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, r) + + var rConstantine: array[rLen, byte] + exportRawUint(rConstantine, rTest, bigEndian) + + # Note: in bigEndian, GMP aligns left while constantine aligns right + doAssert rGMP.toOpenArray(0, rW-1) == rConstantine.toOpenArray(rLen-rW, rLen-1), block: + # Reexport as bigEndian for debugging + discard mpz_export(aBuf[0].addr, aW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, a) + discard mpz_export(bBuf[0].addr, bW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, b) + "\nMultiplication with operands\n" & + " a (" & align($aBits, 4) & "-bit): " & aBuf.toHex & "\n" & + " b (" & align($bBits, 4) & "-bit): " & bBuf.toHex & "\n" & + " keeping words starting from: " & $wordsStartIndex & "\n" & + "into r of size " & align($rBits, 4) & "-bit failed:" & "\n" & + " GMP: " & rGMP.toHex() & "\n" & + " Constantine: " & rConstantine.toHex() & "\n" & + "(Note that GMP aligns bytes left while constantine aligns bytes right)" + +main() diff --git a/tests/test_bigints_mul_vs_gmp.nim b/tests/test_bigints_mul_vs_gmp.nim new file mode 100644 index 0000000..9b34fb6 --- /dev/null +++ b/tests/test_bigints_mul_vs_gmp.nim @@ -0,0 +1,140 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Standard library + std/[random, macros, times, strutils], + # Third-party + gmp, stew/byteutils, + # Internal + ../constantine/io/io_bigints, + ../constantine/arithmetic, + ../constantine/primitives, + ../constantine/config/[common, type_bigint] + +# We test up to 1024-bit, more is really slow + +var bitSizeRNG {.compileTime.} = initRand(1234) + +macro testRandomModSizes(numSizes: static int, rBits, aBits, bBits, body: untyped): untyped = + ## Generate `numSizes` random bit sizes known at compile-time to test against GMP + ## for A mod M + result = newStmtList() + + for _ in 0 ..< numSizes: + let aBitsVal = bitSizeRNG.rand(126 .. 2048) + let bBitsVal = bitSizeRNG.rand(126 .. 2048) + let rBitsVal = bitSizeRNG.rand(62 .. 4096+128) + + result.add quote do: + block: + const `aBits` = `aBitsVal` + const `bBits` = `bBitsVal` + const `rBits` = `rBitsVal` + block: + `body` + +const # https://gmplib.org/manual/Integer-Import-and-Export.html + GMP_WordLittleEndian {.used.} = -1'i32 + GMP_WordNativeEndian {.used.} = 0'i32 + GMP_WordBigEndian {.used.} = 1'i32 + + GMP_MostSignificantWordFirst = 1'i32 + GMP_LeastSignificantWordFirst {.used.} = -1'i32 + +proc main() = + var gmpRng: gmp_randstate_t + gmp_randinit_mt(gmpRng) + # The GMP seed varies between run so that + # test coverage increases as the library gets tested. + # This requires to dump the seed in the console or the function inputs + # to be able to reproduce a bug + let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32 + echo "GMP seed: ", seed + gmp_randseed_ui(gmpRng, seed) + + var r, a, b: mpz_t + mpz_init(r) + mpz_init(a) + mpz_init(b) + + testRandomModSizes(128, rBits, aBits, bBits): + # echo "--------------------------------------------------------------------------------" + echo "Testing: random mul r (", align($rBits, 4), "-bit) <- a (", align($aBits, 4), "-bit) * b (", align($bBits, 4), "-bit) (full mul bits: ", align($(aBits+bBits), 4), "), r large enough? ", rBits >= aBits+bBits + + # Generate random value in the range 0 ..< 2^aBits + mpz_urandomb(a, gmpRng, aBits) + # Generate random modulus and ensure the MSB is set + mpz_urandomb(b, gmpRng, bBits) + mpz_setbit(r, aBits+bBits) + + # discard gmp_printf(" -- %#Zx mod %#Zx\n", a.addr, m.addr) + + ######################################################### + # Conversion buffers + const aLen = (aBits + 7) div 8 + const bLen = (bBits + 7) div 8 + + var aBuf: array[aLen, byte] + var bBuf: array[bLen, byte] + + {.push warnings: off.} # deprecated csize + var aW, bW: csize # Word written by GMP + {.pop.} + + discard mpz_export(aBuf[0].addr, aW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, a) + discard mpz_export(bBuf[0].addr, bW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, b) + + # Since the modulus is using all bits, it's we can test for exact amount copy + doAssert aLen >= aW, "Expected at most " & $aLen & " bytes but wrote " & $aW & " for " & toHex(aBuf) & " (big-endian)" + doAssert bLen >= bW, "Expected at most " & $bLen & " bytes but wrote " & $bW & " for " & toHex(bBuf) & " (big-endian)" + + # Build the bigint + let aTest = BigInt[aBits].fromRawUint(aBuf.toOpenArray(0, aW-1), bigEndian) + let bTest = BigInt[bBits].fromRawUint(bBuf.toOpenArray(0, bW-1), bigEndian) + + ######################################################### + # Multiplication + mpz_mul(r, a, b) + + # If a*b overflow the result size we truncate + const numWords = wordsRequired(rBits) + when numWords < wordsRequired(aBits+bBits): + echo " truncating from ", wordsRequired(aBits+bBits), " words to ", numWords, " (2^", WordBitwidth * numWords, ")" + r.mpz_tdiv_r_2exp(r, WordBitwidth * numWords) + + # Constantine + var rTest: BigInt[rBits] + rTest.prod(aTest, bTest) + + ######################################################### + # Check + const rLen = numWords * WordBitWidth + var rGMP: array[rLen, byte] + {.push warnings: off.} # deprecated csize + var rW: csize # Word written by GMP + {.pop.} + discard mpz_export(rGMP[0].addr, rW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, r) + + var rConstantine: array[rLen, byte] + exportRawUint(rConstantine, rTest, bigEndian) + + # Note: in bigEndian, GMP aligns left while constantine aligns right + doAssert rGMP.toOpenArray(0, rW-1) == rConstantine.toOpenArray(rLen-rW, rLen-1), block: + # Reexport as bigEndian for debugging + discard mpz_export(aBuf[0].addr, aW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, a) + discard mpz_export(bBuf[0].addr, bW.addr, GMP_MostSignificantWordFirst, 1, GMP_WordNativeEndian, 0, b) + "\nMultiplication with operands\n" & + " a (" & align($aBits, 4) & "-bit): " & aBuf.toHex & "\n" & + " b (" & align($bBits, 4) & "-bit): " & bBuf.toHex & "\n" & + "into r of size " & align($rBits, 4) & "-bit failed:" & "\n" & + " GMP: " & rGMP.toHex() & "\n" & + " Constantine: " & rConstantine.toHex() & "\n" & + "(Note that GMP aligns bytes left while constantine aligns bytes right)" + +main() diff --git a/tests/test_bigints_multimod.nim b/tests/test_bigints_multimod.nim index e020b4c..3c5e5e8 100644 --- a/tests/test_bigints_multimod.nim +++ b/tests/test_bigints_multimod.nim @@ -8,7 +8,7 @@ import # Standard library - unittest, + std/unittest, # Third-party ../constantine/io/io_bigints, ../constantine/arithmetic, diff --git a/tests/test_ec_bls12_381.nim b/tests/test_ec_bls12_381.nim index b487965..ffe8740 100644 --- a/tests/test_ec_bls12_381.nim +++ b/tests/test_ec_bls12_381.nim @@ -8,12 +8,12 @@ import # Standard library - unittest, times, + std/[unittest, times], # Internals ../constantine/config/[common, curves], ../constantine/arithmetic, ../constantine/io/[io_bigints, io_ec], - ../constantine/elliptic/[ec_weierstrass_projective], + ../constantine/elliptic/[ec_weierstrass_projective, ec_scalar_mul, ec_endomorphism_accel], # Test utilities ./support/ec_reference_scalar_mult @@ -33,119 +33,122 @@ proc test( var Q: EC let qOK = Q.fromHex(Qx, Qy) - let exponent = EC.F.C.matchingBigInt.fromHex(scalar) + let exponent = BigInt[EC.F.C.getCurveOrderBitwidth()].fromHex(scalar) var exponentCanonical: array[(exponent.bits+7) div 8, byte] exponentCanonical.exportRawUint(exponent, bigEndian) var impl = P reference = P + endo = P scratchSpace: array[1 shl 4, EC] - impl.scalarMul(exponentCanonical, scratchSpace) + impl.scalarMulGeneric(exponentCanonical, scratchSpace) reference.unsafe_ECmul_double_add(exponentCanonical) + endo.scalarMulGLV(exponent) doAssert: bool(Q == reference) doAssert: bool(Q == impl) + doAssert: bool(Q == endo) -suite "BLS12_381 implementation (and unsafe reference impl) vs SageMath": +suite "Scalar Multiplication (cofactor cleared): BLS12_381 implementation (and unsafe reference impl) vs SageMath": # Generated via sage sage/testgen_bls12_381.sage test( id = 1, EC = ECP_SWei_Proj[Fp[BLS12_381]], - Px = "f21eda282230f72b855d48055e68ab3825da87831fa5147a64fa071bade4c26bddd45e8b602e62df4d907414a6ec1b4", - Py = "531b38866cb35c19951f4a1ac62242f11fa714a1b99c6116a630fa75e7f4407fcd1ae9770a821c5899a777d341c915a", + Px = "f9679bb02ee7f352fff6a6467a5e563ec8dd38c86a48abd9e8f7f241f1cdd29d54bc3ddea3a33b62e0d7ce22f3d244a", + Py = "50189b992cf856846b30e52205ff9ef72dc081e9680726586231cbc29a81a162120082585f401e00382d5c86fb1083f", scalar = "f7e60a832eb77ac47374bc93251360d6c81c21add62767ff816caf11a20d8db", - Qx = "18d7ca3fb93d7300a0484233f3bac9bca00b45595a4b9caf66aa0b2237f6fd51559a24a634f3876451332c5f754438b2", - Qy = "edbb203999303fc99ef04368412da4b3555f999c703b425dedff3fdc799317c292751c46275b27990c53d933de2db63" + Qx = "c344f3bcc86df380186311fa502b7943a436a629380f8ee1960515522eedc58fe67ddd47615487668bcf12842c524d8", + Qy = "189e0c154f2631ad26e24ca73d84fb60a21d385fe205df04cf9f2f6fc0c3aa72afe9fbea71a930fa71d9bbfddb2fa571" ) test( id = 2, EC = ECP_SWei_Proj[Fp[BLS12_381]], - Px = "9ca8e33d8a330b04b052af6cf44bf2ed08cc93d83a4eb48cbb0cabfe02ffb2ef910df44862b271354352f15b70e45b5", - Py = "102f6d07ef45f51de9a4ecef5ec34eae16833f4761c2ddfbe2b414173c3580721135e5bbb74269ab85ba83cb03020d9b", + Px = "17d71835ff84f150fabf5c77ac90bf7f6249143abd1f5d8a46a76f243d424d82e1e258fc7983ba8af97a2462adebe090", + Py = "d3e108ee1332067cbe4f4193eae10381acb69f493b40e53d9dee59506b49c6564c9056494a7f987982eb4069512c1c6", scalar = "5f10367bdae7aa872d90b5ac209321ce5a15181ce22848d032a8d452055cbfd0", - Qx = "a50d49e3d8757f994aae312dedd55205687c432bc9d97efbe69e87bef4256b87af1b665a669d06657cda6ff01ee42df", - Qy = "160d50aaa21f9d5b4faada77e4f91d8d4f152a0fcca4d30d271d74b20c1bba8638128f99f52d9603d4a24f8e27219bcd" + Qx = "21073bee733a07b15d83afcd4e6ee11b01e6137fd5ad4589c5045e12d79a9a9490a3ebc59f30633a60fc3635a3c1e51", + Qy = "eb7a97a9d3dfff1667b8fa559bdcdf37c7767e6afb8ca93ad9dd44feb93761e10aa2c4c1a79728a21cd4a6f705398b5" ) test( id = 3, EC = ECP_SWei_Proj[Fp[BLS12_381]], - Px = "173c28687d23de83c950131e548485e8e09d4053d32b814d13b618ee4159e8b61bf6320148ddabcedf2b04d3c9787cd4", - Py = "277f935b4e0a90155915960c617f395dcadead1c7297cf92916add07308fc3f0493aa6dabf31d1f15953f56ac37d3d9", + Px = "f92c9572692e8f3d450483a7a9bb4694e3b54c9cd09441a4dd7f579b0a6984e47f8090c31c172b33d87f3de186d6b58", + Py = "286ede4cb2ae19ead4932d5550c5d3ec8ce3a3ada5e1ed6d202e93dd1b16d3513f0f9b62adc6323f18e272a426ee955", scalar = "4c321d72220c098fc0fd52306de98f8be9446bf854cf1e4d8dbae62375d18faf", - Qx = "16259e878b5921bbe1e5672cccea0f29fedbb93b8ce1bae4d4b602b6dd5708c6d4e5d82ff92868828c46fd333aadf82d", - Qy = "16d09713f4fe5705f2e3491aa9a1d5827fb3b280f5a1fdde0b01a2b75f5803d528d5f5603cc0e9da29d6a07b8e14df7c" + Qx = "4bb385e937582ae32aa7ba89632fcef2eace3f7b57309d979cf35298a430de9ef4d9ac5ba2335c1a4b6e7e5c38d0036", + Qy = "1801154d3a7b0daea772345b7f72a4c88c9677743f267da63490dad4dece2ecc9ec02d4d4d063086ee5d356aa2db914e" ) test( id = 4, EC = ECP_SWei_Proj[Fp[BLS12_381]], - Px = "177d32dfa6e97daf5ea8aee520bc5c33b7bee37cba114fda52dd948030ad81abdffbdda402c930791e1048ad531b2c80", - Py = "14e9f915698dadd2220586a8bcc921b1f855273c3c0e41a88569e5a9fd2a4e886eeff9a7a11b02ec286987c5a52d55ce", + Px = "ec23ff3435b8ebd5e8e0a879d432e11eb974161664b1341fd28f1ffc4c228bf6ada2ae4a565f18c9b66f67a7573502d", + Py = "10c4b647be08db0b49b75320ae891f9f9c5d7bb7c798947e800d681d205d1b24b12e4dfa993d1bd16851b00356627cc1", scalar = "1738857afb76c55f615c2a20b44ca90dcb3267d804ec23fddea431dbee4eb37f", - Qx = "a4bfcfc65eb16562752f5c164349ef673477e19fe020de84eddbc2958f6d40bbbba39fc67ee8c8fdf007922fec97f79", - Qy = "106ccd382d15773e6097f8ea6f012cbec15184d6f4ea08bac2842ed419f0e555f1a43f7434b2e017f9e02971d07eb59d" + Qx = "dc7ae7801152918ee3c13590407b4242a80d0b855a0bf585d3dc30719601d2d5d9e01e99ae735003ecb7c20ef48265", + Qy = "142c01a6aa390426a4ce2f36df43f86442732c35d4e05e5b67f3623832944f0ea5a29138624cb939330652a3cfb282b5" ) test( id = 5, EC = ECP_SWei_Proj[Fp[BLS12_381]], - Px = "1bc38e70e62770489063d3b7a3bebbc6735ab8dc389576ff03272c2883045aa051d74f8407920530b4c719b140cda81", - Py = "bd24e4fb09ed4098d61e3d2cb456f03d7818ded79dfba9cfe7956829797b12e10f1766c46c1a2e1cf2957295124c782", + Px = "df127083c2a5ef2388b02af913c0e4002a52a82db9e5ecbf23ee4f557d3b61c91ebcfe9d4973070b46bc5ea6897bca1", + Py = "318960aeea262ec23ffdd42ec1ba72ae6fa2186a1e2a0fc2659073fb7b5adfb50d581a4d998a94d1accf78b1b3a0163", scalar = "19c47811813444020c999a2b263940b5054cf45bb8ad8e086ff126bfcd5507e1", - Qx = "b310d4688f2c9f8cd4c030b62ed27341f4c71341fe9c56858a949a2d51670eb6ebe1339163bdb833e692b0ee0cf4e92", - Qy = "c92300561e1acb1e1ae6a1b75f83b9d2d2cb5f07c3f8ea945990ceb75e7ea12c4aec115227c13a05be92f5caed9268e" + Qx = "5f93c42fd76a29063efa2ee92607e0b3ae7edc4e419b3914661e5162d6beaeb96a34d2007ff817bc102651f61dca8d1", + Qy = "18dde8666bb1d0a379719d7d1b1512de809b70e49d9553303274ea872e56f7f39da551d6bcb7c57ae88ec7dc1fb354a4" ) test( id = 6, EC = ECP_SWei_Proj[Fp[BLS12_381]], - Px = "48cddafaca93d33caf910a7a6f74dc3489d53da9fa2f940b70b6dcf538cc08da1a369809ab86a8ee49cead0ed6bfef6", - Py = "173f8dfb384aea011bed89aaca625085dc2940d0775e5f2647fc7574ce822643d0d7b1b39e9a51b9f5a0dca7486bddd0", + Px = "101123de23c0f240c583c2368c4118dc942db219c55f58cf54acd500c1fcfa06f651ad75319ebf840cbdb6bddea7fde4", + Py = "5268587d4b844b0708e0336d1bbf48da185aaf5b948eccc3b565d00a856dd55882b9bb31c52af0e275b168cb35eb7b0", scalar = "43ffcda71e45a3e90b7502d92b30a0b06c54c95a91aa21e0438677b1c2714ecb", - Qx = "ef1e4967a3eb19318a66d092eada9810bebf301c168cea7c73fad9d98f7d4c2bde1071fd142c3da90830509f22a82b5", - Qy = "da537922dcb6bf79e4d09237c1a3c5804e3a83b6f18ccb26991d50d77c81bef76139fa73d39c684c7c1616151b1058b" + Qx = "f9871b682c1c76c7f4f0a7ca57ad876c10dc108b65b76987264873278d9f54db95101c173aed06d07062efc7d47ca0c", + Qy = "20d9628d611e72a4251a1f2357d4f53e68e4915383b6a0d126273d216b1a8c5e2cb7b2688ad702ef1682f4c5228fcd9" ) test( id = 7, EC = ECP_SWei_Proj[Fp[BLS12_381]], - Px = "9d56cb273bdeef945078066192b74d2f3077f00f5bd1a50b338c44f7c640005a614f9c6fc89cb4678140b2a721c69a8", - Py = "107b42b9a0c22b9e9cd2191b90fede2ab280532ea26806338a5b28533cf9431bde1a8010677a5078c63482953d4f2451", + Px = "1457ba1bae6eb3afae3261941c65c93e3ae7d784907d15b8d559100da5e13fd29e4a4d6e3103b781a95237b7b2d80a8e", + Py = "6a869a47cb48d01e7d29660932afd7617720262b55de5f430b8aa3d74f9fd2b9d3a07ce192425da58014764fc9532cd", scalar = "64ad0d6c36dba5368e71f0010aebf860288f54611e5aaf18082bae7a404ebfd8", - Qx = "e0c78d1e1ed993fdeb14e4872965bc90014aa39c728c457a720bf3123ebcdcb17ac553a619b9b7073ada436565d4bb4", - Qy = "c2d9ba441ed90bae4f1597da90e434f1668fda320e4fa04cddcdce0eacb3bc54185d5f7cde826f5bd0e3d59b2424906" + Qx = "93e540e26190e161038d985d40f2ab897cbc2346be7d8f2b201a689b59d4020a8740e252606f2f79ba0e121ccc9976d", + Qy = "10568d68f1b993aa1eded3869eda14e509f1cb4d8553bdf97feee175467cea4c0c1316fdb4e5a68440ad04b96b2d3bfc" ) test( id = 8, EC = ECP_SWei_Proj[Fp[BLS12_381]], - Px = "150a83a868fa6a74dbc5658445ea99ec47009572f303ce1d3c76370804c5a8c26d40c8b4b35a6585612d704c5fb090cb", - Py = "31e73ed0aedebcf0b58d60c16f2e5ddd2d4eb2a6e34177939efcca0767cde241966b5950c3333c62ccddee51de26fe6", + Px = "2615f843e8fe68d4c337bcf83b2cf13cbae638edd0740f1eac520dc2146afa3b8d36c540878c1d207ef913634b1e593", + Py = "1787d6eeeceb6e7793073f0bbe7bae522529c126b650c43d5d41e732c581a57df1bfb818061b7b4e6c9145da5df2c43e", scalar = "b0ac3d0e685583075aa46c03a00859dfbec24ccb36e2cae3806d82275adcc03", - Qx = "9c5e69fbd492a64e5811af7cc69e42bc14d8626f6d384d3f479d8e06c20ec5f460a1e3839f33899b4a9e0ada876ac6e", - Qy = "16990d7d308897c74b87368f847df3ac0bb6609091c8d39b22d5778a4229f0bb92fea385d27db41e237dcfb0d05bd0e7" + Qx = "d95ed29c2e15fd2205d83a71478341d6022deb93af4d49f704437678a72ce141d2f6043aa0e34e26f60d17e16b97053", + Qy = "b37cbded112c84116b74ff311b10d148f3e203cb88d4a011b096c74cd2bfdb27255727de4aa8299ae10b32d661d48a7" ) test( id = 9, EC = ECP_SWei_Proj[Fp[BLS12_381]], - Px = "69498486a06c18f836a8e9ed507bbb563d6d03545e03e08f628e8fbd2e5d098e58950071d516ffd044d92b3a8b07184", - Py = "18a169f06fc94f40cd131bdcd23e48e95b1276c0c8daacf56c3a5e278e89ee1094c94aa516113aa4a2455da149f0f989", + Px = "10bc0c4e1ed87246a9d4d7d38546369f275a245f6e1d3b882e8c9a7f05bc6ee8ff97a96a54084c2bef15ed8bfefb1465", + Py = "1782377e5f588576b5ab42fea224e88873dda957202f0c6d72ce8728c2d58dc654be77226fbda385d5f269354e4a176a", scalar = "23941bb3c3659423d6fdafb7cff52e0e02de0ac91e64c537c6203d64905b63d0", - Qx = "482e085550f5e514dd98f2d9b119c284ac165514d228c8f7a179f2b442968984873223af2255a499dc931c63543c0ba", - Qy = "151ce80ca51dd09243d2b1a7937096d6b7494e89190da5ab7604cd913dc4105c871e48c815fefadee2906b8b401e7e71" + Qx = "83f1e7e8bd963c1ccd837dae7bc9336531aaf0aee717537a9a7e2712e220f74cdb73a99f331c0eb6b377be3dafc211f", + Qy = "cd87773d072b1305dfc85c2983aecae2ab316e5e8f31306c32d58d6ce2e431b12685d18c58b6a35ad2113c5b689eeb" ) test( id = 10, EC = ECP_SWei_Proj[Fp[BLS12_381]], - Px = "98cc20aa561769b7ee569304503a94752e236bba52938fed7f3093d5867f65361dc8b48c83bd7db490c26736196e20e", - Py = "10a68394358903122bd649bd30b473f4d3b4f0830bfe7da1c48ae87d9429d8fd26f5b4be8d8fd8e4214017044696da29", + Px = "be4f9f721d98a761a5562bd80ea06f369e9cbb7d33bbb2f0191d4b77d0fd2a10c4083b54157b525f36c522ca3a6ca09", + Py = "166c315ecdd20acb3c5efcc7e038b17d0b37a06ffbf77873f15fc0cd091a1e4102a8b8bf5507919453759e744391b04d", scalar = "4203156dcf70582ea8cbd0388104f47fd5a18ae336b2fed8458e1e4e74d7baf5", - Qx = "18ff1dfd96799b7d0bffaa7480121c3a719047815ae41419f1bd1fdd593288bed8827b3d9e45a3a1e01bf7d603b5ba0", - Qy = "49b95ca2c0f75dfb15fc07e5692d23f8eb38cb1cc9c48cd0e93a80adbff135a3945cc7a5d53d2b7510d6ee7cf97308d" + Qx = "c72bc7087cd22993b7f6d2e49026abfde678a384073ed373b95df722b1ab658eb5ae42211e5528af606e38b59511bc6", + Qy = "96d80593b42fe44e64793e490b1257af0aa26b36773aac93c3686fdb14975917cf60a1a19e32623218d0722dbb88a85" ) diff --git a/tests/test_ec_bn254.nim b/tests/test_ec_bn254.nim index d553eff..6044477 100644 --- a/tests/test_ec_bn254.nim +++ b/tests/test_ec_bn254.nim @@ -8,12 +8,12 @@ import # Standard library - unittest, times, + std/[unittest, times], # Internals ../constantine/config/[common, curves], ../constantine/arithmetic, ../constantine/io/[io_bigints, io_ec], - ../constantine/elliptic/[ec_weierstrass_projective], + ../constantine/elliptic/[ec_weierstrass_projective, ec_scalar_mul, ec_endomorphism_accel], # Test utilities ./support/ec_reference_scalar_mult @@ -33,22 +33,25 @@ proc test( var Q: EC let qOK = Q.fromHex(Qx, Qy) - let exponent = EC.F.C.matchingBigInt.fromHex(scalar) + let exponent = BigInt[EC.F.C.getCurveOrderBitwidth()].fromHex(scalar) var exponentCanonical: array[(exponent.bits+7) div 8, byte] exponentCanonical.exportRawUint(exponent, bigEndian) var impl = P reference = P + endo = P scratchSpace: array[1 shl 4, EC] - impl.scalarMul(exponentCanonical, scratchSpace) + impl.scalarMulGeneric(exponentCanonical, scratchSpace) reference.unsafe_ECmul_double_add(exponentCanonical) + endo.scalarMulGLV(exponent) doAssert: bool(Q == reference) doAssert: bool(Q == impl) + doAssert: bool(Q == endo) -suite "BN254 implementation (and unsafe reference impl) vs SageMath": +suite "Scalar Multiplication: BN254 implementation (and unsafe reference impl) vs SageMath": # Generated via sage sage/testgen_bn254_snarks.sage test( id = 1, diff --git a/tests/test_ec_weierstrass_projective_g1.nim b/tests/test_ec_weierstrass_projective_g1.nim index ef0ac03..d901aa8 100644 --- a/tests/test_ec_weierstrass_projective_g1.nim +++ b/tests/test_ec_weierstrass_projective_g1.nim @@ -8,12 +8,12 @@ import # Standard library - unittest, times, + std/[unittest, times], # Internals ../constantine/config/[common, curves], ../constantine/arithmetic, ../constantine/io/io_bigints, - ../constantine/elliptic/[ec_weierstrass_affine, ec_weierstrass_projective], + ../constantine/elliptic/[ec_weierstrass_affine, ec_weierstrass_projective, ec_scalar_mul], # Test utilities ../helpers/prng_unsafe, ./support/ec_reference_scalar_mult @@ -194,7 +194,7 @@ suite "Elliptic curve in Short Weierstrass form y² = x³ + a x + b with project reference = a scratchSpace{.noInit.}: array[1 shl 4, ECP_SWei_Proj[F]] - impl.scalarMul(exponentCanonical, scratchSpace) + impl.scalarMulGeneric(exponentCanonical, scratchSpace) reference.unsafe_ECmul_double_add(exponentCanonical) check: @@ -223,7 +223,7 @@ suite "Elliptic curve in Short Weierstrass form y² = x³ + a x + b with project reference = a scratchSpace{.noInit.}: array[1 shl 4, ECP_SWei_Proj[F]] - impl.scalarMul(exponentCanonical, scratchSpace) + impl.scalarMulGeneric(exponentCanonical, scratchSpace) reference.unsafe_ECmul_double_add(exponentCanonical) check: @@ -256,7 +256,7 @@ suite "Elliptic curve in Short Weierstrass form y² = x³ + a x + b with project reference = a scratchSpace{.noInit.}: array[1 shl 4, ECP_SWei_Proj[F]] - impl.scalarMul(exponentCanonical, scratchSpace) + impl.scalarMulGeneric(exponentCanonical, scratchSpace) reference.unsafe_ECmul_double_add(exponentCanonical) check: @@ -288,7 +288,7 @@ suite "Elliptic curve in Short Weierstrass form y² = x³ + a x + b with project reference = a scratchSpace{.noInit.}: array[1 shl 4, ECP_SWei_Proj[F]] - impl.scalarMul(exponentCanonical, scratchSpace) + impl.scalarMulGeneric(exponentCanonical, scratchSpace) reference.unsafe_ECmul_double_add(exponentCanonical) check: @@ -323,20 +323,20 @@ suite "Elliptic curve in Short Weierstrass form y² = x³ + a x + b with project fImpl.sum(a, b) fReference.sum(a, b) - fImpl.scalarMul(exponentCanonical, scratchSpace) + fImpl.scalarMulGeneric(exponentCanonical, scratchSpace) fReference.unsafe_ECmul_double_add(exponentCanonical) # [k]a + [k]b - Distributed var kaImpl = a var kaRef = a - kaImpl.scalarMul(exponentCanonical, scratchSpace) + kaImpl.scalarMulGeneric(exponentCanonical, scratchSpace) kaRef.unsafe_ECmul_double_add(exponentCanonical) var kbImpl = b var kbRef = b - kbImpl.scalarMul(exponentCanonical, scratchSpace) + kbImpl.scalarMulGeneric(exponentCanonical, scratchSpace) kbRef.unsafe_ECmul_double_add(exponentCanonical) var kakbImpl{.noInit.}, kakbRef{.noInit.}: ECP_SWei_Proj[F] @@ -370,7 +370,7 @@ suite "Elliptic curve in Short Weierstrass form y² = x³ + a x + b with project reference = a scratchSpace{.noInit.}: array[1 shl 4, ECP_SWei_Proj[F]] - impl.scalarMul(exponentCanonical, scratchSpace) + impl.scalarMulGeneric(exponentCanonical, scratchSpace) reference.unsafe_ECmul_double_add(exponentCanonical) check: bool(impl == reference) diff --git a/tests/test_finite_fields.nim b/tests/test_finite_fields.nim index f85f70b..fd169a4 100644 --- a/tests/test_finite_fields.nim +++ b/tests/test_finite_fields.nim @@ -6,13 +6,11 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import unittest, +import std/unittest, ../constantine/arithmetic, ../constantine/io/io_fields, ../constantine/config/curves -import ../constantine/io/io_bigints - static: doAssert defined(testingCurves), "This modules requires the -d:testingCurves compile option" proc main() = diff --git a/tests/test_finite_fields_mulsquare.nim b/tests/test_finite_fields_mulsquare.nim index d44bf49..d04923c 100644 --- a/tests/test_finite_fields_mulsquare.nim +++ b/tests/test_finite_fields_mulsquare.nim @@ -6,12 +6,15 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import std/unittest, std/times, - ../constantine/arithmetic, - ../constantine/io/[io_bigints, io_fields], - ../constantine/config/[curves, common], - # Test utilities - ../helpers/prng_unsafe +import + # Standard library + std/[unittest, times], + # Internal + ../constantine/arithmetic, + ../constantine/io/[io_bigints, io_fields], + ../constantine/config/[curves, common], + # Test utilities + ../helpers/prng_unsafe const Iters = 128 diff --git a/tests/test_finite_fields_powinv.nim b/tests/test_finite_fields_powinv.nim index bb59f24..9d29980 100644 --- a/tests/test_finite_fields_powinv.nim +++ b/tests/test_finite_fields_powinv.nim @@ -6,13 +6,16 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import ../constantine/arithmetic, - ../constantine/io/[io_bigints, io_fields], - ../constantine/config/curves, - # Test utilities - ../helpers/prng_unsafe, - # Standard library - std/unittest, std/times +import + # Standard library + std/[unittest, times], + # Internal + ../constantine/arithmetic, + ../constantine/io/[io_bigints, io_fields], + ../constantine/config/curves, + # Test utilities + ../helpers/prng_unsafe + static: doAssert defined(testingCurves), "This modules requires the -d:testingCurves compile option" diff --git a/tests/test_finite_fields_sqrt.nim b/tests/test_finite_fields_sqrt.nim index c6b8150..4bffb71 100644 --- a/tests/test_finite_fields_sqrt.nim +++ b/tests/test_finite_fields_sqrt.nim @@ -6,14 +6,16 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import ../constantine/[arithmetic, primitives], - ../constantine/io/[io_fields], - ../constantine/config/[curves, common], - # Test utilities - ../helpers/prng_unsafe, - # Standard library - std/tables, - std/unittest, std/times +import + # Standard library + std/[tables, unittest, times], + # Internal + ../constantine/[arithmetic, primitives], + ../constantine/io/[io_fields], + ../constantine/config/[curves, common], + # Test utilities + ../helpers/prng_unsafe + const Iters = 128 diff --git a/tests/test_finite_fields_vs_gmp.nim b/tests/test_finite_fields_vs_gmp.nim index 518730f..63fa2ff 100644 --- a/tests/test_finite_fields_vs_gmp.nim +++ b/tests/test_finite_fields_vs_gmp.nim @@ -8,7 +8,7 @@ import # Standard library - random, macros, times, strutils, + std/[random, macros, times, strutils], # Third-party gmp, stew/byteutils, # Internal diff --git a/tests/test_precomputed.nim b/tests/test_precomputed.nim new file mode 100644 index 0000000..b787af7 --- /dev/null +++ b/tests/test_precomputed.nim @@ -0,0 +1,37 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import std/unittest, + ../constantine/arithmetic, + ../constantine/config/curves, + ../constantine/io/[io_bigints, io_fields] + +proc checkCubeRootOfUnity(curve: static Curve) = + test $curve & " cube root of unity (mod p)": + var cru = curve.getCubicRootOfUnity_mod_p() + cru.square() + cru *= curve.getCubicRootOfUnity_mod_p() + + check: bool cru.isOne() + + test $curve & " cube root of unity (mod r)": + var cru: BigInt[3 * curve.getCurveOrderBitwidth()] + cru.prod(curve.getCubicRootOfUnity_mod_r(), curve.getCubicRootOfUnity_mod_r()) + cru.prod(cru, curve.getCubicRootOfUnity_mod_r()) + + var r: BigInt[curve.getCurveOrderBitwidth()] + r.reduce(cru, curve.getCurveOrder) + + check: bool r.isOne() + +proc main() = + suite "Sanity checks on precomputed values": + checkCubeRootOfUnity(BN254_Snarks) + # checkCubeRootOfUnity(BLS12_381) + +main()