diff --git a/constantine/arithmetic/bigints.nim b/constantine/arithmetic/bigints.nim index c7a35d8..9236076 100644 --- a/constantine/arithmetic/bigints.nim +++ b/constantine/arithmetic/bigints.nim @@ -10,7 +10,7 @@ import ../config/[common, type_bigint], ../primitives, ./limbs, - ./limbs_double_width, + ./limbs_extmul, ./limbs_modular, ./limbs_montgomery @@ -347,7 +347,7 @@ func bit0*(a: BigInt): Ct[uint8] = # Multiplication by small cosntants # ------------------------------------------------------------ -func `*=`*(a: var BigInt, b: static int) {.inline.} = +func `*=`*(a: var BigInt, b: static int) = ## Multiplication by a small integer known at compile-time # Implementation: # @@ -420,11 +420,6 @@ func `*=`*(a: var BigInt, b: static int) {.inline.} = else: {.error: "Multiplication by this small int not implemented".} -func `*`*(b: static int, a: BigInt): BigInt {.noinit, inline.} = - ## Multiplication by a small integer known at compile-time - result = a - result *= b - # Division by constants # ------------------------------------------------------------ diff --git a/constantine/arithmetic/finite_fields.nim b/constantine/arithmetic/finite_fields.nim index b3d0209..3ed4c12 100644 --- a/constantine/arithmetic/finite_fields.nim +++ b/constantine/arithmetic/finite_fields.nim @@ -43,6 +43,7 @@ export Fp, Fr, FF # No exceptions allowed {.push raises: [].} +{.push inline.} # ############################################################ # @@ -50,14 +51,14 @@ export Fp, Fr, FF # # ############################################################ -func fromBig*(dst: var FF, src: BigInt) {.inline.}= +func fromBig*(dst: var FF, src: BigInt) = ## Convert a BigInt to its Montgomery form when nimvm: dst.mres.montyResidue_precompute(src, FF.fieldMod(), FF.getR2modP(), FF.getNegInvModWord()) else: dst.mres.montyResidue(src, FF.fieldMod(), FF.getR2modP(), FF.getNegInvModWord(), FF.canUseNoCarryMontyMul()) -func fromBig*[C: static Curve](T: type FF[C], src: BigInt): FF[C] {.noInit, inline.} = +func fromBig*[C: static Curve](T: type FF[C], src: BigInt): FF[C] {.noInit.} = ## Convert a BigInt to its Montgomery form result.fromBig(src) @@ -70,14 +71,14 @@ func toBig*(src: FF): auto {.noInit, inline.} = # Copy # ------------------------------------------------------------ -func ccopy*(a: var FF, b: FF, ctl: SecretBool) {.inline, meter.} = +func ccopy*(a: var FF, b: FF, ctl: SecretBool) {.meter.} = ## Constant-time conditional copy ## If ctl is true: b is copied into a ## if ctl is false: b is not copied and a is unmodified ## Time and memory accesses are the same whether a copy occurs or not ccopy(a.mres, b.mres, ctl) -func cswap*(a, b: var FF, ctl: CTBool) {.inline, meter.} = +func cswap*(a, b: var FF, ctl: CTBool) {.meter.} = ## Swap ``a`` and ``b`` if ``ctl`` is true ## ## Constant-time: @@ -105,34 +106,34 @@ func cswap*(a, b: var FF, ctl: CTBool) {.inline, meter.} = # In practice I'm not aware of such prime being using in elliptic curves. # 2^127 - 1 and 2^521 - 1 are used but 127 and 521 are not multiple of 32/64 -func `==`*(a, b: FF): SecretBool {.inline.} = +func `==`*(a, b: FF): SecretBool = ## Constant-time equality check a.mres == b.mres -func isZero*(a: FF): SecretBool {.inline.} = +func isZero*(a: FF): SecretBool = ## Constant-time check if zero a.mres.isZero() -func isOne*(a: FF): SecretBool {.inline.} = +func isOne*(a: FF): SecretBool = ## Constant-time check if one a.mres == FF.getMontyOne() -func isMinusOne*(a: FF): SecretBool {.inline.} = +func isMinusOne*(a: FF): SecretBool = ## Constant-time check if -1 (mod p) a.mres == FF.getMontyPrimeMinus1() -func setZero*(a: var FF) {.inline.} = +func setZero*(a: var FF) = ## Set ``a`` to zero a.mres.setZero() -func setOne*(a: var FF) {.inline.} = +func setOne*(a: var FF) = ## Set ``a`` to one # Note: we need 1 in Montgomery residue form # TODO: Nim codegen is not optimal it uses a temporary # Check if the compiler optimizes it away a.mres = FF.getMontyOne() -func `+=`*(a: var FF, b: FF) {.inline, meter.} = +func `+=`*(a: var FF, b: FF) {.meter.} = ## In-place addition modulo p when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs) @@ -141,7 +142,7 @@ func `+=`*(a: var FF, b: FF) {.inline, meter.} = overflowed = overflowed or not(a.mres < FF.fieldMod()) discard csub(a.mres, FF.fieldMod(), overflowed) -func `-=`*(a: var FF, b: FF) {.inline, meter.} = +func `-=`*(a: var FF, b: FF) {.meter.} = ## In-place substraction modulo p when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling submod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs) @@ -149,7 +150,7 @@ func `-=`*(a: var FF, b: FF) {.inline, meter.} = let underflowed = sub(a.mres, b.mres) discard cadd(a.mres, FF.fieldMod(), underflowed) -func double*(a: var FF) {.inline, meter.} = +func double*(a: var FF) {.meter.} = ## Double ``a`` modulo p when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs) @@ -158,7 +159,7 @@ func double*(a: var FF) {.inline, meter.} = overflowed = overflowed or not(a.mres < FF.fieldMod()) discard csub(a.mres, FF.fieldMod(), overflowed) -func sum*(r: var FF, a, b: FF) {.inline, meter.} = +func sum*(r: var FF, a, b: FF) {.meter.} = ## Sum ``a`` and ``b`` into ``r`` modulo p ## r is initialized/overwritten when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling @@ -168,11 +169,11 @@ func sum*(r: var FF, a, b: FF) {.inline, meter.} = overflowed = overflowed or not(r.mres < FF.fieldMod()) discard csub(r.mres, FF.fieldMod(), overflowed) -func sumNoReduce*(r: var FF, a, b: FF) {.inline, meter.} = +func sumNoReduce*(r: var FF, a, b: FF) {.meter.} = ## Sum ``a`` and ``b`` into ``r`` without reduction discard r.mres.sum(a.mres, b.mres) -func diff*(r: var FF, a, b: FF) {.inline, meter.} = +func diff*(r: var FF, a, b: FF) {.meter.} = ## Substract `b` from `a` and store the result into `r`. ## `r` is initialized/overwritten ## Requires r != b @@ -182,12 +183,12 @@ func diff*(r: var FF, a, b: FF) {.inline, meter.} = var underflowed = r.mres.diff(a.mres, b.mres) discard cadd(r.mres, FF.fieldMod(), underflowed) -func diffNoReduce*(r: var FF, a, b: FF) {.inline, meter.} = +func diffNoReduce*(r: var FF, a, b: FF) {.meter.} = ## Substract `b` from `a` and store the result into `r` ## without reduction discard r.mres.diff(a.mres, b.mres) -func double*(r: var FF, a: FF) {.inline, meter.} = +func double*(r: var FF, a: FF) {.meter.} = ## Double ``a`` into ``r`` ## `r` is initialized/overwritten when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling @@ -197,16 +198,16 @@ func double*(r: var FF, a: FF) {.inline, meter.} = overflowed = overflowed or not(r.mres < FF.fieldMod()) discard csub(r.mres, FF.fieldMod(), overflowed) -func prod*(r: var FF, a, b: FF) {.inline, meter.} = +func prod*(r: var FF, a, b: FF) {.meter.} = ## Store the product of ``a`` by ``b`` modulo p into ``r`` ## ``r`` is initialized / overwritten r.mres.montyMul(a.mres, b.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontyMul()) -func square*(r: var FF, a: FF) {.inline, meter.} = +func square*(r: var FF, a: FF) {.meter.} = ## Squaring modulo p r.mres.montySquare(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontySquare()) -func neg*(r: var FF, a: FF) {.inline, meter.} = +func neg*(r: var FF, a: FF) {.meter.} = ## Negate modulo p when UseASM_X86_64: negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs) @@ -221,11 +222,11 @@ func neg*(r: var FF, a: FF) {.inline, meter.} = t.mres.czero(isZero) r = t -func neg*(a: var FF) {.inline, meter.} = +func neg*(a: var FF) {.meter.} = ## Negate modulo p a.neg(a) -func div2*(a: var FF) {.inline, meter.} = +func div2*(a: var FF) {.meter.} = ## Modular division by 2 a.mres.div2_modular(FF.getPrimePlus1div2()) @@ -269,7 +270,7 @@ func csub*(a: var FF, b: FF, ctl: SecretBool) {.meter.} = # # Internally those procedures will allocate extra scratchspace on the stack -func pow*(a: var FF, exponent: BigInt) {.inline.} = +func pow*(a: var FF, exponent: BigInt) = ## Exponentiation modulo p ## ``a``: a field element to be exponentiated ## ``exponent``: a big integer @@ -282,7 +283,7 @@ func pow*(a: var FF, exponent: BigInt) {.inline.} = FF.canUseNoCarryMontySquare() ) -func pow*(a: var FF, exponent: openarray[byte]) {.inline.} = +func pow*(a: var FF, exponent: openarray[byte]) = ## Exponentiation modulo p ## ``a``: a field element to be exponentiated ## ``exponent``: a big integer in canonical big endian representation @@ -295,7 +296,7 @@ func pow*(a: var FF, exponent: openarray[byte]) {.inline.} = FF.canUseNoCarryMontySquare() ) -func powUnsafeExponent*(a: var FF, exponent: BigInt) {.inline.} = +func powUnsafeExponent*(a: var FF, exponent: BigInt) = ## Exponentiation modulo p ## ``a``: a field element to be exponentiated ## ``exponent``: a big integer @@ -315,7 +316,7 @@ func powUnsafeExponent*(a: var FF, exponent: BigInt) {.inline.} = FF.canUseNoCarryMontySquare() ) -func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) {.inline.} = +func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) = ## Exponentiation modulo p ## ``a``: a field element to be exponentiated ## ``exponent``: a big integer a big integer in canonical big endian representation @@ -342,47 +343,27 @@ func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) {.inline.} = # ############################################################ # # This implements extra primitives for ergonomics. -# The in-place ones should be preferred as they avoid copies on assignment -# Two kinds: -# - Those that return a field element -# - Those that internally allocate a temporary field element -func `+`*(a, b: FF): FF {.noInit, inline, meter.} = - ## Addition modulo p - result.sum(a, b) - -func `-`*(a, b: FF): FF {.noInit, inline, meter.} = - ## Substraction modulo p - result.diff(a, b) - -func `*`*(a, b: FF): FF {.noInit, inline, meter.} = - ## Multiplication modulo p - ## - ## It is recommended to assign with {.noInit.} - ## as FF elements are usually large and this - ## routine will zero init internally the result. - result.prod(a, b) - -func `*=`*(a: var FF, b: FF) {.inline, meter.} = +func `*=`*(a: var FF, b: FF) {.meter.} = ## Multiplication modulo p a.prod(a, b) -func square*(a: var FF) {.inline, meter.} = +func square*(a: var FF) {.meter.} = ## Squaring modulo p a.mres.montySquare(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontySquare()) -func square_repeated*(r: var FF, num: int) {.inline, meter.} = +func square_repeated*(r: var FF, num: int) {.meter.} = ## Repeated squarings for _ in 0 ..< num: r.square() -func square_repeated*(r: var FF, a: FF, num: int) {.inline, meter.} = +func square_repeated*(r: var FF, a: FF, num: int) {.meter.} = ## Repeated squarings r.square(a) for _ in 1 ..< num: r.square() -func `*=`*(a: var FF, b: static int) {.inline.} = +func `*=`*(a: var FF, b: static int) = ## Multiplication by a small integer known at compile-time # Implementation: # We don't want to go convert the integer to the Montgomery domain (O(n²)) @@ -464,7 +445,27 @@ func `*=`*(a: var FF, b: static int) {.inline.} = else: {.error: "Multiplication by this small int not implemented".} -func `*`*(b: static int, a: FF): FF {.noinit, inline.} = +func prod*(r: var FF, a: FF, b: static int) = ## Multiplication by a small integer known at compile-time - result = a - result *= b + const negate = b < 0 + const b = if negate: -b + else: b + when negate: + r.neg(a) + else: + r = a + r *= b + +template mulCheckSparse*(a: var Fp, b: Fp) = + ## Multiplication with optimization for sparse inputs + when b.isOne().bool: + discard + elif b.isZero().bool: + a.setZero() + elif b.isMinusOne().bool: + a.neg() + else: + a *= b + +{.pop.} # inline +{.pop.} # raises no exceptions diff --git a/constantine/arithmetic/finite_fields_double_width.nim b/constantine/arithmetic/finite_fields_double_width.nim index 1762c0e..a9a94db 100644 --- a/constantine/arithmetic/finite_fields_double_width.nim +++ b/constantine/arithmetic/finite_fields_double_width.nim @@ -12,7 +12,7 @@ import ./bigints, ./finite_fields, ./limbs, - ./limbs_double_width, + ./limbs_extmul, ./limbs_montgomery when UseASM_X86_64: @@ -28,18 +28,22 @@ template doubleWidth*(T: typedesc[Fp]): typedesc = ## Return the double-width type matching with Fp FpDbl[T.C] -func `==`*(a, b: FpDbl): SecretBool {.inline.} = +# No exceptions allowed +{.push raises: [].} +{.push inline.} + +func `==`*(a, b: FpDbl): SecretBool = a.limbs2x == b.limbs2x -func mulNoReduce*(r: var FpDbl, a, b: Fp) {.inline.} = +func mulNoReduce*(r: var FpDbl, a, b: Fp) = ## Store the product of ``a`` by ``b`` into ``r`` r.limbs2x.prod(a.mres.limbs, b.mres.limbs) -func squareNoReduce*(r: var FpDbl, a: Fp) {.inline.} = +func squareNoReduce*(r: var FpDbl, a: Fp) = ## Store the square of ``a`` into ``r`` r.limbs2x.square(a.mres.limbs) -func reduce*(r: var Fp, a: FpDbl) {.inline.} = +func reduce*(r: var Fp, a: FpDbl) = ## Reduce a double-width field element into r const N = r.mres.limbs.len montyRed( @@ -54,7 +58,7 @@ func diffNoReduce*(r: var FpDbl, a, b: FpDbl) = ## Double-width substraction without reduction discard r.limbs2x.diff(a.limbs2x, b.limbs2x) -func diff*(r: var FpDbl, a, b: FpDbl) {.inline.}= +func diff*(r: var FpDbl, a, b: FpDbl) = ## Double-width modular substraction when UseASM_X86_64: sub2x_asm(r.limbs2x, a.limbs2x, b.limbs2x, FpDbl.C.Mod.limbs) @@ -69,6 +73,9 @@ func diff*(r: var FpDbl, a, b: FpDbl) {.inline.}= addC(carry, sum, r.limbs2x[i+N], M.limbs[i], carry) underflowed.ccopy(r.limbs2x[i+N], sum) -func `-=`*(a: var FpDbl, b: FpDbl) {.inline.}= +func `-=`*(a: var FpDbl, b: FpDbl) = ## Double-width modular substraction a.diff(a, b) + +{.pop.} # inline +{.pop.} # raises no exceptions diff --git a/constantine/arithmetic/finite_fields_inversion.nim b/constantine/arithmetic/finite_fields_inversion.nim index f26c803..6f04bb9 100644 --- a/constantine/arithmetic/finite_fields_inversion.nim +++ b/constantine/arithmetic/finite_fields_inversion.nim @@ -19,13 +19,17 @@ export zoo_inversions # # ############################################################ -func inv_euclid*(r: var Fp, a: Fp) {.inline.} = +# No exceptions allowed +{.push raises: [].} +{.push inline.} + +func inv_euclid*(r: var Fp, a: Fp) = ## Inversion modulo p via ## Niels Moller constant-time version of ## Stein's GCD derived from extended binary Euclid algorithm r.mres.steinsGCD(a.mres, Fp.getR2modP(), Fp.C.Mod, Fp.getPrimePlus1div2()) -func inv*(r: var Fp, a: Fp) {.inline.} = +func inv*(r: var Fp, a: Fp) = ## Inversion modulo p ## ## The inverse of 0 is 0. @@ -41,7 +45,7 @@ func inv*(r: var Fp, a: Fp) {.inline.} = else: r.inv_euclid(a) -func inv*(a: var Fp) {.inline.} = +func inv*(a: var Fp) = ## Inversion modulo p ## ## The inverse of 0 is 0. @@ -52,3 +56,6 @@ func inv*(a: var Fp) {.inline.} = a.inv_addchain(a) else: a.inv_euclid(a) + +{.pop.} # inline +{.pop.} # raises no exceptions diff --git a/constantine/arithmetic/finite_fields_square_root.nim b/constantine/arithmetic/finite_fields_square_root.nim index 5c66803..2428151 100644 --- a/constantine/arithmetic/finite_fields_square_root.nim +++ b/constantine/arithmetic/finite_fields_square_root.nim @@ -18,10 +18,14 @@ import # # ############################################################ +# No exceptions allowed +{.push raises: [].} +{.push inline.} + # Legendre symbol / Euler's Criterion / Kronecker's symbol # ------------------------------------------------------------ -func isSquare*(a: Fp): SecretBool {.inline.} = +func isSquare*(a: Fp): SecretBool = ## Returns true if ``a`` is a square (quadratic residue) in 𝔽p ## ## Assumes that the prime modulus ``p`` is public. @@ -50,7 +54,7 @@ func hasP3mod4_primeModulus(C: static Curve): static bool = ## Returns true iff p ≡ 3 (mod 4) (BaseType(C.Mod.limbs[0]) and 3) == 3 -func sqrt_p3mod4(a: var Fp) {.inline.} = +func sqrt_p3mod4(a: var Fp) = ## Compute the square root of ``a`` ## ## This requires ``a`` to be a square @@ -64,7 +68,7 @@ func sqrt_p3mod4(a: var Fp) {.inline.} = static: doAssert BaseType(Fp.C.Mod.limbs[0]) mod 4 == 3 a.powUnsafeExponent(Fp.getPrimePlus1div4_BE()) -func sqrt_invsqrt_p3mod4(sqrt, invsqrt: var Fp, a: Fp) {.inline.} = +func sqrt_invsqrt_p3mod4(sqrt, invsqrt: var Fp, a: Fp) = ## If ``a`` is a square, compute the square root of ``a`` in sqrt ## and the inverse square root of a in invsqrt ## @@ -85,7 +89,7 @@ func sqrt_invsqrt_p3mod4(sqrt, invsqrt: var Fp, a: Fp) {.inline.} = # √a ≡ a * 1/√a ≡ a^((p+1)/4) (mod p) sqrt.prod(invsqrt, a) -func sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.inline.} = +func sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt: var Fp, a: Fp): SecretBool = ## If ``a`` is a square, compute the square root of ``a`` in sqrt ## and the inverse square root of a in invsqrt ## @@ -97,7 +101,7 @@ func sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.i test.square(sqrt) result = test == a -func sqrt_if_square_p3mod4*(a: var Fp): SecretBool {.inline.} = +func sqrt_if_square_p3mod4*(a: var Fp): SecretBool = ## If ``a`` is a square, compute the square root of ``a`` ## if not, ``a`` is unmodified. ## @@ -115,7 +119,7 @@ func sqrt_if_square_p3mod4*(a: var Fp): SecretBool {.inline.} = # Specialized routines for addchain-based square roots # ------------------------------------------------------------ -func sqrt_addchain(a: var Fp) {.inline.} = +func sqrt_addchain(a: var Fp) = ## Compute the square root of ``a`` ## ## This requires ``a`` to be a square @@ -128,13 +132,13 @@ func sqrt_addchain(a: var Fp) {.inline.} = invsqrt.invsqrt_addchain(a) a *= invsqrt -func sqrt_invsqrt_addchain(sqrt, invsqrt: var Fp, a: Fp) {.inline.} = +func sqrt_invsqrt_addchain(sqrt, invsqrt: var Fp, a: Fp) = ## If ``a`` is a square, compute the square root of ``a`` in sqrt ## and the inverse square root of a in invsqrt invsqrt.invsqrt_addchain(a) sqrt.prod(invsqrt, a) -func sqrt_invsqrt_if_square_addchain(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.inline.} = +func sqrt_invsqrt_if_square_addchain(sqrt, invsqrt: var Fp, a: Fp): SecretBool = ## If ``a`` is a square, compute the square root of ``a`` in sqrt ## and the inverse square root of a in invsqrt ## @@ -144,7 +148,7 @@ func sqrt_invsqrt_if_square_addchain(sqrt, invsqrt: var Fp, a: Fp): SecretBool { test.square(sqrt) result = test == a -func sqrt_if_square_addchain*(a: var Fp): SecretBool {.inline.} = +func sqrt_if_square_addchain*(a: var Fp): SecretBool = ## If ``a`` is a square, compute the square root of ``a`` ## if not, ``a`` is unmodified. ## @@ -155,6 +159,8 @@ func sqrt_if_square_addchain*(a: var Fp): SecretBool {.inline.} = result = sqrt_invsqrt_if_square_addchain(sqrt, invsqrt, a) a.ccopy(sqrt, result) +{.pop.} # inline + # Tonelli Shanks for any prime # ------------------------------------------------------------ @@ -228,7 +234,7 @@ func sqrt_invsqrt_tonelli_shanks_pre( # ---------------------------------------------- -func sqrt_tonelli_shanks(a: var Fp, useAddChain: static bool) {.inline.} = +func sqrt_tonelli_shanks(a: var Fp, useAddChain: static bool) = ## Compute the square root of ``a`` ## ## This requires ``a`` to be a square @@ -244,7 +250,7 @@ func sqrt_tonelli_shanks(a: var Fp, useAddChain: static bool) {.inline.} = sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp) a = sqrt -func sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool) {.inline.} = +func sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool) = ## Compute the square root and inverse square root of ``a`` ## ## This requires ``a`` to be a square @@ -258,7 +264,7 @@ func sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: stat a_pre_exp.precompute_tonelli_shanks(a, useAddChain) sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp) -func sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool): SecretBool {.inline.} = +func sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool): SecretBool = ## Compute the square root and ivnerse square root of ``a`` ## ## This returns true if ``a`` is square and sqrt/invsqrt contains the square root/inverse square root @@ -274,7 +280,7 @@ func sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddC sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp) a = sqrt -func sqrt_if_square_tonelli_shanks*(a: var Fp, useAddChain: static bool): SecretBool {.inline.} = +func sqrt_if_square_tonelli_shanks*(a: var Fp, useAddChain: static bool): SecretBool = ## If ``a`` is a square, compute the square root of ``a`` ## if not, ``a`` is unmodified. ## @@ -293,7 +299,9 @@ func sqrt_if_square_tonelli_shanks*(a: var Fp, useAddChain: static bool): Secret # Note: we export the inner sqrt_invsqrt_IMPL # for benchmarking purposes. -func sqrt*[C](a: var Fp[C]) {.inline.} = +{.push inline.} + +func sqrt*[C](a: var Fp[C]) = ## Compute the square root of ``a`` ## ## This requires ``a`` to be a square @@ -311,7 +319,7 @@ func sqrt*[C](a: var Fp[C]) {.inline.} = else: sqrt_tonelli_shanks(a, useAddChain = C.hasTonelliShanksAddchain()) -func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) {.inline.} = +func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) = ## Compute the square root and inverse square root of ``a`` ## ## This requires ``a`` to be a square @@ -328,7 +336,7 @@ func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) {.inline.} = else: sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt, a, useAddChain = C.hasTonelliShanksAddchain()) -func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool {.inline.} = +func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool = ## Compute the square root and ivnerse square root of ``a`` ## ## This returns true if ``a`` is square and sqrt/invsqrt contains the square root/inverse square root @@ -345,7 +353,7 @@ func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool else: result = sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt, a, useAddChain = C.hasTonelliShanksAddchain()) -func sqrt_if_square*[C](a: var Fp[C]): SecretBool {.inline.} = +func sqrt_if_square*[C](a: var Fp[C]): SecretBool = ## If ``a`` is a square, compute the square root of ``a`` ## if not, ``a`` is unmodified. ## @@ -359,3 +367,6 @@ func sqrt_if_square*[C](a: var Fp[C]): SecretBool {.inline.} = result = sqrt_if_square_p3mod4(a) else: result = sqrt_if_square_tonelli_shanks(a, useAddChain = C.hasTonelliShanksAddchain()) + +{.pop.} # inline +{.pop.} # raises no exceptions diff --git a/constantine/arithmetic/limbs.nim b/constantine/arithmetic/limbs.nim index 6b81111..70ab953 100644 --- a/constantine/arithmetic/limbs.nim +++ b/constantine/arithmetic/limbs.nim @@ -174,7 +174,7 @@ func isEven*(a: Limbs): SecretBool = # Bit manipulation # ------------------------------------------------------------ -func shiftRight*(a: var Limbs, k: int) {.inline.}= +func shiftRight*(a: var Limbs, k: int) = ## Shift right by k. ## ## k MUST be less than the base word size (2^32 or 2^64) diff --git a/constantine/arithmetic/limbs_double_width.nim b/constantine/arithmetic/limbs_extmul.nim similarity index 82% rename from constantine/arithmetic/limbs_double_width.nim rename to constantine/arithmetic/limbs_extmul.nim index d58a7bb..a674cb8 100644 --- a/constantine/arithmetic/limbs_double_width.nim +++ b/constantine/arithmetic/limbs_extmul.nim @@ -21,13 +21,46 @@ when UseASM_X86_64: # # ############################################################ +# Inlining note: +# - The public dispatch functions are inlined. +# This allows the compiler to check CPU features only once +# in the high-level proc and also removes an intermediate function call +# to the wrapper function +# - The ASM procs or internal fallbacks are not inlined +# to save on code size. + # No exceptions allowed {.push raises: [].} # Multiplication # ------------------------------------------------------------ -func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) = +func prod_comba[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) = + ## Extended precision multiplication + # We use Product Scanning / Comba multiplication + var t, u, v = Zero + const stopEx = min(a.len+b.len, r.len) + + staticFor i, 0, stopEx: + # Invariant for product scanning: + # if we multiply accumulate by a[k1] * b[k2] + # we have k1+k2 == i + const ib = min(b.len-1, i) + const ia = i - ib + staticFor j, 0, min(a.len - ia, ib+1): + mulAcc(t, u, v, a[ia+j], b[ib-j]) + + r[i] = v + when i < stopEx-1: + v = u + u = t + t = Zero + + if aLen+bLen < rLen: + for i in aLen+bLen ..< rLen: + r[i] = Zero + +func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) {.inline.} = ## Multi-precision multiplication ## r <- a*b ## @@ -46,28 +79,7 @@ func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: elif UseASM_X86_64: mul_asm(r, a, b) else: - # We use Product Scanning / Comba multiplication - var t, u, v = Zero - const stopEx = min(a.len+b.len, r.len) - - staticFor i, 0, stopEx: - # Invariant for product scanning: - # if we multiply accumulate by a[k1] * b[k2] - # we have k1+k2 == i - const ib = min(b.len-1, i) - const ia = i - ib - staticFor j, 0, min(a.len - ia, ib+1): - mulAcc(t, u, v, a[ia+j], b[ib-j]) - - r[i] = v - when i < stopEx-1: - v = u - u = t - t = Zero - - if aLen+bLen < rLen: - for i in aLen+bLen ..< rLen: - r[i] = Zero + prod_comba(r, a, b) func prod_high_words*[rLen, aLen, bLen]( r: var Limbs[rLen], @@ -178,7 +190,7 @@ func square_operandScan[rLen, aLen]( func square*[rLen, aLen]( r: var Limbs[rLen], - a: Limbs[aLen]) = + a: Limbs[aLen]) {.inline.} = ## Multi-precision squaring ## r <- a² ## diff --git a/constantine/arithmetic/limbs_modular.nim b/constantine/arithmetic/limbs_modular.nim index 82f6d80..99ec325 100644 --- a/constantine/arithmetic/limbs_modular.nim +++ b/constantine/arithmetic/limbs_modular.nim @@ -20,7 +20,7 @@ import # # ############################################################ -func div2_modular*(a: var Limbs, mp1div2: Limbs) {.inline.}= +func div2_modular*(a: var Limbs, mp1div2: Limbs) {.inline.} = ## Modular Division by 2 ## `a` will be divided in-place ## `mp1div2` is the modulus (M+1)/2 diff --git a/constantine/arithmetic/limbs_montgomery.nim b/constantine/arithmetic/limbs_montgomery.nim index cae6e09..e45964c 100644 --- a/constantine/arithmetic/limbs_montgomery.nim +++ b/constantine/arithmetic/limbs_montgomery.nim @@ -40,7 +40,13 @@ when UseASM_X86_64: # - pairing final exponentiation # are bottlenecked by Montgomery multiplications or squarings # -# Hence we use inline assembly where possible +# Inlining note: +# - The public dispatch functions are inlined. +# This allows the compiler to check CPU features only once +# in the high-level proc and also removes an intermediate function call +# to the wrapper function +# - The ASM procs or internal fallbacks are not inlined +# to save on code size. # No exceptions allowed {.push raises: [].} @@ -381,31 +387,12 @@ func montyRed_Comba[N: static int]( discard z.csub(M, SecretBool(carry) or not(z < M)) r = z -# TODO upstream, using Limbs[N] breaks semcheck -func montyRed*[N: static int]( - r: var array[N, SecretWord], - a: array[N*2, SecretWord], - M: array[N, SecretWord], - m0ninv: BaseType, canUseNoCarryMontyMul: static bool) = - ## Montgomery reduce a double-width bigint modulo M - when UseASM_X86_64 and r.len <= 6: - if ({.noSideEffect.}: hasBmi2()) and ({.noSideEffect.}: hasAdx()): - montRed_asm_adx_bmi2(r, a, M, m0ninv, canUseNoCarryMontyMul) - else: - montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul) - elif UseASM_X86_32 and r.len <= 6: - # TODO: Assembly faster than GCC but slower than Clang - montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul) - else: - montyRed_CIOS(r, a, M, m0ninv) - # montyRed_Comba(r, a, M, m0ninv) - # Exported API # ------------------------------------------------------------ func montyMul*( r: var Limbs, a, b, M: Limbs, - m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) = + m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) {.inline.} = ## Compute r <- a*b (mod M) in the Montgomery domain ## `m0ninv` = -1/M (mod SecretWord). Our words are 2^32 or 2^64 ## @@ -471,6 +458,25 @@ func montySquare*(r: var Limbs, a, M: Limbs, # # montySquare_CIOS(r, a, M, m0ninv) # TODO <--- Fix this # montyMul_FIPS(r, a, a, M, m0ninv) +# TODO upstream, using Limbs[N] breaks semcheck +func montyRed*[N: static int]( + r: var array[N, SecretWord], + a: array[N*2, SecretWord], + M: array[N, SecretWord], + m0ninv: BaseType, canUseNoCarryMontyMul: static bool) {.inline.} = + ## Montgomery reduce a double-width bigint modulo M + when UseASM_X86_64 and r.len <= 6: + if ({.noSideEffect.}: hasBmi2()) and ({.noSideEffect.}: hasAdx()): + montRed_asm_adx_bmi2(r, a, M, m0ninv, canUseNoCarryMontyMul) + else: + montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul) + elif UseASM_X86_32 and r.len <= 6: + # TODO: Assembly faster than GCC but slower than Clang + montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul) + else: + montyRed_CIOS(r, a, M, m0ninv) + # montyRed_Comba(r, a, M, m0ninv) + func redc*(r: var Limbs, a, one, M: Limbs, m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) = ## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N) diff --git a/constantine/config/curves_declaration.nim b/constantine/config/curves_declaration.nim index 33b14e3..cf8f0a0 100644 --- a/constantine/config/curves_declaration.nim +++ b/constantine/config/curves_declaration.nim @@ -83,12 +83,11 @@ declareCurves: eq_form: ShortWeierstrass coef_a: 0 coef_b: 2 - nonresidue_quad_fp: -1 # -1 is not a square in 𝔽p - nonresidue_cube_fp2: (1, 1) # 1+𝑖 1+𝑖 is not a cube in 𝔽p² + nonresidue_fp: -1 # -1 is not a square in 𝔽p + nonresidue_fp2: (1, 1) # 1+𝑖 1+𝑖 is not a square or cube in 𝔽p² embedding_degree: 12 sexticTwist: D_Twist - sexticNonResidue_fp2: (1, 1) # 1+𝑖 curve BN254_Snarks: # Zero-Knowledge proofs curve (SNARKS, STARKS, Ethereum) bitwidth: 254 @@ -103,12 +102,11 @@ declareCurves: eq_form: ShortWeierstrass coef_a: 0 coef_b: 3 - nonresidue_quad_fp: -1 # -1 is not a square in 𝔽p - nonresidue_cube_fp2: (9, 1) # 9+𝑖 9+𝑖 is not a cube in 𝔽p² + nonresidue_fp: -1 # -1 is not a square in 𝔽p + nonresidue_fp2: (9, 1) # 9+𝑖 9+𝑖 is not a square or cube in 𝔽p² embedding_degree: 12 sexticTwist: D_Twist - sexticNonResidue_fp2: (9, 1) # 9+𝑖 curve Curve25519: # Bernstein curve bitwidth: 255 @@ -136,12 +134,11 @@ declareCurves: eq_form: ShortWeierstrass coef_a: 0 coef_b: 1 - nonresidue_quad_fp: -5 # -5 is not a square in 𝔽p - nonresidue_cube_fp2: (0, 1) # √-5 √-5 is not a cube in 𝔽p² + nonresidue_fp: -5 # -5 is not a square in 𝔽p + nonresidue_fp2: (0, 1) # √-5 √-5 is not a square or cube in 𝔽p² embedding_degree: 12 sexticTwist: D_Twist - sexticNonResidue_fp2: (0, 1) # √-5 curve BLS12_381: bitwidth: 381 @@ -157,12 +154,11 @@ declareCurves: eq_form: ShortWeierstrass coef_a: 0 coef_b: 4 - nonresidue_quad_fp: -1 # -1 is not a square in 𝔽p - nonresidue_cube_fp2: (1, 1) # 1+𝑖 1+𝑖 is not a cube in 𝔽p² + nonresidue_fp: -1 # -1 is not a square in 𝔽p + nonresidue_fp2: (1, 1) # 1+𝑖 1+𝑖 is not a square or cube in 𝔽p² embedding_degree: 12 sexticTwist: M_Twist - sexticNonResidue_fp2: (1, 1) # 1+𝑖 curve BW6_761: bitwidth: 761 @@ -182,9 +178,8 @@ declareCurves: coef_b: -1 # TODO: rework the quad/cube/sextic non residue declaration - nonresidue_quad_fp: -4 # -4 is not a square in 𝔽p - nonresidue_cube_fp2: (0, 1) # -4 is not a cube in 𝔽p² + nonresidue_fp: -4 # -4 is not a square or cube in 𝔽p + nonresidue_fp2: (0, 1) # -4 is not a cube in 𝔽p² embedding_degree: 6 sexticTwist: M_Twist - sexticNonResidue_fp2: (0, 1) # -4 diff --git a/constantine/config/curves_parser.nim b/constantine/config/curves_parser.nim index bc06aee..56c1b9a 100644 --- a/constantine/config/curves_parser.nim +++ b/constantine/config/curves_parser.nim @@ -96,8 +96,8 @@ type modulus: NimNode # nnkStrLit (hex) # Towering - nonresidue_quad_fp: NimNode # nnkIntLit - nonresidue_cube_fp2: NimNode # nnkPar(nnkIntLit, nnkIntLit) + nonresidue_fp: NimNode # nnkIntLit + nonresidue_fp2: NimNode # nnkPar(nnkIntLit, nnkIntLit) # Curve parameters eq_form: CurveEquationForm @@ -108,7 +108,6 @@ type embedding_degree: int sexticTwist: SexticTwist - sexticNonResidue_fp2: NimNode # nnkPar(nnkIntLit, nnkIntLit) family: CurveFamily @@ -198,17 +197,15 @@ proc parseCurveDecls(defs: var seq[CurveParams], curves: NimNode) = params.orderBitwidth = sectionVal elif sectionId.eqIdent"cofactor": discard "TODO" - elif sectionId.eqIdent"nonresidue_quad_fp": - params.nonresidue_quad_fp = sectionVal - elif sectionId.eqIdent"nonresidue_cube_fp2": - params.nonresidue_cube_fp2 = sectionVal + elif sectionId.eqIdent"nonresidue_fp": + params.nonresidue_fp = sectionVal + elif sectionId.eqIdent"nonresidue_fp2": + params.nonresidue_fp2 = sectionVal elif sectionId.eqIdent"embedding_degree": params.embedding_degree = sectionVal.intVal.int elif sectionId.eqIdent"sexticTwist": params.sexticTwist = parseEnum[SexticTwist]($sectionVal) - elif sectionId.eqIdent"sexticNonResidue_fp2": - params.sexticNonResidue_fp2 = sectionVal else: error "Invalid section: \n", curveParams[i].toStrLit() @@ -323,12 +320,12 @@ proc genMainConstants(defs: var seq[CurveParams]): NimNode = # Towering # ----------------------------------------------- curveEllipticStmts.add newConstStmt( - exported($curve & "_nonresidue_quad_fp"), - curveDef.nonresidue_quad_fp + exported($curve & "_nonresidue_fp"), + curveDef.nonresidue_fp ) curveEllipticStmts.add newConstStmt( - exported($curve & "_nonresidue_cube_fp2"), - curveDef.nonresidue_cube_fp2 + exported($curve & "_nonresidue_fp2"), + curveDef.nonresidue_fp2 ) # Pairing @@ -341,10 +338,6 @@ proc genMainConstants(defs: var seq[CurveParams]): NimNode = exported($curve & "_sexticTwist"), newLit curveDef.sexticTwist ) - curveEllipticStmts.add newConstStmt( - exported($curve & "_sexticNonResidue_fp2"), - curveDef.sexticNonResidue_fp2 - ) # end for --------------------------------------------------- diff --git a/constantine/config/curves_prop_core.nim b/constantine/config/curves_prop_core.nim index e214f9c..759fe15 100644 --- a/constantine/config/curves_prop_core.nim +++ b/constantine/config/curves_prop_core.nim @@ -82,21 +82,27 @@ macro getCoefB*(C: static Curve): untyped = ## or a bigInt depending on the curve result = bindSym($C & "_coef_B") -macro get_QNR_Fp*(C: static Curve): untyped = - ## Returns the tower extension quadratic non-residue in 𝔽p - ## i.e. a number that is not a square in 𝔽p - result = bindSym($C & "_nonresidue_quad_fp") +macro getNonResidueFp*(C: static Curve): untyped = + ## Returns the tower extension (and twist) non-residue for 𝔽p + ## Depending on the curve it might be: + ## - not a square (quadratic non-residue to construct Fp2) + ## - not a cube (cubic non-residue to construct Fp3) + ## - neither a square or cube (sextic non-residue to construct Fp2, Fp3 or Fp6) + result = bindSym($C & "_nonresidue_fp") -macro get_CNR_Fp2*(C: static Curve): untyped = - ## Returns the tower extension cubic non-residue 𝔽p² - ## i.e. a number that is not a cube in 𝔽p² +macro getNonResidueFp2*(C: static Curve): untyped = + ## Returns the tower extension (and twist) non-residue for 𝔽p² + ## Depending on the curve it might be: + ## - not a square (quadratic non-residue to construct Fp4) + ## - not a cube (cubic non-residue to construct Fp6) + ## - neither a square or cube (sextic non-residue to construct Fp4, Fp6 or Fp12) ## ## The return value is a tuple (a, b) ## that corresponds to the number a + b𝑗 ## with 𝑗 choosen for 𝑗² - QNR_Fp == 0 ## i.e. if -1 is chosen as a quadratic non-residue 𝑗 = √-1 ## if -2 is chosen as a quadratic non-residue 𝑗 = √-2 - result = bindSym($C & "_nonresidue_cube_fp2") + result = bindSym($C & "_nonresidue_fp2") macro getEmbeddingDegree*(C: static Curve): untyped = ## Returns the prime embedding degree, @@ -108,9 +114,3 @@ macro getEmbeddingDegree*(C: static Curve): untyped = macro getSexticTwist*(C: static Curve): untyped = ## Returns if D-Twist or M-Twist result = bindSym($C & "_sexticTwist") - -macro get_SNR_Fp2*(C: static Curve): untyped = - ## Returns the sextic non-residue in 𝔽p² - ## choosen to build the twisted curve E'(𝔽p²) - ## i.e. a number µ so that x⁶ - µ is irreducible - result = bindSym($C & "_sexticNonResidue_fp2") diff --git a/constantine/elliptic/ec_shortweierstrass_affine.nim b/constantine/elliptic/ec_shortweierstrass_affine.nim index e8e4d6f..45d86f8 100644 --- a/constantine/elliptic/ec_shortweierstrass_affine.nim +++ b/constantine/elliptic/ec_shortweierstrass_affine.nim @@ -32,6 +32,8 @@ type ## over a field F x*, y*: F + SexticNonResidue* = NonResidue + func isInf*(P: ECP_ShortW_Aff): SecretBool = ## Returns true if P is an infinity point ## and false otherwise diff --git a/constantine/isogeny/frobenius.nim b/constantine/isogeny/frobenius.nim index 7e1ee42..f596097 100644 --- a/constantine/isogeny/frobenius.nim +++ b/constantine/isogeny/frobenius.nim @@ -49,48 +49,6 @@ func frobenius_map*(r: var Fp2, a: Fp2, k: static int = 1) {.inline.} = else: r = a -template mulCheckSparse(a: var Fp, b: Fp) = - when b.isOne().bool: - discard - elif b.isZero().bool: - a.setZero() - elif b.isMinusOne().bool: - a.neg() - else: - a *= b - -template mulCheckSparse(a: var Fp2, b: Fp2) = - when b.isOne().bool: - discard - elif b.isMinusOne().bool: - a.neg() - elif b.c0.isZero().bool and b.c1.isOne().bool: - var t {.noInit.}: type(a.c0) - when fromComplexExtension(b): - t.neg(a.c1) - a.c1 = a.c0 - a.c0 = t - else: - t = NonResidue * a.c1 - a.c1 = a.c0 - a.c0 = t - elif b.c0.isZero().bool and b.c1.isMinusOne().bool: - var t {.noInit.}: type(a.c0) - when fromComplexExtension(b): - t = a.c1 - a.c1.neg(a.c0) - a.c0 = t - else: - t = NonResidue * a.c1 - a.c1.neg(a.c0) - a.c0.neg(t) - elif b.c0.isZero().bool: - a.mul_sparse_by_0y(b) - elif b.c1.isZero().bool: - a.mul_sparse_by_x0(b) - else: - a *= b - # Frobenius map - on extension fields # ----------------------------------------------------------------- diff --git a/constantine/pairing/lines_common.nim b/constantine/pairing/lines_common.nim index 6b9eea8..fcac103 100644 --- a/constantine/pairing/lines_common.nim +++ b/constantine/pairing/lines_common.nim @@ -35,6 +35,22 @@ type ## with Fp2 coordinates: xyz000 x*, y*, z*: F + SexticNonResidue* = NonResidue + ## The Sextic non-residue to build + ## 𝔽p2 -> 𝔽p12 towering and the G2 sextic twist + ## or + ## 𝔽p -> 𝔽p6 towering and the G2 sextic twist + ## + ## Note: + ## while the non-residues for + ## - 𝔽p2 -> 𝔽p4 + ## - 𝔽p2 -> 𝔽p6 + ## are also sextic non-residues by construction. + ## the non-residues for + ## - 𝔽p4 -> 𝔽p12 + ## - 𝔽p6 -> 𝔽p12 + ## are not. + func toHex*(line: Line, order: static Endianness = bigEndian): string = result = static($line.typeof.genericHead() & '(') for fieldName, fieldValue in fieldPairs(line): diff --git a/constantine/pairing/lines_projective.nim b/constantine/pairing/lines_projective.nim index 31904ed..e9790c8 100644 --- a/constantine/pairing/lines_projective.nim +++ b/constantine/pairing/lines_projective.nim @@ -92,15 +92,10 @@ func line_eval_double[F]( template B: untyped = line.y template C: untyped = line.z - A = T.y # A = Y - v = T.y # v = Y - B = T.z # B = Z - C = T.x # C = X - - A *= B # A = Y.Z - C.square() # C = X² - v.square() # v = Y² - B.square() # B = Z² + A.prod(T.y, T.z) # A = Y.Z + C.square(T.x) # C = X² + v.square(T.y) # v = Y² + B.square(T.z) # B = Z² A.double() # A = 2 Y.Z A.neg() # A = -2 Y.Z @@ -148,28 +143,21 @@ func line_eval_add[F]( template B: untyped = line.y template C: untyped = line.z - A = T.x # A = X₁ - v = T.z # v = Z₁ - B = T.z # B = Z₁ - C = T.y # C = Y₁ + v.prod(T.z, Q.y) # v = Z₁Y₂ + B.prod(T.z, Q.x) # B = Z₁X₂ - v *= Q.y # v = Z₁Y₂ - B *= Q.x # B = Z₁X₂ + A.diff(T.x, B) # A = X₁-Z₁X₂ + C.diff(T.y, v) # C = Y₁-Z₁Y₂ - A -= B # A = X₁-Z₁X₂ - C -= v # C = Y₁-Z₁Y₂ + v.prod(A, Q.y) # v = (X₁-Z₁X₂) Y₂ + B.prod(C, Q.x) # B = (Y₁-Z₁Y₂) X₂ + B -= v # B = (Y₁-Z₁Y₂) X₂ - (X₁-Z₁X₂) Y₂ + + C.neg() # C = -(Y₁-Z₁Y₂) - v = A # v = X₁-Z₁X₂ when F.C.getSexticTwist() == M_Twist: A *= SexticNonResidue # A = ξ (X₁ - Z₁X₂) - v *= Q.y # v = (X₁-Z₁X₂) Y₂ - B = C # B = Y₁-Z₁Y₂ - B *= Q.x # B = (Y₁-Z₁Y₂) X₂ - B -= v # B = (Y₁-Z₁Y₂) X₂ - (X₁-Z₁X₂) Y₂ - - C.neg() # C = -(Y₁-Z₁Y₂) - func line_eval_fused_double[F]( line: var Line[F], T: var ECP_ShortW_Proj[F, OnTwist]) = @@ -193,13 +181,11 @@ func line_eval_fused_double[F]( when F.C.getSexticTwist() == D_Twist: snrB *= SexticNonResidue - E = C - E *= b3 + E.prod(C, b3) when F.C.getSexticTwist() == M_Twist: E *= SexticNonResidue # E = 3b'Z² = 3bξ Z² - F = E - F *= 3 # F = 3E = 9bZ² + F.prod(E, 3) # F = 3E = 9bZ² G.sum(snrB, F) G.div2() # G = (B+F)/2 H.sum(T.y, T.z) diff --git a/constantine/pairing/mul_fp12_by_lines.nim b/constantine/pairing/mul_fp12_by_lines.nim index 0a23c28..065db1a 100644 --- a/constantine/pairing/mul_fp12_by_lines.nim +++ b/constantine/pairing/mul_fp12_by_lines.nim @@ -56,7 +56,7 @@ func mul_by_line_xy0*[C: static Curve]( v0.prod(a.c0, b.x) v1.prod(a.c1, b.y) r.c0.prod(a.c2, b.y) - r.c0 *= ξ + r.c0 *= SexticNonResidue r.c0 += v0 r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated @@ -93,10 +93,9 @@ func mul_sparse_by_line_xy00z0*[C: static Curve]( f.c1 += f.c0 v3.mul_by_line_xy0(f.c1, v2) v3 -= v0 - v3 -= v1 - f.c1 = v3 + f.c1.diff(v3, v1) - v3.c0 = ξ * v1.c2 + v3.c0.prod(v1.c2, SexticNonResidue) v3.c0 += v0.c0 v3.c1.sum(v0.c1, v1.c0) v3.c2.sum(v0.c2, v1.c1) @@ -150,7 +149,7 @@ func mul_sparse_by_line_xyz000*[C: static Curve]( # r0 = ξ a2 b1 + v0 f.c0.mul_sparse_by_y0(f.c2, l.z) - f.c0 *= NonResidue + f.c0 *= SexticNonResidue f.c0 += v0 # r2 = a2 b0 + v1 @@ -201,12 +200,12 @@ func mul_sparse_by_line_xy000z*[C: static Curve]( # r0 = ξ a1 b2 + v0 f.c0.mul_sparse_by_0y(f.c1, l.z) - f.c0 *= NonResidue + f.c0 *= SexticNonResidue f.c0 += v0 # r1 = a1 b0 + ξ v2 f.c1 *= b0 - v2 *= NonResidue + v2 *= SexticNonResidue f.c1 += v2 func mul*[C](f: var Fp12[C], line: Line[Fp2[C]]) {.inline.} = diff --git a/constantine/pairing/pairing_bls12.nim b/constantine/pairing/pairing_bls12.nim index 8252beb..23f53ff 100644 --- a/constantine/pairing/pairing_bls12.nim +++ b/constantine/pairing/pairing_bls12.nim @@ -89,7 +89,8 @@ func millerLoopGenericBLS12*[C]( f.setOne() template u: untyped = C.pairing(ate_param) - let u3 = 3*C.pairing(ate_param) + var u3 = C.pairing(ate_param) + u3 *= 3 for i in countdown(u3.bits - 2, 1): f.square() line.line_double(T, P) diff --git a/constantine/pairing/pairing_bn.nim b/constantine/pairing/pairing_bn.nim index cb03959..e0a6eeb 100644 --- a/constantine/pairing/pairing_bn.nim +++ b/constantine/pairing/pairing_bn.nim @@ -87,7 +87,8 @@ func millerLoopGenericBN*[C]( f.setOne() template u: untyped = C.pairing(ate_param) - let u3 = 3*C.pairing(ate_param) + var u3 = C.pairing(ate_param) + u3 *= 3 for i in countdown(u3.bits - 2, 1): f.square() line.line_double(T, P) diff --git a/constantine/tower_field_extensions/cubic_extensions.nim b/constantine/tower_field_extensions/cubic_extensions.nim deleted file mode 100644 index 091fa5b..0000000 --- a/constantine/tower_field_extensions/cubic_extensions.nim +++ /dev/null @@ -1,228 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../arithmetic, - ../primitives, - ./tower_common - -# Commutative ring implementation for Cubic Extension Fields -# ------------------------------------------------------------------- -# Cubic extensions can use specific squaring procedures -# beyond Schoolbook and Karatsuba: -# - Chung-Hasan (3 different algorithms) -# - Toom-Cook-3x -# -# Chung-Hasan papers -# http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf -# https://www.lirmm.fr/arith18/papers/Chung-Squaring.pdf -# -# The papers focus on polynomial squaring, they have been adapted -# to towered extension fields with the relevant costs in -# -# - Multiplication and Squaring on Pairing-Friendly Fields -# Augusto Jun Devegili and Colm Ó hÉigeartaigh and Michael Scott and Ricardo Dahab, 2006 -# https://eprint.iacr.org/2006/471 -# -# Costs in the underlying field -# M: Mul, S: Square, A: Add/Sub, B: Mul by non-residue -# -# | Method | > Linear | Linear | -# |-------------|----------|-------------------| -# | Schoolbook | 3M + 3S | 6A + 2B | -# | Karatsuba | 6S | 13A + 2B | -# | Tom-Cook-3x | 5S | 33A + 2B | -# | CH-SQR1 | 3M + 2S | 11A + 2B | -# | CH-SQR2 | 2M + 3S | 10A + 2B | -# | CH-SQR3 | 1M + 4S | 11A + 2B + 1 Div2 | -# | CH-SQR3x | 1M + 4S | 14A + 2B | - -func square_Chung_Hasan_SQR2(r: var CubicExt, a: CubicExt) {.used.}= - ## Returns r = a² - mixin prod, square, sum - var v3{.noInit.}, v4{.noInit.}, v5{.noInit.}: typeof(r.c0) - - v4.prod(a.c0, a.c1) - v4.double() - v5.square(a.c2) - r.c1 = NonResidue * v5 - r.c1 += v4 - r.c2.diff(v4, v5) - v3.square(a.c0) - v4.diff(a.c0, a.c1) - v4 += a.c2 - v5.prod(a.c1, a.c2) - v5.double() - v4.square() - r.c0 = NonResidue * v5 - r.c0 += v3 - r.c2 += v4 - r.c2 += v5 - r.c2 -= v3 - -func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) = - ## Returns r = a² - mixin prod, square, sum - var v0{.noInit.}, v2{.noInit.}: typeof(r.c0) - - r.c1.sum(a.c0, a.c2) # r1 = a0 + a2 - v2.diff(r.c1, a.c1) # v2 = a0 - a1 + a2 - r.c1 += a.c1 # r1 = a0 + a1 + a2 - r.c1.square() # r1 = (a0 + a1 + a2)² - v2.square() # v2 = (a0 - a1 + a2)² - - r.c2.sum(r.c1, v2) # r2 = (a0 + a1 + a2)² + (a0 - a1 + a2)² - r.c2.div2() # r2 = ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2 - - r.c0.prod(a.c1, a.c2) # r0 = a1 a2 - r.c0.double() # r0 = 2 a1 a2 - - v2.square(a.c2) # v2 = a2² - r.c1 += NonResidue * v2 # r1 = (a0 + a1 + a2)² + β a2² - r.c1 -= r.c0 # r1 = (a0 + a1 + a2)² - 2 a1 a2 + β a2² - r.c1 -= r.c2 # r1 = (a0 + a1 + a2)² - 2 a1 a2 - ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2 + β a2² - - v0.square(a.c0) # v0 = a0² - r.c0 *= NonResidue # r0 = β 2 a1 a2 - r.c0 += v0 # r0 = a0² + β 2 a1 a2 - - r.c2 -= v0 # r2 = ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2 - a0² - r.c2 -= v2 # r2 = ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2 - a0² - a2² - -func square*(r: var CubicExt, a: CubicExt) {.inline.} = - ## Returns r = a² - square_Chung_Hasan_SQR3(r, a) - -func prod*(r: var CubicExt, a, b: CubicExt) = - ## Returns r = a * b - ## - ## r MUST not share a buffer with a - # Algorithm is Karatsuba - var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}, t{.noInit.}: typeof(r.c0) - - v0.prod(a.c0, b.c0) - v1.prod(a.c1, b.c1) - v2.prod(a.c2, b.c2) - - # r.c0 = β ((a.c1 + a.c2) * (b.c1 + b.c2) - v1 - v2) + v0 - r.c0.sum(a.c1, a.c2) - t.sum(b.c1, b.c2) - r.c0 *= t - r.c0 -= v1 - r.c0 -= v2 - r.c0 *= NonResidue - r.c0 += v0 - - # r.c1 = (a.c0 + a.c1) * (b.c0 + b.c1) - v0 - v1 + β v2 - r.c1.sum(a.c0, a.c1) - t.sum(b.c0, b.c1) - r.c1 *= t - r.c1 -= v0 - r.c1 -= v1 - r.c1 += NonResidue * v2 - - # r.c2 = (a.c0 + a.c2) * (b.c0 + b.c2) - v0 - v2 + v1 - r.c2.sum(a.c0, a.c2) - t.sum(b.c0, b.c2) - r.c2 *= t - r.c2 -= v0 - r.c2 -= v2 - r.c2 += v1 - -func inv*(r: var CubicExt, a: CubicExt) = - ## Compute the multiplicative inverse of ``a`` - ## - ## The inverse of 0 is 0. - ## Incidentally this avoids extra check - ## to convert Jacobian and Projective coordinates - ## to affine for elliptic curve - # - # Algorithm 5.23 - # - # Arithmetic of Finite Fields - # Chapter 5 of Guide to Pairing-Based Cryptography - # Jean Luc Beuchat, Luis J. Dominguez Perez, Sylvain Duquesne, Nadia El Mrabet, Laura Fuentes-Castañeda, Francisco Rodríguez-Henríquez, 2017\ - # https://www.researchgate.net/publication/319538235_Arithmetic_of_Finite_Fields - # - # We optimize for stack usage and use 4 temporaries (+r as temporary) - # instead of 9, because 5 * 2 (𝔽p2) * Bitsize would be: - # - ~2540 bits for BN254 - # - ~3810 bits for BLS12-381 - var v1 {.noInit.}, v2 {.noInit.}, v3 {.noInit.}: typeof(r.c0) - - # A in r0 - # A <- a0² - β a1 a2 - r.c0.square(a.c0) - v1.prod(a.c1, a.c2) - v1 *= NonResidue - r.c0 -= v1 - - # B in v1 - # B <- β a2² - a0 a1 - v1.square(a.c2) - v1 *= NonResidue - v2.prod(a.c0, a.c1) - v1 -= v2 - - # C in v2 - # C <- a1² - a0 a2 - v2.square(a.c1) - v3.prod(a.c0, a.c2) - v2 -= v3 - - # F in v3 - # F <- β a1 C + a0 A + β a2 B - r.c1.prod(v1, NonResidue * a.c2) - r.c2.prod(v2, NonResidue * a.c1) - v3.prod(r.c0, a.c0) - v3 += r.c1 - v3 += r.c2 - - let t = v3 # TODO, support aliasing in all primitives - v3.inv(t) - - # (a0 + a1 v + a2 v²)^-1 = (A + B v + C v²) / F - r.c0 *= v3 - r.c1.prod(v1, v3) - r.c2.prod(v2, v3) - -func inv*(a: var CubicExt) = - ## Compute the multiplicative inverse of ``a`` - ## - ## The inverse of 0 is 0. - ## Incidentally this avoids extra check - ## to convert Jacobian and Projective coordinates - ## to affine for elliptic curve - let t = a - a.inv(t) - -func `*=`*(a: var CubicExt, b: CubicExt) {.inline.} = - ## In-place multiplication - # On higher extension field like 𝔽p6, - # if `prod` is called on shared in and out buffer, the result is wrong - let t = a - a.prod(t, b) - -func conj*(a: var CubicExt) {.inline.} = - ## Computes the conjugate in-place - mixin conj, conjneg - a.c0.conj() - a.c1.conjneg() - a.c2.conj() - -func conj*(r: var CubicExt, a: CubicExt) {.inline.} = - ## Computes the conjugate out-of-place - mixin conj, conjneg - r.c0.conj(a.c0) - r.c1.conjneg(a.c1) - r.c2.conj(a.c2) - -func square*(a: var CubicExt) {.inline.} = - ## In-place squaring - let t = a - a.square(t) diff --git a/constantine/tower_field_extensions/exponentiations.nim b/constantine/tower_field_extensions/exponentiations.nim index a5e18b1..8a60d5c 100644 --- a/constantine/tower_field_extensions/exponentiations.nim +++ b/constantine/tower_field_extensions/exponentiations.nim @@ -12,9 +12,7 @@ import ../config/common, ../primitives, ../io/io_bigints, - ./tower_common, - ./quadratic_extensions, - ./cubic_extensions + ./extension_fields # ############################################################ # diff --git a/constantine/tower_field_extensions/extension_fields.nim b/constantine/tower_field_extensions/extension_fields.nim new file mode 100644 index 0000000..257f46e --- /dev/null +++ b/constantine/tower_field_extensions/extension_fields.nim @@ -0,0 +1,977 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/common, + ../primitives, + ../arithmetic + +# Note: to avoid burdening the Nim compiler, we rely on generic extension +# to complain if the base field procedures don't exist + +# No exceptions allowed +{.push raises: [].} +{.push inline.} + +# ############################################################ +# # +# Extension Fields # +# # +# ############################################################ + +type + NonResidue* = object + ## Non-Residue + ## + ## Placeholder for the appropriate quadratic, cubic or sectic non-residue + + CubicExt* = concept x + ## Cubic Extension field concept + type BaseField = auto + x.c0 is BaseField + x.c1 is BaseField + x.c2 is BaseField + + QuadraticExt* = concept x + ## Quadratic Extension field concept + not(x is CubicExt) + + type BaseField = auto + x.c0 is BaseField + x.c1 is BaseField + + ExtensionField* = QuadraticExt or CubicExt + +# Initialization +# ------------------------------------------------------------------- + +func setZero*(a: var ExtensionField) = + ## Set ``a`` to 0 in the extension field + for field in fields(a): + field.setZero() + +func setOne*(a: var ExtensionField) = + ## Set ``a`` to 1 in the extension field + for fieldName, fA in fieldPairs(a): + when fieldName == "c0": + fA.setOne() + else: + fA.setZero() + +func fromBig*(a: var ExtensionField, src: BigInt) = + ## Set ``a`` to the bigint value in the extension field + for fieldName, fA in fieldPairs(a): + when fieldName == "c0": + fA.fromBig(src) + else: + fA.setZero() + +# Comparison +# ------------------------------------------------------------------- + +func `==`*(a, b: ExtensionField): SecretBool = + ## Constant-time equality check + result = CtTrue + for fA, fB in fields(a, b): + result = result and (fA == fB) + +func isZero*(a: ExtensionField): SecretBool = + ## Constant-time check if zero + result = CtTrue + for fA in fields(a): + result = result and fA.isZero() + +func isOne*(a: ExtensionField): SecretBool = + ## Constant-time check if one + result = CtTrue + for fieldName, fA in fieldPairs(a): + when fieldName == "c0": + result = result and fA.isOne() + else: + result = result and fA.isZero() + +func isMinusOne*(a: ExtensionField): SecretBool = + ## Constant-time check if -1 + result = CtTrue + for fieldName, fA in fieldPairs(a): + when fieldName == "c0": + result = result and fA.isMinusOne() + else: + result = result and fA.isZero() + +# Copies +# ------------------------------------------------------------------- + +func ccopy*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) = + ## Constant-time conditional copy + ## If ctl is true: b is copied into a + ## if ctl is false: b is not copied and a is unmodified + ## Time and memory accesses are the same whether a copy occurs or not + for fA, fB in fields(a, b): + ccopy(fA, fB, ctl) + +# Abelian group +# ------------------------------------------------------------------- + +func neg*(r: var ExtensionField, a: ExtensionField) = + ## Field out-of-place negation + for fR, fA in fields(r, a): + fR.neg(fA) + +func neg*(a: var ExtensionField) = + ## Field in-place negation + for fA in fields(a): + fA.neg() + +func `+=`*(a: var ExtensionField, b: ExtensionField) = + ## Addition in the extension field + for fA, fB in fields(a, b): + fA += fB + +func `-=`*(a: var ExtensionField, b: ExtensionField) = + ## Substraction in the extension field + for fA, fB in fields(a, b): + fA -= fB + +func double*(r: var ExtensionField, a: ExtensionField) = + ## Field out-of-place doubling + for fR, fA in fields(r, a): + fR.double(fA) + +func double*(a: var ExtensionField) = + ## Field in-place doubling + for fA in fields(a): + fA.double() + +func div2*(a: var ExtensionField) = + ## Field in-place division by 2 + for fA in fields(a): + fA.div2() + +func sum*(r: var QuadraticExt, a, b: QuadraticExt) = + ## Sum ``a`` and ``b`` into ``r`` + r.c0.sum(a.c0, b.c0) + r.c1.sum(a.c1, b.c1) + +func sum*(r: var CubicExt, a, b: CubicExt) = + ## Sum ``a`` and ``b`` into ``r`` + r.c0.sum(a.c0, b.c0) + r.c1.sum(a.c1, b.c1) + r.c2.sum(a.c2, b.c2) + +func diff*(r: var QuadraticExt, a, b: QuadraticExt) = + ## Diff ``a`` and ``b`` into ``r`` + r.c0.diff(a.c0, b.c0) + r.c1.diff(a.c1, b.c1) + +func diff*(r: var CubicExt, a, b: CubicExt) = + ## Diff ``a`` and ``b`` into ``r`` + r.c0.diff(a.c0, b.c0) + r.c1.diff(a.c1, b.c1) + r.c2.diff(a.c2, b.c2) + +func conj*(a: var QuadraticExt) = + ## Computes the conjugate in-place + a.c1.neg() + +func conj*(r: var QuadraticExt, a: QuadraticExt) = + ## Computes the conjugate out-of-place + r.c0 = a.c0 + r.c1.neg(a.c1) + +func conjneg*(a: var QuadraticExt) = + ## Computes the negated conjugate in-place + a.c0.neg() + +func conjneg*(r: var QuadraticExt, a: QuadraticExt) = + ## Computes the negated conjugate out-of-place + r.c0.neg(a.c0) + r.c1 = a.c1 + +func conj*(a: var CubicExt) = + ## Computes the conjugate in-place + a.c0.conj() + a.c1.conjneg() + a.c2.conj() + +func conj*(r: var CubicExt, a: CubicExt) = + ## Computes the conjugate out-of-place + r.c0.conj(a.c0) + r.c1.conjneg(a.c1) + r.c2.conj(a.c2) + +# Conditional arithmetic +# ------------------------------------------------------------------- + +func cneg*(a: var ExtensionField, ctl: SecretBool) = + ## Constant-time in-place conditional negation + ## Only negate if ctl is true + for fA in fields(a): + fA.cneg(ctl) + +func cadd*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) = + ## Constant-time in-place conditional addition + for fA, fB in fields(a, b): + fA.cadd(fB, ctl) + +func csub*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) = + ## Constant-time in-place conditional substraction + for fA, fB in fields(a, b): + fA.csub(fB, ctl) + +# Multiplication by a small integer known at compile-time +# ------------------------------------------------------------------- + +func `*=`*(a: var ExtensionField, b: static int) = + ## Multiplication by a small integer known at compile-time + + const negate = b < 0 + const b = if negate: -b + else: b + when negate: + a.neg(a) + when b == 0: + a.setZero() + elif b == 1: + return + elif b == 2: + a.double() + elif b == 3: + let t1 = a + a.double() + a += t1 + elif b == 4: + a.double() + a.double() + elif b == 5: + let t1 = a + a.double() + a.double() + a += t1 + elif b == 6: + a.double() + let t2 = a + a.double() # 4 + a += t2 + elif b == 7: + let t1 = a + a.double() + let t2 = a + a.double() # 4 + a += t2 + a += t1 + elif b == 8: + a.double() + a.double() + a.double() + elif b == 9: + let t1 = a + a.double() + a.double() + a.double() # 8 + a += t1 + elif b == 10: + a.double() + let t2 = a + a.double() + a.double() # 8 + a += t2 + elif b == 11: + let t1 = a + a.double() + let t2 = a + a.double() + a.double() # 8 + a += t2 + a += t1 + elif b == 12: + a.double() + a.double() # 4 + let t4 = a + a.double() # 8 + a += t4 + else: + {.error: "Multiplication by this small int not implemented".} + +func prod*(r: var ExtensionField, a: ExtensionField, b: static int) = + ## Multiplication by a small integer known at compile-time + const negate = b < 0 + const b = if negate: -b + else: b + when negate: + r.neg(a) + else: + r = a + r *= b + +{.pop.} # inline + +# ############################################################ +# # +# Quadratic extensions # +# # +# ############################################################ + +# Commutative ring implementation for complex quadratic extension fields +# ---------------------------------------------------------------------- + +func square_complex(r: var QuadraticExt, a: QuadraticExt) = + ## Return a² in 𝔽p2 = 𝔽p[𝑖] in ``r`` + ## ``r`` is initialized/overwritten + ## + ## Requires a complex extension field + # (c0, c1)² => (c0 + c1𝑖)² + # => c0² + 2 c0 c1𝑖 + (c1𝑖)² + # => c0²-c1² + 2 c0 c1𝑖 + # => (c0²-c1², 2 c0 c1) + # or + # => ((c0-c1)(c0+c1), 2 c0 c1) + # => ((c0-c1)(c0-c1 + 2 c1), c0 * 2 c1) + # + # Costs (naive implementation) + # - 1 Multiplication 𝔽p + # - 2 Squarings 𝔽p + # - 1 Doubling 𝔽p + # - 1 Substraction 𝔽p + # Stack: 4 * ModulusBitSize (4x 𝔽p element) + # + # Or (with 1 less Mul/Squaring at the cost of 1 addition and extra 2 𝔽p stack space) + # + # - 2 Multiplications 𝔽p + # - 1 Addition 𝔽p + # - 1 Doubling 𝔽p + # - 1 Substraction 𝔽p + # + # To handle aliasing between r and a, we need + # r to be used only when a is unneeded + # so we can't use r fields as a temporary + mixin fromComplexExtension + static: doAssert r.fromComplexExtension() + + var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0) + v0.diff(a.c0, a.c1) # v0 = c0 - c1 [1 Sub] + v1.sum(a.c0, a.c1) # v1 = c0 + c1 [1 Dbl, 1 Sub] + r.c1.prod(a.c0, a.c1) # r.c1 = c0 c1 [1 Mul, 1 Dbl, 1 Sub] + # aliasing: a unneeded now + r.c1.double() # r.c1 = 2 c0 c1 [1 Mul, 2 Dbl, 1 Sub] + r.c0.prod(v0, v1) # r.c0 = (c0 + c1)(c0 - c1) [2 Mul, 2 Dbl, 1 Sub] + +func prod_complex(r: var QuadraticExt, a, b: QuadraticExt) = + ## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r`` + ## ``r`` is initialized/overwritten + ## + ## Requires a complex extension field + # (a0, a1) (b0, b1) => (a0 + a1𝑖) (b0 + b1𝑖) + # => (a0 b0 - a1 b1) + (a0 b1 + a1 b0) 𝑖 + # + # In Fp, multiplication has cost O(n²) with n the number of limbs + # while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction) + # and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition) + # + # Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words) + # where addition/substraction are significantly cheaper than multiplication + # + # So we always reframe the imaginary part using Karatsuba approach to save a multiplication + # (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 ) + # + # Costs (naive implementation) + # - 4 Multiplications 𝔽p + # - 1 Addition 𝔽p + # - 1 Substraction 𝔽p + # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries) + # + # Costs (Karatsuba) + # - 3 Multiplications 𝔽p + # - 3 Substraction 𝔽p (2 are fused) + # - 2 Addition 𝔽p + # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary) + mixin fromComplexExtension + static: doAssert r.fromComplexExtension() + + # TODO: GCC is adding an unexplainable 30 cycles tax to this function (~10% slow down) + # for seemingly no reason + + when false: # Single-width implementation - BLS12-381 + # Clang 348 cycles on i9-9980XE @3.9 GHz + var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0) + a0b0.prod(a.c0, b.c0) # [1 Mul] + a1b1.prod(a.c1, b.c1) # [2 Mul] + + r.c0.sum(a.c0, a.c1) # r0 = (a0 + a1) # [2 Mul, 1 Add] + r.c1.sum(b.c0, b.c1) # r1 = (b0 + b1) # [2 Mul, 2 Add] + # aliasing: a and b unneeded now + r.c1 *= r.c0 # r1 = (b0 + b1)(a0 + a1) # [3 Mul, 2 Add] - 𝔽p temporary + + r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1 # [3 Mul, 2 Add, 1 Sub] + r.c1 -= a0b0 # r1 = (b0 + b1)(a0 + a1) - a0b0 # [3 Mul, 2 Add, 2 Sub] + r.c1 -= a1b1 # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub] + + else: # Double-width implementation with lazy reduction + # Clang 341 cycles on i9-9980XE @3.9 GHz + var a0b0 {.noInit.}, a1b1 {.noInit.}: doubleWidth(typeof(r.c0)) + var d {.noInit.}: doubleWidth(typeof(r.c0)) + const msbSet = r.c0.typeof.canUseNoCarryMontyMul() + + a0b0.mulNoReduce(a.c0, b.c0) # 44 cycles - cumul 44 + a1b1.mulNoReduce(a.c1, b.c1) # 44 cycles - cumul 88 + when msbSet: + r.c0.sum(a.c0, a.c1) + r.c1.sum(b.c0, b.c1) + else: + r.c0.sumNoReduce(a.c0, a.c1) # 5 cycles - cumul 93 + r.c1.sumNoReduce(b.c0, b.c1) # 5 cycles - cumul 98 + # aliasing: a and b unneeded now + d.mulNoReduce(r.c0, r.c1) # 44 cycles - cumul 142 + when msbSet: + d -= a0b0 + d -= a1b1 + else: + d.diffNoReduce(d, a0b0) # 11 cycles - cumul 153 + d.diffNoReduce(d, a1b1) # 11 cycles - cumul 164 + a0b0.diff(a0b0, a1b1) # 19 cycles - cumul 183 + r.c0.reduce(a0b0) # 50 cycles - cumul 233 + r.c1.reduce(d) # 50 cycles - cumul 288 + + # Single-width [3 Mul, 2 Add, 3 Sub] + # 3*88 + 2*14 + 3*14 = 334 theoretical cycles + # 348 measured + # Double-Width + # 288 theoretical cycles + # 329 measured + # Unexplained 40 cycles diff between theo and measured + # and unexplained 30 cycles between Clang and GCC + # - Function calls? + # - push/pop stack? + +func mul_sparse_complex_by_0y( + r: var QuadraticExt, a: QuadraticExt, + sparseB: auto) = + ## Multiply `a` by `b` with sparse coordinates (0, y) + ## On a complex quadratic extension field 𝔽p2 = 𝔽p[𝑖] + # + # r0 = a0 b0 - a1 b1 + # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) + # + # with b0 = 0, hence + # + # r0 = - a1 b1 + # r1 = (a0 + a1) b1 - a1 b1 = a0 b1 + mixin fromComplexExtension + static: doAssert r.fromComplexExtension() + + when typeof(sparseB) is typeof(a): + template b(): untyped = sparseB.c1 + elif typeof(sparseB) is typeof(a.c0): + template b(): untyped = sparseB + else: + {.error: "sparseB type is " & $typeof(sparseB) & + " which does not match with either a (" & $typeof(a) & + ") or a.c0 (" & $typeof(a.c0) & ")".} + + var t{.noInit.}: typeof(a.c0) + t.prod(a.c1, b) + r.c1.prod(a.c0, b) + r.c0.neg(t) + +# Commutative ring implementation for generic quadratic extension fields +# ---------------------------------------------------------------------- + +func square_generic(r: var QuadraticExt, a: QuadraticExt) = + ## Return a² in ``r`` + ## ``r`` is initialized/overwritten + # Algorithm (with β the non-residue in the base field) + # + # (c0, c1)² => (c0 + c1 w)² + # => c0² + 2 c0 c1 w + c1²w² + # => c0² + β c1² + 2 c0 c1 w + # => (c0² + β c1², 2 c0 c1) + # We have 2 squarings and 1 multiplication in the base field + # which are significantly more costly than additions. + # For example when construction 𝔽p12 from 𝔽p6: + # - 4 limbs like BN254: multiplication is 20x slower than addition/substraction + # - 6 limbs like BLS12-381: multiplication is 28x slower than addition/substraction + # + # We can save operations with one of the following expressions + # of c0² + β c1² and noticing that c0c1 is already computed for the "y" coordinate + # + # Alternative 1: + # c0² + β c1² <=> (c0 - c1)(c0 - β c1) + β c0c1 + c0c1 + # + # Alternative 2: + # c0² + β c1² <=> (c0 + c1)(c0 + β c1) - β c0c1 - c0c1 + mixin prod + var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0) + + # v1 <- (c0 + β c1) + v1.prod(a.c1, NonResidue) + v1 += a.c0 + + # v0 <- (c0 + c1)(c0 + β c1) + v0.sum(a.c0, a.c1) + v0 *= v1 + + # v1 <- c0 c1 + v1.prod(a.c0, a.c1) + + # aliasing: a unneeded now + + # r0 = (c0 + c1)(c0 + β c1) - c0c1 + v0 -= v1 + + # r1 = 2 c0c1 + r.c1.double(v1) + + # r0 = (c0 + c1)(c0 + β c1) - c0c1 - β c0c1 + v1 *= NonResidue + r.c0.diff(v0, v1) + +func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) = + ## Returns r = a * b + # Algorithm (with β the non-residue in the base field) + # + # r0 = a0 b0 + β a1 b1 + # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) + mixin prod + var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: typeof(r.c0) + + # v2 <- (a0 + a1)(b0 + b1) + v0.sum(a.c0, a.c1) + v1.sum(b.c0, b.c1) + v2.prod(v0, v1) + + # v0 <- a0 b0 + # v1 <- a1 b1 + v0.prod(a.c0, b.c0) + v1.prod(a.c1, b.c1) + + # aliasing: a and b unneeded now + + # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 + r.c1.diff(v2, v1) + r.c1 -= v0 + + # r0 <- a0 b0 + β a1 b1 + v1 *= NonResidue + r.c0.sum(v0, v1) + +func mul_sparse_generic_by_x0(r: var QuadraticExt, a, sparseB: QuadraticExt) = + ## Multiply `a` by `b` with sparse coordinates (x, 0) + ## On a generic quadratic extension field + # Algorithm (with β the non-residue in the base field) + # + # r0 = a0 b0 + β a1 b1 + # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) + # + # with b1 = 0, hence + # + # r0 = a0 b0 + # r1 = (a0 + a1) b0 - a0 b0 = a1 b0 + mixin prod + template b(): untyped = sparseB + + r.c0.prod(a.c0, b.c0) + r.c1.prod(a.c1, b.c0) + +func mul_sparse_generic_by_0y( + r: var QuadraticExt, a: QuadraticExt, + sparseB: auto) = + ## Multiply `a` by `b` with sparse coordinates (0, y) + ## On a generic quadratic extension field + # Algorithm (with β the non-residue in the base field) + # + # r0 = a0 b0 + β a1 b1 + # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) + # + # with b0 = 0, hence + # + # r0 = β a1 b1 + # r1 = (a0 + a1) b1 - a1 b1 = a0 b1 + when typeof(sparseB) is typeof(a): + template b(): untyped = sparseB.c1 + elif typeof(sparseB) is typeof(a.c0): + template b(): untyped = sparseB + else: + {.error: "sparseB type is " & $typeof(sparseB) & + " which does not match with either a (" & $typeof(a) & + ") or a.c0 (" & $typeof(a.c0) & ")".} + + var t{.noInit.}: typeof(a.c0) + + t.prod(a.c1, b) + t *= NonResidue + r.c1.prod(a.c0, b) + # aliasing: a unneeded now + r.c0 = t + +func mul_sparse_generic_by_0y( + r: var QuadraticExt, a: QuadraticExt, + sparseB: static int) = + ## Multiply `a` by `b` with sparse coordinates (0, y) + ## On a generic quadratic extension field + # Algorithm (with β the non-residue in the base field) + # + # r0 = a0 b0 + β a1 b1 + # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) + # + # with b0 = 0, hence + # + # r0 = β a1 b1 + # r1 = (a0 + a1) b1 - a1 b1 = a0 b1 + template b(): untyped = sparseB + + var t{.noInit.}: typeof(a.c0) + + t.prod(a.c1, b) + t *= NonResidue + r.c1.prod(a.c0, b) + # aliasing: a unneeded now + r.c0 = t + +func invImpl(r: var QuadraticExt, a: QuadraticExt) = + ## Compute the multiplicative inverse of ``a`` + ## + ## The inverse of 0 is 0. + # + # Algorithm: + # + # 1 / (a0 + a1 w) <=> (a0 - a1 w) / (a0 + a1 w)(a0 - a1 w) + # <=> (a0 - a1 w) / (a0² - a1² w²) + # with w being our coordinate system and β the quadratic non-residue + # we have w² = β + # So the inverse is (a0 - a1 w) / (a0² - β a1²) + mixin fromComplexExtension + + # [2 Sqr, 1 Add] + var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0) + v0.square(a.c0) + v1.square(a.c1) + when r.fromComplexExtension(): + v0 += v1 + else: + v1 *= NonResidue + v0 -= v1 # v0 = a0² - β a1² (the norm / squared magnitude of a) + + # [1 Inv, 2 Sqr, 1 Add] + v1.inv(v0) # v1 = 1 / (a0² - β a1²) + + # [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg] + r.c0.prod(a.c0, v1) # r0 = a0 / (a0² - β a1²) + v0.neg(v1) # v0 = -1 / (a0² - β a1²) + r.c1.prod(a.c1, v0) # r1 = -a1 / (a0² - β a1²) + +# Exported quadratic symbols +# ------------------------------------------------------------------- + +{.push inline.} + +func square*(r: var QuadraticExt, a: QuadraticExt) = + mixin fromComplexExtension + when r.fromComplexExtension(): + r.square_complex(a) + else: + r.square_generic(a) + +func prod*(r: var QuadraticExt, a, b: QuadraticExt) = + mixin fromComplexExtension + when r.fromComplexExtension(): + r.prod_complex(a, b) + else: + r.prod_generic(a, b) + +func inv*(r: var QuadraticExt, a: QuadraticExt) = + ## Compute the multiplicative inverse of ``a`` + ## + ## The inverse of 0 is 0. + ## Incidentally this avoids extra check + ## to convert Jacobian and Projective coordinates + ## to affine for elliptic curve + r.invImpl(a) + +func inv*(a: var QuadraticExt) = + ## Compute the multiplicative inverse of ``a`` + ## + ## The inverse of 0 is 0. + ## Incidentally this avoids extra check + ## to convert Jacobian and Projective coordinates + ## to affine for elliptic curve + a.invImpl(a) + +func `*=`*(a: var QuadraticExt, b: QuadraticExt) = + ## In-place multiplication + a.prod(a, b) + +func square*(a: var QuadraticExt) = + ## In-place squaring + a.square(a) + +func mul_sparse_by_0y*(r: var QuadraticExt, a: QuadraticExt, sparseB: auto) = + ## Sparse multiplication + mixin fromComplexExtension + when a.fromComplexExtension(): + r.mul_sparse_complex_by_0y(a, sparseB) + else: + r.mul_sparse_generic_by_0y(a, sparseB) + +func mul_sparse_by_0y*(r: var QuadraticExt, a: QuadraticExt, sparseB: static int) = + ## Sparse multiplication + mixin fromComplexExtension + when a.fromComplexExtension(): + {.error: "Not implemented.".} + else: + r.mul_sparse_generic_by_0y(a, sparseB) + +func mul_sparse_by_0y*(a: var QuadraticExt, sparseB: auto) = + ## Sparse in-place multiplication + a.mul_sparse_by_0y(a, sparseB) + +func mul_sparse_by_x0*(a: var QuadraticExt, sparseB: QuadraticExt) = + ## Sparse in-place multiplication + a.mul_sparse_generic_by_x0(a, sparseB) + +{.pop.} # inline + +# ############################################################ +# # +# Cubic extensions # +# # +# ############################################################ + +# Commutative ring implementation for Cubic Extension Fields +# ------------------------------------------------------------------- +# Cubic extensions can use specific squaring procedures +# beyond Schoolbook and Karatsuba: +# - Chung-Hasan (3 different algorithms) +# - Toom-Cook-3x +# +# Chung-Hasan papers +# http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf +# https://www.lirmm.fr/arith18/papers/Chung-Squaring.pdf +# +# The papers focus on polynomial squaring, they have been adapted +# to towered extension fields with the relevant costs in +# +# - Multiplication and Squaring on Pairing-Friendly Fields +# Augusto Jun Devegili and Colm Ó hÉigeartaigh and Michael Scott and Ricardo Dahab, 2006 +# https://eprint.iacr.org/2006/471 +# +# Costs in the underlying field +# M: Mul, S: Square, A: Add/Sub, B: Mul by non-residue +# +# | Method | > Linear | Linear | +# |-------------|----------|-------------------| +# | Schoolbook | 3M + 3S | 6A + 2B | +# | Karatsuba | 6S | 13A + 2B | +# | Tom-Cook-3x | 5S | 33A + 2B | +# | CH-SQR1 | 3M + 2S | 11A + 2B | +# | CH-SQR2 | 2M + 3S | 10A + 2B | +# | CH-SQR3 | 1M + 4S | 11A + 2B + 1 Div2 | +# | CH-SQR3x | 1M + 4S | 14A + 2B | + +func square_Chung_Hasan_SQR2(r: var CubicExt, a: CubicExt) {.used.}= + ## Returns r = a² + mixin prod, square, sum + var s0{.noInit.}, m01{.noInit.}, m12{.noInit.}: typeof(r.c0) + + # precomputations that use a + m01.prod(a.c0, a.c1) + m01.double() + m12.prod(a.c1, a.c2) + m12.double() + s0.square(a.c2) + # aliasing: a₂ unused + + # r₂ = (a₀ - a₁ + a₂)² + r.c2.sum(a.c2, a.c0) + r.c2 -= a.c1 + r.c2.square() + # aliasing, a almost unneeded now + + # r₂ = (a₀ - a₁ + a₂)² + 2a₀a₁ + 2a₁a₂ - a₂² + r.c2 += m01 + r.c2 += m12 + r.c2 -= s0 + + # r₁ = 2a₀a₁ + β a₂² + r.c1.prod(s0, NonResidue) + r.c1 += m01 + + # r₂ = (a₀ - a₁ + a₂)² + 2a₀a₁ + 2a₁a₂ - a₀² - a₂² + s0.square(a.c0) + r.c2 -= s0 + + # r₀ = a₀² + β 2a₁a₂ + r.c0.prod(m12, NonResidue) + r.c0 += s0 + +func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) = + ## Returns r = a² + mixin prod, square, sum + var s0{.noInit.}, t{.noInit.}, m12{.noInit.}: typeof(r.c0) + + # s₀ = (a₀ + a₁ + a₂)² + # t = ((a₀ + a₁ + a₂)² + (a₀ - a₁ + a₂)²) / 2 + s0.sum(a.c0, a.c2) + t.diff(s0, a.c1) + s0 += a.c1 + s0.square() + t.square() + t += s0 + t.div2() + + # m12 = 2a₁a₂ and r₁ = a₂² + # then a₁ and a₂ are unused for aliasing + m12.prod(a.c1, a.c2) + m12.double() + r.c1.square(a.c2) # r₁ = a₂² + + r.c2.diff(t, r.c1) # r₂ = t - a₂² + r.c1 *= NonResidue # r₁ = β a₂² + r.c1 += s0 # r₁ = (a₀ + a₁ + a₂)² + β a₂² + r.c1 -= m12 # r₁ = (a₀ + a₁ + a₂)² - 2a₁a₂ + β a₂² + r.c1 -= t # r₁ = (a₀ + a₁ + a₂)² - 2a₁a₂ - t + β a₂² + + s0.square(a.c0) + # aliasing: a₀ unused + + r.c2 -= s0 + r.c0.prod(m12, NonResidue) + r.c0 += s0 + +func prodImpl(r: var CubicExt, a, b: CubicExt) = + ## Returns r = a * b # Algorithm is Karatsuba + var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(r.c0) + var t0{.noInit.}, t1{.noInit.}, t2{.noInit.}: typeof(r.c0) + + v0.prod(a.c0, b.c0) + v1.prod(a.c1, b.c1) + v2.prod(a.c2, b.c2) + + # r₀ = β ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀ + t0.sum(a.c1, a.c2) + t1.sum(b.c1, b.c2) + t0 *= t1 + t0 -= v1 + t0 -= v2 + t0 *= NonResidue + # r₀ = t₀ + v₀ at the end to handle aliasing + + # r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂ + t1.sum(a.c0, a.c1) + t2.sum(b.c0, b.c1) + r.c1.prod(t1, t2) + r.c1 -= v0 + r.c1 -= v1 + t1.prod(v2, NonResidue) + r.c1 += t1 + + # r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁ + t1.sum(a.c0, a.c2) + t2.sum(b.c0, b.c2) + r.c2.prod(t1, t2) + r.c2 -= v0 + r.c2 -= v2 + r.c2 += v1 + + # Finish r₀ + r.c0.sum(t0, v0) + +func invImpl(r: var CubicExt, a: CubicExt) = + ## Compute the multiplicative inverse of ``a`` + ## + ## The inverse of 0 is 0. + ## Incidentally this avoids extra check + ## to convert Jacobian and Projective coordinates + ## to affine for elliptic curve + # + # Algorithm 5.23 + # + # Arithmetic of Finite Fields + # Chapter 5 of Guide to Pairing-Based Cryptography + # Jean Luc Beuchat, Luis J. Dominguez Perez, Sylvain Duquesne, Nadia El Mrabet, Laura Fuentes-Castañeda, Francisco Rodríguez-Henríquez, 2017\ + # https://www.researchgate.net/publication/319538235_Arithmetic_of_Finite_Fields + # + # We optimize for stack usage and use 4 temporaries + # instead of 9, because 4 * 2 (𝔽p2) * Bitsize would be: + # - ~2032 bits for BN254 + # - ~3048 bits for BLS12-381 + var A {.noInit.}, B {.noInit.}, C {.noInit.}: typeof(r.c0) + var t {.noInit.}: typeof(r.c0) + + # A <- a₀² - β a₁ a₂ + A.square(a.c0) + t.prod(a.c1, a.c2) + t *= NonResidue + A -= t + + # B <- β a₂² - a₀ a₁ + B.square(a.c2) + B *= NonResidue + t.prod(a.c0, a.c1) + B -= t + + # C <- a₁² - a₀ a₂ + C.square(a.c1) + t.prod(a.c0, a.c2) + C -= t + + # F in t + # F <- β a₁ C + a₀ A + β a₂ B + t.prod(a.c1, C) + r.c2.prod(a.c2, B) # aliasing: last use of a₂, destroy r₂ + t += r.c2 + t *= NonResidue + r.c0.prod(a.c0, A) # aliasing: last use of a₀, destroy r₀ + t += r.c0 + + t.inv() + + # (a0 + a1 v + a2 v²)^-1 = (A + B v + C v²) / F + r.c0.prod(A, t) + r.c1.prod(B, t) + r.c2.prod(C, t) + +# Exported cubic symbols +# ------------------------------------------------------------------- +{.push inline.} + +func square*(r: var CubicExt, a: CubicExt) = + ## Returns r = a² + square_Chung_Hasan_SQR3(r, a) + +func square*(a: var CubicExt) = + ## In-place squaring + a.square(a) + +func prod*(r: var CubicExt, a, b: CubicExt) = + ## In-place multiplication + r.prodImpl(a, b) + +func `*=`*(a: var CubicExt, b: CubicExt) = + ## In-place multiplication + a.prodImpl(a, b) + +func inv*(r: var CubicExt, a: CubicExt) = + ## Compute the multiplicative inverse of ``a`` + ## + ## The inverse of 0 is 0. + ## Incidentally this avoids extra check + ## to convert Jacobian and Projective coordinates + ## to affine for elliptic curve + r.invImpl(a) + +func inv*(a: var CubicExt) = + ## Compute the multiplicative inverse of ``a`` + ## + ## The inverse of 0 is 0. + ## Incidentally this avoids extra check + ## to convert Jacobian and Projective coordinates + ## to affine for elliptic curve + a.invImpl(a) + +{.pop.} # inline +{.pop.} # raises no exceptions diff --git a/constantine/tower_field_extensions/quadratic_extensions.nim b/constantine/tower_field_extensions/quadratic_extensions.nim deleted file mode 100644 index e761bfd..0000000 --- a/constantine/tower_field_extensions/quadratic_extensions.nim +++ /dev/null @@ -1,365 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../arithmetic, - ../primitives, - ./tower_common - -# Commutative ring implementation for complex extension fields -# ------------------------------------------------------------------- - -func square_complex(r: var QuadraticExt, a: QuadraticExt) = - ## Return a² in 𝔽p2 = 𝔽p[𝑖] in ``r`` - ## ``r`` is initialized/overwritten - ## - ## Requires a complex extension field - # (c0, c1)² => (c0 + c1𝑖)² - # => c0² + 2 c0 c1𝑖 + (c1𝑖)² - # => c0²-c1² + 2 c0 c1𝑖 - # => (c0²-c1², 2 c0 c1) - # or - # => ((c0-c1)(c0+c1), 2 c0 c1) - # => ((c0-c1)(c0-c1 + 2 c1), c0 * 2 c1) - # - # Costs (naive implementation) - # - 1 Multiplication 𝔽p - # - 2 Squarings 𝔽p - # - 1 Doubling 𝔽p - # - 1 Substraction 𝔽p - # Stack: 4 * ModulusBitSize (4x 𝔽p element) - # - # Or (with 1 less Mul/Squaring at the cost of 1 addition and extra 2 𝔽p stack space) - # - # - 2 Multiplications 𝔽p - # - 1 Addition 𝔽p - # - 1 Doubling 𝔽p - # - 1 Substraction 𝔽p - # Stack: 6 * ModulusBitSize (4x 𝔽p element + 1 named temporaries + 1 in-place multiplication temporary) - # as in-place multiplications require a (shared) internal temporary - mixin fromComplexExtension - static: doAssert r.fromComplexExtension() - - var c0mc1 {.noInit.}: typeof(r.c0) - c0mc1.diff(a.c0, a.c1) # c0mc1 = c0 - c1 [1 Sub] - r.c1.double(a.c1) # result.c1 = 2 c1 [1 Dbl, 1 Sub] - r.c0.sum(c0mc1, r.c1) # result.c0 = c0 - c1 + 2 c1 [1 Add, 1 Dbl, 1 Sub] - r.c0 *= c0mc1 # result.c0 = (c0 + c1)(c0 - c1) = c0² - c1² [1 Mul, 1 Add, 1 Dbl, 1 Sub] - 𝔽p temporary - r.c1 *= a.c0 # result.c1 = 2 c1 c0 [2 Mul, 1 Add, 1 Dbl, 1 Sub] - 𝔽p temporary - -func prod_complex(r: var QuadraticExt, a, b: QuadraticExt) = - ## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r`` - ## ``r`` is initialized/overwritten - ## - ## Requires a complex extension field - # (a0, a1) (b0, b1) => (a0 + a1𝑖) (b0 + b1𝑖) - # => (a0 b0 - a1 b1) + (a0 b1 + a1 b0) 𝑖 - # - # In Fp, multiplication has cost O(n²) with n the number of limbs - # while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction) - # and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition) - # - # Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words) - # where addition/substraction are significantly cheaper than multiplication - # - # So we always reframe the imaginary part using Karatsuba approach to save a multiplication - # (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 ) - # - # Costs (naive implementation) - # - 4 Multiplications 𝔽p - # - 1 Addition 𝔽p - # - 1 Substraction 𝔽p - # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries) - # - # Costs (Karatsuba) - # - 3 Multiplications 𝔽p - # - 3 Substraction 𝔽p (2 are fused) - # - 2 Addition 𝔽p - # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary) - mixin fromComplexExtension - static: doAssert r.fromComplexExtension() - - # TODO: GCC is adding an unexplainable 30 cycles tax to this function (~10% slow down) - # for seemingly no reason - - when false: # Single-width implementation - BLS12-381 - # Clang 348 cycles on i9-9980XE @3.9 GHz - var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0) - a0b0.prod(a.c0, b.c0) # [1 Mul] - a1b1.prod(a.c1, b.c1) # [2 Mul] - - r.c0.sum(a.c0, a.c1) # r0 = (a0 + a1) # [2 Mul, 1 Add] - r.c1.sum(b.c0, b.c1) # r1 = (b0 + b1) # [2 Mul, 2 Add] - r.c1 *= r.c0 # r1 = (b0 + b1)(a0 + a1) # [3 Mul, 2 Add] - 𝔽p temporary - - r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1 # [3 Mul, 2 Add, 1 Sub] - r.c1 -= a0b0 # r1 = (b0 + b1)(a0 + a1) - a0b0 # [3 Mul, 2 Add, 2 Sub] - r.c1 -= a1b1 # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub] - - else: # Double-width implementation with lazy reduction - # Clang 341 cycles on i9-9980XE @3.9 GHz - var a0b0 {.noInit.}, a1b1 {.noInit.}: doubleWidth(typeof(r.c0)) - var d {.noInit.}: doubleWidth(typeof(r.c0)) - const msbSet = r.c0.typeof.canUseNoCarryMontyMul() - - a0b0.mulNoReduce(a.c0, b.c0) # 44 cycles - cumul 44 - a1b1.mulNoReduce(a.c1, b.c1) # 44 cycles - cumul 88 - when msbSet: - r.c0.sum(a.c0, a.c1) - r.c1.sum(b.c0, b.c1) - else: - r.c0.sumNoReduce(a.c0, a.c1) # 5 cycles - cumul 93 - r.c1.sumNoReduce(b.c0, b.c1) # 5 cycles - cumul 98 - d.mulNoReduce(r.c0, r.c1) # 44 cycles - cumul 142 - when msbSet: - d -= a0b0 - d -= a1b1 - else: - d.diffNoReduce(d, a0b0) # 11 cycles - cumul 153 - d.diffNoReduce(d, a1b1) # 11 cycles - cumul 164 - a0b0.diff(a0b0, a1b1) # 19 cycles - cumul 183 - r.c0.reduce(a0b0) # 50 cycles - cumul 233 - r.c1.reduce(d) # 50 cycles - cumul 288 - - # Single-width [3 Mul, 2 Add, 3 Sub] - # 3*88 + 2*14 + 3*14 = 334 theoretical cycles - # 348 measured - # Double-Width - # 288 theoretical cycles - # 329 measured - # Unexplained 40 cycles diff between theo and measured - # and unexplained 30 cycles between Clang and GCC - # - Function calls? - # - push/pop stack? - -func mul_sparse_complex_by_0y(r: var QuadraticExt, a, sparseB: QuadraticExt) = - ## Multiply `a` by `b` with sparse coordinates (0, y) - ## On a complex quadratic extension field 𝔽p2 = 𝔽p[𝑖] - # - # r0 = a0 b0 - a1 b1 - # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) - # - # with b0 = 0, hence - # - # r0 = - a1 b1 - # r1 = (a0 + a1) b1 - a1 b1 = a0 b1 - mixin fromComplexExtension - static: doAssert r.fromComplexExtension() - - template b(): untyped = sparseB - - r.c0.prod(a.c1, b.c1) - r.c0.neg(r.c0) - r.c1.prod(a.c0, b.c1) - -# Commutative ring implementation for generic quadratic extension fields -# ------------------------------------------------------------------- - -func square_generic(r: var QuadraticExt, a: QuadraticExt) = - ## Return a² in ``r`` - ## ``r`` is initialized/overwritten - # Algorithm (with β the non-residue in the base field) - # - # (c0, c1)² => (c0 + c1 w)² - # => c0² + 2 c0 c1 w + c1²w² - # => c0² + β c1² + 2 c0 c1 w - # => (c0² + β c1², 2 c0 c1) - # We have 2 squarings and 1 multiplication in the base field - # which are significantly more costly than additions. - # For example when construction 𝔽p12 from 𝔽p6: - # - 4 limbs like BN254: multiplication is 20x slower than addition/substraction - # - 6 limbs like BLS12-381: multiplication is 28x slower than addition/substraction - # - # We can save operations with one of the following expressions - # of c0² + β c1² and noticing that c0c1 is already computed for the "y" coordinate - # - # Alternative 1: - # c0² + β c1² <=> (c0 - c1)(c0 - β c1) + β c0c1 + c0c1 - # - # Alternative 2: - # c0² + β c1² <=> (c0 + c1)(c0 + β c1) - β c0c1 - c0c1 - mixin prod - - # r0 <- (c0 + c1)(c0 + β c1) - r.c0.sum(a.c0, a.c1) - r.c1.sum(a.c0, NonResidue * a.c1) - r.c0 *= r.c1 - - # r1 <- c0 c1 - r.c1.prod(a.c0, a.c1) - - # r0 = (c0 + c1)(c0 + β c1) - β c0c1 - c0c1 - r.c0 -= NonResidue * r.c1 - r.c0 -= r.c1 - - # r1 = 2 c0c1 - r.c1.double() - -func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) = - ## Returns r = a * b - # Algorithm (with β the non-residue in the base field) - # - # r0 = a0 b0 + β a1 b1 - # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) - mixin prod - var t {.noInit.}: typeof(r.c0) - - # r1 <- (a0 + a1)(b0 + b1) - r.c0.sum(a.c0, a.c1) - t.sum(b.c0, b.c1) - r.c1.prod(r.c0, t) - - # r0 <- a0 b0 - # r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 - r.c0.prod(a.c0, b.c0) - t.prod(a.c1, b.c1) - r.c1 -= r.c0 - r.c1 -= t - - # r0 <- a0 b0 + β a1 b1 - r.c0 += NonResidue * t - -func mul_sparse_generic_by_x0(r: var QuadraticExt, a, sparseB: QuadraticExt) = - ## Multiply `a` by `b` with sparse coordinates (x, 0) - ## On a generic quadratic extension field - # Algorithm (with β the non-residue in the base field) - # - # r0 = a0 b0 + β a1 b1 - # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) - # - # with b1 = 0, hence - # - # r0 = a0 b0 - # r1 = (a0 + a1) b0 - a0 b0 = a1 b0 - mixin prod - template b(): untyped = sparseB - - r.c0.prod(a.c0, b.c0) - r.c1.prod(a.c1, b.c0) - -func mul_sparse_generic_by_0y(r: var QuadraticExt, a, sparseB: QuadraticExt) = - ## Multiply `a` by `b` with sparse coordinates (0, y) - ## On a generic quadratic extension field - # Algorithm (with β the non-residue in the base field) - # - # r0 = a0 b0 + β a1 b1 - # r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba) - # - # with b0 = 0, hence - # - # r0 = β a1 b1 - # r1 = (a0 + a1) b1 - a1 b1 = a0 b1 - template b(): untyped = sparseB - - r.c0.prod(a.c1, b.c1) - r.c0 *= NonResidue - r.c1.prod(a.c0, b.c1) - -# Exported symbols -# ------------------------------------------------------------------- - -func conj*(a: var QuadraticExt) {.inline.} = - ## Computes the conjugate in-place - a.c1.neg() - -func conj*(r: var QuadraticExt, a: QuadraticExt) {.inline.} = - ## Computes the conjugate out-of-place - r.c0 = a.c0 - r.c1.neg(a.c1) - -func conjneg*(a: var QuadraticExt) {.inline.} = - ## Computes the negated conjugate in-place - a.c0.neg() - -func conjneg*(r: var QuadraticExt, a: QuadraticExt) {.inline.} = - ## Computes the negated conjugate out-of-place - r.c0.neg(a.c0) - r.c1 = a.c1 - -func square*(r: var QuadraticExt, a: QuadraticExt) {.inline.} = - mixin fromComplexExtension - when r.fromComplexExtension(): - r.square_complex(a) - else: - r.square_generic(a) - -func prod*(r: var QuadraticExt, a, b: QuadraticExt) {.inline.} = - mixin fromComplexExtension - when r.fromComplexExtension(): - r.prod_complex(a, b) - else: - r.prod_generic(a, b) - -func inv*(r: var QuadraticExt, a: QuadraticExt) = - ## Compute the multiplicative inverse of ``a`` - ## - ## The inverse of 0 is 0. - ## Incidentally this avoids extra check - ## to convert Jacobian and Projective coordinates - ## to affine for elliptic curve - # - # Algorithm: - # - # 1 / (a0 + a1 w) <=> (a0 - a1 w) / (a0 + a1 w)(a0 - a1 w) - # <=> (a0 - a1 w) / (a0² - a1² w²) - # with w being our coordinate system and β the quadratic non-residue - # we have w² = β - # So the inverse is (a0 - a1 w) / (a0² - β a1²) - mixin fromComplexExtension - - # [2 Sqr, 1 Add] - var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0) - v0.square(a.c0) - v1.square(a.c1) - when r.fromComplexExtension(): - v0 += v1 - else: - v0 -= NonResidue * v1 # v0 = a0² - β a1² (the norm / squared magnitude of a) - - # [1 Inv, 2 Sqr, 1 Add] - v1.inv(v0) # v1 = 1 / (a0² - β a1²) - - # [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg] - r.c0.prod(a.c0, v1) # r0 = a0 / (a0² - β a1²) - v0.neg(v1) # v0 = -1 / (a0² - β a1²) - r.c1.prod(a.c1, v0) # r1 = -a1 / (a0² - β a1²) - -func inv*(a: var QuadraticExt) = - ## Compute the multiplicative inverse of ``a`` - ## - ## The inverse of 0 is 0. - ## Incidentally this avoids extra check - ## to convert Jacobian and Projective coordinates - ## to affine for elliptic curve - a.inv(a) - -func `*=`*(a: var QuadraticExt, b: QuadraticExt) {.inline.} = - ## In-place multiplication - # On higher extension field like 𝔽p12, - # if `prod` is called on shared in and out buffer, the result is wrong - let t = a - a.prod(t, b) - -func square*(a: var QuadraticExt) {.inline.} = - let t = a - a.square(t) - -func mul_sparse_by_0y*(a: var QuadraticExt, sparseB: QuadraticExt) {.inline.} = - ## Sparse in-place multiplication - mixin fromComplexExtension - when a.fromComplexExtension(): - let t = a - a.mul_sparse_complex_by_0y(t, sparseB) - else: - let t = a - a.mul_sparse_generic_by_0y(t, sparseB) - -func mul_sparse_by_x0*(a: var QuadraticExt, sparseB: QuadraticExt) {.inline.} = - ## Sparse in-place multiplication - let t = a - a.mul_sparse_generic_by_x0(t, sparseB) diff --git a/constantine/tower_field_extensions/tower_common.nim b/constantine/tower_field_extensions/tower_common.nim deleted file mode 100644 index 6fdb63d..0000000 --- a/constantine/tower_field_extensions/tower_common.nim +++ /dev/null @@ -1,263 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -import - ../config/common, - ../primitives, - ../arithmetic - -# Note: to avoid burdening the Nim compiler, we rely on generic extension -# to complain if the base field procedures don't exist - -# Common type definition -# ------------------------------------------------------------------- - -type - NonResidue* = object - ## Non-Residue - ## - ## Placeholder for the appropriate quadratic, cubic or sectic non-residue - - CubicExt* = concept x - ## Cubic Extension field concept - type BaseField = auto - x.c0 is BaseField - x.c1 is BaseField - x.c2 is BaseField - - QuadraticExt* = concept x - ## Quadratic Extension field concept - not(x is CubicExt) - - type BaseField = auto - x.c0 is BaseField - x.c1 is BaseField - - ExtensionField* = QuadraticExt or CubicExt - -# Initialization -# ------------------------------------------------------------------- - -func setZero*(a: var ExtensionField) = - ## Set ``a`` to 0 in the extension field - for field in fields(a): - field.setZero() - -func setOne*(a: var ExtensionField) = - ## Set ``a`` to 1 in the extension field - for fieldName, fA in fieldPairs(a): - when fieldName == "c0": - fA.setOne() - else: - fA.setZero() - -func fromBig*(a: var ExtensionField, src: BigInt) = - ## Set ``a`` to the bigint value in the extension field - for fieldName, fA in fieldPairs(a): - when fieldName == "c0": - fA.fromBig(src) - else: - fA.setZero() - -# Comparison -# ------------------------------------------------------------------- - -func `==`*(a, b: ExtensionField): SecretBool = - ## Constant-time equality check - result = CtTrue - for fA, fB in fields(a, b): - result = result and (fA == fB) - -func isZero*(a: ExtensionField): SecretBool = - ## Constant-time check if zero - result = CtTrue - for fA in fields(a): - result = result and fA.isZero() - -func isOne*(a: ExtensionField): SecretBool = - ## Constant-time check if one - result = CtTrue - for fieldName, fA in fieldPairs(a): - when fieldName == "c0": - result = result and fA.isOne() - else: - result = result and fA.isZero() - -func isMinusOne*(a: ExtensionField): SecretBool = - ## Constant-time check if -1 - result = CtTrue - for fieldName, fA in fieldPairs(a): - when fieldName == "c0": - result = result and fA.isMinusOne() - else: - result = result and fA.isZero() - -# Copies -# ------------------------------------------------------------------- - -func ccopy*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) = - ## Constant-time conditional copy - ## If ctl is true: b is copied into a - ## if ctl is false: b is not copied and a is unmodified - ## Time and memory accesses are the same whether a copy occurs or not - for fA, fB in fields(a, b): - ccopy(fA, fB, ctl) - -# Abelian group -# ------------------------------------------------------------------- - -func neg*(r: var ExtensionField, a: ExtensionField) = - ## Field out-of-place negation - for fR, fA in fields(r, a): - fR.neg(fA) - -func neg*(a: var ExtensionField) = - ## Field in-place negation - for fA in fields(a): - fA.neg() - -func `+=`*(a: var ExtensionField, b: ExtensionField) = - ## Addition in the extension field - for fA, fB in fields(a, b): - fA += fB - -func `-=`*(a: var ExtensionField, b: ExtensionField) = - ## Substraction in the extension field - for fA, fB in fields(a, b): - fA -= fB - -func double*(r: var ExtensionField, a: ExtensionField) = - ## Field out-of-place doubling - for fR, fA in fields(r, a): - fR.double(fA) - -func double*(a: var ExtensionField) = - ## Field in-place doubling - for fA in fields(a): - fA.double() - -func div2*(a: var ExtensionField) = - ## Field in-place division by 2 - for fA in fields(a): - fA.div2() - -func sum*(r: var QuadraticExt, a, b: QuadraticExt) = - ## Sum ``a`` and ``b`` into ``r`` - r.c0.sum(a.c0, b.c0) - r.c1.sum(a.c1, b.c1) - -func sum*(r: var CubicExt, a, b: CubicExt) = - ## Sum ``a`` and ``b`` into ``r`` - r.c0.sum(a.c0, b.c0) - r.c1.sum(a.c1, b.c1) - r.c2.sum(a.c2, b.c2) - -func diff*(r: var QuadraticExt, a, b: QuadraticExt) = - ## Diff ``a`` and ``b`` into ``r`` - r.c0.diff(a.c0, b.c0) - r.c1.diff(a.c1, b.c1) - -func diff*(r: var CubicExt, a, b: CubicExt) = - ## Diff ``a`` and ``b`` into ``r`` - r.c0.diff(a.c0, b.c0) - r.c1.diff(a.c1, b.c1) - r.c2.diff(a.c2, b.c2) - - -# Conditional arithmetic -# ------------------------------------------------------------------- - -func cneg*(a: var ExtensionField, ctl: SecretBool) = - ## Constant-time in-place conditional negation - ## Only negate if ctl is true - for fA in fields(a): - fA.cneg(ctl) - -func cadd*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) = - ## Constant-time in-place conditional addition - for fA, fB in fields(a, b): - fA.cadd(fB, ctl) - -func csub*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) = - ## Constant-time in-place conditional substraction - for fA, fB in fields(a, b): - fA.csub(fB, ctl) - -# Multiplication by a small integer known at compile-time -# ------------------------------------------------------------------- - -func `*=`*(a: var ExtensionField, b: static int) {.inline.} = - ## Multiplication by a small integer known at compile-time - - const negate = b < 0 - const b = if negate: -b - else: b - when negate: - a.neg(a) - when b == 0: - a.setZero() - elif b == 1: - return - elif b == 2: - a.double() - elif b == 3: - let t1 = a - a.double() - a += t1 - elif b == 4: - a.double() - a.double() - elif b == 5: - let t1 = a - a.double() - a.double() - a += t1 - elif b == 6: - a.double() - let t2 = a - a.double() # 4 - a += t2 - elif b == 7: - let t1 = a - a.double() - let t2 = a - a.double() # 4 - a += t2 - a += t1 - elif b == 8: - a.double() - a.double() - a.double() - elif b == 9: - let t1 = a - a.double() - a.double() - a.double() # 8 - a += t1 - elif b == 10: - a.double() - let t2 = a - a.double() - a.double() # 8 - a += t2 - elif b == 11: - let t1 = a - a.double() - let t2 = a - a.double() - a.double() # 8 - a += t2 - a += t1 - elif b == 12: - a.double() - a.double() # 4 - let t4 = a - a.double() # 8 - a += t4 - else: - {.error: "Multiplication by this small int not implemented".} diff --git a/constantine/tower_field_extensions/tower_instantiation.nim b/constantine/tower_field_extensions/tower_instantiation.nim index 8502f58..f654d04 100644 --- a/constantine/tower_field_extensions/tower_instantiation.nim +++ b/constantine/tower_field_extensions/tower_instantiation.nim @@ -12,14 +12,32 @@ import ../arithmetic, - ../config/curves, + ../config/[common, curves], ../io/io_fields, - ./tower_common, - ./quadratic_extensions, - ./cubic_extensions, + ./extension_fields, ./exponentiations -export tower_common, quadratic_extensions, cubic_extensions, exponentiations +export extension_fields, exponentiations + +# We assume that the sextic non-residues used to construct +# the elliptic curve twists +# match with the quadratic and cubic non-residues +# chosen to construct the tower of extension fields. + +# 𝔽p +# ---------------------------------------------------------------- + +func `*=`*(a: var Fp, _: type NonResidue) {.inline.} = + ## Multiply an element of 𝔽p by the quadratic non-residue + ## chosen to construct 𝔽p2 + static: doAssert Fp.C.getNonResidueFp() != -1, "𝔽p2 should be specialized for complex extension" + a *= Fp.C.getNonResidueFp() + +func prod*(r: var Fp, a: Fp, _: type NonResidue){.inline.} = + ## Multiply an element of 𝔽p by the quadratic non-residue + ## chosen to construct 𝔽p2 + static: doAssert Fp.C.getNonResidueFp() != -1, "𝔽p2 should be specialized for complex extension" + r.prod(a, Fp.C.getNonResidueFp()) # 𝔽p2 # ---------------------------------------------------------------- @@ -28,53 +46,58 @@ type Fp2*[C: static Curve] = object c0*, c1*: Fp[C] - β = NonResidue - # Quadratic or Cubic non-residue - - SexticNonResidue* = object - template fromComplexExtension*[F](elem: F): static bool = ## Returns true if the input is a complex extension ## i.e. the irreducible polynomial chosen is ## x² - µ with µ = -1 ## and so 𝔽p2 = 𝔽p[x] / x² - µ = 𝔽p[𝑖] - when F is Fp2 and F.C.get_QNR_Fp() == -1: + when F is Fp2 and F.C.getNonResidueFp() == -1: true else: false -func `*=`*(a: var Fp, _: typedesc[β]) {.inline.} = - ## Multiply an element of 𝔽p by the quadratic non-residue - ## chosen to construct 𝔽p2 - static: doAssert Fp.C.get_QNR_Fp() != -1, "𝔽p2 should be specialized for complex extension" - a *= Fp.C.get_QNR_Fp() +template mulCheckSparse*(a: var Fp2, b: Fp2) = + when b.isOne().bool: + discard + elif b.isMinusOne().bool: + a.neg() + elif b.c0.isZero().bool and b.c1.isOne().bool: + var t {.noInit.}: type(a.c0) + when fromComplexExtension(b): + t.neg(a.c1) + a.c1 = a.c0 + a.c0 = t + else: + t.prod(a.c1, NonResidue) + a.c1 = a.c0 + a.c0 = t + elif b.c0.isZero().bool and b.c1.isMinusOne().bool: + var t {.noInit.}: type(a.c0) + when fromComplexExtension(b): + t = a.c1 + a.c1.neg(a.c0) + a.c0 = t + else: + t.prod(a.c1, NonResidue) + a.c1.neg(a.c0) + a.c0.neg(t) + elif b.c0.isZero().bool: + a.mul_sparse_by_0y(b) + elif b.c1.isZero().bool: + a.mul_sparse_by_x0(b) + else: + a *= b -func `*`*(_: typedesc[β], a: Fp): Fp {.inline, noInit.} = - ## Multiply an element of 𝔽p by the quadratic non-residue - ## chosen to construct 𝔽p2 - result = a - result *= β - -# TODO: rework the quad/cube/sextic non residue declaration - -func `*=`*(a: var Fp, _: typedesc[SexticNonResidue]) {.inline.} = - ## Multiply an element of 𝔽p by the sextic non-residue - ## chosen to construct 𝔽p6 - a *= Fp.C.get_QNR_Fp() - -func `*`*(_: typedesc[SexticNonResidue], a: Fp): Fp {.inline, noInit.} = - ## Multiply an element of 𝔽p by the sextic non-residue - ## chosen to construct 𝔽p6 - result = a - result *= SexticNonResidue - -func `*=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} = - ## Multiply an element of 𝔽p2 by the sextic non-residue - ## chosen to construct the sextic twist +func prod*(r: var Fp2, a: Fp2, _: type NonResidue) {.inline.} = + ## Multiply an element of 𝔽p2 by the non-residue + ## chosen to construct the next extension or the twist: + ## - if quadratic non-residue: 𝔽p4 + ## - if cubic non-residue: 𝔽p6 + ## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12 # Yet another const tuple unpacking bug - const u = Fp2.C.get_SNR_Fp2()[0] # Sextic non-residue to construct 𝔽p12 - const v = Fp2.C.get_SNR_Fp2()[1] - const Beta {.used.} = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2 + const u = Fp2.C.getNonResidueFp2()[0] + const v = Fp2.C.getNonResidueFp2()[1] + const Beta {.used.} = Fp2.C.getNonResidueFp() # ξ = u + v x # and x² = β # @@ -82,24 +105,49 @@ func `*=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} = # => u c0 + β v c1 + (v c0 + u c1) x when a.fromComplexExtension() and u == 1 and v == 1: let t = a.c0 - a.c0 -= a.c1 - a.c1 += t + r.c0.diff(t, a.c1) + r.c1.sum(t, a.c1) else: - let a0 = a.c0 - let a1 = a.c1 - when a.fromComplexExtension(): - a.c0.diff(u * a0, v * a1) + # Case: + # - BN254_Snarks, QNR_Fp: -1, SNR_Fp2: 9+1𝑖 (𝑖 = √-1) + # - BLS12_377, QNR_Fp: -5, SNR_Fp2: 0+1j (j = √-5) + # - BW6_761, SNR_Fp: -4, CNR_Fp2: 0+1j (j = √-4) + when u == 0: + # BLS12_377 and BW6_761, use small addition chain + r.mul_sparse_by_0y(a, v) else: - a.c0.sum(u * a0, (Beta * v) * a1) - a.c1.sum(v * a0, u * a1) + # BN254_Snarks, u = 9 + # Full 𝔽p2 multiplication is cheaper than addition chains + # for u*c0 and u*c1 + static: + doAssert u >= 0 and uint64(u) <= uint64(high(BaseType)) + doAssert v >= 0 and uint64(v) <= uint64(high(BaseType)) + # TODO: compile-time + var NR {.noInit.}: Fp2 + NR.c0.fromUint(uint u) + NR.c1.fromUint(uint v) + r.prod(a, NR) -func `/=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} = - ## Multiply an element of 𝔽p by the quadratic non-residue - ## chosen to construct sextic twist +func `*=`*(a: var Fp2, _: type NonResidue) {.inline.} = + ## Multiply an element of 𝔽p2 by the non-residue + ## chosen to construct the next extension or the twist: + ## - if quadratic non-residue: 𝔽p4 + ## - if cubic non-residue: 𝔽p6 + ## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12 # Yet another const tuple unpacking bug - const u = Fp2.C.get_SNR_Fp2()[0] # Sextic non-residue to construct 𝔽p12 - const v = Fp2.C.get_SNR_Fp2()[1] - const Beta = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2 + a.prod(a, NonResidue) + +func `/=`*(a: var Fp2, _: type NonResidue) {.inline.} = + ## Divide an element of 𝔽p by the non-residue + ## chosen to construct the next extension or the twist: + ## - if quadratic non-residue: 𝔽p4 + ## - if cubic non-residue: 𝔽p6 + ## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12 + # Yet another const tuple unpacking bug + # Yet another const tuple unpacking bug + const u = Fp2.C.getNonresidueFp2()[0] # Sextic non-residue to construct 𝔽p12 + const v = Fp2.C.getNonresidueFp2()[1] + const Beta = Fp2.C.getNonResidueFp() # Quadratic non-residue to construct 𝔽p2 # ξ = u + v x # and x² = β # @@ -118,7 +166,7 @@ func `/=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} = a.c1 -= t a.div2() else: - let a0 = a.c0 + var a0 = a.c0 let a1 = a.c1 const u2v2 = u*u - Beta*v*v # (u² - βv²) # TODO can be precomputed (or precompute b/µ the twist coefficient) @@ -127,12 +175,18 @@ func `/=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} = u2v2inv.fromUint(u2v2) u2v2inv.inv() - a.c0.diff(u * a0, (Beta * v) * a1) - a.c1.diff(u * a1, v * a0) + a.c0 *= u + a.c1 *= Beta * v + a.c0 -= a.c1 + + a.c1 = a1 + a.c1 *= u + a0 *= v + a.c1 -= a0 a.c0 *= u2v2inv a.c1 *= u2v2inv -# 𝔽p6 +# 𝔽p4 & 𝔽p6 # ---------------------------------------------------------------- type @@ -142,53 +196,58 @@ type Fp6*[C: static Curve] = object c0*, c1*, c2*: Fp2[C] - ξ* = NonResidue - # We call the non-residue ξ on 𝔽p4/𝔽p6 to avoid confusion - # between non-residue - # of different tower level +func prod*(r: var Fp4, a: Fp4, _: type NonResidue) = + ## Multiply an element of 𝔽p4 by the non-residue + ## chosen to construct the next extension or the twist: + ## - if quadratic non-residue: 𝔽p8 + ## - if cubic non-residue: 𝔽p12 + ## - if sextic non-residue: 𝔽p8, 𝔽p12 or 𝔽p24 + ## + ## Assumes that it is sqrt(NonResidue_Fp2) + let t = a.c0 + r.c0.prod(a.c1, NonResidue) + r.c1 = t -func `*`*(_: typedesc[ξ], a: Fp2): Fp2 {.inline, noInit.} = - ## Multiply an element of 𝔽p2 by the quadratic and cubic non-residue - ## chosen to construct 𝔽p4/𝔽p6 - # Yet another const tuple unpacking bug - const u = Fp2.C.get_CNR_Fp2()[0] # Quadratic & Cubic non-residue to construct 𝔽p4/𝔽p6 - const v = Fp2.C.get_CNR_Fp2()[1] - const Beta {.used.} = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2 - # ξ = u + v x - # and x² = β - # - # (c0 + c1 x) (u + v x) => u c0 + (u c0 + u c1)x + v c1 x² - # => u c0 + β v c1 + (v c0 + u c1) x +func `*=`*(a: var Fp4, _: type NonResidue) {.inline.} = + ## Multiply an element of 𝔽p4 by the non-residue + ## chosen to construct the next extension or the twist: + ## - if quadratic non-residue: 𝔽p8 + ## - if cubic non-residue: 𝔽p12 + ## - if sextic non-residue: 𝔽p8, 𝔽p12 or 𝔽p24 + ## + ## Assumes that it is sqrt(NonResidue_Fp2) + a.prod(a, NonResidue) - # TODO: check code generated when ξ = 1 + 𝑖 - # The mul by constant are inline but - # since we don't have __restrict tag - # and we use arrays (which decay into pointer) - # the compiler might not elide the temporary - when a.fromComplexExtension(): - result.c0.diff(u * a.c0, v * a.c1) - else: - result.c0.sum(u * a.c0, (Beta * v) * a.c1) - result.c1.sum(v * a.c0, u * a.c1 ) +func prod*(r: var Fp6, a: Fp6, _: type NonResidue) {.inline.} = + ## Multiply an element of 𝔽p4 by the non-residue + ## chosen to construct the next extension or the twist: + ## - if quadratic non-residue: 𝔽p12 + ## - if cubic non-residue: 𝔽p18 + ## - if sextic non-residue: 𝔽p12, 𝔽p18 or 𝔽p36 + ## + ## Assumes that it is cube_root(NonResidue_Fp2) + ## + ## For all curves γ = v with v the factor for 𝔽p6 coordinate + ## and v³ = ξ + ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v² + let t = a.c2 + r.c1 = a.c0 + r.c2 = a.c1 + t.c0.prod(t, NonResidue) -func `*=`*(a: var Fp2, _: typedesc[ξ]) {.inline.} = - ## Multiply an element of 𝔽p2 by the quadratic non-residue - ## chosen to construct 𝔽p6 - # Yet another const tuple unpacking bug - const u = Fp2.C.get_CNR_Fp2()[0] # Cubic non-residue to construct 𝔽p6 - const v = Fp2.C.get_CNR_Fp2()[1] - const Beta {.used.} = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2 - # ξ = u + v x - # and x² = β - # - # (c0 + c1 x) (u + v x) => u c0 + (u c0 + u c1)x + v c1 x² - # => u c0 + β v c1 + (v c0 + u c1) x - when a.fromComplexExtension() and u == 1 and v == 1: - let t = a.c0 - a.c0 -= a.c1 - a.c1 += t - else: # TODO: optim for inline - a = ξ * a +func `*=`*(a: var Fp6, _: type NonResidue) {.inline.} = + ## Multiply an element of 𝔽p4 by the non-residue + ## chosen to construct the next extension or the twist: + ## - if quadratic non-residue: 𝔽p12 + ## - if cubic non-residue: 𝔽p18 + ## - if sextic non-residue: 𝔽p12, 𝔽p18 or 𝔽p36 + ## + ## Assumes that it is cube_root(NonResidue_Fp2) + ## + ## For all curves γ = v with v the factor for 𝔽p6 coordinate + ## and v³ = ξ + ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v² + a.prod(a, NonResidue) # 𝔽p12 # ---------------------------------------------------------------- @@ -198,32 +257,6 @@ type c0*, c1*, c2*: Fp4[C] # c0*, c1*: Fp6[C] - γ = NonResidue - # We call the non-residue γ (Gamma) on 𝔽p6 to avoid confusion between non-residue - # of different tower level - -func `*`*(_: typedesc[γ], a: Fp4): Fp4 {.noInit, inline.} = - ## Multiply an element of 𝔽p4 by the sextic non-residue - ## chosen to construct 𝔽p12 - result.c0 = ξ * a.c1 - result.c1 = a.c0 - -func `*=`*(a: var Fp4, _: typedesc[γ]) {.inline.} = - a = γ * a - -func `*`*(_: typedesc[γ], a: Fp6): Fp6 {.noInit, inline.} = - ## Multiply an element of 𝔽p6 by the cubic non-residue - ## chosen to construct 𝔽p12 - ## For all curves γ = v with v the factor for 𝔽p6 coordinate - ## and v³ = ξ - ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v² - result.c0 = ξ * a.c2 - result.c1 = a.c0 - result.c2 = a.c1 - -func `*=`*(a: var Fp6, _: typedesc[γ]) {.inline.} = - a = γ * a - # Sparse functions # ---------------------------------------------------------------- @@ -238,13 +271,6 @@ func mul_sparse_by_y0*[C: static Curve](r: var Fp4[C], a: Fp4[C], b: Fp2[C]) = r.c0.prod(a.c0, b) r.c1.prod(a.c1, b) -func mul_sparse_by_0y*[C: static Curve](r: var Fp4[C], a: Fp4[C], b: Fp2[C]) = - ## Sparse multiplication of an Fp4 element - ## with coordinates (a₀, a₁) by (0, b₁) - r.c0.prod(a.c1, b) - r.c0 *= NonResidue - r.c1.prod(a.c0, b) - func mul_sparse_by_0y0*[C: static Curve](r: var Fp6[C], a: Fp6[C], b: Fp2[C]) = ## Sparse multiplication of an Fp6 element ## with coordinates (a₀, a₁, a₂) by (0, b₁, 0) @@ -265,6 +291,6 @@ func mul_sparse_by_0y0*[C: static Curve](r: var Fp6[C], a: Fp6[C], b: Fp2[C]) = # = a1 b1 r.c0.prod(a.c2, b) - r.c0 *= ξ + r.c0 *= NonResidue r.c1.prod(a.c0, b) r.c2.prod(a.c1, b) diff --git a/docs/optimizations.md b/docs/optimizations.md index eeba714..023119b 100644 --- a/docs/optimizations.md +++ b/docs/optimizations.md @@ -75,6 +75,9 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n - [ ] x86: MULX, ADCX, ADOX instructions - [ ] no-carry optimization for CIOS (Coarsely Integrated Operand Scanning) +- Addition chains + - [ ] unreduced squarings/multiplications in addition chains + - Exponentiation - [x] variable-time exponentiation - [x] fixed window optimization _(sliding windows are not constant-time)_ @@ -158,19 +161,25 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n - Line functions - [x] Homogeneous projective coordinates - [x] D-Twist + - [x] Fused line add + elliptic curve add + - [x] Fused line double + elliptic curve double - [x] M-Twist - - [x] Fused line add + elliptic curve add - - [x] Fused line double + elliptic curve double + - [x] Fused line add + elliptic curve add + - [x] Fused line double + elliptic curve double - [ ] Jacobian projective coordinates - [ ] D-Twist + - [ ] Fused line add + elliptic curve add + - [ ] Fused line double + elliptic curve double - [ ] M-Twist - - [ ] Fused line add + elliptic curve add - - [ ] Fused line double + elliptic curve double + - [ ] Fused line add + elliptic curve add + - [ ] Fused line double + elliptic curve double - [x] Sparse multiplication line * Gₜ element - - [x] 6-way sparse - - [ ] Pseudo 8-sparse - [x] D-Twist + - [x] 6-way sparse + - [ ] Pseudo 8-sparse - [x] M-Twist + - [x] 6-way sparse + - [ ] Pseudo 8-sparse - Miller Loop - [x] NAF recoding diff --git a/helpers/prng_unsafe.nim b/helpers/prng_unsafe.nim index b156d79..4845957 100644 --- a/helpers/prng_unsafe.nim +++ b/helpers/prng_unsafe.nim @@ -15,7 +15,7 @@ import ec_shortweierstrass_projective, ec_shortweierstrass_jacobian], ../constantine/io/io_bigints, - ../constantine/tower_field_extensions/tower_common + ../constantine/tower_field_extensions/extension_fields # ############################################################ # diff --git a/tests/t_finite_fields.nim b/tests/t_finite_fields.nim index 37b5362..29d9bc0 100644 --- a/tests/t_finite_fields.nim +++ b/tests/t_finite_fields.nim @@ -25,7 +25,6 @@ proc main() = y.fromUint(10'u32) z.fromUint(90'u32) - let u = x + y x += y var x_bytes: array[8, byte] @@ -34,7 +33,6 @@ proc main() = check: # Check equality in the Montgomery domain bool(z == x) - bool(z == u) # Check equality when converting back to natural domain 90'u64 == cast[uint64](x_bytes) @@ -45,7 +43,6 @@ proc main() = y.fromUint(21'u32) z.fromUint(0'u32) - let u = x + y x += y var x_bytes: array[8, byte] @@ -54,7 +51,6 @@ proc main() = check: # Check equality in the Montgomery domain bool(z == x) - bool(z == u) # Check equality when converting back to natural domain 0'u64 == cast[uint64](x_bytes) @@ -65,7 +61,6 @@ proc main() = y.fromUint(22'u32) z.fromUint(1'u32) - let u = x + y x += y var x_bytes: array[8, byte] @@ -74,7 +69,6 @@ proc main() = check: # Check equality in the Montgomery domain bool(z == x) - bool(z == u) # Check equality when converting back to natural domain 1'u64 == cast[uint64](x_bytes) @@ -86,7 +80,6 @@ proc main() = y.fromUint(10'u32) z.fromUint(70'u32) - let u = x - y x -= y var x_bytes: array[8, byte] @@ -95,7 +88,6 @@ proc main() = check: # Check equality in the Montgomery domain bool(z == x) - bool(z == u) # Check equality when converting back to natural domain 70'u64 == cast[uint64](x_bytes) @@ -106,7 +98,6 @@ proc main() = y.fromUint(80'u32) z.fromUint(0'u32) - let u = x - y x -= y var x_bytes: array[8, byte] @@ -115,7 +106,6 @@ proc main() = check: # Check equality in the Montgomery domain bool(z == x) - bool(z == u) # Check equality when converting back to natural domain 0'u64 == cast[uint64](x_bytes) @@ -126,7 +116,6 @@ proc main() = y.fromUint(81'u32) z.fromUint(100'u32) - let u = x - y x -= y var x_bytes: array[8, byte] @@ -135,19 +124,18 @@ proc main() = check: # Check equality in the Montgomery domain bool(z == x) - bool(z == u) # Check equality when converting back to natural domain 100'u64 == cast[uint64](x_bytes) test "Multiplication mod 101": block: - var x, y, z: Fp[Fake101] + var x, y, z, r: Fp[Fake101] x.fromUint(10'u32) y.fromUint(10'u32) z.fromUint(100'u32) - let r = x * y + r.prod(x, y) var r_bytes: array[8, byte] r_bytes.exportRawUint(r, cpuEndian) @@ -159,13 +147,13 @@ proc main() = 100'u64 == cast[uint64](r_bytes) block: - var x, y, z: Fp[Fake101] + var x, y, z, r: Fp[Fake101] x.fromUint(10'u32) y.fromUint(11'u32) z.fromUint(9'u32) - let r = x * y + r.prod(x, y) var r_bytes: array[8, byte] r_bytes.exportRawUint(r, cpuEndian) @@ -275,13 +263,13 @@ proc main() = test "Multiplication mod 2^61 - 1": block: - var x, y, z: Fp[Mersenne61] + var x, y, z, r: Fp[Mersenne61] x.fromUint(10'u32) y.fromUint(10'u32) z.fromUint(100'u32) - let r = x * y + r.prod(x, y) var r_bytes: array[8, byte] r_bytes.exportRawUint(r, cpuEndian) @@ -294,13 +282,13 @@ proc main() = cast[uint64](r_bytes) == 100'u64 block: - var x, y, z: Fp[Mersenne61] + var x, y, z, r: Fp[Mersenne61] x.fromUint(1'u32 shl 31) y.fromUint(1'u32 shl 31) z.fromUint(2'u32) - let r = x * y + r.prod(x, y) var r_bytes: array[8, byte] r_bytes.exportRawUint(r, cpuEndian)