Rework towering (#148)

* naive removal of out-of-place mul by non residue

* Use {.inline.} in a consistent manner across the codebase

* Handle aliasing for quadratic multiplication

* reorg optimization

* Handle aliasing for quadratic squaring

* handle aliasing in mul_sparse_complex_by_0y

* Rework multiplication by nonresidue, assume tower and twist use same non-residue

* continue rework

* continue on non-residues

* Remove "NonResidue *" calls

* handle aliasing in Chung-Hasan SQR2

* Handla aliasing in Chung-Hasan SQR3

* Use one less temporary in Chung Hasan sqr2

* handle aliasing in cubic extensions

* merge extension tower in the same file to reduce duplicate proc and allow better inlining

* handle aliasing in cubic inversion

* drop out-of-place proc from BigInt and finite fields as well

* less copies in line_projective

* remove a copy in fp12 by lines
This commit is contained in:
Mamy Ratsimbazafy 2021-02-06 16:28:38 +01:00 committed by GitHub
parent 2c5e12d5f8
commit c312210878
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 1417 additions and 1285 deletions

View File

@ -10,7 +10,7 @@ import
../config/[common, type_bigint],
../primitives,
./limbs,
./limbs_double_width,
./limbs_extmul,
./limbs_modular,
./limbs_montgomery
@ -347,7 +347,7 @@ func bit0*(a: BigInt): Ct[uint8] =
# Multiplication by small cosntants
# ------------------------------------------------------------
func `*=`*(a: var BigInt, b: static int) {.inline.} =
func `*=`*(a: var BigInt, b: static int) =
## Multiplication by a small integer known at compile-time
# Implementation:
#
@ -420,11 +420,6 @@ func `*=`*(a: var BigInt, b: static int) {.inline.} =
else:
{.error: "Multiplication by this small int not implemented".}
func `*`*(b: static int, a: BigInt): BigInt {.noinit, inline.} =
## Multiplication by a small integer known at compile-time
result = a
result *= b
# Division by constants
# ------------------------------------------------------------

View File

@ -43,6 +43,7 @@ export Fp, Fr, FF
# No exceptions allowed
{.push raises: [].}
{.push inline.}
# ############################################################
#
@ -50,14 +51,14 @@ export Fp, Fr, FF
#
# ############################################################
func fromBig*(dst: var FF, src: BigInt) {.inline.}=
func fromBig*(dst: var FF, src: BigInt) =
## Convert a BigInt to its Montgomery form
when nimvm:
dst.mres.montyResidue_precompute(src, FF.fieldMod(), FF.getR2modP(), FF.getNegInvModWord())
else:
dst.mres.montyResidue(src, FF.fieldMod(), FF.getR2modP(), FF.getNegInvModWord(), FF.canUseNoCarryMontyMul())
func fromBig*[C: static Curve](T: type FF[C], src: BigInt): FF[C] {.noInit, inline.} =
func fromBig*[C: static Curve](T: type FF[C], src: BigInt): FF[C] {.noInit.} =
## Convert a BigInt to its Montgomery form
result.fromBig(src)
@ -70,14 +71,14 @@ func toBig*(src: FF): auto {.noInit, inline.} =
# Copy
# ------------------------------------------------------------
func ccopy*(a: var FF, b: FF, ctl: SecretBool) {.inline, meter.} =
func ccopy*(a: var FF, b: FF, ctl: SecretBool) {.meter.} =
## Constant-time conditional copy
## If ctl is true: b is copied into a
## if ctl is false: b is not copied and a is unmodified
## Time and memory accesses are the same whether a copy occurs or not
ccopy(a.mres, b.mres, ctl)
func cswap*(a, b: var FF, ctl: CTBool) {.inline, meter.} =
func cswap*(a, b: var FF, ctl: CTBool) {.meter.} =
## Swap ``a`` and ``b`` if ``ctl`` is true
##
## Constant-time:
@ -105,34 +106,34 @@ func cswap*(a, b: var FF, ctl: CTBool) {.inline, meter.} =
# In practice I'm not aware of such prime being using in elliptic curves.
# 2^127 - 1 and 2^521 - 1 are used but 127 and 521 are not multiple of 32/64
func `==`*(a, b: FF): SecretBool {.inline.} =
func `==`*(a, b: FF): SecretBool =
## Constant-time equality check
a.mres == b.mres
func isZero*(a: FF): SecretBool {.inline.} =
func isZero*(a: FF): SecretBool =
## Constant-time check if zero
a.mres.isZero()
func isOne*(a: FF): SecretBool {.inline.} =
func isOne*(a: FF): SecretBool =
## Constant-time check if one
a.mres == FF.getMontyOne()
func isMinusOne*(a: FF): SecretBool {.inline.} =
func isMinusOne*(a: FF): SecretBool =
## Constant-time check if -1 (mod p)
a.mres == FF.getMontyPrimeMinus1()
func setZero*(a: var FF) {.inline.} =
func setZero*(a: var FF) =
## Set ``a`` to zero
a.mres.setZero()
func setOne*(a: var FF) {.inline.} =
func setOne*(a: var FF) =
## Set ``a`` to one
# Note: we need 1 in Montgomery residue form
# TODO: Nim codegen is not optimal it uses a temporary
# Check if the compiler optimizes it away
a.mres = FF.getMontyOne()
func `+=`*(a: var FF, b: FF) {.inline, meter.} =
func `+=`*(a: var FF, b: FF) {.meter.} =
## In-place addition modulo p
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs)
@ -141,7 +142,7 @@ func `+=`*(a: var FF, b: FF) {.inline, meter.} =
overflowed = overflowed or not(a.mres < FF.fieldMod())
discard csub(a.mres, FF.fieldMod(), overflowed)
func `-=`*(a: var FF, b: FF) {.inline, meter.} =
func `-=`*(a: var FF, b: FF) {.meter.} =
## In-place substraction modulo p
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
submod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs)
@ -149,7 +150,7 @@ func `-=`*(a: var FF, b: FF) {.inline, meter.} =
let underflowed = sub(a.mres, b.mres)
discard cadd(a.mres, FF.fieldMod(), underflowed)
func double*(a: var FF) {.inline, meter.} =
func double*(a: var FF) {.meter.} =
## Double ``a`` modulo p
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
@ -158,7 +159,7 @@ func double*(a: var FF) {.inline, meter.} =
overflowed = overflowed or not(a.mres < FF.fieldMod())
discard csub(a.mres, FF.fieldMod(), overflowed)
func sum*(r: var FF, a, b: FF) {.inline, meter.} =
func sum*(r: var FF, a, b: FF) {.meter.} =
## Sum ``a`` and ``b`` into ``r`` modulo p
## r is initialized/overwritten
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
@ -168,11 +169,11 @@ func sum*(r: var FF, a, b: FF) {.inline, meter.} =
overflowed = overflowed or not(r.mres < FF.fieldMod())
discard csub(r.mres, FF.fieldMod(), overflowed)
func sumNoReduce*(r: var FF, a, b: FF) {.inline, meter.} =
func sumNoReduce*(r: var FF, a, b: FF) {.meter.} =
## Sum ``a`` and ``b`` into ``r`` without reduction
discard r.mres.sum(a.mres, b.mres)
func diff*(r: var FF, a, b: FF) {.inline, meter.} =
func diff*(r: var FF, a, b: FF) {.meter.} =
## Substract `b` from `a` and store the result into `r`.
## `r` is initialized/overwritten
## Requires r != b
@ -182,12 +183,12 @@ func diff*(r: var FF, a, b: FF) {.inline, meter.} =
var underflowed = r.mres.diff(a.mres, b.mres)
discard cadd(r.mres, FF.fieldMod(), underflowed)
func diffNoReduce*(r: var FF, a, b: FF) {.inline, meter.} =
func diffNoReduce*(r: var FF, a, b: FF) {.meter.} =
## Substract `b` from `a` and store the result into `r`
## without reduction
discard r.mres.diff(a.mres, b.mres)
func double*(r: var FF, a: FF) {.inline, meter.} =
func double*(r: var FF, a: FF) {.meter.} =
## Double ``a`` into ``r``
## `r` is initialized/overwritten
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
@ -197,16 +198,16 @@ func double*(r: var FF, a: FF) {.inline, meter.} =
overflowed = overflowed or not(r.mres < FF.fieldMod())
discard csub(r.mres, FF.fieldMod(), overflowed)
func prod*(r: var FF, a, b: FF) {.inline, meter.} =
func prod*(r: var FF, a, b: FF) {.meter.} =
## Store the product of ``a`` by ``b`` modulo p into ``r``
## ``r`` is initialized / overwritten
r.mres.montyMul(a.mres, b.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontyMul())
func square*(r: var FF, a: FF) {.inline, meter.} =
func square*(r: var FF, a: FF) {.meter.} =
## Squaring modulo p
r.mres.montySquare(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontySquare())
func neg*(r: var FF, a: FF) {.inline, meter.} =
func neg*(r: var FF, a: FF) {.meter.} =
## Negate modulo p
when UseASM_X86_64:
negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
@ -221,11 +222,11 @@ func neg*(r: var FF, a: FF) {.inline, meter.} =
t.mres.czero(isZero)
r = t
func neg*(a: var FF) {.inline, meter.} =
func neg*(a: var FF) {.meter.} =
## Negate modulo p
a.neg(a)
func div2*(a: var FF) {.inline, meter.} =
func div2*(a: var FF) {.meter.} =
## Modular division by 2
a.mres.div2_modular(FF.getPrimePlus1div2())
@ -269,7 +270,7 @@ func csub*(a: var FF, b: FF, ctl: SecretBool) {.meter.} =
#
# Internally those procedures will allocate extra scratchspace on the stack
func pow*(a: var FF, exponent: BigInt) {.inline.} =
func pow*(a: var FF, exponent: BigInt) =
## Exponentiation modulo p
## ``a``: a field element to be exponentiated
## ``exponent``: a big integer
@ -282,7 +283,7 @@ func pow*(a: var FF, exponent: BigInt) {.inline.} =
FF.canUseNoCarryMontySquare()
)
func pow*(a: var FF, exponent: openarray[byte]) {.inline.} =
func pow*(a: var FF, exponent: openarray[byte]) =
## Exponentiation modulo p
## ``a``: a field element to be exponentiated
## ``exponent``: a big integer in canonical big endian representation
@ -295,7 +296,7 @@ func pow*(a: var FF, exponent: openarray[byte]) {.inline.} =
FF.canUseNoCarryMontySquare()
)
func powUnsafeExponent*(a: var FF, exponent: BigInt) {.inline.} =
func powUnsafeExponent*(a: var FF, exponent: BigInt) =
## Exponentiation modulo p
## ``a``: a field element to be exponentiated
## ``exponent``: a big integer
@ -315,7 +316,7 @@ func powUnsafeExponent*(a: var FF, exponent: BigInt) {.inline.} =
FF.canUseNoCarryMontySquare()
)
func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) {.inline.} =
func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) =
## Exponentiation modulo p
## ``a``: a field element to be exponentiated
## ``exponent``: a big integer a big integer in canonical big endian representation
@ -342,47 +343,27 @@ func powUnsafeExponent*(a: var FF, exponent: openarray[byte]) {.inline.} =
# ############################################################
#
# This implements extra primitives for ergonomics.
# The in-place ones should be preferred as they avoid copies on assignment
# Two kinds:
# - Those that return a field element
# - Those that internally allocate a temporary field element
func `+`*(a, b: FF): FF {.noInit, inline, meter.} =
## Addition modulo p
result.sum(a, b)
func `-`*(a, b: FF): FF {.noInit, inline, meter.} =
## Substraction modulo p
result.diff(a, b)
func `*`*(a, b: FF): FF {.noInit, inline, meter.} =
## Multiplication modulo p
##
## It is recommended to assign with {.noInit.}
## as FF elements are usually large and this
## routine will zero init internally the result.
result.prod(a, b)
func `*=`*(a: var FF, b: FF) {.inline, meter.} =
func `*=`*(a: var FF, b: FF) {.meter.} =
## Multiplication modulo p
a.prod(a, b)
func square*(a: var FF) {.inline, meter.} =
func square*(a: var FF) {.meter.} =
## Squaring modulo p
a.mres.montySquare(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.canUseNoCarryMontySquare())
func square_repeated*(r: var FF, num: int) {.inline, meter.} =
func square_repeated*(r: var FF, num: int) {.meter.} =
## Repeated squarings
for _ in 0 ..< num:
r.square()
func square_repeated*(r: var FF, a: FF, num: int) {.inline, meter.} =
func square_repeated*(r: var FF, a: FF, num: int) {.meter.} =
## Repeated squarings
r.square(a)
for _ in 1 ..< num:
r.square()
func `*=`*(a: var FF, b: static int) {.inline.} =
func `*=`*(a: var FF, b: static int) =
## Multiplication by a small integer known at compile-time
# Implementation:
# We don't want to go convert the integer to the Montgomery domain (O(n²))
@ -464,7 +445,27 @@ func `*=`*(a: var FF, b: static int) {.inline.} =
else:
{.error: "Multiplication by this small int not implemented".}
func `*`*(b: static int, a: FF): FF {.noinit, inline.} =
func prod*(r: var FF, a: FF, b: static int) =
## Multiplication by a small integer known at compile-time
result = a
result *= b
const negate = b < 0
const b = if negate: -b
else: b
when negate:
r.neg(a)
else:
r = a
r *= b
template mulCheckSparse*(a: var Fp, b: Fp) =
## Multiplication with optimization for sparse inputs
when b.isOne().bool:
discard
elif b.isZero().bool:
a.setZero()
elif b.isMinusOne().bool:
a.neg()
else:
a *= b
{.pop.} # inline
{.pop.} # raises no exceptions

View File

@ -12,7 +12,7 @@ import
./bigints,
./finite_fields,
./limbs,
./limbs_double_width,
./limbs_extmul,
./limbs_montgomery
when UseASM_X86_64:
@ -28,18 +28,22 @@ template doubleWidth*(T: typedesc[Fp]): typedesc =
## Return the double-width type matching with Fp
FpDbl[T.C]
func `==`*(a, b: FpDbl): SecretBool {.inline.} =
# No exceptions allowed
{.push raises: [].}
{.push inline.}
func `==`*(a, b: FpDbl): SecretBool =
a.limbs2x == b.limbs2x
func mulNoReduce*(r: var FpDbl, a, b: Fp) {.inline.} =
func mulNoReduce*(r: var FpDbl, a, b: Fp) =
## Store the product of ``a`` by ``b`` into ``r``
r.limbs2x.prod(a.mres.limbs, b.mres.limbs)
func squareNoReduce*(r: var FpDbl, a: Fp) {.inline.} =
func squareNoReduce*(r: var FpDbl, a: Fp) =
## Store the square of ``a`` into ``r``
r.limbs2x.square(a.mres.limbs)
func reduce*(r: var Fp, a: FpDbl) {.inline.} =
func reduce*(r: var Fp, a: FpDbl) =
## Reduce a double-width field element into r
const N = r.mres.limbs.len
montyRed(
@ -54,7 +58,7 @@ func diffNoReduce*(r: var FpDbl, a, b: FpDbl) =
## Double-width substraction without reduction
discard r.limbs2x.diff(a.limbs2x, b.limbs2x)
func diff*(r: var FpDbl, a, b: FpDbl) {.inline.}=
func diff*(r: var FpDbl, a, b: FpDbl) =
## Double-width modular substraction
when UseASM_X86_64:
sub2x_asm(r.limbs2x, a.limbs2x, b.limbs2x, FpDbl.C.Mod.limbs)
@ -69,6 +73,9 @@ func diff*(r: var FpDbl, a, b: FpDbl) {.inline.}=
addC(carry, sum, r.limbs2x[i+N], M.limbs[i], carry)
underflowed.ccopy(r.limbs2x[i+N], sum)
func `-=`*(a: var FpDbl, b: FpDbl) {.inline.}=
func `-=`*(a: var FpDbl, b: FpDbl) =
## Double-width modular substraction
a.diff(a, b)
{.pop.} # inline
{.pop.} # raises no exceptions

View File

@ -19,13 +19,17 @@ export zoo_inversions
#
# ############################################################
func inv_euclid*(r: var Fp, a: Fp) {.inline.} =
# No exceptions allowed
{.push raises: [].}
{.push inline.}
func inv_euclid*(r: var Fp, a: Fp) =
## Inversion modulo p via
## Niels Moller constant-time version of
## Stein's GCD derived from extended binary Euclid algorithm
r.mres.steinsGCD(a.mres, Fp.getR2modP(), Fp.C.Mod, Fp.getPrimePlus1div2())
func inv*(r: var Fp, a: Fp) {.inline.} =
func inv*(r: var Fp, a: Fp) =
## Inversion modulo p
##
## The inverse of 0 is 0.
@ -41,7 +45,7 @@ func inv*(r: var Fp, a: Fp) {.inline.} =
else:
r.inv_euclid(a)
func inv*(a: var Fp) {.inline.} =
func inv*(a: var Fp) =
## Inversion modulo p
##
## The inverse of 0 is 0.
@ -52,3 +56,6 @@ func inv*(a: var Fp) {.inline.} =
a.inv_addchain(a)
else:
a.inv_euclid(a)
{.pop.} # inline
{.pop.} # raises no exceptions

View File

@ -18,10 +18,14 @@ import
#
# ############################################################
# No exceptions allowed
{.push raises: [].}
{.push inline.}
# Legendre symbol / Euler's Criterion / Kronecker's symbol
# ------------------------------------------------------------
func isSquare*(a: Fp): SecretBool {.inline.} =
func isSquare*(a: Fp): SecretBool =
## Returns true if ``a`` is a square (quadratic residue) in 𝔽p
##
## Assumes that the prime modulus ``p`` is public.
@ -50,7 +54,7 @@ func hasP3mod4_primeModulus(C: static Curve): static bool =
## Returns true iff p ≡ 3 (mod 4)
(BaseType(C.Mod.limbs[0]) and 3) == 3
func sqrt_p3mod4(a: var Fp) {.inline.} =
func sqrt_p3mod4(a: var Fp) =
## Compute the square root of ``a``
##
## This requires ``a`` to be a square
@ -64,7 +68,7 @@ func sqrt_p3mod4(a: var Fp) {.inline.} =
static: doAssert BaseType(Fp.C.Mod.limbs[0]) mod 4 == 3
a.powUnsafeExponent(Fp.getPrimePlus1div4_BE())
func sqrt_invsqrt_p3mod4(sqrt, invsqrt: var Fp, a: Fp) {.inline.} =
func sqrt_invsqrt_p3mod4(sqrt, invsqrt: var Fp, a: Fp) =
## If ``a`` is a square, compute the square root of ``a`` in sqrt
## and the inverse square root of a in invsqrt
##
@ -85,7 +89,7 @@ func sqrt_invsqrt_p3mod4(sqrt, invsqrt: var Fp, a: Fp) {.inline.} =
# √a ≡ a * 1/√a ≡ a^((p+1)/4) (mod p)
sqrt.prod(invsqrt, a)
func sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.inline.} =
func sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt: var Fp, a: Fp): SecretBool =
## If ``a`` is a square, compute the square root of ``a`` in sqrt
## and the inverse square root of a in invsqrt
##
@ -97,7 +101,7 @@ func sqrt_invsqrt_if_square_p3mod4(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.i
test.square(sqrt)
result = test == a
func sqrt_if_square_p3mod4*(a: var Fp): SecretBool {.inline.} =
func sqrt_if_square_p3mod4*(a: var Fp): SecretBool =
## If ``a`` is a square, compute the square root of ``a``
## if not, ``a`` is unmodified.
##
@ -115,7 +119,7 @@ func sqrt_if_square_p3mod4*(a: var Fp): SecretBool {.inline.} =
# Specialized routines for addchain-based square roots
# ------------------------------------------------------------
func sqrt_addchain(a: var Fp) {.inline.} =
func sqrt_addchain(a: var Fp) =
## Compute the square root of ``a``
##
## This requires ``a`` to be a square
@ -128,13 +132,13 @@ func sqrt_addchain(a: var Fp) {.inline.} =
invsqrt.invsqrt_addchain(a)
a *= invsqrt
func sqrt_invsqrt_addchain(sqrt, invsqrt: var Fp, a: Fp) {.inline.} =
func sqrt_invsqrt_addchain(sqrt, invsqrt: var Fp, a: Fp) =
## If ``a`` is a square, compute the square root of ``a`` in sqrt
## and the inverse square root of a in invsqrt
invsqrt.invsqrt_addchain(a)
sqrt.prod(invsqrt, a)
func sqrt_invsqrt_if_square_addchain(sqrt, invsqrt: var Fp, a: Fp): SecretBool {.inline.} =
func sqrt_invsqrt_if_square_addchain(sqrt, invsqrt: var Fp, a: Fp): SecretBool =
## If ``a`` is a square, compute the square root of ``a`` in sqrt
## and the inverse square root of a in invsqrt
##
@ -144,7 +148,7 @@ func sqrt_invsqrt_if_square_addchain(sqrt, invsqrt: var Fp, a: Fp): SecretBool {
test.square(sqrt)
result = test == a
func sqrt_if_square_addchain*(a: var Fp): SecretBool {.inline.} =
func sqrt_if_square_addchain*(a: var Fp): SecretBool =
## If ``a`` is a square, compute the square root of ``a``
## if not, ``a`` is unmodified.
##
@ -155,6 +159,8 @@ func sqrt_if_square_addchain*(a: var Fp): SecretBool {.inline.} =
result = sqrt_invsqrt_if_square_addchain(sqrt, invsqrt, a)
a.ccopy(sqrt, result)
{.pop.} # inline
# Tonelli Shanks for any prime
# ------------------------------------------------------------
@ -228,7 +234,7 @@ func sqrt_invsqrt_tonelli_shanks_pre(
# ----------------------------------------------
func sqrt_tonelli_shanks(a: var Fp, useAddChain: static bool) {.inline.} =
func sqrt_tonelli_shanks(a: var Fp, useAddChain: static bool) =
## Compute the square root of ``a``
##
## This requires ``a`` to be a square
@ -244,7 +250,7 @@ func sqrt_tonelli_shanks(a: var Fp, useAddChain: static bool) {.inline.} =
sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp)
a = sqrt
func sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool) {.inline.} =
func sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool) =
## Compute the square root and inverse square root of ``a``
##
## This requires ``a`` to be a square
@ -258,7 +264,7 @@ func sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: stat
a_pre_exp.precompute_tonelli_shanks(a, useAddChain)
sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp)
func sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool): SecretBool {.inline.} =
func sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddChain: static bool): SecretBool =
## Compute the square root and ivnerse square root of ``a``
##
## This returns true if ``a`` is square and sqrt/invsqrt contains the square root/inverse square root
@ -274,7 +280,7 @@ func sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt: var Fp, a: Fp, useAddC
sqrt_invsqrt_tonelli_shanks_pre(sqrt, invsqrt, a, a_pre_exp)
a = sqrt
func sqrt_if_square_tonelli_shanks*(a: var Fp, useAddChain: static bool): SecretBool {.inline.} =
func sqrt_if_square_tonelli_shanks*(a: var Fp, useAddChain: static bool): SecretBool =
## If ``a`` is a square, compute the square root of ``a``
## if not, ``a`` is unmodified.
##
@ -293,7 +299,9 @@ func sqrt_if_square_tonelli_shanks*(a: var Fp, useAddChain: static bool): Secret
# Note: we export the inner sqrt_invsqrt_IMPL
# for benchmarking purposes.
func sqrt*[C](a: var Fp[C]) {.inline.} =
{.push inline.}
func sqrt*[C](a: var Fp[C]) =
## Compute the square root of ``a``
##
## This requires ``a`` to be a square
@ -311,7 +319,7 @@ func sqrt*[C](a: var Fp[C]) {.inline.} =
else:
sqrt_tonelli_shanks(a, useAddChain = C.hasTonelliShanksAddchain())
func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) {.inline.} =
func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) =
## Compute the square root and inverse square root of ``a``
##
## This requires ``a`` to be a square
@ -328,7 +336,7 @@ func sqrt_invsqrt*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]) {.inline.} =
else:
sqrt_invsqrt_tonelli_shanks(sqrt, invsqrt, a, useAddChain = C.hasTonelliShanksAddchain())
func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool {.inline.} =
func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool =
## Compute the square root and ivnerse square root of ``a``
##
## This returns true if ``a`` is square and sqrt/invsqrt contains the square root/inverse square root
@ -345,7 +353,7 @@ func sqrt_invsqrt_if_square*[C](sqrt, invsqrt: var Fp[C], a: Fp[C]): SecretBool
else:
result = sqrt_invsqrt_if_square_tonelli_shanks(sqrt, invsqrt, a, useAddChain = C.hasTonelliShanksAddchain())
func sqrt_if_square*[C](a: var Fp[C]): SecretBool {.inline.} =
func sqrt_if_square*[C](a: var Fp[C]): SecretBool =
## If ``a`` is a square, compute the square root of ``a``
## if not, ``a`` is unmodified.
##
@ -359,3 +367,6 @@ func sqrt_if_square*[C](a: var Fp[C]): SecretBool {.inline.} =
result = sqrt_if_square_p3mod4(a)
else:
result = sqrt_if_square_tonelli_shanks(a, useAddChain = C.hasTonelliShanksAddchain())
{.pop.} # inline
{.pop.} # raises no exceptions

View File

@ -174,7 +174,7 @@ func isEven*(a: Limbs): SecretBool =
# Bit manipulation
# ------------------------------------------------------------
func shiftRight*(a: var Limbs, k: int) {.inline.}=
func shiftRight*(a: var Limbs, k: int) =
## Shift right by k.
##
## k MUST be less than the base word size (2^32 or 2^64)

View File

@ -21,13 +21,46 @@ when UseASM_X86_64:
#
# ############################################################
# Inlining note:
# - The public dispatch functions are inlined.
# This allows the compiler to check CPU features only once
# in the high-level proc and also removes an intermediate function call
# to the wrapper function
# - The ASM procs or internal fallbacks are not inlined
# to save on code size.
# No exceptions allowed
{.push raises: [].}
# Multiplication
# ------------------------------------------------------------
func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) =
func prod_comba[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) =
## Extended precision multiplication
# We use Product Scanning / Comba multiplication
var t, u, v = Zero
const stopEx = min(a.len+b.len, r.len)
staticFor i, 0, stopEx:
# Invariant for product scanning:
# if we multiply accumulate by a[k1] * b[k2]
# we have k1+k2 == i
const ib = min(b.len-1, i)
const ia = i - ib
staticFor j, 0, min(a.len - ia, ib+1):
mulAcc(t, u, v, a[ia+j], b[ib-j])
r[i] = v
when i < stopEx-1:
v = u
u = t
t = Zero
if aLen+bLen < rLen:
for i in aLen+bLen ..< rLen:
r[i] = Zero
func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b: Limbs[bLen]) {.inline.} =
## Multi-precision multiplication
## r <- a*b
##
@ -46,28 +79,7 @@ func prod*[rLen, aLen, bLen: static int](r: var Limbs[rLen], a: Limbs[aLen], b:
elif UseASM_X86_64:
mul_asm(r, a, b)
else:
# We use Product Scanning / Comba multiplication
var t, u, v = Zero
const stopEx = min(a.len+b.len, r.len)
staticFor i, 0, stopEx:
# Invariant for product scanning:
# if we multiply accumulate by a[k1] * b[k2]
# we have k1+k2 == i
const ib = min(b.len-1, i)
const ia = i - ib
staticFor j, 0, min(a.len - ia, ib+1):
mulAcc(t, u, v, a[ia+j], b[ib-j])
r[i] = v
when i < stopEx-1:
v = u
u = t
t = Zero
if aLen+bLen < rLen:
for i in aLen+bLen ..< rLen:
r[i] = Zero
prod_comba(r, a, b)
func prod_high_words*[rLen, aLen, bLen](
r: var Limbs[rLen],
@ -178,7 +190,7 @@ func square_operandScan[rLen, aLen](
func square*[rLen, aLen](
r: var Limbs[rLen],
a: Limbs[aLen]) =
a: Limbs[aLen]) {.inline.} =
## Multi-precision squaring
## r <- a²
##

View File

@ -20,7 +20,7 @@ import
#
# ############################################################
func div2_modular*(a: var Limbs, mp1div2: Limbs) {.inline.}=
func div2_modular*(a: var Limbs, mp1div2: Limbs) {.inline.} =
## Modular Division by 2
## `a` will be divided in-place
## `mp1div2` is the modulus (M+1)/2

View File

@ -40,7 +40,13 @@ when UseASM_X86_64:
# - pairing final exponentiation
# are bottlenecked by Montgomery multiplications or squarings
#
# Hence we use inline assembly where possible
# Inlining note:
# - The public dispatch functions are inlined.
# This allows the compiler to check CPU features only once
# in the high-level proc and also removes an intermediate function call
# to the wrapper function
# - The ASM procs or internal fallbacks are not inlined
# to save on code size.
# No exceptions allowed
{.push raises: [].}
@ -381,31 +387,12 @@ func montyRed_Comba[N: static int](
discard z.csub(M, SecretBool(carry) or not(z < M))
r = z
# TODO upstream, using Limbs[N] breaks semcheck
func montyRed*[N: static int](
r: var array[N, SecretWord],
a: array[N*2, SecretWord],
M: array[N, SecretWord],
m0ninv: BaseType, canUseNoCarryMontyMul: static bool) =
## Montgomery reduce a double-width bigint modulo M
when UseASM_X86_64 and r.len <= 6:
if ({.noSideEffect.}: hasBmi2()) and ({.noSideEffect.}: hasAdx()):
montRed_asm_adx_bmi2(r, a, M, m0ninv, canUseNoCarryMontyMul)
else:
montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul)
elif UseASM_X86_32 and r.len <= 6:
# TODO: Assembly faster than GCC but slower than Clang
montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul)
else:
montyRed_CIOS(r, a, M, m0ninv)
# montyRed_Comba(r, a, M, m0ninv)
# Exported API
# ------------------------------------------------------------
func montyMul*(
r: var Limbs, a, b, M: Limbs,
m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) =
m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) {.inline.} =
## Compute r <- a*b (mod M) in the Montgomery domain
## `m0ninv` = -1/M (mod SecretWord). Our words are 2^32 or 2^64
##
@ -471,6 +458,25 @@ func montySquare*(r: var Limbs, a, M: Limbs,
# # montySquare_CIOS(r, a, M, m0ninv) # TODO <--- Fix this
# montyMul_FIPS(r, a, a, M, m0ninv)
# TODO upstream, using Limbs[N] breaks semcheck
func montyRed*[N: static int](
r: var array[N, SecretWord],
a: array[N*2, SecretWord],
M: array[N, SecretWord],
m0ninv: BaseType, canUseNoCarryMontyMul: static bool) {.inline.} =
## Montgomery reduce a double-width bigint modulo M
when UseASM_X86_64 and r.len <= 6:
if ({.noSideEffect.}: hasBmi2()) and ({.noSideEffect.}: hasAdx()):
montRed_asm_adx_bmi2(r, a, M, m0ninv, canUseNoCarryMontyMul)
else:
montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul)
elif UseASM_X86_32 and r.len <= 6:
# TODO: Assembly faster than GCC but slower than Clang
montRed_asm(r, a, M, m0ninv, canUseNoCarryMontyMul)
else:
montyRed_CIOS(r, a, M, m0ninv)
# montyRed_Comba(r, a, M, m0ninv)
func redc*(r: var Limbs, a, one, M: Limbs,
m0ninv: static BaseType, canUseNoCarryMontyMul: static bool) =
## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)

View File

@ -83,12 +83,11 @@ declareCurves:
eq_form: ShortWeierstrass
coef_a: 0
coef_b: 2
nonresidue_quad_fp: -1 # -1 is not a square in 𝔽p
nonresidue_cube_fp2: (1, 1) # 1+𝑖 1+𝑖 is not a cube in 𝔽
nonresidue_fp: -1 # -1 is not a square in 𝔽p
nonresidue_fp2: (1, 1) # 1+𝑖 1+𝑖 is not a square or cube in 𝔽
embedding_degree: 12
sexticTwist: D_Twist
sexticNonResidue_fp2: (1, 1) # 1+𝑖
curve BN254_Snarks: # Zero-Knowledge proofs curve (SNARKS, STARKS, Ethereum)
bitwidth: 254
@ -103,12 +102,11 @@ declareCurves:
eq_form: ShortWeierstrass
coef_a: 0
coef_b: 3
nonresidue_quad_fp: -1 # -1 is not a square in 𝔽p
nonresidue_cube_fp2: (9, 1) # 9+𝑖 9+𝑖 is not a cube in 𝔽
nonresidue_fp: -1 # -1 is not a square in 𝔽p
nonresidue_fp2: (9, 1) # 9+𝑖 9+𝑖 is not a square or cube in 𝔽
embedding_degree: 12
sexticTwist: D_Twist
sexticNonResidue_fp2: (9, 1) # 9+𝑖
curve Curve25519: # Bernstein curve
bitwidth: 255
@ -136,12 +134,11 @@ declareCurves:
eq_form: ShortWeierstrass
coef_a: 0
coef_b: 1
nonresidue_quad_fp: -5 # -5 is not a square in 𝔽p
nonresidue_cube_fp2: (0, 1) # √-5 √-5 is not a cube in 𝔽
nonresidue_fp: -5 # -5 is not a square in 𝔽p
nonresidue_fp2: (0, 1) # √-5 √-5 is not a square or cube in 𝔽
embedding_degree: 12
sexticTwist: D_Twist
sexticNonResidue_fp2: (0, 1) # √-5
curve BLS12_381:
bitwidth: 381
@ -157,12 +154,11 @@ declareCurves:
eq_form: ShortWeierstrass
coef_a: 0
coef_b: 4
nonresidue_quad_fp: -1 # -1 is not a square in 𝔽p
nonresidue_cube_fp2: (1, 1) # 1+𝑖 1+𝑖 is not a cube in 𝔽
nonresidue_fp: -1 # -1 is not a square in 𝔽p
nonresidue_fp2: (1, 1) # 1+𝑖 1+𝑖 is not a square or cube in 𝔽
embedding_degree: 12
sexticTwist: M_Twist
sexticNonResidue_fp2: (1, 1) # 1+𝑖
curve BW6_761:
bitwidth: 761
@ -182,9 +178,8 @@ declareCurves:
coef_b: -1
# TODO: rework the quad/cube/sextic non residue declaration
nonresidue_quad_fp: -4 # -4 is not a square in 𝔽p
nonresidue_cube_fp2: (0, 1) # -4 is not a cube in 𝔽
nonresidue_fp: -4 # -4 is not a square or cube in 𝔽p
nonresidue_fp2: (0, 1) # -4 is not a cube in 𝔽
embedding_degree: 6
sexticTwist: M_Twist
sexticNonResidue_fp2: (0, 1) # -4

View File

@ -96,8 +96,8 @@ type
modulus: NimNode # nnkStrLit (hex)
# Towering
nonresidue_quad_fp: NimNode # nnkIntLit
nonresidue_cube_fp2: NimNode # nnkPar(nnkIntLit, nnkIntLit)
nonresidue_fp: NimNode # nnkIntLit
nonresidue_fp2: NimNode # nnkPar(nnkIntLit, nnkIntLit)
# Curve parameters
eq_form: CurveEquationForm
@ -108,7 +108,6 @@ type
embedding_degree: int
sexticTwist: SexticTwist
sexticNonResidue_fp2: NimNode # nnkPar(nnkIntLit, nnkIntLit)
family: CurveFamily
@ -198,17 +197,15 @@ proc parseCurveDecls(defs: var seq[CurveParams], curves: NimNode) =
params.orderBitwidth = sectionVal
elif sectionId.eqIdent"cofactor":
discard "TODO"
elif sectionId.eqIdent"nonresidue_quad_fp":
params.nonresidue_quad_fp = sectionVal
elif sectionId.eqIdent"nonresidue_cube_fp2":
params.nonresidue_cube_fp2 = sectionVal
elif sectionId.eqIdent"nonresidue_fp":
params.nonresidue_fp = sectionVal
elif sectionId.eqIdent"nonresidue_fp2":
params.nonresidue_fp2 = sectionVal
elif sectionId.eqIdent"embedding_degree":
params.embedding_degree = sectionVal.intVal.int
elif sectionId.eqIdent"sexticTwist":
params.sexticTwist = parseEnum[SexticTwist]($sectionVal)
elif sectionId.eqIdent"sexticNonResidue_fp2":
params.sexticNonResidue_fp2 = sectionVal
else:
error "Invalid section: \n", curveParams[i].toStrLit()
@ -323,12 +320,12 @@ proc genMainConstants(defs: var seq[CurveParams]): NimNode =
# Towering
# -----------------------------------------------
curveEllipticStmts.add newConstStmt(
exported($curve & "_nonresidue_quad_fp"),
curveDef.nonresidue_quad_fp
exported($curve & "_nonresidue_fp"),
curveDef.nonresidue_fp
)
curveEllipticStmts.add newConstStmt(
exported($curve & "_nonresidue_cube_fp2"),
curveDef.nonresidue_cube_fp2
exported($curve & "_nonresidue_fp2"),
curveDef.nonresidue_fp2
)
# Pairing
@ -341,10 +338,6 @@ proc genMainConstants(defs: var seq[CurveParams]): NimNode =
exported($curve & "_sexticTwist"),
newLit curveDef.sexticTwist
)
curveEllipticStmts.add newConstStmt(
exported($curve & "_sexticNonResidue_fp2"),
curveDef.sexticNonResidue_fp2
)
# end for ---------------------------------------------------

View File

@ -82,21 +82,27 @@ macro getCoefB*(C: static Curve): untyped =
## or a bigInt depending on the curve
result = bindSym($C & "_coef_B")
macro get_QNR_Fp*(C: static Curve): untyped =
## Returns the tower extension quadratic non-residue in 𝔽p
## i.e. a number that is not a square in 𝔽p
result = bindSym($C & "_nonresidue_quad_fp")
macro getNonResidueFp*(C: static Curve): untyped =
## Returns the tower extension (and twist) non-residue for 𝔽p
## Depending on the curve it might be:
## - not a square (quadratic non-residue to construct Fp2)
## - not a cube (cubic non-residue to construct Fp3)
## - neither a square or cube (sextic non-residue to construct Fp2, Fp3 or Fp6)
result = bindSym($C & "_nonresidue_fp")
macro get_CNR_Fp2*(C: static Curve): untyped =
## Returns the tower extension cubic non-residue 𝔽
## i.e. a number that is not a cube in 𝔽
macro getNonResidueFp2*(C: static Curve): untyped =
## Returns the tower extension (and twist) non-residue for 𝔽
## Depending on the curve it might be:
## - not a square (quadratic non-residue to construct Fp4)
## - not a cube (cubic non-residue to construct Fp6)
## - neither a square or cube (sextic non-residue to construct Fp4, Fp6 or Fp12)
##
## The return value is a tuple (a, b)
## that corresponds to the number a + b𝑗
## with 𝑗 choosen for 𝑗² - QNR_Fp == 0
## i.e. if -1 is chosen as a quadratic non-residue 𝑗 = √-1
## if -2 is chosen as a quadratic non-residue 𝑗 = √-2
result = bindSym($C & "_nonresidue_cube_fp2")
result = bindSym($C & "_nonresidue_fp2")
macro getEmbeddingDegree*(C: static Curve): untyped =
## Returns the prime embedding degree,
@ -108,9 +114,3 @@ macro getEmbeddingDegree*(C: static Curve): untyped =
macro getSexticTwist*(C: static Curve): untyped =
## Returns if D-Twist or M-Twist
result = bindSym($C & "_sexticTwist")
macro get_SNR_Fp2*(C: static Curve): untyped =
## Returns the sextic non-residue in 𝔽
## choosen to build the twisted curve E'(𝔽p²)
## i.e. a number µ so that x⁶ - µ is irreducible
result = bindSym($C & "_sexticNonResidue_fp2")

View File

@ -32,6 +32,8 @@ type
## over a field F
x*, y*: F
SexticNonResidue* = NonResidue
func isInf*(P: ECP_ShortW_Aff): SecretBool =
## Returns true if P is an infinity point
## and false otherwise

View File

@ -49,48 +49,6 @@ func frobenius_map*(r: var Fp2, a: Fp2, k: static int = 1) {.inline.} =
else:
r = a
template mulCheckSparse(a: var Fp, b: Fp) =
when b.isOne().bool:
discard
elif b.isZero().bool:
a.setZero()
elif b.isMinusOne().bool:
a.neg()
else:
a *= b
template mulCheckSparse(a: var Fp2, b: Fp2) =
when b.isOne().bool:
discard
elif b.isMinusOne().bool:
a.neg()
elif b.c0.isZero().bool and b.c1.isOne().bool:
var t {.noInit.}: type(a.c0)
when fromComplexExtension(b):
t.neg(a.c1)
a.c1 = a.c0
a.c0 = t
else:
t = NonResidue * a.c1
a.c1 = a.c0
a.c0 = t
elif b.c0.isZero().bool and b.c1.isMinusOne().bool:
var t {.noInit.}: type(a.c0)
when fromComplexExtension(b):
t = a.c1
a.c1.neg(a.c0)
a.c0 = t
else:
t = NonResidue * a.c1
a.c1.neg(a.c0)
a.c0.neg(t)
elif b.c0.isZero().bool:
a.mul_sparse_by_0y(b)
elif b.c1.isZero().bool:
a.mul_sparse_by_x0(b)
else:
a *= b
# Frobenius map - on extension fields
# -----------------------------------------------------------------

View File

@ -35,6 +35,22 @@ type
## with Fp2 coordinates: xyz000
x*, y*, z*: F
SexticNonResidue* = NonResidue
## The Sextic non-residue to build
## 𝔽p2 -> 𝔽p12 towering and the G2 sextic twist
## or
## 𝔽p -> 𝔽p6 towering and the G2 sextic twist
##
## Note:
## while the non-residues for
## - 𝔽p2 -> 𝔽p4
## - 𝔽p2 -> 𝔽p6
## are also sextic non-residues by construction.
## the non-residues for
## - 𝔽p4 -> 𝔽p12
## - 𝔽p6 -> 𝔽p12
## are not.
func toHex*(line: Line, order: static Endianness = bigEndian): string =
result = static($line.typeof.genericHead() & '(')
for fieldName, fieldValue in fieldPairs(line):

View File

@ -92,15 +92,10 @@ func line_eval_double[F](
template B: untyped = line.y
template C: untyped = line.z
A = T.y # A = Y
v = T.y # v = Y
B = T.z # B = Z
C = T.x # C = X
A *= B # A = Y.Z
C.square() # C = X²
v.square() # v = Y²
B.square() # B = Z²
A.prod(T.y, T.z) # A = Y.Z
C.square(T.x) # C = X²
v.square(T.y) # v = Y²
B.square(T.z) # B = Z²
A.double() # A = 2 Y.Z
A.neg() # A = -2 Y.Z
@ -148,28 +143,21 @@ func line_eval_add[F](
template B: untyped = line.y
template C: untyped = line.z
A = T.x # A = X₁
v = T.z # v = Z₁
B = T.z # B = Z₁
C = T.y # C = Y₁
v.prod(T.z, Q.y) # v = Z₁Y₂
B.prod(T.z, Q.x) # B = Z₁X₂
v *= Q.y # v = Z₁Y
B *= Q.x # B = Z₁X
A.diff(T.x, B) # A = X₁-Z₁X₂
C.diff(T.y, v) # C = Y₁-Z₁Y
A -= B # A = X₁-Z₁X₂
C -= v # C = Y₁-Z₁Y₂
v.prod(A, Q.y) # v = (X₁-Z₁X₂) Y₂
B.prod(C, Q.x) # B = (Y₁-Z₁Y₂) X₂
B -= v # B = (Y₁-Z₁Y₂) X₂ - (X₁-Z₁X₂) Y₂
C.neg() # C = -(Y₁-Z₁Y₂)
v = A # v = X₁-Z₁X₂
when F.C.getSexticTwist() == M_Twist:
A *= SexticNonResidue # A = ξ (X₁ - Z₁X₂)
v *= Q.y # v = (X₁-Z₁X₂) Y₂
B = C # B = Y₁-Z₁Y₂
B *= Q.x # B = (Y₁-Z₁Y₂) X₂
B -= v # B = (Y₁-Z₁Y₂) X₂ - (X₁-Z₁X₂) Y₂
C.neg() # C = -(Y₁-Z₁Y₂)
func line_eval_fused_double[F](
line: var Line[F],
T: var ECP_ShortW_Proj[F, OnTwist]) =
@ -193,13 +181,11 @@ func line_eval_fused_double[F](
when F.C.getSexticTwist() == D_Twist:
snrB *= SexticNonResidue
E = C
E *= b3
E.prod(C, b3)
when F.C.getSexticTwist() == M_Twist:
E *= SexticNonResidue # E = 3b'Z² = 3bξ Z²
F = E
F *= 3 # F = 3E = 9bZ²
F.prod(E, 3) # F = 3E = 9bZ²
G.sum(snrB, F)
G.div2() # G = (B+F)/2
H.sum(T.y, T.z)

View File

@ -56,7 +56,7 @@ func mul_by_line_xy0*[C: static Curve](
v0.prod(a.c0, b.x)
v1.prod(a.c1, b.y)
r.c0.prod(a.c2, b.y)
r.c0 *= ξ
r.c0 *= SexticNonResidue
r.c0 += v0
r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated
@ -93,10 +93,9 @@ func mul_sparse_by_line_xy00z0*[C: static Curve](
f.c1 += f.c0
v3.mul_by_line_xy0(f.c1, v2)
v3 -= v0
v3 -= v1
f.c1 = v3
f.c1.diff(v3, v1)
v3.c0 = ξ * v1.c2
v3.c0.prod(v1.c2, SexticNonResidue)
v3.c0 += v0.c0
v3.c1.sum(v0.c1, v1.c0)
v3.c2.sum(v0.c2, v1.c1)
@ -150,7 +149,7 @@ func mul_sparse_by_line_xyz000*[C: static Curve](
# r0 = ξ a2 b1 + v0
f.c0.mul_sparse_by_y0(f.c2, l.z)
f.c0 *= NonResidue
f.c0 *= SexticNonResidue
f.c0 += v0
# r2 = a2 b0 + v1
@ -201,12 +200,12 @@ func mul_sparse_by_line_xy000z*[C: static Curve](
# r0 = ξ a1 b2 + v0
f.c0.mul_sparse_by_0y(f.c1, l.z)
f.c0 *= NonResidue
f.c0 *= SexticNonResidue
f.c0 += v0
# r1 = a1 b0 + ξ v2
f.c1 *= b0
v2 *= NonResidue
v2 *= SexticNonResidue
f.c1 += v2
func mul*[C](f: var Fp12[C], line: Line[Fp2[C]]) {.inline.} =

View File

@ -89,7 +89,8 @@ func millerLoopGenericBLS12*[C](
f.setOne()
template u: untyped = C.pairing(ate_param)
let u3 = 3*C.pairing(ate_param)
var u3 = C.pairing(ate_param)
u3 *= 3
for i in countdown(u3.bits - 2, 1):
f.square()
line.line_double(T, P)

View File

@ -87,7 +87,8 @@ func millerLoopGenericBN*[C](
f.setOne()
template u: untyped = C.pairing(ate_param)
let u3 = 3*C.pairing(ate_param)
var u3 = C.pairing(ate_param)
u3 *= 3
for i in countdown(u3.bits - 2, 1):
f.square()
line.line_double(T, P)

View File

@ -1,228 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../arithmetic,
../primitives,
./tower_common
# Commutative ring implementation for Cubic Extension Fields
# -------------------------------------------------------------------
# Cubic extensions can use specific squaring procedures
# beyond Schoolbook and Karatsuba:
# - Chung-Hasan (3 different algorithms)
# - Toom-Cook-3x
#
# Chung-Hasan papers
# http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf
# https://www.lirmm.fr/arith18/papers/Chung-Squaring.pdf
#
# The papers focus on polynomial squaring, they have been adapted
# to towered extension fields with the relevant costs in
#
# - Multiplication and Squaring on Pairing-Friendly Fields
# Augusto Jun Devegili and Colm Ó hÉigeartaigh and Michael Scott and Ricardo Dahab, 2006
# https://eprint.iacr.org/2006/471
#
# Costs in the underlying field
# M: Mul, S: Square, A: Add/Sub, B: Mul by non-residue
#
# | Method | > Linear | Linear |
# |-------------|----------|-------------------|
# | Schoolbook | 3M + 3S | 6A + 2B |
# | Karatsuba | 6S | 13A + 2B |
# | Tom-Cook-3x | 5S | 33A + 2B |
# | CH-SQR1 | 3M + 2S | 11A + 2B |
# | CH-SQR2 | 2M + 3S | 10A + 2B |
# | CH-SQR3 | 1M + 4S | 11A + 2B + 1 Div2 |
# | CH-SQR3x | 1M + 4S | 14A + 2B |
func square_Chung_Hasan_SQR2(r: var CubicExt, a: CubicExt) {.used.}=
## Returns r = a²
mixin prod, square, sum
var v3{.noInit.}, v4{.noInit.}, v5{.noInit.}: typeof(r.c0)
v4.prod(a.c0, a.c1)
v4.double()
v5.square(a.c2)
r.c1 = NonResidue * v5
r.c1 += v4
r.c2.diff(v4, v5)
v3.square(a.c0)
v4.diff(a.c0, a.c1)
v4 += a.c2
v5.prod(a.c1, a.c2)
v5.double()
v4.square()
r.c0 = NonResidue * v5
r.c0 += v3
r.c2 += v4
r.c2 += v5
r.c2 -= v3
func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) =
## Returns r = a²
mixin prod, square, sum
var v0{.noInit.}, v2{.noInit.}: typeof(r.c0)
r.c1.sum(a.c0, a.c2) # r1 = a0 + a2
v2.diff(r.c1, a.c1) # v2 = a0 - a1 + a2
r.c1 += a.c1 # r1 = a0 + a1 + a2
r.c1.square() # r1 = (a0 + a1 + a2)²
v2.square() # v2 = (a0 - a1 + a2)²
r.c2.sum(r.c1, v2) # r2 = (a0 + a1 + a2)² + (a0 - a1 + a2)²
r.c2.div2() # r2 = ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2
r.c0.prod(a.c1, a.c2) # r0 = a1 a2
r.c0.double() # r0 = 2 a1 a2
v2.square(a.c2) # v2 = a2²
r.c1 += NonResidue * v2 # r1 = (a0 + a1 + a2)² + β a2²
r.c1 -= r.c0 # r1 = (a0 + a1 + a2)² - 2 a1 a2 + β a2²
r.c1 -= r.c2 # r1 = (a0 + a1 + a2)² - 2 a1 a2 - ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2 + β a2²
v0.square(a.c0) # v0 = a0²
r.c0 *= NonResidue # r0 = β 2 a1 a2
r.c0 += v0 # r0 = a0² + β 2 a1 a2
r.c2 -= v0 # r2 = ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2 - a0²
r.c2 -= v2 # r2 = ((a0 + a1 + a2)² + (a0 - a1 + a2)²)/2 - a0² - a2²
func square*(r: var CubicExt, a: CubicExt) {.inline.} =
## Returns r = a²
square_Chung_Hasan_SQR3(r, a)
func prod*(r: var CubicExt, a, b: CubicExt) =
## Returns r = a * b
##
## r MUST not share a buffer with a
# Algorithm is Karatsuba
var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}, t{.noInit.}: typeof(r.c0)
v0.prod(a.c0, b.c0)
v1.prod(a.c1, b.c1)
v2.prod(a.c2, b.c2)
# r.c0 = β ((a.c1 + a.c2) * (b.c1 + b.c2) - v1 - v2) + v0
r.c0.sum(a.c1, a.c2)
t.sum(b.c1, b.c2)
r.c0 *= t
r.c0 -= v1
r.c0 -= v2
r.c0 *= NonResidue
r.c0 += v0
# r.c1 = (a.c0 + a.c1) * (b.c0 + b.c1) - v0 - v1 + β v2
r.c1.sum(a.c0, a.c1)
t.sum(b.c0, b.c1)
r.c1 *= t
r.c1 -= v0
r.c1 -= v1
r.c1 += NonResidue * v2
# r.c2 = (a.c0 + a.c2) * (b.c0 + b.c2) - v0 - v2 + v1
r.c2.sum(a.c0, a.c2)
t.sum(b.c0, b.c2)
r.c2 *= t
r.c2 -= v0
r.c2 -= v2
r.c2 += v1
func inv*(r: var CubicExt, a: CubicExt) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
#
# Algorithm 5.23
#
# Arithmetic of Finite Fields
# Chapter 5 of Guide to Pairing-Based Cryptography
# Jean Luc Beuchat, Luis J. Dominguez Perez, Sylvain Duquesne, Nadia El Mrabet, Laura Fuentes-Castañeda, Francisco Rodríguez-Henríquez, 2017\
# https://www.researchgate.net/publication/319538235_Arithmetic_of_Finite_Fields
#
# We optimize for stack usage and use 4 temporaries (+r as temporary)
# instead of 9, because 5 * 2 (𝔽p2) * Bitsize would be:
# - ~2540 bits for BN254
# - ~3810 bits for BLS12-381
var v1 {.noInit.}, v2 {.noInit.}, v3 {.noInit.}: typeof(r.c0)
# A in r0
# A <- a0² - β a1 a2
r.c0.square(a.c0)
v1.prod(a.c1, a.c2)
v1 *= NonResidue
r.c0 -= v1
# B in v1
# B <- β a2² - a0 a1
v1.square(a.c2)
v1 *= NonResidue
v2.prod(a.c0, a.c1)
v1 -= v2
# C in v2
# C <- a1² - a0 a2
v2.square(a.c1)
v3.prod(a.c0, a.c2)
v2 -= v3
# F in v3
# F <- β a1 C + a0 A + β a2 B
r.c1.prod(v1, NonResidue * a.c2)
r.c2.prod(v2, NonResidue * a.c1)
v3.prod(r.c0, a.c0)
v3 += r.c1
v3 += r.c2
let t = v3 # TODO, support aliasing in all primitives
v3.inv(t)
# (a0 + a1 v + a2 v²)^-1 = (A + B v + C v²) / F
r.c0 *= v3
r.c1.prod(v1, v3)
r.c2.prod(v2, v3)
func inv*(a: var CubicExt) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
let t = a
a.inv(t)
func `*=`*(a: var CubicExt, b: CubicExt) {.inline.} =
## In-place multiplication
# On higher extension field like 𝔽p6,
# if `prod` is called on shared in and out buffer, the result is wrong
let t = a
a.prod(t, b)
func conj*(a: var CubicExt) {.inline.} =
## Computes the conjugate in-place
mixin conj, conjneg
a.c0.conj()
a.c1.conjneg()
a.c2.conj()
func conj*(r: var CubicExt, a: CubicExt) {.inline.} =
## Computes the conjugate out-of-place
mixin conj, conjneg
r.c0.conj(a.c0)
r.c1.conjneg(a.c1)
r.c2.conj(a.c2)
func square*(a: var CubicExt) {.inline.} =
## In-place squaring
let t = a
a.square(t)

View File

@ -12,9 +12,7 @@ import
../config/common,
../primitives,
../io/io_bigints,
./tower_common,
./quadratic_extensions,
./cubic_extensions
./extension_fields
# ############################################################
#

View File

@ -0,0 +1,977 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/common,
../primitives,
../arithmetic
# Note: to avoid burdening the Nim compiler, we rely on generic extension
# to complain if the base field procedures don't exist
# No exceptions allowed
{.push raises: [].}
{.push inline.}
# ############################################################
# #
# Extension Fields #
# #
# ############################################################
type
NonResidue* = object
## Non-Residue
##
## Placeholder for the appropriate quadratic, cubic or sectic non-residue
CubicExt* = concept x
## Cubic Extension field concept
type BaseField = auto
x.c0 is BaseField
x.c1 is BaseField
x.c2 is BaseField
QuadraticExt* = concept x
## Quadratic Extension field concept
not(x is CubicExt)
type BaseField = auto
x.c0 is BaseField
x.c1 is BaseField
ExtensionField* = QuadraticExt or CubicExt
# Initialization
# -------------------------------------------------------------------
func setZero*(a: var ExtensionField) =
## Set ``a`` to 0 in the extension field
for field in fields(a):
field.setZero()
func setOne*(a: var ExtensionField) =
## Set ``a`` to 1 in the extension field
for fieldName, fA in fieldPairs(a):
when fieldName == "c0":
fA.setOne()
else:
fA.setZero()
func fromBig*(a: var ExtensionField, src: BigInt) =
## Set ``a`` to the bigint value in the extension field
for fieldName, fA in fieldPairs(a):
when fieldName == "c0":
fA.fromBig(src)
else:
fA.setZero()
# Comparison
# -------------------------------------------------------------------
func `==`*(a, b: ExtensionField): SecretBool =
## Constant-time equality check
result = CtTrue
for fA, fB in fields(a, b):
result = result and (fA == fB)
func isZero*(a: ExtensionField): SecretBool =
## Constant-time check if zero
result = CtTrue
for fA in fields(a):
result = result and fA.isZero()
func isOne*(a: ExtensionField): SecretBool =
## Constant-time check if one
result = CtTrue
for fieldName, fA in fieldPairs(a):
when fieldName == "c0":
result = result and fA.isOne()
else:
result = result and fA.isZero()
func isMinusOne*(a: ExtensionField): SecretBool =
## Constant-time check if -1
result = CtTrue
for fieldName, fA in fieldPairs(a):
when fieldName == "c0":
result = result and fA.isMinusOne()
else:
result = result and fA.isZero()
# Copies
# -------------------------------------------------------------------
func ccopy*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
## Constant-time conditional copy
## If ctl is true: b is copied into a
## if ctl is false: b is not copied and a is unmodified
## Time and memory accesses are the same whether a copy occurs or not
for fA, fB in fields(a, b):
ccopy(fA, fB, ctl)
# Abelian group
# -------------------------------------------------------------------
func neg*(r: var ExtensionField, a: ExtensionField) =
## Field out-of-place negation
for fR, fA in fields(r, a):
fR.neg(fA)
func neg*(a: var ExtensionField) =
## Field in-place negation
for fA in fields(a):
fA.neg()
func `+=`*(a: var ExtensionField, b: ExtensionField) =
## Addition in the extension field
for fA, fB in fields(a, b):
fA += fB
func `-=`*(a: var ExtensionField, b: ExtensionField) =
## Substraction in the extension field
for fA, fB in fields(a, b):
fA -= fB
func double*(r: var ExtensionField, a: ExtensionField) =
## Field out-of-place doubling
for fR, fA in fields(r, a):
fR.double(fA)
func double*(a: var ExtensionField) =
## Field in-place doubling
for fA in fields(a):
fA.double()
func div2*(a: var ExtensionField) =
## Field in-place division by 2
for fA in fields(a):
fA.div2()
func sum*(r: var QuadraticExt, a, b: QuadraticExt) =
## Sum ``a`` and ``b`` into ``r``
r.c0.sum(a.c0, b.c0)
r.c1.sum(a.c1, b.c1)
func sum*(r: var CubicExt, a, b: CubicExt) =
## Sum ``a`` and ``b`` into ``r``
r.c0.sum(a.c0, b.c0)
r.c1.sum(a.c1, b.c1)
r.c2.sum(a.c2, b.c2)
func diff*(r: var QuadraticExt, a, b: QuadraticExt) =
## Diff ``a`` and ``b`` into ``r``
r.c0.diff(a.c0, b.c0)
r.c1.diff(a.c1, b.c1)
func diff*(r: var CubicExt, a, b: CubicExt) =
## Diff ``a`` and ``b`` into ``r``
r.c0.diff(a.c0, b.c0)
r.c1.diff(a.c1, b.c1)
r.c2.diff(a.c2, b.c2)
func conj*(a: var QuadraticExt) =
## Computes the conjugate in-place
a.c1.neg()
func conj*(r: var QuadraticExt, a: QuadraticExt) =
## Computes the conjugate out-of-place
r.c0 = a.c0
r.c1.neg(a.c1)
func conjneg*(a: var QuadraticExt) =
## Computes the negated conjugate in-place
a.c0.neg()
func conjneg*(r: var QuadraticExt, a: QuadraticExt) =
## Computes the negated conjugate out-of-place
r.c0.neg(a.c0)
r.c1 = a.c1
func conj*(a: var CubicExt) =
## Computes the conjugate in-place
a.c0.conj()
a.c1.conjneg()
a.c2.conj()
func conj*(r: var CubicExt, a: CubicExt) =
## Computes the conjugate out-of-place
r.c0.conj(a.c0)
r.c1.conjneg(a.c1)
r.c2.conj(a.c2)
# Conditional arithmetic
# -------------------------------------------------------------------
func cneg*(a: var ExtensionField, ctl: SecretBool) =
## Constant-time in-place conditional negation
## Only negate if ctl is true
for fA in fields(a):
fA.cneg(ctl)
func cadd*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
## Constant-time in-place conditional addition
for fA, fB in fields(a, b):
fA.cadd(fB, ctl)
func csub*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
## Constant-time in-place conditional substraction
for fA, fB in fields(a, b):
fA.csub(fB, ctl)
# Multiplication by a small integer known at compile-time
# -------------------------------------------------------------------
func `*=`*(a: var ExtensionField, b: static int) =
## Multiplication by a small integer known at compile-time
const negate = b < 0
const b = if negate: -b
else: b
when negate:
a.neg(a)
when b == 0:
a.setZero()
elif b == 1:
return
elif b == 2:
a.double()
elif b == 3:
let t1 = a
a.double()
a += t1
elif b == 4:
a.double()
a.double()
elif b == 5:
let t1 = a
a.double()
a.double()
a += t1
elif b == 6:
a.double()
let t2 = a
a.double() # 4
a += t2
elif b == 7:
let t1 = a
a.double()
let t2 = a
a.double() # 4
a += t2
a += t1
elif b == 8:
a.double()
a.double()
a.double()
elif b == 9:
let t1 = a
a.double()
a.double()
a.double() # 8
a += t1
elif b == 10:
a.double()
let t2 = a
a.double()
a.double() # 8
a += t2
elif b == 11:
let t1 = a
a.double()
let t2 = a
a.double()
a.double() # 8
a += t2
a += t1
elif b == 12:
a.double()
a.double() # 4
let t4 = a
a.double() # 8
a += t4
else:
{.error: "Multiplication by this small int not implemented".}
func prod*(r: var ExtensionField, a: ExtensionField, b: static int) =
## Multiplication by a small integer known at compile-time
const negate = b < 0
const b = if negate: -b
else: b
when negate:
r.neg(a)
else:
r = a
r *= b
{.pop.} # inline
# ############################################################
# #
# Quadratic extensions #
# #
# ############################################################
# Commutative ring implementation for complex quadratic extension fields
# ----------------------------------------------------------------------
func square_complex(r: var QuadraticExt, a: QuadraticExt) =
## Return a² in 𝔽p2 = 𝔽p[𝑖] in ``r``
## ``r`` is initialized/overwritten
##
## Requires a complex extension field
# (c0, c1)² => (c0 + c1𝑖
# => c0² + 2 c0 c1𝑖 + (c1𝑖
# => c0²-c1² + 2 c0 c1𝑖
# => (c0²-c1², 2 c0 c1)
# or
# => ((c0-c1)(c0+c1), 2 c0 c1)
# => ((c0-c1)(c0-c1 + 2 c1), c0 * 2 c1)
#
# Costs (naive implementation)
# - 1 Multiplication 𝔽p
# - 2 Squarings 𝔽p
# - 1 Doubling 𝔽p
# - 1 Substraction 𝔽p
# Stack: 4 * ModulusBitSize (4x 𝔽p element)
#
# Or (with 1 less Mul/Squaring at the cost of 1 addition and extra 2 𝔽p stack space)
#
# - 2 Multiplications 𝔽p
# - 1 Addition 𝔽p
# - 1 Doubling 𝔽p
# - 1 Substraction 𝔽p
#
# To handle aliasing between r and a, we need
# r to be used only when a is unneeded
# so we can't use r fields as a temporary
mixin fromComplexExtension
static: doAssert r.fromComplexExtension()
var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
v0.diff(a.c0, a.c1) # v0 = c0 - c1 [1 Sub]
v1.sum(a.c0, a.c1) # v1 = c0 + c1 [1 Dbl, 1 Sub]
r.c1.prod(a.c0, a.c1) # r.c1 = c0 c1 [1 Mul, 1 Dbl, 1 Sub]
# aliasing: a unneeded now
r.c1.double() # r.c1 = 2 c0 c1 [1 Mul, 2 Dbl, 1 Sub]
r.c0.prod(v0, v1) # r.c0 = (c0 + c1)(c0 - c1) [2 Mul, 2 Dbl, 1 Sub]
func prod_complex(r: var QuadraticExt, a, b: QuadraticExt) =
## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r``
## ``r`` is initialized/overwritten
##
## Requires a complex extension field
# (a0, a1) (b0, b1) => (a0 + a1𝑖) (b0 + b1𝑖)
# => (a0 b0 - a1 b1) + (a0 b1 + a1 b0) 𝑖
#
# In Fp, multiplication has cost O(n²) with n the number of limbs
# while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction)
# and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition)
#
# Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words)
# where addition/substraction are significantly cheaper than multiplication
#
# So we always reframe the imaginary part using Karatsuba approach to save a multiplication
# (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 )
#
# Costs (naive implementation)
# - 4 Multiplications 𝔽p
# - 1 Addition 𝔽p
# - 1 Substraction 𝔽p
# Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries)
#
# Costs (Karatsuba)
# - 3 Multiplications 𝔽p
# - 3 Substraction 𝔽p (2 are fused)
# - 2 Addition 𝔽p
# Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary)
mixin fromComplexExtension
static: doAssert r.fromComplexExtension()
# TODO: GCC is adding an unexplainable 30 cycles tax to this function (~10% slow down)
# for seemingly no reason
when false: # Single-width implementation - BLS12-381
# Clang 348 cycles on i9-9980XE @3.9 GHz
var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0)
a0b0.prod(a.c0, b.c0) # [1 Mul]
a1b1.prod(a.c1, b.c1) # [2 Mul]
r.c0.sum(a.c0, a.c1) # r0 = (a0 + a1) # [2 Mul, 1 Add]
r.c1.sum(b.c0, b.c1) # r1 = (b0 + b1) # [2 Mul, 2 Add]
# aliasing: a and b unneeded now
r.c1 *= r.c0 # r1 = (b0 + b1)(a0 + a1) # [3 Mul, 2 Add] - 𝔽p temporary
r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1 # [3 Mul, 2 Add, 1 Sub]
r.c1 -= a0b0 # r1 = (b0 + b1)(a0 + a1) - a0b0 # [3 Mul, 2 Add, 2 Sub]
r.c1 -= a1b1 # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub]
else: # Double-width implementation with lazy reduction
# Clang 341 cycles on i9-9980XE @3.9 GHz
var a0b0 {.noInit.}, a1b1 {.noInit.}: doubleWidth(typeof(r.c0))
var d {.noInit.}: doubleWidth(typeof(r.c0))
const msbSet = r.c0.typeof.canUseNoCarryMontyMul()
a0b0.mulNoReduce(a.c0, b.c0) # 44 cycles - cumul 44
a1b1.mulNoReduce(a.c1, b.c1) # 44 cycles - cumul 88
when msbSet:
r.c0.sum(a.c0, a.c1)
r.c1.sum(b.c0, b.c1)
else:
r.c0.sumNoReduce(a.c0, a.c1) # 5 cycles - cumul 93
r.c1.sumNoReduce(b.c0, b.c1) # 5 cycles - cumul 98
# aliasing: a and b unneeded now
d.mulNoReduce(r.c0, r.c1) # 44 cycles - cumul 142
when msbSet:
d -= a0b0
d -= a1b1
else:
d.diffNoReduce(d, a0b0) # 11 cycles - cumul 153
d.diffNoReduce(d, a1b1) # 11 cycles - cumul 164
a0b0.diff(a0b0, a1b1) # 19 cycles - cumul 183
r.c0.reduce(a0b0) # 50 cycles - cumul 233
r.c1.reduce(d) # 50 cycles - cumul 288
# Single-width [3 Mul, 2 Add, 3 Sub]
# 3*88 + 2*14 + 3*14 = 334 theoretical cycles
# 348 measured
# Double-Width
# 288 theoretical cycles
# 329 measured
# Unexplained 40 cycles diff between theo and measured
# and unexplained 30 cycles between Clang and GCC
# - Function calls?
# - push/pop stack?
func mul_sparse_complex_by_0y(
r: var QuadraticExt, a: QuadraticExt,
sparseB: auto) =
## Multiply `a` by `b` with sparse coordinates (0, y)
## On a complex quadratic extension field 𝔽p2 = 𝔽p[𝑖]
#
# r0 = a0 b0 - a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
#
# with b0 = 0, hence
#
# r0 = - a1 b1
# r1 = (a0 + a1) b1 - a1 b1 = a0 b1
mixin fromComplexExtension
static: doAssert r.fromComplexExtension()
when typeof(sparseB) is typeof(a):
template b(): untyped = sparseB.c1
elif typeof(sparseB) is typeof(a.c0):
template b(): untyped = sparseB
else:
{.error: "sparseB type is " & $typeof(sparseB) &
" which does not match with either a (" & $typeof(a) &
") or a.c0 (" & $typeof(a.c0) & ")".}
var t{.noInit.}: typeof(a.c0)
t.prod(a.c1, b)
r.c1.prod(a.c0, b)
r.c0.neg(t)
# Commutative ring implementation for generic quadratic extension fields
# ----------------------------------------------------------------------
func square_generic(r: var QuadraticExt, a: QuadraticExt) =
## Return a² in ``r``
## ``r`` is initialized/overwritten
# Algorithm (with β the non-residue in the base field)
#
# (c0, c1)² => (c0 + c1 w)²
# => c0² + 2 c0 c1 w + c1²w²
# => c0² + β c1² + 2 c0 c1 w
# => (c0² + β c1², 2 c0 c1)
# We have 2 squarings and 1 multiplication in the base field
# which are significantly more costly than additions.
# For example when construction 𝔽p12 from 𝔽p6:
# - 4 limbs like BN254: multiplication is 20x slower than addition/substraction
# - 6 limbs like BLS12-381: multiplication is 28x slower than addition/substraction
#
# We can save operations with one of the following expressions
# of c0² + β c1² and noticing that c0c1 is already computed for the "y" coordinate
#
# Alternative 1:
# c0² + β c1² <=> (c0 - c1)(c0 - β c1) + β c0c1 + c0c1
#
# Alternative 2:
# c0² + β c1² <=> (c0 + c1)(c0 + β c1) - β c0c1 - c0c1
mixin prod
var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
# v1 <- (c0 + β c1)
v1.prod(a.c1, NonResidue)
v1 += a.c0
# v0 <- (c0 + c1)(c0 + β c1)
v0.sum(a.c0, a.c1)
v0 *= v1
# v1 <- c0 c1
v1.prod(a.c0, a.c1)
# aliasing: a unneeded now
# r0 = (c0 + c1)(c0 + β c1) - c0c1
v0 -= v1
# r1 = 2 c0c1
r.c1.double(v1)
# r0 = (c0 + c1)(c0 + β c1) - c0c1 - β c0c1
v1 *= NonResidue
r.c0.diff(v0, v1)
func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) =
## Returns r = a * b
# Algorithm (with β the non-residue in the base field)
#
# r0 = a0 b0 + β a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
mixin prod
var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: typeof(r.c0)
# v2 <- (a0 + a1)(b0 + b1)
v0.sum(a.c0, a.c1)
v1.sum(b.c0, b.c1)
v2.prod(v0, v1)
# v0 <- a0 b0
# v1 <- a1 b1
v0.prod(a.c0, b.c0)
v1.prod(a.c1, b.c1)
# aliasing: a and b unneeded now
# r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
r.c1.diff(v2, v1)
r.c1 -= v0
# r0 <- a0 b0 + β a1 b1
v1 *= NonResidue
r.c0.sum(v0, v1)
func mul_sparse_generic_by_x0(r: var QuadraticExt, a, sparseB: QuadraticExt) =
## Multiply `a` by `b` with sparse coordinates (x, 0)
## On a generic quadratic extension field
# Algorithm (with β the non-residue in the base field)
#
# r0 = a0 b0 + β a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
#
# with b1 = 0, hence
#
# r0 = a0 b0
# r1 = (a0 + a1) b0 - a0 b0 = a1 b0
mixin prod
template b(): untyped = sparseB
r.c0.prod(a.c0, b.c0)
r.c1.prod(a.c1, b.c0)
func mul_sparse_generic_by_0y(
r: var QuadraticExt, a: QuadraticExt,
sparseB: auto) =
## Multiply `a` by `b` with sparse coordinates (0, y)
## On a generic quadratic extension field
# Algorithm (with β the non-residue in the base field)
#
# r0 = a0 b0 + β a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
#
# with b0 = 0, hence
#
# r0 = β a1 b1
# r1 = (a0 + a1) b1 - a1 b1 = a0 b1
when typeof(sparseB) is typeof(a):
template b(): untyped = sparseB.c1
elif typeof(sparseB) is typeof(a.c0):
template b(): untyped = sparseB
else:
{.error: "sparseB type is " & $typeof(sparseB) &
" which does not match with either a (" & $typeof(a) &
") or a.c0 (" & $typeof(a.c0) & ")".}
var t{.noInit.}: typeof(a.c0)
t.prod(a.c1, b)
t *= NonResidue
r.c1.prod(a.c0, b)
# aliasing: a unneeded now
r.c0 = t
func mul_sparse_generic_by_0y(
r: var QuadraticExt, a: QuadraticExt,
sparseB: static int) =
## Multiply `a` by `b` with sparse coordinates (0, y)
## On a generic quadratic extension field
# Algorithm (with β the non-residue in the base field)
#
# r0 = a0 b0 + β a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
#
# with b0 = 0, hence
#
# r0 = β a1 b1
# r1 = (a0 + a1) b1 - a1 b1 = a0 b1
template b(): untyped = sparseB
var t{.noInit.}: typeof(a.c0)
t.prod(a.c1, b)
t *= NonResidue
r.c1.prod(a.c0, b)
# aliasing: a unneeded now
r.c0 = t
func invImpl(r: var QuadraticExt, a: QuadraticExt) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
#
# Algorithm:
#
# 1 / (a0 + a1 w) <=> (a0 - a1 w) / (a0 + a1 w)(a0 - a1 w)
# <=> (a0 - a1 w) / (a0² - a1² w²)
# with w being our coordinate system and β the quadratic non-residue
# we have w² = β
# So the inverse is (a0 - a1 w) / (a0² - β a1²)
mixin fromComplexExtension
# [2 Sqr, 1 Add]
var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
v0.square(a.c0)
v1.square(a.c1)
when r.fromComplexExtension():
v0 += v1
else:
v1 *= NonResidue
v0 -= v1 # v0 = a0² - β a1² (the norm / squared magnitude of a)
# [1 Inv, 2 Sqr, 1 Add]
v1.inv(v0) # v1 = 1 / (a0² - β a1²)
# [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
r.c0.prod(a.c0, v1) # r0 = a0 / (a0² - β a1²)
v0.neg(v1) # v0 = -1 / (a0² - β a1²)
r.c1.prod(a.c1, v0) # r1 = -a1 / (a0² - β a1²)
# Exported quadratic symbols
# -------------------------------------------------------------------
{.push inline.}
func square*(r: var QuadraticExt, a: QuadraticExt) =
mixin fromComplexExtension
when r.fromComplexExtension():
r.square_complex(a)
else:
r.square_generic(a)
func prod*(r: var QuadraticExt, a, b: QuadraticExt) =
mixin fromComplexExtension
when r.fromComplexExtension():
r.prod_complex(a, b)
else:
r.prod_generic(a, b)
func inv*(r: var QuadraticExt, a: QuadraticExt) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
r.invImpl(a)
func inv*(a: var QuadraticExt) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
a.invImpl(a)
func `*=`*(a: var QuadraticExt, b: QuadraticExt) =
## In-place multiplication
a.prod(a, b)
func square*(a: var QuadraticExt) =
## In-place squaring
a.square(a)
func mul_sparse_by_0y*(r: var QuadraticExt, a: QuadraticExt, sparseB: auto) =
## Sparse multiplication
mixin fromComplexExtension
when a.fromComplexExtension():
r.mul_sparse_complex_by_0y(a, sparseB)
else:
r.mul_sparse_generic_by_0y(a, sparseB)
func mul_sparse_by_0y*(r: var QuadraticExt, a: QuadraticExt, sparseB: static int) =
## Sparse multiplication
mixin fromComplexExtension
when a.fromComplexExtension():
{.error: "Not implemented.".}
else:
r.mul_sparse_generic_by_0y(a, sparseB)
func mul_sparse_by_0y*(a: var QuadraticExt, sparseB: auto) =
## Sparse in-place multiplication
a.mul_sparse_by_0y(a, sparseB)
func mul_sparse_by_x0*(a: var QuadraticExt, sparseB: QuadraticExt) =
## Sparse in-place multiplication
a.mul_sparse_generic_by_x0(a, sparseB)
{.pop.} # inline
# ############################################################
# #
# Cubic extensions #
# #
# ############################################################
# Commutative ring implementation for Cubic Extension Fields
# -------------------------------------------------------------------
# Cubic extensions can use specific squaring procedures
# beyond Schoolbook and Karatsuba:
# - Chung-Hasan (3 different algorithms)
# - Toom-Cook-3x
#
# Chung-Hasan papers
# http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf
# https://www.lirmm.fr/arith18/papers/Chung-Squaring.pdf
#
# The papers focus on polynomial squaring, they have been adapted
# to towered extension fields with the relevant costs in
#
# - Multiplication and Squaring on Pairing-Friendly Fields
# Augusto Jun Devegili and Colm Ó hÉigeartaigh and Michael Scott and Ricardo Dahab, 2006
# https://eprint.iacr.org/2006/471
#
# Costs in the underlying field
# M: Mul, S: Square, A: Add/Sub, B: Mul by non-residue
#
# | Method | > Linear | Linear |
# |-------------|----------|-------------------|
# | Schoolbook | 3M + 3S | 6A + 2B |
# | Karatsuba | 6S | 13A + 2B |
# | Tom-Cook-3x | 5S | 33A + 2B |
# | CH-SQR1 | 3M + 2S | 11A + 2B |
# | CH-SQR2 | 2M + 3S | 10A + 2B |
# | CH-SQR3 | 1M + 4S | 11A + 2B + 1 Div2 |
# | CH-SQR3x | 1M + 4S | 14A + 2B |
func square_Chung_Hasan_SQR2(r: var CubicExt, a: CubicExt) {.used.}=
## Returns r = a²
mixin prod, square, sum
var s0{.noInit.}, m01{.noInit.}, m12{.noInit.}: typeof(r.c0)
# precomputations that use a
m01.prod(a.c0, a.c1)
m01.double()
m12.prod(a.c1, a.c2)
m12.double()
s0.square(a.c2)
# aliasing: a₂ unused
# r₂ = (a₀ - a₁ + a₂)²
r.c2.sum(a.c2, a.c0)
r.c2 -= a.c1
r.c2.square()
# aliasing, a almost unneeded now
# r₂ = (a₀ - a₁ + a₂)² + 2a₀a₁ + 2a₁a₂ - a₂²
r.c2 += m01
r.c2 += m12
r.c2 -= s0
# r₁ = 2a₀a₁ + β a₂²
r.c1.prod(s0, NonResidue)
r.c1 += m01
# r₂ = (a₀ - a₁ + a₂)² + 2a₀a₁ + 2a₁a₂ - a₀² - a₂²
s0.square(a.c0)
r.c2 -= s0
# r₀ = a₀² + β 2a₁a₂
r.c0.prod(m12, NonResidue)
r.c0 += s0
func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) =
## Returns r = a²
mixin prod, square, sum
var s0{.noInit.}, t{.noInit.}, m12{.noInit.}: typeof(r.c0)
# s₀ = (a₀ + a₁ + a₂)²
# t = ((a₀ + a₁ + a₂)² + (a₀ - a₁ + a₂)²) / 2
s0.sum(a.c0, a.c2)
t.diff(s0, a.c1)
s0 += a.c1
s0.square()
t.square()
t += s0
t.div2()
# m12 = 2a₁a₂ and r₁ = a₂²
# then a₁ and a₂ are unused for aliasing
m12.prod(a.c1, a.c2)
m12.double()
r.c1.square(a.c2) # r₁ = a₂²
r.c2.diff(t, r.c1) # r₂ = t - a₂²
r.c1 *= NonResidue # r₁ = β a₂²
r.c1 += s0 # r₁ = (a₀ + a₁ + a₂)² + β a₂²
r.c1 -= m12 # r₁ = (a₀ + a₁ + a₂)² - 2a₁a₂ + β a₂²
r.c1 -= t # r₁ = (a₀ + a₁ + a₂)² - 2a₁a₂ - t + β a₂²
s0.square(a.c0)
# aliasing: a₀ unused
r.c2 -= s0
r.c0.prod(m12, NonResidue)
r.c0 += s0
func prodImpl(r: var CubicExt, a, b: CubicExt) =
## Returns r = a * b # Algorithm is Karatsuba
var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(r.c0)
var t0{.noInit.}, t1{.noInit.}, t2{.noInit.}: typeof(r.c0)
v0.prod(a.c0, b.c0)
v1.prod(a.c1, b.c1)
v2.prod(a.c2, b.c2)
# r₀ = β ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
t0.sum(a.c1, a.c2)
t1.sum(b.c1, b.c2)
t0 *= t1
t0 -= v1
t0 -= v2
t0 *= NonResidue
# r₀ = t₀ + v₀ at the end to handle aliasing
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
t1.sum(a.c0, a.c1)
t2.sum(b.c0, b.c1)
r.c1.prod(t1, t2)
r.c1 -= v0
r.c1 -= v1
t1.prod(v2, NonResidue)
r.c1 += t1
# r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
t1.sum(a.c0, a.c2)
t2.sum(b.c0, b.c2)
r.c2.prod(t1, t2)
r.c2 -= v0
r.c2 -= v2
r.c2 += v1
# Finish r₀
r.c0.sum(t0, v0)
func invImpl(r: var CubicExt, a: CubicExt) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
#
# Algorithm 5.23
#
# Arithmetic of Finite Fields
# Chapter 5 of Guide to Pairing-Based Cryptography
# Jean Luc Beuchat, Luis J. Dominguez Perez, Sylvain Duquesne, Nadia El Mrabet, Laura Fuentes-Castañeda, Francisco Rodríguez-Henríquez, 2017\
# https://www.researchgate.net/publication/319538235_Arithmetic_of_Finite_Fields
#
# We optimize for stack usage and use 4 temporaries
# instead of 9, because 4 * 2 (𝔽p2) * Bitsize would be:
# - ~2032 bits for BN254
# - ~3048 bits for BLS12-381
var A {.noInit.}, B {.noInit.}, C {.noInit.}: typeof(r.c0)
var t {.noInit.}: typeof(r.c0)
# A <- a₀² - β a₁ a₂
A.square(a.c0)
t.prod(a.c1, a.c2)
t *= NonResidue
A -= t
# B <- β a₂² - a₀ a₁
B.square(a.c2)
B *= NonResidue
t.prod(a.c0, a.c1)
B -= t
# C <- a₁² - a₀ a₂
C.square(a.c1)
t.prod(a.c0, a.c2)
C -= t
# F in t
# F <- β a₁ C + a₀ A + β a₂ B
t.prod(a.c1, C)
r.c2.prod(a.c2, B) # aliasing: last use of a₂, destroy r₂
t += r.c2
t *= NonResidue
r.c0.prod(a.c0, A) # aliasing: last use of a₀, destroy r₀
t += r.c0
t.inv()
# (a0 + a1 v + a2 v²)^-1 = (A + B v + C v²) / F
r.c0.prod(A, t)
r.c1.prod(B, t)
r.c2.prod(C, t)
# Exported cubic symbols
# -------------------------------------------------------------------
{.push inline.}
func square*(r: var CubicExt, a: CubicExt) =
## Returns r = a²
square_Chung_Hasan_SQR3(r, a)
func square*(a: var CubicExt) =
## In-place squaring
a.square(a)
func prod*(r: var CubicExt, a, b: CubicExt) =
## In-place multiplication
r.prodImpl(a, b)
func `*=`*(a: var CubicExt, b: CubicExt) =
## In-place multiplication
a.prodImpl(a, b)
func inv*(r: var CubicExt, a: CubicExt) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
r.invImpl(a)
func inv*(a: var CubicExt) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
a.invImpl(a)
{.pop.} # inline
{.pop.} # raises no exceptions

View File

@ -1,365 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../arithmetic,
../primitives,
./tower_common
# Commutative ring implementation for complex extension fields
# -------------------------------------------------------------------
func square_complex(r: var QuadraticExt, a: QuadraticExt) =
## Return a² in 𝔽p2 = 𝔽p[𝑖] in ``r``
## ``r`` is initialized/overwritten
##
## Requires a complex extension field
# (c0, c1)² => (c0 + c1𝑖
# => c0² + 2 c0 c1𝑖 + (c1𝑖
# => c0²-c1² + 2 c0 c1𝑖
# => (c0²-c1², 2 c0 c1)
# or
# => ((c0-c1)(c0+c1), 2 c0 c1)
# => ((c0-c1)(c0-c1 + 2 c1), c0 * 2 c1)
#
# Costs (naive implementation)
# - 1 Multiplication 𝔽p
# - 2 Squarings 𝔽p
# - 1 Doubling 𝔽p
# - 1 Substraction 𝔽p
# Stack: 4 * ModulusBitSize (4x 𝔽p element)
#
# Or (with 1 less Mul/Squaring at the cost of 1 addition and extra 2 𝔽p stack space)
#
# - 2 Multiplications 𝔽p
# - 1 Addition 𝔽p
# - 1 Doubling 𝔽p
# - 1 Substraction 𝔽p
# Stack: 6 * ModulusBitSize (4x 𝔽p element + 1 named temporaries + 1 in-place multiplication temporary)
# as in-place multiplications require a (shared) internal temporary
mixin fromComplexExtension
static: doAssert r.fromComplexExtension()
var c0mc1 {.noInit.}: typeof(r.c0)
c0mc1.diff(a.c0, a.c1) # c0mc1 = c0 - c1 [1 Sub]
r.c1.double(a.c1) # result.c1 = 2 c1 [1 Dbl, 1 Sub]
r.c0.sum(c0mc1, r.c1) # result.c0 = c0 - c1 + 2 c1 [1 Add, 1 Dbl, 1 Sub]
r.c0 *= c0mc1 # result.c0 = (c0 + c1)(c0 - c1) = c0² - c1² [1 Mul, 1 Add, 1 Dbl, 1 Sub] - 𝔽p temporary
r.c1 *= a.c0 # result.c1 = 2 c1 c0 [2 Mul, 1 Add, 1 Dbl, 1 Sub] - 𝔽p temporary
func prod_complex(r: var QuadraticExt, a, b: QuadraticExt) =
## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r``
## ``r`` is initialized/overwritten
##
## Requires a complex extension field
# (a0, a1) (b0, b1) => (a0 + a1𝑖) (b0 + b1𝑖)
# => (a0 b0 - a1 b1) + (a0 b1 + a1 b0) 𝑖
#
# In Fp, multiplication has cost O(n²) with n the number of limbs
# while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction)
# and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition)
#
# Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words)
# where addition/substraction are significantly cheaper than multiplication
#
# So we always reframe the imaginary part using Karatsuba approach to save a multiplication
# (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 )
#
# Costs (naive implementation)
# - 4 Multiplications 𝔽p
# - 1 Addition 𝔽p
# - 1 Substraction 𝔽p
# Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries)
#
# Costs (Karatsuba)
# - 3 Multiplications 𝔽p
# - 3 Substraction 𝔽p (2 are fused)
# - 2 Addition 𝔽p
# Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary)
mixin fromComplexExtension
static: doAssert r.fromComplexExtension()
# TODO: GCC is adding an unexplainable 30 cycles tax to this function (~10% slow down)
# for seemingly no reason
when false: # Single-width implementation - BLS12-381
# Clang 348 cycles on i9-9980XE @3.9 GHz
var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0)
a0b0.prod(a.c0, b.c0) # [1 Mul]
a1b1.prod(a.c1, b.c1) # [2 Mul]
r.c0.sum(a.c0, a.c1) # r0 = (a0 + a1) # [2 Mul, 1 Add]
r.c1.sum(b.c0, b.c1) # r1 = (b0 + b1) # [2 Mul, 2 Add]
r.c1 *= r.c0 # r1 = (b0 + b1)(a0 + a1) # [3 Mul, 2 Add] - 𝔽p temporary
r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1 # [3 Mul, 2 Add, 1 Sub]
r.c1 -= a0b0 # r1 = (b0 + b1)(a0 + a1) - a0b0 # [3 Mul, 2 Add, 2 Sub]
r.c1 -= a1b1 # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub]
else: # Double-width implementation with lazy reduction
# Clang 341 cycles on i9-9980XE @3.9 GHz
var a0b0 {.noInit.}, a1b1 {.noInit.}: doubleWidth(typeof(r.c0))
var d {.noInit.}: doubleWidth(typeof(r.c0))
const msbSet = r.c0.typeof.canUseNoCarryMontyMul()
a0b0.mulNoReduce(a.c0, b.c0) # 44 cycles - cumul 44
a1b1.mulNoReduce(a.c1, b.c1) # 44 cycles - cumul 88
when msbSet:
r.c0.sum(a.c0, a.c1)
r.c1.sum(b.c0, b.c1)
else:
r.c0.sumNoReduce(a.c0, a.c1) # 5 cycles - cumul 93
r.c1.sumNoReduce(b.c0, b.c1) # 5 cycles - cumul 98
d.mulNoReduce(r.c0, r.c1) # 44 cycles - cumul 142
when msbSet:
d -= a0b0
d -= a1b1
else:
d.diffNoReduce(d, a0b0) # 11 cycles - cumul 153
d.diffNoReduce(d, a1b1) # 11 cycles - cumul 164
a0b0.diff(a0b0, a1b1) # 19 cycles - cumul 183
r.c0.reduce(a0b0) # 50 cycles - cumul 233
r.c1.reduce(d) # 50 cycles - cumul 288
# Single-width [3 Mul, 2 Add, 3 Sub]
# 3*88 + 2*14 + 3*14 = 334 theoretical cycles
# 348 measured
# Double-Width
# 288 theoretical cycles
# 329 measured
# Unexplained 40 cycles diff between theo and measured
# and unexplained 30 cycles between Clang and GCC
# - Function calls?
# - push/pop stack?
func mul_sparse_complex_by_0y(r: var QuadraticExt, a, sparseB: QuadraticExt) =
## Multiply `a` by `b` with sparse coordinates (0, y)
## On a complex quadratic extension field 𝔽p2 = 𝔽p[𝑖]
#
# r0 = a0 b0 - a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
#
# with b0 = 0, hence
#
# r0 = - a1 b1
# r1 = (a0 + a1) b1 - a1 b1 = a0 b1
mixin fromComplexExtension
static: doAssert r.fromComplexExtension()
template b(): untyped = sparseB
r.c0.prod(a.c1, b.c1)
r.c0.neg(r.c0)
r.c1.prod(a.c0, b.c1)
# Commutative ring implementation for generic quadratic extension fields
# -------------------------------------------------------------------
func square_generic(r: var QuadraticExt, a: QuadraticExt) =
## Return a² in ``r``
## ``r`` is initialized/overwritten
# Algorithm (with β the non-residue in the base field)
#
# (c0, c1)² => (c0 + c1 w)²
# => c0² + 2 c0 c1 w + c1²w²
# => c0² + β c1² + 2 c0 c1 w
# => (c0² + β c1², 2 c0 c1)
# We have 2 squarings and 1 multiplication in the base field
# which are significantly more costly than additions.
# For example when construction 𝔽p12 from 𝔽p6:
# - 4 limbs like BN254: multiplication is 20x slower than addition/substraction
# - 6 limbs like BLS12-381: multiplication is 28x slower than addition/substraction
#
# We can save operations with one of the following expressions
# of c0² + β c1² and noticing that c0c1 is already computed for the "y" coordinate
#
# Alternative 1:
# c0² + β c1² <=> (c0 - c1)(c0 - β c1) + β c0c1 + c0c1
#
# Alternative 2:
# c0² + β c1² <=> (c0 + c1)(c0 + β c1) - β c0c1 - c0c1
mixin prod
# r0 <- (c0 + c1)(c0 + β c1)
r.c0.sum(a.c0, a.c1)
r.c1.sum(a.c0, NonResidue * a.c1)
r.c0 *= r.c1
# r1 <- c0 c1
r.c1.prod(a.c0, a.c1)
# r0 = (c0 + c1)(c0 + β c1) - β c0c1 - c0c1
r.c0 -= NonResidue * r.c1
r.c0 -= r.c1
# r1 = 2 c0c1
r.c1.double()
func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) =
## Returns r = a * b
# Algorithm (with β the non-residue in the base field)
#
# r0 = a0 b0 + β a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
mixin prod
var t {.noInit.}: typeof(r.c0)
# r1 <- (a0 + a1)(b0 + b1)
r.c0.sum(a.c0, a.c1)
t.sum(b.c0, b.c1)
r.c1.prod(r.c0, t)
# r0 <- a0 b0
# r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
r.c0.prod(a.c0, b.c0)
t.prod(a.c1, b.c1)
r.c1 -= r.c0
r.c1 -= t
# r0 <- a0 b0 + β a1 b1
r.c0 += NonResidue * t
func mul_sparse_generic_by_x0(r: var QuadraticExt, a, sparseB: QuadraticExt) =
## Multiply `a` by `b` with sparse coordinates (x, 0)
## On a generic quadratic extension field
# Algorithm (with β the non-residue in the base field)
#
# r0 = a0 b0 + β a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
#
# with b1 = 0, hence
#
# r0 = a0 b0
# r1 = (a0 + a1) b0 - a0 b0 = a1 b0
mixin prod
template b(): untyped = sparseB
r.c0.prod(a.c0, b.c0)
r.c1.prod(a.c1, b.c0)
func mul_sparse_generic_by_0y(r: var QuadraticExt, a, sparseB: QuadraticExt) =
## Multiply `a` by `b` with sparse coordinates (0, y)
## On a generic quadratic extension field
# Algorithm (with β the non-residue in the base field)
#
# r0 = a0 b0 + β a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
#
# with b0 = 0, hence
#
# r0 = β a1 b1
# r1 = (a0 + a1) b1 - a1 b1 = a0 b1
template b(): untyped = sparseB
r.c0.prod(a.c1, b.c1)
r.c0 *= NonResidue
r.c1.prod(a.c0, b.c1)
# Exported symbols
# -------------------------------------------------------------------
func conj*(a: var QuadraticExt) {.inline.} =
## Computes the conjugate in-place
a.c1.neg()
func conj*(r: var QuadraticExt, a: QuadraticExt) {.inline.} =
## Computes the conjugate out-of-place
r.c0 = a.c0
r.c1.neg(a.c1)
func conjneg*(a: var QuadraticExt) {.inline.} =
## Computes the negated conjugate in-place
a.c0.neg()
func conjneg*(r: var QuadraticExt, a: QuadraticExt) {.inline.} =
## Computes the negated conjugate out-of-place
r.c0.neg(a.c0)
r.c1 = a.c1
func square*(r: var QuadraticExt, a: QuadraticExt) {.inline.} =
mixin fromComplexExtension
when r.fromComplexExtension():
r.square_complex(a)
else:
r.square_generic(a)
func prod*(r: var QuadraticExt, a, b: QuadraticExt) {.inline.} =
mixin fromComplexExtension
when r.fromComplexExtension():
r.prod_complex(a, b)
else:
r.prod_generic(a, b)
func inv*(r: var QuadraticExt, a: QuadraticExt) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
#
# Algorithm:
#
# 1 / (a0 + a1 w) <=> (a0 - a1 w) / (a0 + a1 w)(a0 - a1 w)
# <=> (a0 - a1 w) / (a0² - a1² w²)
# with w being our coordinate system and β the quadratic non-residue
# we have w² = β
# So the inverse is (a0 - a1 w) / (a0² - β a1²)
mixin fromComplexExtension
# [2 Sqr, 1 Add]
var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
v0.square(a.c0)
v1.square(a.c1)
when r.fromComplexExtension():
v0 += v1
else:
v0 -= NonResidue * v1 # v0 = a0² - β a1² (the norm / squared magnitude of a)
# [1 Inv, 2 Sqr, 1 Add]
v1.inv(v0) # v1 = 1 / (a0² - β a1²)
# [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
r.c0.prod(a.c0, v1) # r0 = a0 / (a0² - β a1²)
v0.neg(v1) # v0 = -1 / (a0² - β a1²)
r.c1.prod(a.c1, v0) # r1 = -a1 / (a0² - β a1²)
func inv*(a: var QuadraticExt) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
a.inv(a)
func `*=`*(a: var QuadraticExt, b: QuadraticExt) {.inline.} =
## In-place multiplication
# On higher extension field like 𝔽p12,
# if `prod` is called on shared in and out buffer, the result is wrong
let t = a
a.prod(t, b)
func square*(a: var QuadraticExt) {.inline.} =
let t = a
a.square(t)
func mul_sparse_by_0y*(a: var QuadraticExt, sparseB: QuadraticExt) {.inline.} =
## Sparse in-place multiplication
mixin fromComplexExtension
when a.fromComplexExtension():
let t = a
a.mul_sparse_complex_by_0y(t, sparseB)
else:
let t = a
a.mul_sparse_generic_by_0y(t, sparseB)
func mul_sparse_by_x0*(a: var QuadraticExt, sparseB: QuadraticExt) {.inline.} =
## Sparse in-place multiplication
let t = a
a.mul_sparse_generic_by_x0(t, sparseB)

View File

@ -1,263 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/common,
../primitives,
../arithmetic
# Note: to avoid burdening the Nim compiler, we rely on generic extension
# to complain if the base field procedures don't exist
# Common type definition
# -------------------------------------------------------------------
type
NonResidue* = object
## Non-Residue
##
## Placeholder for the appropriate quadratic, cubic or sectic non-residue
CubicExt* = concept x
## Cubic Extension field concept
type BaseField = auto
x.c0 is BaseField
x.c1 is BaseField
x.c2 is BaseField
QuadraticExt* = concept x
## Quadratic Extension field concept
not(x is CubicExt)
type BaseField = auto
x.c0 is BaseField
x.c1 is BaseField
ExtensionField* = QuadraticExt or CubicExt
# Initialization
# -------------------------------------------------------------------
func setZero*(a: var ExtensionField) =
## Set ``a`` to 0 in the extension field
for field in fields(a):
field.setZero()
func setOne*(a: var ExtensionField) =
## Set ``a`` to 1 in the extension field
for fieldName, fA in fieldPairs(a):
when fieldName == "c0":
fA.setOne()
else:
fA.setZero()
func fromBig*(a: var ExtensionField, src: BigInt) =
## Set ``a`` to the bigint value in the extension field
for fieldName, fA in fieldPairs(a):
when fieldName == "c0":
fA.fromBig(src)
else:
fA.setZero()
# Comparison
# -------------------------------------------------------------------
func `==`*(a, b: ExtensionField): SecretBool =
## Constant-time equality check
result = CtTrue
for fA, fB in fields(a, b):
result = result and (fA == fB)
func isZero*(a: ExtensionField): SecretBool =
## Constant-time check if zero
result = CtTrue
for fA in fields(a):
result = result and fA.isZero()
func isOne*(a: ExtensionField): SecretBool =
## Constant-time check if one
result = CtTrue
for fieldName, fA in fieldPairs(a):
when fieldName == "c0":
result = result and fA.isOne()
else:
result = result and fA.isZero()
func isMinusOne*(a: ExtensionField): SecretBool =
## Constant-time check if -1
result = CtTrue
for fieldName, fA in fieldPairs(a):
when fieldName == "c0":
result = result and fA.isMinusOne()
else:
result = result and fA.isZero()
# Copies
# -------------------------------------------------------------------
func ccopy*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
## Constant-time conditional copy
## If ctl is true: b is copied into a
## if ctl is false: b is not copied and a is unmodified
## Time and memory accesses are the same whether a copy occurs or not
for fA, fB in fields(a, b):
ccopy(fA, fB, ctl)
# Abelian group
# -------------------------------------------------------------------
func neg*(r: var ExtensionField, a: ExtensionField) =
## Field out-of-place negation
for fR, fA in fields(r, a):
fR.neg(fA)
func neg*(a: var ExtensionField) =
## Field in-place negation
for fA in fields(a):
fA.neg()
func `+=`*(a: var ExtensionField, b: ExtensionField) =
## Addition in the extension field
for fA, fB in fields(a, b):
fA += fB
func `-=`*(a: var ExtensionField, b: ExtensionField) =
## Substraction in the extension field
for fA, fB in fields(a, b):
fA -= fB
func double*(r: var ExtensionField, a: ExtensionField) =
## Field out-of-place doubling
for fR, fA in fields(r, a):
fR.double(fA)
func double*(a: var ExtensionField) =
## Field in-place doubling
for fA in fields(a):
fA.double()
func div2*(a: var ExtensionField) =
## Field in-place division by 2
for fA in fields(a):
fA.div2()
func sum*(r: var QuadraticExt, a, b: QuadraticExt) =
## Sum ``a`` and ``b`` into ``r``
r.c0.sum(a.c0, b.c0)
r.c1.sum(a.c1, b.c1)
func sum*(r: var CubicExt, a, b: CubicExt) =
## Sum ``a`` and ``b`` into ``r``
r.c0.sum(a.c0, b.c0)
r.c1.sum(a.c1, b.c1)
r.c2.sum(a.c2, b.c2)
func diff*(r: var QuadraticExt, a, b: QuadraticExt) =
## Diff ``a`` and ``b`` into ``r``
r.c0.diff(a.c0, b.c0)
r.c1.diff(a.c1, b.c1)
func diff*(r: var CubicExt, a, b: CubicExt) =
## Diff ``a`` and ``b`` into ``r``
r.c0.diff(a.c0, b.c0)
r.c1.diff(a.c1, b.c1)
r.c2.diff(a.c2, b.c2)
# Conditional arithmetic
# -------------------------------------------------------------------
func cneg*(a: var ExtensionField, ctl: SecretBool) =
## Constant-time in-place conditional negation
## Only negate if ctl is true
for fA in fields(a):
fA.cneg(ctl)
func cadd*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
## Constant-time in-place conditional addition
for fA, fB in fields(a, b):
fA.cadd(fB, ctl)
func csub*(a: var ExtensionField, b: ExtensionField, ctl: SecretBool) =
## Constant-time in-place conditional substraction
for fA, fB in fields(a, b):
fA.csub(fB, ctl)
# Multiplication by a small integer known at compile-time
# -------------------------------------------------------------------
func `*=`*(a: var ExtensionField, b: static int) {.inline.} =
## Multiplication by a small integer known at compile-time
const negate = b < 0
const b = if negate: -b
else: b
when negate:
a.neg(a)
when b == 0:
a.setZero()
elif b == 1:
return
elif b == 2:
a.double()
elif b == 3:
let t1 = a
a.double()
a += t1
elif b == 4:
a.double()
a.double()
elif b == 5:
let t1 = a
a.double()
a.double()
a += t1
elif b == 6:
a.double()
let t2 = a
a.double() # 4
a += t2
elif b == 7:
let t1 = a
a.double()
let t2 = a
a.double() # 4
a += t2
a += t1
elif b == 8:
a.double()
a.double()
a.double()
elif b == 9:
let t1 = a
a.double()
a.double()
a.double() # 8
a += t1
elif b == 10:
a.double()
let t2 = a
a.double()
a.double() # 8
a += t2
elif b == 11:
let t1 = a
a.double()
let t2 = a
a.double()
a.double() # 8
a += t2
a += t1
elif b == 12:
a.double()
a.double() # 4
let t4 = a
a.double() # 8
a += t4
else:
{.error: "Multiplication by this small int not implemented".}

View File

@ -12,14 +12,32 @@
import
../arithmetic,
../config/curves,
../config/[common, curves],
../io/io_fields,
./tower_common,
./quadratic_extensions,
./cubic_extensions,
./extension_fields,
./exponentiations
export tower_common, quadratic_extensions, cubic_extensions, exponentiations
export extension_fields, exponentiations
# We assume that the sextic non-residues used to construct
# the elliptic curve twists
# match with the quadratic and cubic non-residues
# chosen to construct the tower of extension fields.
# 𝔽p
# ----------------------------------------------------------------
func `*=`*(a: var Fp, _: type NonResidue) {.inline.} =
## Multiply an element of 𝔽p by the quadratic non-residue
## chosen to construct 𝔽p2
static: doAssert Fp.C.getNonResidueFp() != -1, "𝔽p2 should be specialized for complex extension"
a *= Fp.C.getNonResidueFp()
func prod*(r: var Fp, a: Fp, _: type NonResidue){.inline.} =
## Multiply an element of 𝔽p by the quadratic non-residue
## chosen to construct 𝔽p2
static: doAssert Fp.C.getNonResidueFp() != -1, "𝔽p2 should be specialized for complex extension"
r.prod(a, Fp.C.getNonResidueFp())
# 𝔽p2
# ----------------------------------------------------------------
@ -28,53 +46,58 @@ type
Fp2*[C: static Curve] = object
c0*, c1*: Fp[C]
β = NonResidue
# Quadratic or Cubic non-residue
SexticNonResidue* = object
template fromComplexExtension*[F](elem: F): static bool =
## Returns true if the input is a complex extension
## i.e. the irreducible polynomial chosen is
## x² - µ with µ = -1
## and so 𝔽p2 = 𝔽p[x] / x² - µ = 𝔽p[𝑖]
when F is Fp2 and F.C.get_QNR_Fp() == -1:
when F is Fp2 and F.C.getNonResidueFp() == -1:
true
else:
false
func `*=`*(a: var Fp, _: typedesc[β]) {.inline.} =
## Multiply an element of 𝔽p by the quadratic non-residue
## chosen to construct 𝔽p2
static: doAssert Fp.C.get_QNR_Fp() != -1, "𝔽p2 should be specialized for complex extension"
a *= Fp.C.get_QNR_Fp()
template mulCheckSparse*(a: var Fp2, b: Fp2) =
when b.isOne().bool:
discard
elif b.isMinusOne().bool:
a.neg()
elif b.c0.isZero().bool and b.c1.isOne().bool:
var t {.noInit.}: type(a.c0)
when fromComplexExtension(b):
t.neg(a.c1)
a.c1 = a.c0
a.c0 = t
else:
t.prod(a.c1, NonResidue)
a.c1 = a.c0
a.c0 = t
elif b.c0.isZero().bool and b.c1.isMinusOne().bool:
var t {.noInit.}: type(a.c0)
when fromComplexExtension(b):
t = a.c1
a.c1.neg(a.c0)
a.c0 = t
else:
t.prod(a.c1, NonResidue)
a.c1.neg(a.c0)
a.c0.neg(t)
elif b.c0.isZero().bool:
a.mul_sparse_by_0y(b)
elif b.c1.isZero().bool:
a.mul_sparse_by_x0(b)
else:
a *= b
func `*`*(_: typedesc[β], a: Fp): Fp {.inline, noInit.} =
## Multiply an element of 𝔽p by the quadratic non-residue
## chosen to construct 𝔽p2
result = a
result *= β
# TODO: rework the quad/cube/sextic non residue declaration
func `*=`*(a: var Fp, _: typedesc[SexticNonResidue]) {.inline.} =
## Multiply an element of 𝔽p by the sextic non-residue
## chosen to construct 𝔽p6
a *= Fp.C.get_QNR_Fp()
func `*`*(_: typedesc[SexticNonResidue], a: Fp): Fp {.inline, noInit.} =
## Multiply an element of 𝔽p by the sextic non-residue
## chosen to construct 𝔽p6
result = a
result *= SexticNonResidue
func `*=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} =
## Multiply an element of 𝔽p2 by the sextic non-residue
## chosen to construct the sextic twist
func prod*(r: var Fp2, a: Fp2, _: type NonResidue) {.inline.} =
## Multiply an element of 𝔽p2 by the non-residue
## chosen to construct the next extension or the twist:
## - if quadratic non-residue: 𝔽p4
## - if cubic non-residue: 𝔽p6
## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12
# Yet another const tuple unpacking bug
const u = Fp2.C.get_SNR_Fp2()[0] # Sextic non-residue to construct 𝔽p12
const v = Fp2.C.get_SNR_Fp2()[1]
const Beta {.used.} = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2
const u = Fp2.C.getNonResidueFp2()[0]
const v = Fp2.C.getNonResidueFp2()[1]
const Beta {.used.} = Fp2.C.getNonResidueFp()
# ξ = u + v x
# and x² = β
#
@ -82,24 +105,49 @@ func `*=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} =
# => u c0 + β v c1 + (v c0 + u c1) x
when a.fromComplexExtension() and u == 1 and v == 1:
let t = a.c0
a.c0 -= a.c1
a.c1 += t
r.c0.diff(t, a.c1)
r.c1.sum(t, a.c1)
else:
let a0 = a.c0
let a1 = a.c1
when a.fromComplexExtension():
a.c0.diff(u * a0, v * a1)
# Case:
# - BN254_Snarks, QNR_Fp: -1, SNR_Fp2: 9+1𝑖 (𝑖 = √-1)
# - BLS12_377, QNR_Fp: -5, SNR_Fp2: 0+1j (j = √-5)
# - BW6_761, SNR_Fp: -4, CNR_Fp2: 0+1j (j = √-4)
when u == 0:
# BLS12_377 and BW6_761, use small addition chain
r.mul_sparse_by_0y(a, v)
else:
a.c0.sum(u * a0, (Beta * v) * a1)
a.c1.sum(v * a0, u * a1)
# BN254_Snarks, u = 9
# Full 𝔽p2 multiplication is cheaper than addition chains
# for u*c0 and u*c1
static:
doAssert u >= 0 and uint64(u) <= uint64(high(BaseType))
doAssert v >= 0 and uint64(v) <= uint64(high(BaseType))
# TODO: compile-time
var NR {.noInit.}: Fp2
NR.c0.fromUint(uint u)
NR.c1.fromUint(uint v)
r.prod(a, NR)
func `/=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} =
## Multiply an element of 𝔽p by the quadratic non-residue
## chosen to construct sextic twist
func `*=`*(a: var Fp2, _: type NonResidue) {.inline.} =
## Multiply an element of 𝔽p2 by the non-residue
## chosen to construct the next extension or the twist:
## - if quadratic non-residue: 𝔽p4
## - if cubic non-residue: 𝔽p6
## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12
# Yet another const tuple unpacking bug
const u = Fp2.C.get_SNR_Fp2()[0] # Sextic non-residue to construct 𝔽p12
const v = Fp2.C.get_SNR_Fp2()[1]
const Beta = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2
a.prod(a, NonResidue)
func `/=`*(a: var Fp2, _: type NonResidue) {.inline.} =
## Divide an element of 𝔽p by the non-residue
## chosen to construct the next extension or the twist:
## - if quadratic non-residue: 𝔽p4
## - if cubic non-residue: 𝔽p6
## - if sextic non-residue: 𝔽p4, 𝔽p6 or 𝔽p12
# Yet another const tuple unpacking bug
# Yet another const tuple unpacking bug
const u = Fp2.C.getNonresidueFp2()[0] # Sextic non-residue to construct 𝔽p12
const v = Fp2.C.getNonresidueFp2()[1]
const Beta = Fp2.C.getNonResidueFp() # Quadratic non-residue to construct 𝔽p2
# ξ = u + v x
# and x² = β
#
@ -118,7 +166,7 @@ func `/=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} =
a.c1 -= t
a.div2()
else:
let a0 = a.c0
var a0 = a.c0
let a1 = a.c1
const u2v2 = u*u - Beta*v*v # (u² - βv²)
# TODO can be precomputed (or precompute b/µ the twist coefficient)
@ -127,12 +175,18 @@ func `/=`*(a: var Fp2, _: typedesc[SexticNonResidue]) {.inline.} =
u2v2inv.fromUint(u2v2)
u2v2inv.inv()
a.c0.diff(u * a0, (Beta * v) * a1)
a.c1.diff(u * a1, v * a0)
a.c0 *= u
a.c1 *= Beta * v
a.c0 -= a.c1
a.c1 = a1
a.c1 *= u
a0 *= v
a.c1 -= a0
a.c0 *= u2v2inv
a.c1 *= u2v2inv
# 𝔽p6
# 𝔽p4 & 𝔽p6
# ----------------------------------------------------------------
type
@ -142,53 +196,58 @@ type
Fp6*[C: static Curve] = object
c0*, c1*, c2*: Fp2[C]
ξ* = NonResidue
# We call the non-residue ξ on 𝔽p4/𝔽p6 to avoid confusion
# between non-residue
# of different tower level
func prod*(r: var Fp4, a: Fp4, _: type NonResidue) =
## Multiply an element of 𝔽p4 by the non-residue
## chosen to construct the next extension or the twist:
## - if quadratic non-residue: 𝔽p8
## - if cubic non-residue: 𝔽p12
## - if sextic non-residue: 𝔽p8, 𝔽p12 or 𝔽p24
##
## Assumes that it is sqrt(NonResidue_Fp2)
let t = a.c0
r.c0.prod(a.c1, NonResidue)
r.c1 = t
func `*`*(_: typedesc[ξ], a: Fp2): Fp2 {.inline, noInit.} =
## Multiply an element of 𝔽p2 by the quadratic and cubic non-residue
## chosen to construct 𝔽p4/𝔽p6
# Yet another const tuple unpacking bug
const u = Fp2.C.get_CNR_Fp2()[0] # Quadratic & Cubic non-residue to construct 𝔽p4/𝔽p6
const v = Fp2.C.get_CNR_Fp2()[1]
const Beta {.used.} = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2
# ξ = u + v x
# and x² = β
#
# (c0 + c1 x) (u + v x) => u c0 + (u c0 + u c1)x + v c1 x²
# => u c0 + β v c1 + (v c0 + u c1) x
func `*=`*(a: var Fp4, _: type NonResidue) {.inline.} =
## Multiply an element of 𝔽p4 by the non-residue
## chosen to construct the next extension or the twist:
## - if quadratic non-residue: 𝔽p8
## - if cubic non-residue: 𝔽p12
## - if sextic non-residue: 𝔽p8, 𝔽p12 or 𝔽p24
##
## Assumes that it is sqrt(NonResidue_Fp2)
a.prod(a, NonResidue)
# TODO: check code generated when ξ = 1 + 𝑖
# The mul by constant are inline but
# since we don't have __restrict tag
# and we use arrays (which decay into pointer)
# the compiler might not elide the temporary
when a.fromComplexExtension():
result.c0.diff(u * a.c0, v * a.c1)
else:
result.c0.sum(u * a.c0, (Beta * v) * a.c1)
result.c1.sum(v * a.c0, u * a.c1 )
func prod*(r: var Fp6, a: Fp6, _: type NonResidue) {.inline.} =
## Multiply an element of 𝔽p4 by the non-residue
## chosen to construct the next extension or the twist:
## - if quadratic non-residue: 𝔽p12
## - if cubic non-residue: 𝔽p18
## - if sextic non-residue: 𝔽p12, 𝔽p18 or 𝔽p36
##
## Assumes that it is cube_root(NonResidue_Fp2)
##
## For all curves γ = v with v the factor for 𝔽p6 coordinate
## and v³ = ξ
## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
let t = a.c2
r.c1 = a.c0
r.c2 = a.c1
t.c0.prod(t, NonResidue)
func `*=`*(a: var Fp2, _: typedesc[ξ]) {.inline.} =
## Multiply an element of 𝔽p2 by the quadratic non-residue
## chosen to construct 𝔽p6
# Yet another const tuple unpacking bug
const u = Fp2.C.get_CNR_Fp2()[0] # Cubic non-residue to construct 𝔽p6
const v = Fp2.C.get_CNR_Fp2()[1]
const Beta {.used.} = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2
# ξ = u + v x
# and x² = β
#
# (c0 + c1 x) (u + v x) => u c0 + (u c0 + u c1)x + v c1 x²
# => u c0 + β v c1 + (v c0 + u c1) x
when a.fromComplexExtension() and u == 1 and v == 1:
let t = a.c0
a.c0 -= a.c1
a.c1 += t
else: # TODO: optim for inline
a = ξ * a
func `*=`*(a: var Fp6, _: type NonResidue) {.inline.} =
## Multiply an element of 𝔽p4 by the non-residue
## chosen to construct the next extension or the twist:
## - if quadratic non-residue: 𝔽p12
## - if cubic non-residue: 𝔽p18
## - if sextic non-residue: 𝔽p12, 𝔽p18 or 𝔽p36
##
## Assumes that it is cube_root(NonResidue_Fp2)
##
## For all curves γ = v with v the factor for 𝔽p6 coordinate
## and v³ = ξ
## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
a.prod(a, NonResidue)
# 𝔽p12
# ----------------------------------------------------------------
@ -198,32 +257,6 @@ type
c0*, c1*, c2*: Fp4[C]
# c0*, c1*: Fp6[C]
γ = NonResidue
# We call the non-residue γ (Gamma) on 𝔽p6 to avoid confusion between non-residue
# of different tower level
func `*`*(_: typedesc[γ], a: Fp4): Fp4 {.noInit, inline.} =
## Multiply an element of 𝔽p4 by the sextic non-residue
## chosen to construct 𝔽p12
result.c0 = ξ * a.c1
result.c1 = a.c0
func `*=`*(a: var Fp4, _: typedesc[γ]) {.inline.} =
a = γ * a
func `*`*(_: typedesc[γ], a: Fp6): Fp6 {.noInit, inline.} =
## Multiply an element of 𝔽p6 by the cubic non-residue
## chosen to construct 𝔽p12
## For all curves γ = v with v the factor for 𝔽p6 coordinate
## and v³ = ξ
## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
result.c0 = ξ * a.c2
result.c1 = a.c0
result.c2 = a.c1
func `*=`*(a: var Fp6, _: typedesc[γ]) {.inline.} =
a = γ * a
# Sparse functions
# ----------------------------------------------------------------
@ -238,13 +271,6 @@ func mul_sparse_by_y0*[C: static Curve](r: var Fp4[C], a: Fp4[C], b: Fp2[C]) =
r.c0.prod(a.c0, b)
r.c1.prod(a.c1, b)
func mul_sparse_by_0y*[C: static Curve](r: var Fp4[C], a: Fp4[C], b: Fp2[C]) =
## Sparse multiplication of an Fp4 element
## with coordinates (a₀, a₁) by (0, b₁)
r.c0.prod(a.c1, b)
r.c0 *= NonResidue
r.c1.prod(a.c0, b)
func mul_sparse_by_0y0*[C: static Curve](r: var Fp6[C], a: Fp6[C], b: Fp2[C]) =
## Sparse multiplication of an Fp6 element
## with coordinates (a₀, a₁, a₂) by (0, b₁, 0)
@ -265,6 +291,6 @@ func mul_sparse_by_0y0*[C: static Curve](r: var Fp6[C], a: Fp6[C], b: Fp2[C]) =
# = a1 b1
r.c0.prod(a.c2, b)
r.c0 *= ξ
r.c0 *= NonResidue
r.c1.prod(a.c0, b)
r.c2.prod(a.c1, b)

View File

@ -75,6 +75,9 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n
- [ ] x86: MULX, ADCX, ADOX instructions
- [ ] no-carry optimization for CIOS (Coarsely Integrated Operand Scanning)
- Addition chains
- [ ] unreduced squarings/multiplications in addition chains
- Exponentiation
- [x] variable-time exponentiation
- [x] fixed window optimization _(sliding windows are not constant-time)_
@ -158,19 +161,25 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n
- Line functions
- [x] Homogeneous projective coordinates
- [x] D-Twist
- [x] Fused line add + elliptic curve add
- [x] Fused line double + elliptic curve double
- [x] M-Twist
- [x] Fused line add + elliptic curve add
- [x] Fused line double + elliptic curve double
- [x] Fused line add + elliptic curve add
- [x] Fused line double + elliptic curve double
- [ ] Jacobian projective coordinates
- [ ] D-Twist
- [ ] Fused line add + elliptic curve add
- [ ] Fused line double + elliptic curve double
- [ ] M-Twist
- [ ] Fused line add + elliptic curve add
- [ ] Fused line double + elliptic curve double
- [ ] Fused line add + elliptic curve add
- [ ] Fused line double + elliptic curve double
- [x] Sparse multiplication line * Gₜ element
- [x] 6-way sparse
- [ ] Pseudo 8-sparse
- [x] D-Twist
- [x] 6-way sparse
- [ ] Pseudo 8-sparse
- [x] M-Twist
- [x] 6-way sparse
- [ ] Pseudo 8-sparse
- Miller Loop
- [x] NAF recoding

View File

@ -15,7 +15,7 @@ import
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian],
../constantine/io/io_bigints,
../constantine/tower_field_extensions/tower_common
../constantine/tower_field_extensions/extension_fields
# ############################################################
#

View File

@ -25,7 +25,6 @@ proc main() =
y.fromUint(10'u32)
z.fromUint(90'u32)
let u = x + y
x += y
var x_bytes: array[8, byte]
@ -34,7 +33,6 @@ proc main() =
check:
# Check equality in the Montgomery domain
bool(z == x)
bool(z == u)
# Check equality when converting back to natural domain
90'u64 == cast[uint64](x_bytes)
@ -45,7 +43,6 @@ proc main() =
y.fromUint(21'u32)
z.fromUint(0'u32)
let u = x + y
x += y
var x_bytes: array[8, byte]
@ -54,7 +51,6 @@ proc main() =
check:
# Check equality in the Montgomery domain
bool(z == x)
bool(z == u)
# Check equality when converting back to natural domain
0'u64 == cast[uint64](x_bytes)
@ -65,7 +61,6 @@ proc main() =
y.fromUint(22'u32)
z.fromUint(1'u32)
let u = x + y
x += y
var x_bytes: array[8, byte]
@ -74,7 +69,6 @@ proc main() =
check:
# Check equality in the Montgomery domain
bool(z == x)
bool(z == u)
# Check equality when converting back to natural domain
1'u64 == cast[uint64](x_bytes)
@ -86,7 +80,6 @@ proc main() =
y.fromUint(10'u32)
z.fromUint(70'u32)
let u = x - y
x -= y
var x_bytes: array[8, byte]
@ -95,7 +88,6 @@ proc main() =
check:
# Check equality in the Montgomery domain
bool(z == x)
bool(z == u)
# Check equality when converting back to natural domain
70'u64 == cast[uint64](x_bytes)
@ -106,7 +98,6 @@ proc main() =
y.fromUint(80'u32)
z.fromUint(0'u32)
let u = x - y
x -= y
var x_bytes: array[8, byte]
@ -115,7 +106,6 @@ proc main() =
check:
# Check equality in the Montgomery domain
bool(z == x)
bool(z == u)
# Check equality when converting back to natural domain
0'u64 == cast[uint64](x_bytes)
@ -126,7 +116,6 @@ proc main() =
y.fromUint(81'u32)
z.fromUint(100'u32)
let u = x - y
x -= y
var x_bytes: array[8, byte]
@ -135,19 +124,18 @@ proc main() =
check:
# Check equality in the Montgomery domain
bool(z == x)
bool(z == u)
# Check equality when converting back to natural domain
100'u64 == cast[uint64](x_bytes)
test "Multiplication mod 101":
block:
var x, y, z: Fp[Fake101]
var x, y, z, r: Fp[Fake101]
x.fromUint(10'u32)
y.fromUint(10'u32)
z.fromUint(100'u32)
let r = x * y
r.prod(x, y)
var r_bytes: array[8, byte]
r_bytes.exportRawUint(r, cpuEndian)
@ -159,13 +147,13 @@ proc main() =
100'u64 == cast[uint64](r_bytes)
block:
var x, y, z: Fp[Fake101]
var x, y, z, r: Fp[Fake101]
x.fromUint(10'u32)
y.fromUint(11'u32)
z.fromUint(9'u32)
let r = x * y
r.prod(x, y)
var r_bytes: array[8, byte]
r_bytes.exportRawUint(r, cpuEndian)
@ -275,13 +263,13 @@ proc main() =
test "Multiplication mod 2^61 - 1":
block:
var x, y, z: Fp[Mersenne61]
var x, y, z, r: Fp[Mersenne61]
x.fromUint(10'u32)
y.fromUint(10'u32)
z.fromUint(100'u32)
let r = x * y
r.prod(x, y)
var r_bytes: array[8, byte]
r_bytes.exportRawUint(r, cpuEndian)
@ -294,13 +282,13 @@ proc main() =
cast[uint64](r_bytes) == 100'u64
block:
var x, y, z: Fp[Mersenne61]
var x, y, z, r: Fp[Mersenne61]
x.fromUint(1'u32 shl 31)
y.fromUint(1'u32 shl 31)
z.fromUint(2'u32)
let r = x * y
r.prod(x, y)
var r_bytes: array[8, byte]
r_bytes.exportRawUint(r, cpuEndian)