Fast modular inversion (#172)

* split modular inversion in its own file

* Stash fast GCD inversion https://eprint.iacr.org/2020/972.pdf

* Stash Pornin's bingcd -> issue with inner modular reduction

* Implement Bernstein-Yang inversion

* Avoid Nim checks on signed integers (32-bit runtime issue)

* cleanup: remove old inversion impls

* cleanup: static moduli, move div2

* small comments (skip ci)

* comment cleanup (skip ci)

* fix total iterations on 32-bit

* Add batch conversion to affine coordinates using simultaneous inversion trick

* fix conditional setZero and batchAffine conversion

* cleanup unneeded branches following affine conversion unification

* Fix batchAffine with zero inputs and add fuzz failure to test suite
This commit is contained in:
Mamy Ratsimbazafy 2022-02-10 14:05:07 +01:00 committed by GitHub
parent c02e6bdf84
commit 53c4db7ead
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
65 changed files with 1655 additions and 1679 deletions

View File

@ -68,10 +68,7 @@ proc mixedAddBench*(T: typedesc, iters: int) =
let P = rng.random_unsafe(T)
let Q = rng.random_unsafe(T)
var Qaff: ECP_ShortW_Aff[T.F, T.G]
when Q is ECP_ShortW_Prj:
Qaff.affineFromProjective(Q)
else:
Qaff.affineFromJacobian(Q)
Qaff.affine(Q)
bench("EC Mixed Addition " & G1_or_G2, T, iters):
r.madd(P, Qaff)
@ -87,14 +84,14 @@ proc affFromProjBench*(T: typedesc, iters: int) =
var r {.noInit.}: ECP_ShortW_Aff[T.F, T.G]
let P = rng.random_unsafe(T)
bench("EC Projective to Affine " & G1_or_G2, T, iters):
r.affineFromProjective(P)
r.affine(P)
proc affFromJacBench*(T: typedesc, iters: int) =
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
var r {.noInit.}: ECP_ShortW_Aff[T.F, T.G]
let P = rng.random_unsafe(T)
bench("EC Jacobian to Affine " & G1_or_G2, T, iters):
r.affineFromJacobian(P)
r.affine(P)
proc scalarMulGenericBench*(T: typedesc, window: static int, iters: int) =
const bits = T.F.C.getCurveOrderBitwidth()

View File

@ -95,30 +95,9 @@ proc invBench*(T: typedesc, iters: int) =
var r: T
let x = rng.random_unsafe(T)
preventOptimAway(r)
bench("Inversion (constant-time default impl)", T, iters):
bench("Inversion (constant-time)", T, iters):
r.inv(x)
proc invEuclidBench*(T: typedesc, iters: int) =
var r: T
let x = rng.random_unsafe(T)
preventOptimAway(r)
bench("Inversion (constant-time Euclid)", T, iters):
r.inv_euclid(x)
proc invPowFermatBench*(T: typedesc, iters: int) =
let x = rng.random_unsafe(T)
const exponent = T.getInvModExponent()
bench("Inversion (exponentiation p-2, Little Fermat)", T, iters):
var r = x
r.powUnsafeExponent(exponent)
proc invAddChainBench*(T: typedesc, iters: int) =
var r: T
let x = rng.random_unsafe(T)
preventOptimAway(r)
bench("Inversion (addition chain)", T, iters):
r.inv_addchain(x)
proc sqrtBench*(T: typedesc, iters: int) =
let x = rng.random_unsafe(T)

View File

@ -11,7 +11,7 @@ import
../constantine/config/[curves, common],
../constantine/arithmetic,
../constantine/io/io_bigints,
../constantine/curves/[zoo_inversions, zoo_square_roots],
../constantine/curves/zoo_square_roots,
# Helpers
../helpers/static_for,
./bench_fields_template
@ -50,8 +50,7 @@ proc main() =
mulBench(Fp[curve], Iters)
sqrBench(Fp[curve], Iters)
smallSeparator()
invEuclidBench(Fp[curve], ExponentIters)
invPowFermatBench(Fp[curve], ExponentIters)
invBench(Fp[curve], ExponentIters)
sqrtBench(Fp[curve], ExponentIters)
sqrtRatioBench(Fp[curve], ExponentIters)
# Exponentiation by a "secret" of size ~the curve order

View File

@ -64,7 +64,7 @@ proc bench_BLS12_381_proj_aff_conversion(iters: int) =
)
bench("Proj->Affine conversion (for pairing)", BLS12_381, iters):
Paff.affineFromProjective(P)
Paff.affine(P)
const Iters = 1000

View File

@ -51,9 +51,9 @@ func clearCofactor[F; G: static Subgroup](
ec: var ECP_ShortW_Aff[F, G]) =
# For now we don't have any affine operation defined
var t {.noInit.}: ECP_ShortW_Prj[F, G]
t.projectiveFromAffine(ec)
t.fromAffine(ec)
t.clearCofactor()
ec.affineFromProjective(t)
ec.affine(t)
func random_point*(rng: var RngState, EC: typedesc): EC {.noInit.} =
result = rng.random_unsafe(EC)

View File

@ -86,9 +86,9 @@ func clearCofactorReference[F; G: static Subgroup](
ec: var ECP_ShortW_Aff[F, G]) =
# For now we don't have any affine operation defined
var t {.noInit.}: ECP_ShortW_Prj[F, G]
t.projectiveFromAffine(ec)
t.fromAffine(ec)
t.clearCofactorReference()
ec.affineFromProjective(t)
ec.affine(t)
func random_point*(rng: var RngState, EC: typedesc): EC {.noInit.} =
result = rng.random_unsafe(EC)
@ -136,10 +136,7 @@ proc mixedAddBench*(T: typedesc, iters: int) =
let P = rng.random_unsafe(T)
let Q = rng.random_unsafe(T)
var Qaff: ECP_ShortW_Aff[T.F, T.G]
when Q is ECP_ShortW_Prj:
Qaff.affineFromProjective(Q)
else:
Qaff.affineFromJacobian(Q)
Qaff.affine(Q)
bench("EC Mixed Addition " & G1_or_G2, T, iters):
r.madd(P, Qaff)

View File

@ -28,6 +28,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
# Big ints
# ----------------------------------------------------------
("tests/t_io_bigints.nim", false),
("tests/t_io_unsaturated.nim", false),
("tests/t_bigints.nim", false),
("tests/t_bigints_multimod.nim", false),
("tests/t_bigints_mod_vs_gmp.nim", true),
@ -64,6 +65,11 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
("tests/t_fp4_frobenius.nim", false),
("tests/t_fp6_frobenius.nim", false),
("tests/t_fp12_frobenius.nim", false),
# Elliptic curve arithmetic
# ----------------------------------------------------------
("tests/t_ec_conversion.nim", false),
# Elliptic curve arithmetic G1
# ----------------------------------------------------------
# ("tests/t_ec_shortw_prj_g1_add_double.nim", false),

View File

@ -10,7 +10,6 @@ import
arithmetic/[bigints, bigints_montgomery],
arithmetic/[
finite_fields,
finite_fields_inversion,
finite_fields_square_root,
finite_fields_double_precision
]
@ -19,6 +18,5 @@ export
bigints,
bigints_montgomery,
finite_fields,
finite_fields_inversion,
finite_fields_square_root,
finite_fields_double_precision

View File

@ -147,7 +147,7 @@ proc partialRedx(
ctx.comment " Reduction"
ctx.comment " m = t[0] * m0ninv mod 2^w"
ctx.mov rdx, t[0]
ctx.mulx S, rdx, m0ninv, rdx # (S, RDX) <- m0ninv * RDX
ctx.imul rdx, m0ninv
# Clear carry flags - TODO: necessary?
ctx.`xor` S, S

View File

@ -11,8 +11,8 @@ import
../primitives,
./limbs,
./limbs_extmul,
./limbs_modular,
./limbs_montgomery
./limbs_invmod,
./limbs_division
export BigInt
@ -79,9 +79,9 @@ func setUint*(a: var BigInt, n: SomeUnsignedInt) =
## Set a BigInt to a machine-sized integer ``n``
a.limbs.setUint(n)
func czero*(a: var BigInt, ctl: SecretBool) =
func csetZero*(a: var BigInt, ctl: SecretBool) =
## Set ``a`` to 0 if ``ctl`` is true
a.limbs.czero(ctl)
a.limbs.csetZero(ctl)
# Copy
# ------------------------------------------------------------
@ -450,35 +450,36 @@ func reduce*[aBits, mBits](r: var BigInt[mBits], a: BigInt[aBits], M: BigInt[mBi
# pass a pointer+length to a fixed session of the BSS.
reduce(r.limbs, a.limbs, aBits, M.limbs, mBits)
func div2_modular*[bits](a: var BigInt[bits], mp1div2: BigInt[bits]) =
## Compute a <- a/2 (mod M)
## `mp1div2` is the modulus (M+1)/2
##
## Normally if `a` is odd we add the modulus before dividing by 2
## but this may overflow and we might lose a bit before shifting.
## Instead we shift first and then add half the modulus rounded up
##
## Assuming M is odd, `mp1div2` can be precomputed without
## overflowing the "Limbs" by dividing by 2 first
## and add 1
## Otherwise `mp1div2` should be M/2
a.limbs.div2_modular(mp1div2.limbs)
func steinsGCD*[bits](r: var BigInt[bits], a, F, M, mp1div2: BigInt[bits]) =
## Compute F multiplied the modular inverse of ``a`` modulo M
## r ≡ F . a^-1 (mod M)
func invmod*[bits](
r: var BigInt[bits],
a, F, M: BigInt[bits]) =
## Compute the modular inverse of ``a`` modulo M
## r ≡ F.a⁻¹ (mod M)
##
## M MUST be odd, M does not need to be prime.
## ``a`` MUST be less than M.
r.limbs.steinsGCD(a.limbs, F.limbs, M.limbs, bits, mp1div2.limbs)
r.limbs.invmod(a.limbs, F.limbs, M.limbs, bits)
func invmod*[bits](r: var BigInt[bits], a, M, mp1div2: BigInt[bits]) =
func invmod*[bits](
r: var BigInt[bits],
a: BigInt[bits],
F, M: static BigInt[bits]) =
## Compute the modular inverse of ``a`` modulo M
## r ≡ F.a⁻¹ (mod M)
##
## with F and M known at compile-time
##
## M MUST be odd, M does not need to be prime.
## ``a`` MUST be less than M.
r.limbs.invmod(a.limbs, F.limbs, M.limbs, bits)
func invmod*[bits](r: var BigInt[bits], a, M: BigInt[bits]) =
## Compute the modular inverse of ``a`` modulo M
##
## The modulus ``M`` MUST be odd
var one {.noInit.}: BigInt[bits]
one.setOne()
r.steinsGCD(a, one, M, mp1div2)
r.invmod(a, one, M)
{.pop.} # inline
{.pop.} # raises no exceptions

View File

@ -11,7 +11,6 @@ import
../primitives,
../io/io_bigints,
./limbs,
./limbs_modular,
./limbs_montgomery,
./bigints

View File

@ -231,25 +231,29 @@ func neg*(r: var FF, a: FF) {.meter.} =
var t {.noInit.}: FF
let isZero = a.isZero()
discard t.mres.diff(FF.fieldMod(), a.mres)
t.mres.czero(isZero)
t.mres.csetZero(isZero)
r = t
func neg*(a: var FF) {.meter.} =
## Negate modulo p
a.neg(a)
func div2*(a: var FF) {.meter.} =
## Modular division by 2
a.mres.div2_modular(FF.getPrimePlus1div2())
# ############################################################
#
# Field arithmetic conditional
#
# ############################################################
func csetZero*(a: var FF, ctl: SecretBool) =
## Set ``a`` to 0 if ``ctl`` is true
a.mres.csetZero(ctl)
func csetOne*(a: var FF, ctl: SecretBool) =
## Set ``a`` to 1 if ``ctl`` is true
a.mres.ccopy(FF.getMontyOne(), ctl)
func cneg*(r: var FF, a: FF, ctl: SecretBool) {.meter.} =
## Constant-time in-place conditional negation
## Constant-time out-of-place conditional negation
## The negation is only performed if ctl is "true"
r.neg(a)
r.ccopy(a, not ctl)
@ -261,19 +265,68 @@ func cneg*(a: var FF, ctl: SecretBool) {.meter.} =
a.cneg(t, ctl)
func cadd*(a: var FF, b: FF, ctl: SecretBool) {.meter.} =
## Constant-time in-place conditional addition
## Constant-time out-place conditional addition
## The addition is only performed if ctl is "true"
var t = a
t += b
a.ccopy(t, ctl)
func csub*(a: var FF, b: FF, ctl: SecretBool) {.meter.} =
## Constant-time in-place conditional substraction
## Constant-time out-place conditional substraction
## The substraction is only performed if ctl is "true"
var t = a
t -= b
a.ccopy(t, ctl)
# ############################################################
#
# Field arithmetic division and inversion
#
# ############################################################
func div2*(a: var FF) {.meter.} =
## Modular division by 2
## `a` will be divided in-place
#
# Normally if `a` is odd we add the modulus before dividing by 2
# but this may overflow and we might lose a bit before shifting.
# Instead we shift first and then add half the modulus rounded up
#
# Assuming M is odd, `mp1div2` can be precomputed without
# overflowing the "Limbs" by dividing by 2 first
# and add 1
# Otherwise `mp1div2` should be M/2
# if a.isOdd:
# a += M
# a = a shr 1
let wasOdd = a.mres.isOdd()
a.mres.shiftRight(1)
let carry {.used.} = a.mres.cadd(FF.getPrimePlus1div2(), wasOdd)
# a < M -> a/2 <= M/2:
# a/2 + M/2 < M if a is odd
# a/2 < M if a is even
debug: doAssert not carry.bool
func inv*(r: var FF, a: FF) =
## Inversion modulo p
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
r.mres.invmod(a.mres, FF.getR2modP(), FF.fieldMod())
func inv*(a: var FF) =
## Inversion modulo p
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
a.inv(a)
# ############################################################
#
# Field arithmetic exponentiation

View File

@ -162,7 +162,7 @@ func neg2xMod*(r: var FpDbl, a: FpDbl) =
subB(borrow, t.limbs2x[i], M.limbs[i-N], a.limbs2x[i], borrow)
# Zero the result if input was zero
t.limbs2x.czero(isZero)
t.limbs2x.csetZero(isZero)
r = t
func prod2xImpl(

View File

@ -1,64 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/[curves, type_ff],
./bigints,
../curves/zoo_inversions
export zoo_inversions
# ############################################################
#
# Finite field inversion
#
# ############################################################
# No exceptions allowed
{.push raises: [].}
{.push inline.}
func inv_euclid*(r: var FF, a: FF) =
## Inversion modulo p via
## Niels Moller constant-time version of
## Stein's GCD derived from extended binary Euclid algorithm
r.mres.steinsGCD(a.mres, FF.getR2modP(), FF.fieldMod(), FF.getPrimePlus1div2())
func inv*(r: var FF, a: FF) =
## Inversion modulo p
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
# For now we don't activate the addition chains
# Performance is slower than Euclid-based inversion on newer CPUs
#
# - Montgomery multiplication/squaring can skip the final substraction
# - For generalized Mersenne Prime curves, modular reduction can be made extremely cheap.
# - For BW6-761 the addition chain is over 2x slower than Euclid-based inversion
# due to multiplication being so costly with 12 limbs (grows quadratically)
# while Euclid costs grows linearly.
when false and
FF is Fp and FF.C.hasInversionAddchain() and
FF.C notin {BW6_761}:
r.inv_addchain(a)
else:
r.inv_euclid(a)
func inv*(a: var FF) =
## Inversion modulo p
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
a.inv(a)
{.pop.} # inline
{.pop.} # raises no exceptions

View File

@ -85,12 +85,11 @@ func setUint*(a: var Limbs, n: SomeUnsignedInt) =
when a.len > 2:
zeroMem(a[2].addr, (a.len - 2) * sizeof(SecretWord))
func czero*(a: var Limbs, ctl: SecretBool) =
func csetZero*(a: var Limbs, ctl: SecretBool) =
## Set ``a`` to 0 if ``ctl`` is true
# Only used for FF neg in pure Nim fallback
# so no need for assembly
let mask = -(SecretWord(ctl) xor One)
for i in 0 ..< a.len:
ctl.ccopy(a[i], Zero)
a[i] = a[i] and mask
# Copy
# ------------------------------------------------------------
@ -330,6 +329,26 @@ func cneg*(a: var Limbs, ctl: CTBool) =
carry = SecretWord(t < carry) # Carry on
a[i] = t
func cneg*(r: var Limbs, a: Limbs, ctl: CTBool) =
## Conditional negation.
## Negate if ``ctl`` is true
# Algorithm:
# In two-complement representation
# -x <=> not(x) + 1 <=> x xor 0xFF... + 1
# and
# x <=> x xor 0x00...<=> x xor 0x00... + 0
#
# So we need to xor all words and then add 1
# The "+1" might carry
# So we fuse the 2 steps
let mask = -SecretWord(ctl) # Obtain a 0xFF... or 0x00... mask
var carry = SecretWord(ctl)
for i in 0 ..< a.len:
let t = (a[i] xor mask) + carry # XOR with mask and add 0x01 or 0x00 respectively
carry = SecretWord(t < carry) # Carry on
r[i] = t
{.pop.} # inline
# Division

View File

@ -8,144 +8,14 @@
import
../config/common,
../primitives,
./limbs
../primitives
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Modular division by 2
#
# ############################################################
func div2_modular*(a: var Limbs, mp1div2: Limbs) {.inline.} =
## Modular Division by 2
## `a` will be divided in-place
## `mp1div2` is the modulus (M+1)/2
##
## Normally if `a` is odd we add the modulus before dividing by 2
## but this may overflow and we might lose a bit before shifting.
## Instead we shift first and then add half the modulus rounded up
##
## Assuming M is odd, `mp1div2` can be precomputed without
## overflowing the "Limbs" by dividing by 2 first
## and add 1
## Otherwise `mp1div2` should be M/2
# if a.isOdd:
# a += M
# a = a shr 1
let wasOdd = a.isOdd()
a.shiftRight(1)
let carry {.used.} = a.cadd(mp1div2, wasOdd)
debug: doAssert not carry.bool
# ############################################################
#
# Modular inversion
#
# ############################################################
# Generic (odd-only modulus)
# ------------------------------------------------------------
# Algorithm by Niels Möller,
# a modified version of Stein's Algorithm (binary Extended Euclid GCD)
#
# Algorithm 5 in
# Fast Software Polynomial Multiplication on ARM Processors Using the NEON Engine
# Danilo Camara, Conrado P. L. Gouvea, Julio Lopez, and Ricardo Dahab
# https://link.springer.com/content/pdf/10.1007%2F978-3-642-40588-4_10.pdf
#
# Input: integer x, odd integer n, x < n
# Output: x1 (mod n)
# 1: function ModInv(x, n)
# 2: (a, b, u, v) ← (x, n, 1, 1)
# 3: ← ⌊log2 n⌋ + 1 ⮚ number of bits in n
# 4: for i ← 0 to 2 1 do
# 5: odd ← a & 1
# 6: if odd and a ≥ b then
# 7: a ← a b
# 8: else if odd and a < b then
# 9: (a, b, u, v) ← (b a, a, v, u)
# 10: a ← a >> 1
# 11: if odd then u ← u v
# 12: if u < 0 then u ← u + n
# 13: if u & 1 then u ← u + n
# 14: u ← u >> 1
# 15: return v
#
# Warning ⚠️: v should be 0 at initialization
#
# We modify it to return F . a^-1
# So that we can pass an adjustment factor F
# And directly compute modular division or Montgomery inversion
func steinsGCD*(v: var Limbs, a: Limbs, F, M: Limbs, bits: int, mp1div2: Limbs) =
## Compute F multiplied the modular inverse of ``a`` modulo M
## r ≡ F . a^-1 (mod M)
##
## M MUST be odd, M does not need to be prime.
## ``a`` MUST be less than M.
##
## No information about ``a`` in particular its actual length in bits is leaked.
##
## This takes (M+1)/2 (mp1div2) as a precomputed parameter as a slight optimization
## in stack size and speed.
##
## The inverse of 0 is 0.
# Ideally we need registers for a, b, u, v
# but:
# - Even with ~256-bit primes, that's 4 limbs = 4*4 => 16 registers
# - x86_64 only has 16 general purposes registers
# - Registers are needed for the loop counter and comparison results
# - CMOV is reg <- RM so can move registers/memory into registers
# but cannot move into memory.
# so we choose to keep "v" from the algorithm in memory as `r`
# TODO: the inlining of primitives like `csub` is bad for codesize
# but there is a 80% slowdown without it.
var a = a
var b = M
var u = F
v.setZero()
for i in 0 ..< 2 * bits:
debug: doAssert bool(b.isOdd)
let isOddA = a.isOdd()
# if isOddA: a -= b
let aLessThanB = isOddA and (SecretBool) a.csub(b, isOddA)
# if a < b and the sub was processed
# in that case, b <- a = a - b + b
discard b.cadd(a, aLessThanB)
# and a <- -new_a = (b-a)
a.cneg(aLessThanB)
debug: doAssert not bool(a.isOdd)
a.shiftRight(1)
# Swap u and v is a < b
u.cswap(v, aLessThanB)
# if isOddA: u -= v (mod M)
let neg = isOddA and (SecretBool) u.csub(v, isOddA)
discard u.cadd(M, neg)
# u = u/2 (mod M)
u.div2_modular(mp1div2)
debug:
doAssert bool a.isZero()
# GCD exist (always true if a and M are relatively prime)
doAssert bool b.isOne() or
# or not (on prime fields iff input was zero) and no GCD fallback output is zero
v.isZero()
# ############################################################
#
# Modular Reduction
# Division and Modular Reduction
#
# ############################################################
#

View File

@ -0,0 +1,453 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/common,
../primitives,
./limbs, ./limbs_unsaturated
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Modular inversion (mod 2ᵏ)
#
# ############################################################
func invModBitwidth(a: BaseType): BaseType =
# Modular inverse algorithm:
# Explanation p11 "Dumas iterations" based on Newton-Raphson:
# - Cetin Kaya Koc (2017), https://eprint.iacr.org/2017/411
# - Jean-Guillaume Dumas (2012), https://arxiv.org/pdf/1209.6626v2.pdf
# - Colin Plumb (1994), http://groups.google.com/groups?selm=1994Apr6.093116.27805%40mnemosyne.cs.du.edu
# Other sources:
# - https://crypto.stackexchange.com/questions/47493/how-to-determine-the-multiplicative-inverse-modulo-64-or-other-power-of-two
# - https://mumble.net/~campbell/2015/01/21/inverse-mod-power-of-two
# - http://marc-b-reynolds.github.io/math/2017/09/18/ModInverse.html
# We are in a special case
# where m = 2^WordBitWidth.
# For a and m to be coprimes, a must be odd.
#
# We have the following relation
# ax ≡ 1 (mod 2ᵏ) <=> ax(2 - ax) ≡ 1 (mod 2²ᵏ)
# which grows in O(log(log(a)))
debug: doAssert (a and 1) == 1, "a must be odd"
const k = log2_vartime(a.sizeof() * 8)
result = a # Start from an inverse of M0 modulo 2, M0 is odd and it's own inverse
for _ in 0 ..< k: # at each iteration we get the inverse mod(2^2k)
result *= 2 - a * result # x' = x(2 - ax)
func invMod2powK(M0: BaseType, k: static BaseType): BaseType =
## Compute 1/M mod 2ᵏ
## from M[0]
# Algorithm: Cetin Kaya Koc (2017) p11, https://eprint.iacr.org/2017/411
# Once you have a modular inverse (mod 2ˢ) you can reduce
# (mod 2ᵏ) to have the modular inverse (mod 2ᵏ)
static: doAssert k <= WordBitWidth
const maskMod = (1 shl k)-1
M0.invModBitwidth() and maskMod
# ###############################################################
#
# Modular inversion
#
# ###############################################################
# Algorithm by Bernstein-Yang
# ------------------------------------------------------------
#
# - Original Bernstein-Yang paper, https://eprint.iacr.org/2019/266
# - Executable spec and description by Dettman-Wuille, https://github.com/bitcoin-core/secp256k1/blob/85b00a1/doc/safegcd_implementation.md
# - Formal bound verification by Wuille, https://github.com/sipa/safegcd-bounds
# - Formal verification by Hvass-Aranha-Spitters, https://eprint.iacr.org/2021/549
#
# We implement the half-delta divstep variant
type TransitionMatrix = object
## Bernstein-Yang Jumpdivstep transition matrix
## [u v]
## t = [q r]
## It it is scaled by 2ᵏ
u, v, q, r: SignedSecretWord
debug:
# Debugging helpers
func checkDeterminant(t: TransitionMatrix, u, v, q, r: SignedSecretWord, k, numIters: int): bool =
# The determinant of t must be a power of two. This guarantees that multiplication with t
# does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
# will be divided out again).
# Each divstep's individual matrix has determinant 2⁻¹,
# the aggregate of numIters of them will have determinant 2ⁿᵘᵐᴵᵗᵉʳˢ. Multiplying with the initial
# 2ᵏ*identity (which has determinant 2²ᵏ) means the result has determinant 2²ᵏ⁻ⁿᵘᵐᴵᵗᵉʳˢ.
let
u = SecretWord u
v = SecretWord v
q = SecretWord q
r = SecretWord r
var a, b: array[2, SecretWord]
var e: array[2, SecretWord]
smul(a[1], a[0], u, r)
smul(b[1], b[0], v, q)
var borrow: Borrow
subB(borrow, a[0], a[0], b[0], Borrow(0))
subB(borrow, a[1], a[1], b[1], borrow)
let d = 2*k - numIters
b[0] = Zero; b[1] = Zero
b[d div WordBitwidth] = One shl (d mod WordBitwidth)
return bool(a == b)
func canonicalize(
a: var LimbsUnsaturated,
signMask: SignedSecretWord,
M: LimbsUnsaturated
) =
## Compute a = sign*a (mod M)
##
## with a in range (-2*M, M)
## result in range [0, M)
const
UnsatBitWidth = WordBitWidth - a.Excess
Max = SignedSecretWord(MaxWord shr a.Excess)
# Operate in registers
var z = a
# Add M if `z` is negative
# -> range (-M, M)
z.cadd(M, z.isNegMask())
# Negate if sign is negative
# -> range (-M, M)
z.cneg(signMask)
# Normalize words to range (-2^UnsatBitwidth, 2^UnsatBitwidth)
for i in 0 ..< z.words.len-1:
z[i+1] = z[i+1] + z[i].ashr(UnsatBitWidth)
z[i] = z[i] and Max
# Add M if `z` is negative
# -> range (0, M)
z.cadd(M, z.isNegMask())
# Normalize words to range (-2^UnsatBitwidth, 2^UnsatBitwidth)
for i in 0 ..< z.words.len-1:
z[i+1] = z[i+1] + z[i].ashr(UnsatBitWidth)
z[i] = z[i] and Max
a = z
proc partitionDivsteps(bits, wordBitWidth: int): tuple[totalIters, numChunks, chunkSize, cutoff: int] =
# Given the field modulus number of bits
# and the effective word size
# Returns:
# - the total number of iterations that guarantees GCD convergence
# - the number of chunks of divsteps to compute
# - the base number of divsteps per chunk
# - a cutoff chunk,
# before this chunk ID, the number of divsteps is "base number + 1"
# afterward it's "base number"
let totalIters =
if bits == 256:
# https://github.com/sipa/safegcd-bounds/tree/master/coq
# For 256-bit inputs, 590 divsteps are sufficient with hddivstep variant (half-delta divstep)
# for gcd(f, g) with 0 <= g <= f <= Modulus (inversion g == 1)
# The generic formula reports 591
590
else:
# https://github.com/sipa/safegcd-bounds/blob/master/genproofhd.md
# For any input, for gcd(f, g) with 0 <= g <= f <= Modulus with hddivstep variant (half-delta divstep)
# (inversion g == 1)
(45907*bits + 26313) div 19929
let numChunks = (totalIters + wordBitWidth-1) div wordBitWidth
let chunkSize = totalIters div numChunks
let cutoff = totalIters mod numChunks
return (totalIters, numChunks, chunkSize, cutoff)
func batchedDivsteps(
t: var TransitionMatrix,
theta: SignedSecretWord,
f0, g0: SignedSecretWord,
numIters: int,
k: static int
): SignedSecretWord =
## Bernstein-Yang half-delta (theta) batch of divsteps
##
## Output:
## - return theta for the next batch of divsteps
## - mutate t, the transition matrix to apply `numIters` divsteps at once
## t is scaled by 2ᵏ
##
## Input:
## - f0, bottom limb of f
## - g0, bottom limb of g
## - numIters, number of iterations requested in this batch of divsteps
## - k, the maximum batch size, transition matrix is scaled by 2ᵏ
var
u = SignedSecretWord(1 shl k)
v = SignedSecretWord(0)
q = SignedSecretWord(0)
r = SignedSecretWord(1 shl k)
f = f0
g = g0
theta = theta
for i in k-numIters ..< k:
debug:
func reportLoop() =
debugEcho " iterations: [", k-numIters, ", ", k, ")", " (", numIters, " iterations in total)"
debugEcho " i: ", i, ", theta: ", int(theta)
# debugEcho " f: 0b", BiggestInt(f).toBin(64), ", g: 0b", BiggestInt(g).toBin(64), " | f: ", int(f), ", g: ", int(g)
# debugEcho " u: 0b", BiggestInt(u).toBin(64), ", v: 0b", BiggestInt(v).toBin(64), " | u: ", int(u), ", v: ", int(v)
# debugEcho " q: 0b", BiggestInt(q).toBin(64), ", r: 0b", BiggestInt(r).toBin(64), " | q: ", int(q), ", r: ", int(r)
doAssert (BaseType(f) and 1) == 1, (reportLoop(); "f must be odd)")
doAssert bool(not(uint(u or v or q or r) and (if i == 0: high(uint) else: high(uint) shr (i - 1)))), (reportLoop(); "Min trailing zeros count decreases at each iteration")
doAssert bool(u.ashr(k-i)*f0 + v.ashr(k-i)*g0 == f.lshl(i)), (reportLoop(); "Applying the transition matrix to (f₀, g₀) returns current (f, g)")
doAssert bool(q.ashr(k-i)*f0 + r.ashr(k-i)*g0 == g.lshl(i)), (reportLoop(); "Applying the transition matrix to (f₀, g₀) returns current (f, g)")
# Conditional masks for (theta < 0) and g odd
let c1 = theta.isNegMask()
let c2 = g.isOddMask()
# x, y, z, conditional complement of f, u, v
let x = f xor c1
let y = u xor c1
let z = v xor c1
# conditional substraction from g, q, r
g.csub(x, c2)
q.csub(y, c2)
r.csub(z, c2)
# c3 = (theta >= 0) and g odd
let c3 = c2 and not c1
# theta = -theta or theta+1
theta = (theta xor c3) + SignedSecretWord(1)
# Conditional rollback substraction
f.cadd(g, c3)
u.cadd(q, c3)
v.cadd(r, c3)
# Shifts
g = g.lshr(1)
q = q.ashr(1)
r = r.ashr(1)
t.u = u
t.v = v
t.q = q
t.r = r
debug:
doAssert bool(u*f0 + v*g0 == f.lshl(k)), "Applying the final matrix to (f₀, g₀) gives the final (f, g)"
doAssert bool(q*f0 + r*g0 == g.lshl(k)), "Applying the final matrix to (f₀, g₀) gives the final (f, g)"
doAssert checkDeterminant(t, u, v, q, r, k, numIters)
return theta
func matVecMul_shr_k_mod_M[N, E: static int](
t: TransitionMatrix,
d, e: var LimbsUnsaturated[N, E],
k: static int,
M: LimbsUnsaturated[N, E],
invMod2powK: SecretWord
) =
## Compute
##
## [u v] [d]
## [q r]/2ᵏ.[e] mod M
##
## d, e will be in range (-2*modulus,modulus)
## and output limbs in (-2ᵏ, 2ᵏ)
static: doAssert k == WordBitWidth - E
const Max = SignedSecretWord(MaxWord shr E)
let
u = t.u
v = t.v
q = t.q
r = t.r
let sign_d = d.isNegMask()
let sign_e = e.isNegMask()
# Double-signed-word carries
var cd, ce: DSWord
# First iteration of [u v] [d]
# [q r].[e]
cd.slincombAccNoCarry(u, d[0], v, e[0])
ce.slincombAccNoCarry(q, d[0], r, e[0])
# Compute me and md, multiples of M
# such as the bottom k bits if d and e are 0
# This allows fusing division by 2ᵏ
# i.e. (mx * M) mod 2ᵏ = x mod 2ᵏ
var md, me = SignedSecretWord(0)
md.cadd(u, sign_d)
md.cadd(v, sign_e)
me.cadd(q, sign_d)
me.cadd(r, sign_e)
md = md - (SignedSecretWord(invMod2powK * SecretWord(cd.lo) + SecretWord(md)) and Max)
me = me - (SignedSecretWord(invMod2powK * SecretWord(ce.lo) + SecretWord(me)) and Max)
# First iteration of [u v] [d] [md]
# [q r].[e] + [me].M[0]
# k bottom bits are 0
cd.smulAccNoCarry(md, M[0])
ce.smulAccNoCarry(me, M[0])
cd.ashr(k)
ce.ashr(k)
for i in 1 ..< N:
cd.slincombAccNoCarry(u, d[i], v, e[i])
ce.slincombAccNoCarry(q, d[i], r, e[i])
cd.smulAccNoCarry(md, M[i])
ce.smulAccNoCarry(me, M[i])
d[i-1] = cd.lo and Max
e[i-1] = ce.lo and Max
cd.ashr(k)
ce.ashr(k)
d[N-1] = cd.lo
e[N-1] = ce.lo
func matVecMul_shr_k[N, E: static int](
t: TransitionMatrix,
f, g: var LimbsUnsaturated[N, E],
k: static int
) =
## Compute
##
## [u v] [f]
## [q r].[g] / 2ᵏ
static: doAssert k == WordBitWidth - E
const Max = SignedSecretWord(MaxWord shr E)
let
u = t.u
v = t.v
q = t.q
r = t.r
# Double-signed-word carries
var cf, cg: DSWord
# First iteration of [u v] [f]
# [q r].[g]
cf.slincombAccNoCarry(u, f[0], v, g[0])
cg.slincombAccNoCarry(q, f[0], r, g[0])
# bottom k bits are zero by construction
debug:
doAssert BaseType(cf.lo and Max) == 0, "bottom k bits should be 0, cf.lo: " & $BaseType(cf.lo)
doAssert BaseType(cg.lo and Max) == 0, "bottom k bits should be 0, cg.lo: " & $BaseType(cg.lo)
cf.ashr(k)
cg.ashr(k)
for i in 1 ..< N:
cf.slincombAccNoCarry(u, f[i], v, g[i])
cg.slincombAccNoCarry(q, f[i], r, g[i])
f[i-1] = cf.lo and Max
g[i-1] = cg.lo and Max
cf.ashr(k)
cg.ashr(k)
f[N-1] = cf.lo
g[N-1] = cg.lo
func invmodImpl[N, E](
a: var LimbsUnsaturated[N, E],
F, M: LimbsUnsaturated[N, E],
invMod2powK: SecretWord,
k, bits: static int) =
## Modular inversion using Bernstein-Yang algorithm
## r ≡ F.a⁻¹ (mod M)
# theta = delta-1/2, delta starts at 1/2 for the half-delta variant
var theta = SignedSecretWord(0)
var d{.noInit.}, e{.noInit.}: LimbsUnsaturated[N, E]
var f{.noInit.}, g{.noInit.}: LimbsUnsaturated[N, E]
d.setZero()
e = F
# g < f for partitioning / iteration count formula
f = M
g = a
const partition = partitionDivsteps(bits, k)
for i in 0 ..< partition.numChunks:
var t{.noInit.}: TransitionMatrix
let numIters = partition.chunkSize + int(i < partition.cutoff)
# Compute transition matrix and next theta
theta = t.batchedDivsteps(theta, f[0], g[0], numIters, k)
# Apply the transition matrix
# [u v] [d]
# [q r]/2ᵏ.[e] mod M
t.matVecMul_shr_k_mod_M(d, e, k, M, invMod2powK)
# [u v] [f]
# [q r]/2ᵏ.[g]
t.matVecMul_shr_k(f, g, k)
d.canonicalize(signMask = f.isNegMask(), M)
a = d
func invmod*(
r: var Limbs, a: Limbs,
F, M: Limbs, bits: static int) =
## Compute the scaled modular inverse of ``a`` modulo M
## r ≡ F.a⁻¹ (mod M)
##
## M MUST be odd, M does not need to be prime.
## ``a`` MUST be less than M.
const Excess = 2
const k = WordBitwidth - Excess
const NumUnsatWords = (bits + k - 1) div k
# Convert values to unsaturated repr
var m2 {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess]
var factor {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess]
m2.fromPackedRepr(M)
factor.fromPackedRepr(F)
let m0invK = SecretWord invMod2powK(BaseType M[0], k)
var a2 {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess]
a2.fromPackedRepr(a)
a2.invmodImpl(factor, m2, m0invK, k, bits)
r.fromUnsatRepr(a2)
func invmod*(
r: var Limbs, a: Limbs,
F, M: static Limbs, bits: static int) =
## Compute the scaled modular inverse of ``a`` modulo M
## r ≡ F.a⁻¹ (mod M)
##
## with F and M known at compile-time
##
## M MUST be odd, M does not need to be prime.
## ``a`` MUST be less than M.
const Excess = 2
const k = WordBitwidth - Excess
const NumUnsatWords = (bits + k - 1) div k
# Convert values to unsaturated repr
const m2 = LimbsUnsaturated[NumUnsatWords, Excess].fromPackedRepr(M)
const factor = LimbsUnsaturated[NumUnsatWords, Excess].fromPackedRepr(F)
const m0invK = SecretWord invMod2powK(BaseType M[0], k)
var a2 {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess]
a2.fromPackedRepr(a)
a2.invmodImpl(factor, m2, m0invK, k, bits)
r.fromUnsatRepr(a2)

View File

@ -0,0 +1,385 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/common,
../primitives
type
SignedSecretWord* = distinct SecretWord
LimbsUnsaturated*[N, Excess: static int] = object
## An array of signed secret words
## with each word having their top Excess bits unused between function calls
## This allows efficient handling of carries and signs without intrinsics or assembly.
#
# Comparison with packed representation:
#
# Packed representation
# - pro: uses less words (important for multiplication which is O(n²) with n the number of words)
# - pro: less "mental overhead" to keep track (clear/shift) excess bits
# - con: addition-with-carry require compiler intrinsics or inline assembly.
# Compiler codegen may be incredibly bad (GCC).
# - con: addition-with-carry cannot use instruction-level-parallelism or SIMD vectorization
# due to the dependency chain.
# - con: on x86 addition-with-carry dependency chain has high latency
#
# Unsaturated representation:
# - pro: portable, addition-with-carry can be implemented without compiler support or inline assembly
# - con: "mental overhead" to keep track of used free bits in algorithms, shifts, multiplication, division, ...
# - con: substraction-with-borrow internally requires signed integers
# or immediate canonicalization
# or adding a multiple of the modulus to avoid underflows
# - con: multiplication requires immediate canonicalization
# - con: require arithmetic right-shift if using signed integers
# - con: more memory usage
# - con: may be slower due to using more words
# - con: multiple representation of integers
# - pro: can do many additions before having to canonicalize
#
# Constantine used to have an unsaturated representation by default
# before refactor PR: https://github.com/mratsim/constantine/pull/17
# Unsaturated representation has been advocated (in particular for generalized mersenne primes)
# by:
# - https://bearssl.org/bigint.html
# - https://cryptojedi.org/peter/data/pairing-20131122.pdf
# - https://milagro.apache.org/docs/amcl-overview/#representation
# - https://eprint.iacr.org/2017/437
#
# In practice:
# - Addition is not a bottleneck in cryptography, multiplication is.
# Naive asymptotic analysis, makes it 4x~6x more costly
# on 256-bit (4-limbs) inputs to 384-bit (6-limbs) inputs
# - CPUs have significantly reduce carry latencies.
# https://www.agner.org/optimize/instruction_tables.pdf
# - Nehalem (Intel 2008) used to have 0.33 ADD (reg, reg), 1 ADD (reg, mem) and 2 ADC reciprcal throughput (ADD 6x faster than ADC)
# - Ice Lake (Intel 2019) is 0.25 ADD (reg, reg), 0.5 ADD (reg, mem) and 1 ADC reciprocal throughput
# - Zen 3 (AMD 2020) is 0.25 ADD (reg, reg/mem) and 1 ADC reciprocal throughput
# - Introducing inline assembly is easier than dealing with signed integers
# - Inline assembly is required to ensure constant-time properties
# as compilers get smarters
# - Canonicalization has an overhead, it just delays (and potentially batches) paying for the dependencing chain.
words*: array[N, SignedSecretWord]
# ############################################################
#
# Accessors
#
# ############################################################
template `[]`*(a: LimbsUnsaturated, idx: int): SignedSecretWord =
a.words[idx]
template `[]=`*(a: LimbsUnsaturated, idx: int, val: SignedSecretWord) =
a.words[idx] = val
# ############################################################
#
# Conversion
#
# ############################################################
# {.push checks:off.} # Avoid IndexDefect and Int overflows
func fromPackedRepr*[LU, E, LP: static int](
dst: var LimbsUnsaturated[LU, E],
src: Limbs[LP]) =
## Converts from an packed representation to an unsaturated representation
const UnsatBitWidth = WordBitWidth-E
const Max = MaxWord shr E
static:
# Destination and Source size are consistent
doAssert (LU-1) * UnsatBitWidth <= WordBitwidth * LP, block:
"\n (LU-1) * UnsatBitWidth: " & $(LU-1) & " * " & $UnsatBitWidth & " = " & $((LU-1) * UnsatBitWidth) &
"\n WordBitwidth * LP: " & $WordBitwidth & " * " & $LP & " = " & $(WordBitwidth * LP)
var
srcIdx, dstIdx = 0
hi, lo = Zero
accLen = 0
while srcIdx < src.len:
# Form a 2-word buffer (hi, lo)
let w = if src_idx < src.len: src[srcIdx]
else: Zero
inc srcIdx
if accLen == 0:
lo = w and Max
hi = w shr UnsatBitWidth
else:
lo = (lo or (w shl accLen)) and Max
hi = w shr (UnsatBitWidth - accLen)
accLen += WordBitWidth
while accLen >= UnsatBitWidth:
let s = min(accLen, UnsatBitWidth)
dst[dstIdx] = SignedSecretWord lo
dstIdx += 1
accLen -= s
lo = ((lo shr s) or (hi shl (UnsatBitWidth - s))) and Max
hi = hi shr s
if dstIdx < dst.words.len:
dst[dstIdx] = SignedSecretWord lo
for i in dst_idx + 1 ..< dst.words.len:
dst[i] = SignedSecretWord Zero
func fromPackedRepr*(T: type LimbsUnsaturated, src: Limbs): T =
## Converts from an packed representation to an unsaturated representation
result.fromPackedRepr(src)
func fromUnsatRepr*[LU, E, LP: static int](
dst: var Limbs[LP],
src: LimbsUnsaturated[LU, E]) =
## Converts from an packed representation to an unsaturated representation
const UnsatBitWidth = WordBitWidth-E
static:
# Destination and Source size are consistent
doAssert (LU-1) * UnsatBitWidth <= WordBitwidth * LP, block:
"\n (LU-1) * UnsatBitWidth: " & $(LU-1) & " * " & $UnsatBitWidth & " = " & $((LU-1) * UnsatBitWidth) &
"\n WordBitwidth * LP: " & $WordBitwidth & " * " & $LP & " = " & $(WordBitwidth * LP)
var
srcIdx, dstIdx = 0
acc = Zero
accLen = 0
for src_idx in 0 ..< src.words.len:
let nextWord = SecretWord src[srcIdx]
# buffer reads
acc = acc or (nextWord shl accLen)
accLen += UnsatBitWidth
# if full, dump
if accLen >= WordBitWidth:
dst[dstIdx] = acc
inc dstIdx
accLen -= WordBitWidth
acc = nextWord shr (UnsatBitWidth - accLen)
if dst_idx < dst.len:
dst[dst_idx] = acc
for i in dst_idx + 1 ..< dst.len:
dst[i] = Zero
# {.pop.}
# ############################################################
#
# Initialization
#
# ############################################################
func setZero*(a: var LimbsUnsaturated) =
## Set ``a`` to 0
for i in 0 ..< a.words.len:
a[i] = SignedSecretWord(0)
func setOne*(a: var LimbsUnsaturated) =
## Set ``a`` to 1
a[0] = SignedSecretWord(1)
for i in 1 ..< a.words.len:
a[i] = SignedSecretWord(0)
# ############################################################
#
# Arithmetic
#
# ############################################################
# Workaround bug
func `xor`*(x,y: SecretWord): SecretWord {.inline.} =
# For some reason the template defined in constant_time.nim isn't found
SecretWord(x.BaseType xor y.BaseType)
when sizeof(int) == 8 and not defined(Constantine32):
type
SignedBaseType* = int64
else:
type
SignedBaseType* = int32
template fmap(x: SignedSecretWord, op: untyped, y: SignedSecretWord): SignedSecretWord =
## Unwrap x and y from their distinct type
## Apply op, and rewrap them
SignedSecretWord(op(SecretWord(x), SecretWord(y)))
template fmapAsgn(x: SignedSecretWord, op: untyped, y: SignedSecretWord) =
## Unwrap x and y from their distinct type
## Apply assignment op, and rewrap them
op(SecretWord(x), SecretWord(y))
template `and`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `and`, y)
template `or`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `or`, y)
template `xor`*(x, y: SignedSecretWord): SignedSecretWord = SignedSecretWord(BaseType(x) xor BaseType(y))
template `not`*(x: SignedSecretWord): SignedSecretWord = SignedSecretWord(not SecretWord(x))
template `+`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `+`, y)
template `+=`*(x: var SignedSecretWord, y: SignedSecretWord) = fmapAsgn(x, `+=`, y)
template `-`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `-`, y)
template `-=`*(x: var SignedSecretWord, y: SignedSecretWord) = fmapAsgn(x, `-=`, y)
template `-`*(x: SignedSecretWord): SignedSecretWord =
# We don't use Nim signed integers to avoid range checks
SignedSecretWord(-SecretWord(x))
template `*`*(x, y: SignedSecretWord): SignedSecretWord =
# Warning ⚠️ : We assume that mul hardware multiplication is constant time
# but this is not always true, especially on ARMv7 and ARMv9
fmap(x, `*`, y)
# shifts
template ashr*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord =
## Arithmetic right shift
# We need to cast to Nim ints without Nim checks
SignedSecretWord(cast[SignedBaseType](x).ashr(y))
template lshr*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord =
## Logical right shift
SignedSecretWord(SecretWord(x) shr y)
template lshl*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord =
## Logical left shift
SignedSecretWord(SecretWord(x) shl y)
# ############################################################
#
# Hardened Boolean primitives
#
# ############################################################
template `==`*(x, y: SignedSecretWord): SecretBool =
SecretWord(x) == SecretWord(y)
# ############################################################
#
# Conditional arithmetic
#
# ############################################################
# SignedSecretWord
# ----------------
func isNegMask*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Produce the -1 mask if a is negative
## and 0 otherwise
a.ashr(WordBitWidth-1)
func isOddMask*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Produce the -1 mask if a is odd
## and 0 otherwise
-(a and SignedSecretWord(1))
func cneg*(
a: SignedSecretWord,
mask: SignedSecretWord): SignedSecretWord {.inline.} =
## Conditionally negate `a`
## mask must be 0 (0x00000...0000) (no negation)
## or -1 (0xFFFF...FFFF) (negation)
(a xor mask) - mask
func cadd*(
a: var SignedSecretWord,
b: SignedSecretWord,
mask: SignedSecretWord) {.inline.} =
## Conditionally add `b` to `a`
## mask must be 0 (0x00000...0000) (no addition)
## or -1 (0xFFFF...FFFF) (addition)
a = a + (b and mask)
func csub*(
a: var SignedSecretWord,
b: SignedSecretWord,
mask: SignedSecretWord) {.inline.} =
## Conditionally substract `b` from `a`
## mask must be 0 (0x00000...0000) (no substraction)
## or -1 (0xFFFF...FFFF) (substraction)
a = a - (b and mask)
# UnsaturatedLimbs
# ----------------
func isNegMask*(a: LimbsUnsaturated): SignedSecretWord {.inline.} =
## Produce the -1 mask if a is negative
## and 0 otherwise
a[a.words.len-1].ashr(WordBitWidth - a.Excess + 1)
func cneg*(
a: var LimbsUnsaturated,
mask: SignedSecretWord) {.inline.} =
## Conditionally negate `a`
## mask must be 0 (0x00000...0000) (no negation)
## or -1 (0xFFFF...FFFF) (negation)
##
## Carry propagation is deferred
for i in 0 ..< a.words.len:
a[i] = a[i].cneg(mask)
func cadd*(
a: var LimbsUnsaturated,
b: LimbsUnsaturated,
mask: SignedSecretWord) {.inline.} =
## Conditionally add `b` to `a`
## mask must be 0 (0x00000...0000) (no addition)
## or -1 (0xFFFF...FFFF) (addition)
##
## Carry propagation is deferred
for i in 0 ..< a.words.len:
a[i].cadd(b[i], mask)
# ############################################################
#
# Double-Width signed arithmetic
#
# ############################################################
type DSWord* = object
lo*, hi*: SignedSecretWord
func smulAccNoCarry*(r: var DSWord, a, b: SignedSecretWord) {.inline.}=
## Signed accumulated multiplication
## (_, hi, lo) += a*b
## This assumes no overflowing
var UV: array[2, SecretWord]
var carry: Carry
smul(UV[1], UV[0], SecretWord a, SecretWord b)
addC(carry, UV[0], UV[0], SecretWord r.lo, Carry(0))
addC(carry, UV[1], UV[1], SecretWord r.hi, carry)
r.lo = SignedSecretWord UV[0]
r.hi = SignedSecretWord UV[1]
func slincombAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}=
## Accumulated linear combination
## (_, hi, lo) += a*u + b*v
## This assumes no overflowing
var carry: Carry
var x1, x0, y1, y0: SecretWord
smul(x1, x0, SecretWord a, SecretWord u)
addC(carry, x0, x0, SecretWord r.lo, Carry(0))
addC(carry, x1, x1, SecretWord r.hi, carry)
smul(y1, y0, SecretWord b, SecretWord v)
addC(carry, x0, x0, y0, Carry(0))
addC(carry, x1, x1, y1, carry)
r.lo = SignedSecretWord x0
r.hi = SignedSecretWord x1
func ashr*(
r: var DSWord,
k: SomeInteger) {.inline.} =
## Arithmetic right-shift of a double-word
## This does not normalize the excess bits
r.lo = r.lo.lshr(k) or r.hi.lshl(WordBitWidth - k)
r.hi = r.hi.ashr(k)

View File

@ -1,204 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/curves,
../arithmetic/finite_fields
# ############################################################
#
# Specialized inversion for BLS12-377
#
# ############################################################
func inv_addchain*(r: var Fp[BLS12_377], a: Fp[BLS12_377]) =
let a = a # ensure a.inv_addchain(a) is OK
var
x10 {.noInit.}: Fp[BLS12_377]
x11 {.noInit.}: Fp[BLS12_377]
x100 {.noInit.}: Fp[BLS12_377]
x101 {.noInit.}: Fp[BLS12_377]
x111 {.noInit.}: Fp[BLS12_377]
x1001 {.noInit.}: Fp[BLS12_377]
x1011 {.noInit.}: Fp[BLS12_377]
x1111 {.noInit.}: Fp[BLS12_377]
x10001 {.noInit.}: Fp[BLS12_377]
x10011 {.noInit.}: Fp[BLS12_377]
x10111 {.noInit.}: Fp[BLS12_377]
x11011 {.noInit.}: Fp[BLS12_377]
x11101 {.noInit.}: Fp[BLS12_377]
x11111 {.noInit.}: Fp[BLS12_377]
x110100 {.noInit.}: Fp[BLS12_377]
x11010000 {.noInit.}: Fp[BLS12_377]
x11010111 {.noInit.}: Fp[BLS12_377]
x10 .square(a)
x11 .prod(a, x10)
x100 .prod(a, x11)
x101 .prod(a, x100)
x111 .prod(x10, x101)
x1001 .prod(x10, x111)
x1011 .prod(x10, x1001)
x1111 .prod(x100, x1011)
x10001 .prod(x10, x1111)
x10011 .prod(x10, x10001)
x10111 .prod(x100, x10011)
x11011 .prod(x100, x10111)
x11101 .prod(x10, x11011)
x11111 .prod(x10, x11101)
x110100 .prod(x10111, x11101)
x11010000 .square_repeated(x110100, 2)
x11010111 .prod(x111, x11010000)
# 18 operations
# TODO: we can accumulate in a partially reduced
# doubled-size `r` to avoid the final substractions.
# and only reduce at the end.
# This requires the number of op to be less than log2(p) == 381
# 18 + 18 = 36 operations
r.square_repeated(x11010111, 8)
r *= x11101
r.square_repeated(7)
r *= x10001
r.square()
# 36 + 14 = 50 operations
r *= a
r.square_repeated(9)
r *= x10111
r.square_repeated(2)
r *= x11
# 50 + 21 = 71 operations
r.square_repeated(6)
r *= x101
r.square_repeated(4)
r *= a
r.square_repeated(9)
# 71 + 13 = 84 operations
r *= x11101
r.square_repeated(5)
r *= x1011
r.square_repeated(5)
r *= x11
# 84 + 21 = 105 operations
r.square_repeated(8)
r *= x11101
r.square()
r *= a
r.square_repeated(10)
# 105 + 20 = 125 operations
r *= x10111
r.square_repeated(12)
r *= x11011
r.square_repeated(5)
r *= x101
# 125 + 22 = 147 operations
r.square_repeated(7)
r *= x101
r.square_repeated(6)
r *= x1001
r.square_repeated(7)
# 147 + 11 = 158 operations
r *= x11101
r.square_repeated(5)
r *= x10001
r.square_repeated(3)
r *= x101
# 158 + 23 = 181 operations
r.square_repeated(8)
r *= x10001
r.square_repeated(6)
r *= x11011
r.square_repeated(7)
# 181 + 19 = 200 operations
r *= x11111
r.square_repeated(4)
r *= x11
r.square_repeated(12)
r *= x1111
# 200 + 19 = 219 operations
r.square_repeated(4)
r *= x101
r.square_repeated(8)
r *= x10011
r.square_repeated(5)
# 219 + 13 = 232 operations
r *= x10001
r.square_repeated(3)
r *= x111
r.square_repeated(7)
r *= x1111
# 232 + 22 = 254 operations
r.square_repeated(5)
r *= x1111
r.square_repeated(7)
r *= x11011
r.square_repeated(8)
# 254 + 13 = 269 operations
r *= x10001
r.square_repeated(6)
r *= x11111
r.square_repeated(6)
r *= x11101
# 269 + 35 = 304 operations
r.square_repeated(9)
r *= x1001
r.square_repeated(5)
r *= x1001
r.square_repeated(19)
# 304 + 17 = 321 operations
r *= x10111
r.square_repeated(8)
r *= x1011
r.square_repeated(6)
r *= x10111
# 321 + 16 = 337 operations
r.square_repeated(4)
r *= x101
r.square_repeated(4)
r *= a
r.square_repeated(6)
# 337 + 29 = 376 operations
r *= x11
r.square_repeated(29)
r *= a
r.square_repeated(7)
r *= x101
# 376 + 16 = 392 operations
r.square_repeated(9)
r *= x10001
r.square_repeated(6)
# 392 + 8*6 = 440 operations
for _ in 0 ..< 8:
r *= x11111
r.square_repeated(5)
r *= x11111
r.square()
r *= a
# Total 443 operations

View File

@ -1,220 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/curves,
../arithmetic/finite_fields
# ############################################################
#
# Specialized inversion for BLS12-381
#
# ############################################################
func inv_addchain*(r: var Fp[BLS12_381], a: Fp[BLS12_381]) =
var
x10 {.noinit.}: Fp[BLS12_381]
x100 {.noinit.}: Fp[BLS12_381]
x1000 {.noinit.}: Fp[BLS12_381]
x1001 {.noinit.}: Fp[BLS12_381]
x1011 {.noinit.}: Fp[BLS12_381]
x1101 {.noinit.}: Fp[BLS12_381]
x10001 {.noinit.}: Fp[BLS12_381]
x10100 {.noinit.}: Fp[BLS12_381]
x11001 {.noinit.}: Fp[BLS12_381]
x11010 {.noinit.}: Fp[BLS12_381]
x110100 {.noinit.}: Fp[BLS12_381]
x110110 {.noinit.}: Fp[BLS12_381]
x110111 {.noinit.}: Fp[BLS12_381]
x1001101 {.noinit.}: Fp[BLS12_381]
x1001111 {.noinit.}: Fp[BLS12_381]
x1010101 {.noinit.}: Fp[BLS12_381]
x1011101 {.noinit.}: Fp[BLS12_381]
x1100111 {.noinit.}: Fp[BLS12_381]
x1101001 {.noinit.}: Fp[BLS12_381]
x1110111 {.noinit.}: Fp[BLS12_381]
x1111011 {.noinit.}: Fp[BLS12_381]
x10001001 {.noinit.}: Fp[BLS12_381]
x10010101 {.noinit.}: Fp[BLS12_381]
x10010111 {.noinit.}: Fp[BLS12_381]
x10101001 {.noinit.}: Fp[BLS12_381]
x10110001 {.noinit.}: Fp[BLS12_381]
x10111111 {.noinit.}: Fp[BLS12_381]
x11000011 {.noinit.}: Fp[BLS12_381]
x11010000 {.noinit.}: Fp[BLS12_381]
x11010111 {.noinit.}: Fp[BLS12_381]
x11100001 {.noinit.}: Fp[BLS12_381]
x11100101 {.noinit.}: Fp[BLS12_381]
x11101011 {.noinit.}: Fp[BLS12_381]
x11110101 {.noinit.}: Fp[BLS12_381]
x11111111 {.noinit.}: Fp[BLS12_381]
x10 .square(a)
x100 .square(x10)
x1000 .square(x100)
x1001 .prod(a, x1000)
x1011 .prod(x10, x1001)
x1101 .prod(x10, x1011)
x10001 .prod(x100, x1101)
x10100 .prod(x1001, x1011)
x11001 .prod(x1000, x10001)
x11010 .prod(a, x11001)
x110100 .square(x11010)
x110110 .prod(x10, x110100)
x110111 .prod(a, x110110)
x1001101 .prod(x11001, x110100)
x1001111 .prod(x10, x1001101)
x1010101 .prod(x1000, x1001101)
x1011101 .prod(x1000, x1010101)
x1100111 .prod(x11010, x1001101)
x1101001 .prod(x10, x1100111)
x1110111 .prod(x11010, x1011101)
x1111011 .prod(x100, x1110111)
x10001001 .prod(x110100, x1010101)
x10010101 .prod(x11010, x1111011)
x10010111 .prod(x10, x10010101)
x10101001 .prod(x10100, x10010101)
x10110001 .prod(x1000, x10101001)
x10111111 .prod(x110110, x10001001)
x11000011 .prod(x100, x10111111)
x11010000 .prod(x1101, x11000011)
x11010111 .prod(x10100, x11000011)
x11100001 .prod(x10001, x11010000)
x11100101 .prod(x100, x11100001)
x11101011 .prod(x10100, x11010111)
x11110101 .prod(x10100, x11100001)
x11111111 .prod(x10100, x11101011)
# 35 operations
# TODO: we can accumulate in a partially reduced
# doubled-size `r` to avoid the final substractions.
# and only reduce at the end.
# This requires the number of op to be less than log2(p) == 381
# 35 + 22 = 57 operations
r.prod(x10111111, x11100001)
r.square_repeated(8)
r *= x10001
r.square_repeated(11)
r *= x11110101
# 57 + 28 = 85 operations
r.square_repeated(11)
r *= x11100101
r.square_repeated(8)
r *= x11111111
r.square_repeated(7)
# 85 + 22 = 107 operations
r *= x1001101
r.square_repeated(9)
r *= x1101001
r.square_repeated(10)
r *= x10110001
# 107+24 = 131 operations
r.square_repeated(7)
r *= x1011101
r.square_repeated(9)
r *= x1111011
r.square_repeated(6)
# 131+23 = 154 operations
r *= x11001
r.square_repeated(11)
r *= x1101001
r.square_repeated(9)
r *= x11101011
# 154+28 = 182 operations
r.square_repeated(10)
r *= x11010111
r.square_repeated(6)
r *= x11001
r.square_repeated(10)
# 182+23 = 205 operations
r *= x1110111
r.square_repeated(9)
r *= x10010111
r.square_repeated(11)
r *= x1001111
# 205+30 = 235 operations
r.square_repeated(10)
r *= x11100001
r.square_repeated(9)
r *= x10001001
r.square_repeated(9)
# 235+21 = 256 operations
r *= x10111111
r.square_repeated(8)
r *= x1100111
r.square_repeated(10)
r *= x11000011
# 256+28 = 284 operations
r.square_repeated(9)
r *= x10010101
r.square_repeated(12)
r *= x1111011
r.square_repeated(5)
# 284 + 21 = 305 operations
r *= x1011
r.square_repeated(11)
r *= x1111011
r.square_repeated(7)
r *= x1001
# 305+32 = 337 operations
r.square_repeated(13)
r *= x11110101
r.square_repeated(9)
r *= x10111111
r.square_repeated(8)
# 337+22 = 359 operations
r *= x11111111
r.square_repeated(8)
r *= x11101011
r.square_repeated(11)
r *= x10101001
# 359+24 = 383 operations
r.square_repeated(8)
r *= x11111111
r.square_repeated(8)
r *= x11111111
r.square_repeated(6)
# 383+22 = 405 operations
r *= x110111
r.square_repeated(10)
r *= x11111111
r.square_repeated(9)
r *= x11111111
# 405+26 = 431 operations
r.square_repeated(8)
r *= x11111111
r.square_repeated(8)
r *= x11111111
r.square_repeated(8)
# 431+19 = 450 operations
r *= x11111111
r.square_repeated(7)
r *= x1010101
r.square_repeated(9)
r *= x10101001
# Total 450 operations:
# - 74 multiplications
# - 376 squarings

View File

@ -1,98 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/curves,
../arithmetic/finite_fields
# ############################################################
#
# Specialized inversion for BN254-Nogami
#
# ############################################################
func inv_addchain*(r: var Fp[BN254_Nogami], a: Fp[BN254_Nogami]) =
var
x100 {.noInit.}: Fp[BN254_Nogami]
x1000 {.noInit.}: Fp[BN254_Nogami]
x1100 {.noInit.}: Fp[BN254_Nogami]
x1101 {.noInit.}: Fp[BN254_Nogami]
x10001 {.noInit.}: Fp[BN254_Nogami]
x100010 {.noInit.}: Fp[BN254_Nogami]
x1000100 {.noInit.}: Fp[BN254_Nogami]
x1010101 {.noInit.}: Fp[BN254_Nogami]
x100 .square_repeated(a, 2)
x1000 .square(x100)
x1100 .prod(x100, x1000)
x1101 .prod(a, x1100)
x10001 .prod(x100, x1101)
x100010 .square(x10001)
x1000100 .square(x100010)
x1010101 .prod(x10001, x1000100)
# 9 operations
var
r13 {.noInit.}: Fp[BN254_Nogami]
r17 {.noInit.}: Fp[BN254_Nogami]
r18 {.noInit.}: Fp[BN254_Nogami]
r23 {.noInit.}: Fp[BN254_Nogami]
r26 {.noInit.}: Fp[BN254_Nogami]
r27 {.noInit.}: Fp[BN254_Nogami]
r28 {.noInit.}: Fp[BN254_Nogami]
r36 {.noInit.}: Fp[BN254_Nogami]
r38 {.noInit.}: Fp[BN254_Nogami]
r39 {.noInit.}: Fp[BN254_Nogami]
r40 {.noInit.}: Fp[BN254_Nogami]
r13.square_repeated(x1010101, 2)
r13 *= x100010
r13 *= x1101
r17.square(r13)
r17 *= r13
r17.square_repeated(2)
r18.prod(r13, r17)
r23.square_repeated(r18, 3)
r23 *= r18
r23 *= r17
r26.square_repeated(r23, 2)
r26 *= r23
r27.prod(r23, r26)
r28.prod(r26, r27)
r36.square(r28)
r36 *= r28
r36.square_repeated(2)
r36 *= r28
r36.square_repeated(3)
r38.prod(r28, r36)
r38 *= r27
r39.square(r38)
r40.prod(r38, r39)
r.prod(r39, r40)
r.square_repeated(3)
r *= r40
r.square_repeated(55)
r *= r38
r.square_repeated(55)
r *= r28
r.square_repeated(56)
r *= r18
r.square_repeated(56)
r *= x10001
# Total 271 operations

View File

@ -1,158 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/curves,
../arithmetic/finite_fields
# ############################################################
#
# Specialized inversion for BN254-Snarks
#
# ############################################################
func inv_addchain*(r: var Fp[BN254_Snarks], a: Fp[BN254_Snarks]) =
var
x10 {.noInit.}: Fp[BN254_Snarks]
x11 {.noInit.}: Fp[BN254_Snarks]
x101 {.noInit.}: Fp[BN254_Snarks]
x110 {.noInit.}: Fp[BN254_Snarks]
x1000 {.noInit.}: Fp[BN254_Snarks]
x1101 {.noInit.}: Fp[BN254_Snarks]
x10010 {.noInit.}: Fp[BN254_Snarks]
x10011 {.noInit.}: Fp[BN254_Snarks]
x10100 {.noInit.}: Fp[BN254_Snarks]
x10111 {.noInit.}: Fp[BN254_Snarks]
x11100 {.noInit.}: Fp[BN254_Snarks]
x100000 {.noInit.}: Fp[BN254_Snarks]
x100011 {.noInit.}: Fp[BN254_Snarks]
x101011 {.noInit.}: Fp[BN254_Snarks]
x101111 {.noInit.}: Fp[BN254_Snarks]
x1000001 {.noInit.}: Fp[BN254_Snarks]
x1010011 {.noInit.}: Fp[BN254_Snarks]
x1011011 {.noInit.}: Fp[BN254_Snarks]
x1100001 {.noInit.}: Fp[BN254_Snarks]
x1110101 {.noInit.}: Fp[BN254_Snarks]
x10010001 {.noInit.}: Fp[BN254_Snarks]
x10010101 {.noInit.}: Fp[BN254_Snarks]
x10110101 {.noInit.}: Fp[BN254_Snarks]
x10111011 {.noInit.}: Fp[BN254_Snarks]
x11000001 {.noInit.}: Fp[BN254_Snarks]
x11000011 {.noInit.}: Fp[BN254_Snarks]
x11010011 {.noInit.}: Fp[BN254_Snarks]
x11100001 {.noInit.}: Fp[BN254_Snarks]
x11100011 {.noInit.}: Fp[BN254_Snarks]
x11100111 {.noInit.}: Fp[BN254_Snarks]
x10 .square(a)
x11 .prod(x10, a)
x101 .prod(x10, x11)
x110 .prod(x101, a)
x1000 .prod(x10, x110)
x1101 .prod(x101, x1000)
x10010 .prod(x101, x1101)
x10011 .prod(x10010, a)
x10100 .prod(x10011, a)
x10111 .prod(x11, x10100)
x11100 .prod(x101, x10111)
x100000 .prod(x1101, x10011)
x100011 .prod(x11, x100000)
x101011 .prod(x1000, x100011)
x101111 .prod(x10011, x11100)
x1000001 .prod(x10010, x101111)
x1010011 .prod(x10010, x1000001)
x1011011 .prod(x1000, x1010011)
x1100001 .prod(x110, x1011011)
x1110101 .prod(x10100, x1100001)
x10010001 .prod(x11100, x1110101)
x10010101 .prod(x100000, x1110101)
x10110101 .prod(x100000, x10010101)
x10111011 .prod(x110, x10110101)
x11000001 .prod(x110, x10111011)
x11000011 .prod(x10, x11000001)
x11010011 .prod(x10010, x11000001)
x11100001 .prod(x100000, x11000001)
x11100011 .prod(x10, x11100001)
x11100111 .prod(x110, x11100001) # 30 operations
# 30 + 27 = 57 operations
r.square(x11000001)
r.square_repeated(7)
r *= x10010001
r.square_repeated(10)
r *= x11100111
r.square_repeated(7)
# 57 + 19 = 76 operations
r *= x10111
r.square_repeated(9)
r *= x10011
r.square_repeated(7)
r *= x1101
# 76 + 33 = 109 operations
r.square_repeated(14)
r *= x1010011
r.square_repeated(9)
r *= x11100001
r.square_repeated(8)
# 109 + 18 = 127 operations
r *= x1000001
r.square_repeated(10)
r *= x1011011
r.square_repeated(5)
r *= x1101
# 127 + 34 = 161 operations
r.square_repeated(8)
r *= x11
r.square_repeated(12)
r *= x101011
r.square_repeated(12)
# 161 + 25 = 186 operations
r *= x10111011
r.square_repeated(8)
r *= x101111
r.square_repeated(14)
r *= x10110101
# 186 + 28 = 214
r.square_repeated(9)
r *= x10010001
r.square_repeated(5)
r *= x1101
r.square_repeated(12)
# 214 + 22 = 236
r *= x11100011
r.square_repeated(8)
r *= x10010101
r.square_repeated(11)
r *= x11010011
# 236 + 32 = 268
r.square_repeated(7)
r *= x1100001
r.square_repeated(11)
r *= x100011
r.square_repeated(12)
# 268 + 20 = 288
r *= x1011011
r.square_repeated(9)
r *= x11000011
r.square_repeated(8)
r *= x11100111
# 288 + 15 = 303
r.square_repeated(7)
r *= x1110101
r.square_repeated(6)
r *= x101

View File

@ -1,376 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/curves,
../arithmetic/finite_fields
# ############################################################
#
# Specialized inversion for BW6-761
#
# ############################################################
func inv_addchain*(r: var Fp[BW6_761], a: Fp[BW6_761]) =
let a = a # ensure a.inv_addchain(a) is OK
var
x10 {.noInit.}: Fp[BW6_761]
x11 {.noInit.}: Fp[BW6_761]
x101 {.noInit.}: Fp[BW6_761]
x111 {.noInit.}: Fp[BW6_761]
x1001 {.noInit.}: Fp[BW6_761]
x1011 {.noInit.}: Fp[BW6_761]
x1101 {.noInit.}: Fp[BW6_761]
x1111 {.noInit.}: Fp[BW6_761]
x10001 {.noInit.}: Fp[BW6_761]
x10010 {.noInit.}: Fp[BW6_761]
x10011 {.noInit.}: Fp[BW6_761]
x10111 {.noInit.}: Fp[BW6_761]
x11001 {.noInit.}: Fp[BW6_761]
x11011 {.noInit.}: Fp[BW6_761]
x11101 {.noInit.}: Fp[BW6_761]
x11111 {.noInit.}: Fp[BW6_761]
x100001 {.noInit.}: Fp[BW6_761]
x100011 {.noInit.}: Fp[BW6_761]
x100101 {.noInit.}: Fp[BW6_761]
x100111 {.noInit.}: Fp[BW6_761]
x101001 {.noInit.}: Fp[BW6_761]
x101011 {.noInit.}: Fp[BW6_761]
x101101 {.noInit.}: Fp[BW6_761]
x101111 {.noInit.}: Fp[BW6_761]
x110001 {.noInit.}: Fp[BW6_761]
x110011 {.noInit.}: Fp[BW6_761]
x110101 {.noInit.}: Fp[BW6_761]
x110111 {.noInit.}: Fp[BW6_761]
x111001 {.noInit.}: Fp[BW6_761]
x111011 {.noInit.}: Fp[BW6_761]
x111101 {.noInit.}: Fp[BW6_761]
x1111010 {.noInit.}: Fp[BW6_761]
x1111111 {.noInit.}: Fp[BW6_761]
x11111110 {.noInit.}: Fp[BW6_761]
x11111111 {.noInit.}: Fp[BW6_761]
x10 .square(a)
x11 .prod(a, x10)
x101 .prod(x10, x11)
x111 .prod(x10, x101)
x1001 .prod(x10, x111)
x1011 .prod(x10, x1001)
x1101 .prod(x10, x1011)
x1111 .prod(x10, x1101)
x10001 .prod(x10, x1111)
x10010 .prod(a, x10001)
x10011 .prod(a, x10010)
x10111 .prod(x101, x10010)
x11001 .prod(x10, x10111)
x11011 .prod(x10, x11001)
x11101 .prod(x10, x11011)
x11111 .prod(x10, x11101)
x100001 .prod(x10, x11111)
x100011 .prod(x10, x100001)
x100101 .prod(x10, x100011)
x100111 .prod(x10, x100101)
x101001 .prod(x10, x100111)
x101011 .prod(x10, x101001)
x101101 .prod(x10, x101011)
x101111 .prod(x10, x101101)
x110001 .prod(x10, x101111)
x110011 .prod(x10, x110001)
x110101 .prod(x10, x110011)
x110111 .prod(x10, x110101)
x111001 .prod(x10, x110111)
x111011 .prod(x10, x111001)
x111101 .prod(x10, x111011)
x1111010 .square(x111101)
x1111111 .prod(x101, x1111010)
x11111110 .square(x1111111)
x11111111 .prod(a, x11111110)
# 35 operations
# TODO: we can accumulate in a partially reduced
# doubled-size `r` to avoid the final substractions.
# and only reduce at the end.
# This requires the number of op to be less than log2(p) == 381
# 35 + 8 = 43 operations
r.prod(x100001, x11111111)
r.square_repeated(3)
r *= x10111
r.square_repeated(2)
r *= a
# 43 + 22 = 65 operations
r.square_repeated(9)
r *= x1001
r.square_repeated(7)
r *= x11111
r.square_repeated(4)
# 65 + 17 = 82 operations
r *= x111
r.square_repeated(9)
r *= x1111
r.square_repeated(5)
r *= x111
# 82 + 29 = 111 operations
r.square_repeated(11)
r *= x101011
r.square_repeated(7)
r *= x100011
r.square_repeated(9)
# 111 + 28 = 139 operations
r *= x11111
r.square_repeated(8)
r *= x100101
r.square_repeated(17)
r *= x100111
# 139 + 22 = 161 operations
r.square_repeated(4)
r *= x1101
r.square_repeated(9)
r *= x11111111
r.square_repeated(7)
# 161 + 15 = 176 operations
r *= x11111
r.square_repeated(6)
r *= x10111
r.square_repeated(6)
r *= x1001
# 176 + 22 = 198 operations
r.square_repeated(4)
r *= x11
r.square_repeated(6)
r *= x11
r.square_repeated(10)
# 198 + 16 = 214 operations
r *= x110101
r.square_repeated(2)
r *= a
r.square_repeated(11)
r *= x11101
# 214 + 28 = 238 operations
r.square_repeated(6)
r *= x101
r.square_repeated(7)
r *= x1101
r.square_repeated(9)
# 238 + 21 = 259 operations
r *= x100001
r.square_repeated(7)
r *= x100101
r.square_repeated(11)
r *= x100111
# 259 + 28 = 287 operations
r.square_repeated(7)
r *= x101111
r.square_repeated(6)
r *= x11111
r.square_repeated(13)
# 287 + 25 = 302 operations
r *= x100001
r.square_repeated(6)
r *= x111011
r.square_repeated(6)
r *= x111001
# 302 + 27 = 329 operations
r.square_repeated(10)
r *= x10111
r.square_repeated(11)
r *= x111101
r.square_repeated(4)
# 329 + 17 = 346 operations
r *= x1101
r.square_repeated(8)
r *= x110001
r.square_repeated(6)
r *= x110001
# 346 + 20 = 366 operations
r.square_repeated(5)
r *= x11001
r.square_repeated(3)
r *= x11
r.square_repeated(10)
# 366 + 16 = 382 operations
r *= x100111
r.square_repeated(5)
r *= x1001
r.square_repeated(8)
r *= x11001
# 382 + 25 = 407 operations
r.square_repeated(10)
r *= x1111
r.square_repeated(7)
r *= x11101
r.square_repeated(6)
# 407 + 20 = 427 operations
r *= x11101
r.square_repeated(9)
r *= x11111111
r.square_repeated(8)
r *= x100101
# 427 + 27 = 454 operations
r.square_repeated(6)
r *= x101101
r.square_repeated(10)
r *= x100011
r.square_repeated(9)
# 454 + 20 = 474 operations
r *= x1001
r.square_repeated(8)
r *= x1101
r.square_repeated(9)
r *= x100111
# 474 + 25 = 499 operations
r.square_repeated(8)
r *= x100011
r.square_repeated(6)
r *= x101101
r.square_repeated(9)
# 499 + 16 = 515 operations
r *= x100101
r.square_repeated(4)
r *= x1111
r.square_repeated(9)
r *= x1111111
# 515 + 25 = 540 operations
r.square_repeated(6)
r *= x11001
r.square_repeated(8)
r *= x111
r.square_repeated(9)
# 540 + 15 = 555 operations
r *= x111011
r.square_repeated(5)
r *= x10011
r.square_repeated(7)
r *= x100111
# 555 + 22 = 577 operations
r.square_repeated(5)
r *= x10111
r.square_repeated(9)
r *= x111001
r.square_repeated(6)
# 577 + 14 = 591 operations
r *= x111101
r.square_repeated(9)
r *= x11111111
r.square_repeated(2)
r *= x11
# 591 + 21 = 612 operations
r.square_repeated(7)
r *= x10111
r.square_repeated(6)
r *= x10011
r.square_repeated(6)
# 612 + 18 = 630 operations
r *= x101
r.square_repeated(9)
r *= x10001
r.square_repeated(6)
r *= x11011
# 630 + 27 = 657 operations
r.square_repeated(10)
r *= x100101
r.square_repeated(7)
r *= x110011
r.square_repeated(8)
# 657 + 13 = 670 operations
r *= x111101
r.square_repeated(7)
r *= x100011
r.square_repeated(3)
r *= x111
# 670 + 26 = 696 operations
r.square_repeated(10)
r *= x1011
r.square_repeated(11)
r *= x110011
r.square_repeated(3)
# 696 + 17 = 713 operations
r *= x111
r.square_repeated(9)
r *= x101011
r.square_repeated(5)
r *= x10111
# 713 + 21 = 734 operations
r.square_repeated(7)
r *= x101011
r.square_repeated(2)
r *= x11
r.square_repeated(10)
# 734 + 19 = 753 operations
r *= x101001
r.square_repeated(10)
r *= x110111
r.square_repeated(6)
r *= x111001
# 753 + 23 = 776 operations
r.square_repeated(6)
r *= x101001
r.square_repeated(9)
r *= x100111
r.square_repeated(6)
# 776 + 12 = 788 operations
r *= x110011
r.square_repeated(7)
r *= x100001
r.square_repeated(2)
r *= x11
# 788 + 39 = 827 operations
r.square_repeated(21)
r *= a
r.square_repeated(11)
r *= x101111
r.square_repeated(5)
# 827 + 55 = 882 operations
r *= x1001
r.square_repeated(7)
r *= x11101
r.square_repeated(45)
r *= x10001
# 882 + 4 = 886 operations
r.square_repeated(3)
r *= a

View File

@ -1,60 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/curves,
../arithmetic/finite_fields
# ############################################################
#
# Specialized inversion for BLS12-381
#
# ############################################################
func inv_addchain*(r: var Fp[Curve25519], a: Fp[Curve25519]) =
var
x10 {.noinit.}: Fp[Curve25519]
x1001 {.noinit.}: Fp[Curve25519]
x1011 {.noinit.}: Fp[Curve25519]
x10 .square(a) # 2
x1001 .square_repeated(x10, 2) # 8
x1001 *= a # 9
x1011 .prod(x10, x1001) # 11
# 5 operations
# TODO: we can accumulate in a partially reduced
# doubled-size `r` to avoid the final substractions.
# and only reduce at the end.
# This requires the number of op to be less than log2(p) == 255
template t: untyped = x10
t.square(x1011) # 22
r.prod(t, x1001) # 31 = 2⁵-1
template u: untyped = x1001
t.square_repeated(r, 5)
r *= t # 2¹⁰-1
t.square_repeated(r, 10)
t *= r # 2²⁰-1
u.square_repeated(t, 20)
t *= u # 2⁴⁰-1
t.square_repeated(10)
t *= r # 2⁵⁰-1
r.square_repeated(t, 50)
r *= t # 2¹⁰⁰-1
u.square_repeated(r, 100)
r *= u # 2²⁰⁰-1
r.square_repeated(50)
r *= t # 2²⁵⁰-1
r.square_repeated(5)
r *= x1011 # 2²⁵⁵-21 (note: 11 = 2⁵-21)

View File

@ -1,96 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/curves,
../arithmetic/finite_fields
# ############################################################
#
# Specialized inversion for Secp256k1
#
# ############################################################
func inv_addchain*(r: var Fp[Secp256k1], a: Fp[Secp256k1]) {.used.}=
## We invert via Little Fermat's theorem
## a^(-1) ≡ a^(p-2) (mod p)
## with p = "0xFFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F"
## We take advantage of the prime special form to hardcode
## the sequence of squarings and multiplications for the modular exponentiation
##
## See libsecp256k1
##
## The binary representation of (p - 2) has 5 blocks of 1s, with lengths in
## { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
## [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
var
x2{.noInit.}: Fp[Secp256k1]
x3{.noInit.}: Fp[Secp256k1]
x6{.noInit.}: Fp[Secp256k1]
x9{.noInit.}: Fp[Secp256k1]
x11{.noInit.}: Fp[Secp256k1]
x22{.noInit.}: Fp[Secp256k1]
x44{.noInit.}: Fp[Secp256k1]
x88{.noInit.}: Fp[Secp256k1]
x176{.noInit.}: Fp[Secp256k1]
x220{.noInit.}: Fp[Secp256k1]
x223{.noInit.}: Fp[Secp256k1]
x2.square(a)
x2 *= a
x3.square(x2)
x3 *= a
x6 = x3
x6.square_repeated(3)
x6 *= x3
x9 = x6
x9.square_repeated(3)
x9 *= x3
x11 = x9
x11.square_repeated(2)
x11 *= x2
x22 = x11
x22.square_repeated(11)
x22 *= x11
x44 = x22
x44.square_repeated(22)
x44 *= x22
x88 = x44
x88.square_repeated(44)
x88 *= x44
x176 = x88
x88.square_repeated(88)
x176 *= x88
x220 = x176
x220.square_repeated(44)
x220 *= x44
x223 = x220
x223.square_repeated(3)
x223 *= x3
# The final result is then assembled using a sliding window over the blocks
r = x223
r.square_repeated(23)
r *= x22
r.square_repeated(5)
r *= a
r.square_repeated(3)
r *= x2
r.square_repeated(2)
r *= a

View File

@ -9,11 +9,11 @@
import
std/macros,
../config/curves,
./bls12_377_precomputed_params,
./bls12_381_precomputed_params,
./bn254_nogami_precomputed_params,
./bn254_snarks_precomputed_params,
./bw6_761_precomputed_params
./bls12_377_g2_params,
./bls12_381_g2_params,
./bn254_nogami_g2_params,
./bn254_snarks_g2_params,
./bw6_761_g2_params
{.experimental: "dynamicBindSym".}

View File

@ -1,35 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/curves,
./bls12_377_inversion,
./bls12_381_inversion,
./bn254_nogami_inversion,
./bn254_snarks_inversion,
./bw6_761_inversion,
./secp256k1_inversion,
./curve25519_inversion
export
bls12_377_inversion,
bls12_381_inversion,
bn254_nogami_inversion,
bn254_snarks_inversion,
bw6_761_inversion,
secp256k1_inversion,
curve25519_inversion
func hasInversionAddchain*(C: static Curve): static bool =
## Is an inversion addition chain implemented for the curve.
## Note: the addition chain might be slower than Euclid-based inversion.
when C in {BN254_Nogami, BN254_Snarks, BLS12_377, BLS12_381, BW6_761, Curve25519, Secp256k1}:
true
else:
false

View File

@ -218,10 +218,10 @@ func nDimMultiScalarRecoding[M, L: static int](
k[j].div2()
k[j] += SecretWord (bji and b[0][i])
func buildLookupTable[M: static int, EC](
func buildLookupTable[M: static int, EC, ECaff](
P: EC,
endomorphisms: array[M-1, EC],
lut: var array[1 shl (M-1), EC],
lut: var array[1 shl (M-1), ECaff],
) =
## Build the lookup table from the base point P
## and the curve endomorphism
@ -251,11 +251,17 @@ func buildLookupTable[M: static int, EC](
#
# This scheme ensures 1 addition per table entry instead of a number
# of addition dependent on `u` Hamming Weight
lut[0] = P
# Step 1. Create the lookup-table in alternative coordinates
var tab {.noInit.}: array[1 shl (M-1), EC]
tab[0] = P
for u in 1'u32 ..< 1 shl (M-1):
# The recoding allows usage of 2^(n-1) table instead of the usual 2^n with NAF
let msb = u.log2_vartime() # No undefined, u != 0
lut[u].sum(lut[u.clearBit(msb)], endomorphisms[msb])
tab[u].sum(tab[u.clearBit(msb)], endomorphisms[msb])
# Step 2. Convert to affine coordinates to benefit from mixed-addition
lut.batchAffine(tab)
func tableIndex(glv: GLV_SAC, bit: int): SecretWord =
## Compose the secret table index from
@ -286,12 +292,14 @@ func scalarMulEndo*[scalBits; EC](
## Requires:
## - Cofactor to be cleared
## - 0 <= scalar < curve order
mixin affine
type ECaff = affine(EC)
const C = P.F.C # curve
static: doAssert scalBits <= C.getCurveOrderBitwidth(), "Do not use endomorphism to multiply beyond the curve order"
when P.F is Fp:
const M = 2
# 1. Compute endomorphisms
var endomorphisms {.noInit.}: array[M-1, typeof(P)]
var endomorphisms {.noInit.}: array[M-1, EC]
when P.G == G1:
endomorphisms[0] = P
endomorphisms[0].x *= C.getCubicRootOfUnity_mod_p()
@ -301,7 +309,7 @@ func scalarMulEndo*[scalBits; EC](
elif P.F is Fp2:
const M = 4
# 1. Compute endomorphisms
var endomorphisms {.noInit.}: array[M-1, typeof(P)]
var endomorphisms {.noInit.}: array[M-1, EC]
endomorphisms[0].frobenius_psi(P)
endomorphisms[1].frobenius_psi(P, 2)
endomorphisms[2].frobenius_psi(P, 3)
@ -331,10 +339,8 @@ func scalarMulEndo*[scalBits; EC](
endomorphisms[i-1].cneg(negatePoints[i])
# 4. Precompute lookup table
var lut {.noInit.}: array[1 shl (M-1), typeof(P)]
var lut {.noInit.}: array[1 shl (M-1), ECaff]
buildLookupTable(P, endomorphisms, lut)
# TODO: Montgomery simultaneous inversion (or other simultaneous inversion techniques)
# so that we use mixed addition formulas in the main loop
# 5. Recode the miniscalars
# we need the base miniscalar (that encodes the sign)
@ -346,12 +352,13 @@ func scalarMulEndo*[scalBits; EC](
recoded.nDimMultiScalarRecoding(miniScalars)
# 6. Proceed to GLV accelerated scalar multiplication
var Q {.noInit.}: typeof(P)
Q.secretLookup(lut, recoded.tableIndex(L-1))
var Q {.noInit.}: EC
var tmp {.noInit.}: ECaff
tmp.secretLookup(lut, recoded.tableIndex(L-1))
Q.fromAffine(tmp)
for i in countdown(L-2, 0):
Q.double()
var tmp {.noInit.}: typeof(Q)
tmp.secretLookup(lut, recoded.tableIndex(i))
tmp.cneg(SecretBool recoded[0][i])
Q += tmp
@ -392,34 +399,40 @@ func scalarMulEndo*[scalBits; EC](
# - 0t10 -> 0b10 is 2
# - 0t11 -> 0b11 is 3
func buildLookupTable_m2w2[EC](
func buildLookupTable_m2w2[EC, Ecaff](
P0: EC,
P1: EC,
lut: var array[8, EC],
lut: var array[8, Ecaff],
) =
## Build a lookup table for GLV with 2-dimensional decomposition
## and window of size 2
# Step 1. Create the lookup-table in alternative coordinates
var tab {.noInit.}: array[8, EC]
# with [k0, k1] the mini-scalars with digits of size 2-bit
#
# 4 = 0b100 - encodes [0b01, 0b00] ≡ P0
lut[4] = P0
tab[4] = P0
# 5 = 0b101 - encodes [0b01, 0b01] ≡ P0 - P1
lut[5].diff(lut[4], P1)
tab[5].diff(tab[4], P1)
# 7 = 0b111 - encodes [0b01, 0b11] ≡ P0 + P1
lut[7].sum(lut[4], P1)
tab[7].sum(tab[4], P1)
# 6 = 0b110 - encodes [0b01, 0b10] ≡ P0 + 2P1
lut[6].sum(lut[7], P1)
tab[6].sum(tab[7], P1)
# 0 = 0b000 - encodes [0b00, 0b00] ≡ 3P0
lut[0].double(lut[4])
lut[0] += lut[4]
tab[0].double(tab[4])
tab[0] += tab[4]
# 1 = 0b001 - encodes [0b00, 0b01] ≡ 3P0 + P1
lut[1].sum(lut[0], P1)
tab[1].sum(tab[0], P1)
# 2 = 0b010 - encodes [0b00, 0b10] ≡ 3P0 + 2P1
lut[2].sum(lut[1], P1)
tab[2].sum(tab[1], P1)
# 3 = 0b011 - encodes [0b00, 0b11] ≡ 3P0 + 3P1
lut[3].sum(lut[2], P1)
tab[3].sum(tab[2], P1)
# Step 2. Convert to affine coordinates to benefit from mixed-addition
lut.batchAffine(tab)
func w2Get(recoding: Recoded,
digitIdx: int): uint8 {.inline.}=
@ -477,6 +490,8 @@ func scalarMulGLV_m2w2*[scalBits; EC](
## Requires:
## - Cofactor to be cleared
## - 0 <= scalar < curve order
mixin affine
type ECaff = affine(EC)
const C = P0.F.C # curve
static: doAssert: scalBits == C.getCurveOrderBitwidth()
@ -485,7 +500,7 @@ func scalarMulGLV_m2w2*[scalBits; EC](
var P1 = P0
P1.x *= C.getCubicRootOfUnity_mod_p()
else:
var P1 {.noInit.}: typeof(P0)
var P1 {.noInit.}: EC
P1.frobenius_psi(P0, 2)
# 2. Decompose scalar into mini-scalars
@ -503,10 +518,8 @@ func scalarMulGLV_m2w2*[scalBits; EC](
P1.cneg(negatePoints[1])
# 4. Precompute lookup table
var lut {.noInit.}: array[8, EC]
var lut {.noInit.}: array[8, ECaff]
buildLookupTable_m2w2(P0, P1, lut)
# TODO: Montgomery simultaneous inversion (or other simultaneous inversion techniques)
# so that we use mixed addition formulas in the main loop
# 5. Recode the miniscalars
# we need the base miniscalar (that encodes the sign)
@ -518,15 +531,16 @@ func scalarMulGLV_m2w2*[scalBits; EC](
recoded.nDimMultiScalarRecoding(miniScalars)
# 6. Proceed to GLV accelerated scalar multiplication
var Q {.noInit.}: typeof(P0)
var Q {.noInit.}: EC
var tmp {.noInit.}: ECaff
var isNeg: SecretBool
Q.secretLookup(lut, recoded.w2TableIndex((L div 2) - 1, isNeg))
tmp.secretLookup(lut, recoded.w2TableIndex((L div 2) - 1, isNeg))
Q.fromAffine(tmp)
for i in countdown((L div 2) - 2, 0):
Q.double()
Q.double()
var tmp {.noInit.}: typeof(Q)
tmp.secretLookup(lut, recoded.w2TableIndex(i, isNeg))
tmp.cneg(isNeg)
Q += tmp

View File

@ -12,7 +12,7 @@ import
../arithmetic,
../towers,
../io/[io_fields, io_towers],
../curves/zoo_precomputed_params
../curves/zoo_g2_params
# ############################################################
#
@ -45,6 +45,14 @@ func isInf*(P: ECP_ShortW_Aff): SecretBool =
## and false otherwise
result = P.x.isZero() and P.y.isZero()
func ccopy*(P: var ECP_ShortW_Aff, Q: ECP_ShortW_Aff, ctl: SecretBool) {.inline.} =
## Constant-time conditional copy
## If ctl is true: Q is copied into P
## if ctl is false: Q is not copied and P is unmodified
## Time and memory accesses are the same whether a copy occurs or not
for fP, fQ in fields(P, Q):
ccopy(fP, fQ, ctl)
func curve_eq_rhs*[F](y2: var F, x: F, G: static Subgroup) =
## Compute the curve equation right-hand-side from field element `x`
## i.e. `y²` in `y² = x³ + a x + b`

View File

@ -34,6 +34,10 @@ type ECP_ShortW_Jac*[F; G: static Subgroup] = object
## Note that jacobian coordinates are not unique
x*, y*, z*: F
template affine*[F, G](_: type ECP_ShortW_Jac[F, G]): typedesc =
## Returns the affine type that corresponds to the Jacobian type input
ECP_ShortW_Aff[F, G]
func `==`*(P, Q: ECP_ShortW_Jac): SecretBool =
## Constant-time equality check
## This is a costly operation
@ -379,6 +383,8 @@ func madd*[F; G: static Subgroup](
## with p in Jacobian coordinates and Q in affine coordinates
##
## R = P + Q
##
## TODO: ⚠️ cannot handle P == Q
# "madd-2007-bl" mixed addition formula - https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
# with conditional copies to handle infinity points
# Assumptions: Z2=1.
@ -516,6 +522,14 @@ func `+=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Jac) {.inline.} =
## In-place point addition
P.sum(P, Q)
func `+=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Aff) {.inline.} =
## In-place mixed point addition
##
## TODO: ⚠️ cannot handle P == Q
var t{.noInit.}: typeof(P)
t.madd(P, Q)
P = t
func double*(P: var ECP_ShortW_Jac) {.inline.} =
## In-place point doubling
P.double(P)
@ -528,7 +542,7 @@ func diff*(r: var ECP_ShortW_Jac,
nQ.neg(Q)
r.sum(P, nQ)
func affineFromJacobian*[F; G](
func affine*[F; G](
aff: var ECP_ShortW_Aff[F, G],
jac: ECP_ShortW_Jac[F, G]) =
var invZ {.noInit.}, invZ2{.noInit.}: F
@ -536,16 +550,76 @@ func affineFromJacobian*[F; G](
invZ2.square(invZ)
aff.x.prod(jac.x, invZ2)
invZ *= invZ2
aff.y.prod(jac.y, invZ)
aff.y *= invZ2
func jacobianFromAffine*[F; G](
func fromAffine*[F; G](
jac: var ECP_ShortW_Jac[F, G],
aff: ECP_ShortW_Aff[F, G]) {.inline.} =
jac.x = aff.x
jac.y = aff.y
jac.z.setOne()
func batchAffine*[N: static int, F, G](
affs: var array[N, ECP_ShortW_Aff[F, G]],
jacs: array[N, ECP_ShortW_Jac[F, G]]) =
# Algorithm: Montgomery's batch inversion
# - Speeding the Pollard and Elliptic Curve Methods of Factorization
# Section 10.3.1
# Peter L. Montgomery
# https://www.ams.org/journals/mcom/1987-48-177/S0025-5718-1987-0866113-7/S0025-5718-1987-0866113-7.pdf
# - Modern Computer Arithmetic
# Section 2.5.1 Several inversions at once
# Richard P. Brent and Paul Zimmermann
# https://members.loria.fr/PZimmermann/mca/mca-cup-0.5.9.pdf
# To avoid temporaries, we store partial accumulations
# in affs[i].x and whether z == 0 in affs[i].y
var zeroes: array[N, SecretBool]
affs[0].x = jacs[0].z
zeroes[0] = affs[0].x.isZero()
affs[0].x.csetOne(zeroes[0])
for i in 1 ..< N:
# Skip zero z-coordinates (infinity points)
var z = jacs[i].z
zeroes[i] = z.isZero()
z.csetOne(zeroes[i])
affs[i].x.prod(affs[i-1].x, z)
var accInv {.noInit.}: F
accInv.inv(affs[N-1].x)
for i in countdown(N-1, 1):
# Skip zero z-coordinates (infinity points)
var z = affs[i].x
# Extract 1/Pᵢ
var invi {.noInit.}: F
invi.prod(accInv, affs[i-1].x)
invi.csetZero(zeroes[i])
# Now convert Pᵢ to affine
var invi2 {.noinit.}: F
invi2.square(invi)
affs[i].x.prod(jacs[i].x, invi2)
invi *= invi2
affs[i].y.prod(jacs[i].y, invi)
# next iteration
invi = jacs[i].z
invi.csetOne(zeroes[i])
accInv *= invi
block: # tail
var invi2 {.noinit.}: F
accInv.csetZero(zeroes[0])
invi2.square(accInv)
affs[0].x.prod(jacs[0].x, invi2)
accInv *= invi2
affs[0].y.prod(jacs[0].y, accInv)
# ############################################################
# #
# Deriving efficient and complete Jacobian formulae #

View File

@ -34,6 +34,10 @@ type ECP_ShortW_Prj*[F; G: static Subgroup] = object
## Note that projective coordinates are not unique
x*, y*, z*: F
template affine*[F, G](_: type ECP_ShortW_Prj[F, G]): typedesc =
## Returns the affine type that corresponds to the Jacobian type input
ECP_ShortW_Aff[F, G]
func `==`*(P, Q: ECP_ShortW_Prj): SecretBool =
## Constant-time equality check
## This is a costly operation
@ -393,7 +397,6 @@ func `+=`*(P: var ECP_ShortW_Prj, Q: ECP_ShortW_Prj) {.inline.} =
func `+=`*(P: var ECP_ShortW_Prj, Q: ECP_ShortW_Aff) {.inline.} =
## In-place mixed point addition
# used in line_addition
P.madd(P, Q)
func double*(P: var ECP_ShortW_Prj) {.inline.} =
@ -409,7 +412,7 @@ func diff*(r: var ECP_ShortW_Prj,
nQ.neg(Q)
r.sum(P, nQ)
func affineFromProjective*[F, G](
func affine*[F, G](
aff: var ECP_ShortW_Aff[F, G],
proj: ECP_ShortW_Prj[F, G]) =
var invZ {.noInit.}: F
@ -418,9 +421,63 @@ func affineFromProjective*[F, G](
aff.x.prod(proj.x, invZ)
aff.y.prod(proj.y, invZ)
func projectiveFromAffine*[F, G](
func fromAffine*[F, G](
proj: var ECP_ShortW_Prj[F, G],
aff: ECP_ShortW_Aff[F, G]) {.inline.} =
proj.x = aff.x
proj.y = aff.y
proj.z.setOne()
func batchAffine*[N: static int, F, G](
affs: var array[N, ECP_ShortW_Aff[F, G]],
projs: array[N, ECP_ShortW_Prj[F, G]]) =
# Algorithm: Montgomery's batch inversion
# - Speeding the Pollard and Elliptic Curve Methods of Factorization
# Section 10.3.1
# Peter L. Montgomery
# https://www.ams.org/journals/mcom/1987-48-177/S0025-5718-1987-0866113-7/S0025-5718-1987-0866113-7.pdf
# - Modern Computer Arithmetic
# Section 2.5.1 Several inversions at once
# Richard P. Brent and Paul Zimmermann
# https://members.loria.fr/PZimmermann/mca/mca-cup-0.5.9.pdf
# To avoid temporaries, we store partial accumulations
# in affs[i].x
var zeroes: array[N, SecretBool]
affs[0].x = projs[0].z
zeroes[0] = affs[0].x.isZero()
affs[0].x.csetOne(zeroes[0])
for i in 1 ..< N:
# Skip zero z-coordinates (infinity points)
var z = projs[i].z
zeroes[i] = z.isZero()
z.csetOne(zeroes[i])
affs[i].x.prod(affs[i-1].x, z)
var accInv {.noInit.}: F
accInv.inv(affs[N-1].x)
for i in countdown(N-1, 1):
# Skip zero z-coordinates (infinity points)
var z = affs[i].x
# Extract 1/Pᵢ
var invi {.noInit.}: F
invi.prod(accInv, affs[i-1].x)
invi.csetZero(zeroes[i])
# Now convert Pᵢ to affine
affs[i].x.prod(projs[i].x, invi)
affs[i].y.prod(projs[i].y, invi)
# next iteration
invi = projs[i].z
invi.csetOne(zeroes[i])
accInv *= invi
block: # tail
accInv.csetZero(zeroes[0])
affs[0].x.prod(projs[0].x, accInv)
affs[0].y.prod(projs[0].y, accInv)

View File

@ -289,7 +289,7 @@ func diff*(r: var ECP_TwEdwards_Prj,
nQ.neg(Q)
r.sum(P, nQ)
func affineFromProjective*[F](
func affine*[F](
aff: var ECP_TwEdwards_Prj[F],
proj: ECP_TwEdwards_Prj[F]) =
var invZ {.noInit.}: F
@ -298,7 +298,7 @@ func affineFromProjective*[F](
aff.x.prod(proj.x, invZ)
aff.y.prod(proj.y, invZ)
func projectiveFromAffine*[F](
func projective*[F](
proj: var ECP_TwEdwards_Prj[F],
aff: ECP_TwEdwards_Prj[F]) {.inline.} =
proj.x = aff.x

View File

@ -10,6 +10,7 @@ import
./io_bigints, ./io_fields, ./io_towers,
../arithmetic,
../towers,
../primitives,
../elliptic/[
ec_shortweierstrass_affine,
ec_shortweierstrass_projective,
@ -26,7 +27,12 @@ import
#
# ############################################################
func toHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff](P: EC): string =
proc spaces(num: int): string =
result = newString(num)
for i in 0 ..< num:
result[i] = ' '
func toHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff](P: EC, indent: static int = 0): string =
## Stringify an elliptic curve point to Hex
## Note. Leading zeros are not removed.
## Result is prefixed with 0x
@ -39,29 +45,31 @@ func toHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff](P: EC): stri
## This proc output may change format in the future
var aff {.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
when EC is ECP_ShortW_Prj:
aff.affineFromProjective(P)
elif EC is ECP_ShortW_Jac:
aff.affineFromJacobian(P)
when EC isnot ECP_ShortW_Aff:
aff.affine(P)
else:
aff = P
result = "ECP[" & $aff.F & "](\n x: "
const sp = spaces(indent)
result = sp & $EC & "(\n" & sp & " x: "
result.appendHex(aff.x, bigEndian)
result &= ",\n y: "
result &= ",\n" & sp & " y: "
result.appendHex(aff.y, bigEndian)
result &= "\n)"
result &= "\n" & sp & ")"
func fromHex*(dst: var (ECP_ShortW_Prj or ECP_ShortW_Jac), x, y: string): bool {.raises: [ValueError].}=
## Convert hex strings to a G1 curve point
## Returns `false`
## if there is no point with coordinates (`x`, `y`) on the curve
## Returns true if point exist or if input is the point at infinity (all 0)
## Returns `false` if there is no point with coordinates (`x`, `y`) on the curve
## In that case, `dst` content is undefined.
static: doAssert dst.F is Fp, "dst must be on G1, an elliptic curve over 𝔽p"
dst.x.fromHex(x)
dst.y.fromHex(y)
dst.z.setOne()
return bool(isOnCurve(dst.x, dst.y, dst.G))
let isInf = dst.x.isZero() and dst.y.isZero()
dst.z.csetZero(isInf)
return bool(isOnCurve(dst.x, dst.y, dst.G) or isInf)
func fromHex*(dst: var (ECP_ShortW_Prj or ECP_ShortW_Jac), x0, x1, y0, y1: string): bool {.raises: [ValueError].}=
## Convert hex strings to a G2 curve point
@ -72,24 +80,34 @@ func fromHex*(dst: var (ECP_ShortW_Prj or ECP_ShortW_Jac), x0, x1, y0, y1: strin
dst.x.fromHex(x0, x1)
dst.y.fromHex(y0, y1)
dst.z.setOne()
return bool(isOnCurve(dst.x, dst.y, dst.G))
let isInf = dst.x.isZero() and dst.y.isZero()
dst.z.csetZero(isInf)
return bool(isOnCurve(dst.x, dst.y, dst.G) or isInf)
func fromHex*(dst: var ECP_ShortW_Aff, x, y: string): bool {.raises: [ValueError].}=
## Convert hex strings to a G1 curve point
## Returns `false`
## if there is no point with coordinates (`x`, `y`) on the curve
## Returns true if point exist or if input is the point at infinity (all 0)
## Returns `false` if there is no point with coordinates (`x`, `y`) on the curve
## In that case, `dst` content is undefined.
static: doAssert dst.F is Fp, "dst must be on G1, an elliptic curve over 𝔽p"
dst.x.fromHex(x)
dst.y.fromHex(y)
return bool(isOnCurve(dst.x, dst.y, dst.G))
return bool(isOnCurve(dst.x, dst.y, dst.G) or dst.isInf())
func fromHex*(dst: var ECP_ShortW_Aff, x0, x1, y0, y1: string): bool {.raises: [ValueError].}=
## Convert hex strings to a G2 curve point
## Returns `false`
## if there is no point with coordinates (`x`, `y`) on the curve
## Returns true if point exist or if input is the point at infinity (all 0)
## Returns `false` if there is no point with coordinates (`x`, `y`) on the curve
## In that case, `dst` content is undefined.
static: doAssert dst.F is Fp2, "dst must be on G2, an elliptic curve over 𝔽p2"
dst.x.fromHex(x0, x1)
dst.y.fromHex(y0, y1)
return bool(isOnCurve(dst.x, dst.y, dst.G))
return bool(isOnCurve(dst.x, dst.y, dst.G) or dst.isInf())
func fromHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff](
_: type EC, x, y: string): EC {.raises: [ValueError].} =
doAssert result.fromHex(x, y)
func fromHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff](
_: type EC, x0, x1, y0, y1: string): EC {.raises: [ValueError].} =
doAssert result.fromHex(x0, x1, y0, y1)

View File

@ -141,7 +141,7 @@ func miller_init_double_then_add*[FT, F1, F2](
# First step: 0b10, T <- Q, f = 1 (mod p¹²), f *= line
# ----------------------------------------------------
T.projectiveFromAffine(Q)
T.fromAffine(Q)
# f.square() -> square(1)
line.line_double(T, P)
@ -292,7 +292,7 @@ func miller_init_double_then_add*[N: static int, FT, F1, F2](
# First step: T <- Q, f = 1 (mod p¹²), f *= line
# ----------------------------------------------
for i in 0 ..< N:
Ts[i].projectiveFromAffine(Qs[i])
Ts[i].fromAffine(Qs[i])
line0.line_double(Ts[0], Ps[0])
when N >= 2:

View File

@ -63,7 +63,7 @@ func millerLoopGenericBLS12*[C](
line {.noInit.}: Line[Fp2[C]]
nQ{.noInit.}: typeof(Q)
T.projectiveFromAffine(Q)
T.fromAffine(Q)
nQ.neg(Q)
basicMillerLoop(

View File

@ -61,7 +61,7 @@ func millerLoopGenericBN*[C](
line {.noInit.}: Line[Fp2[C]]
nQ{.noInit.}: typeof(Q)
T.projectiveFromAffine(Q)
T.fromAffine(Q)
nQ.neg(Q)
basicMillerLoop(

View File

@ -45,7 +45,7 @@ func millerLoopBW6_761_naive[C](
line {.noInit.}: Line[Fp[C]]
nQ{.noInit.}: typeof(Q)
T.projectiveFromAffine(Q)
T.fromAffine(Q)
nQ.neg(Q)
basicMillerLoop(
@ -55,7 +55,7 @@ func millerLoopBW6_761_naive[C](
)
var f2 {.noInit.}: typeof(f)
T.projectiveFromAffine(Q)
T.fromAffine(Q)
basicMillerLoop(
f2, T, line,
@ -93,7 +93,7 @@ func millerLoopBW6_761_opt_to_debug[C](
T {.noInit.}: ECP_ShortW_Prj[Fp[C], G2]
line {.noInit.}: Line[Fp[C]]
T.projectiveFromAffine(Q)
T.fromAffine(Q)
f.setOne()
template u: untyped = pairing(C, ate_param_1_opt)
@ -114,7 +114,7 @@ func millerLoopBW6_761_opt_to_debug[C](
mu = f
minvu.inv(f)
Qu.affineFromProjective(T)
Qu.affine(T)
nQu.neg(Qu)
# Drop the vertical line
@ -125,7 +125,7 @@ func millerLoopBW6_761_opt_to_debug[C](
# 2nd part: f_{u²-u-1,Q}(P)
# ------------------------------
# We restart from `f` and `T`
T.projectiveFromAffine(Qu)
T.fromAffine(Qu)
template u: untyped = pairing(C, ate_param_2_opt)
var u3 = pairing(C, ate_param_2_opt)

View File

@ -115,4 +115,4 @@ func isPowerOf2_vartime*(n: SomeUnsignedInt): bool {.inline.} =
func nextPowerOf2_vartime*(n: uint64): uint64 {.inline.} =
## Returns x if x is a power of 2
## or the next biggest power of 2
1'u64 shl (log2_vartime(n-1) + 1)
1'u64 shl (log2_vartime(n-1) + 1)

View File

@ -124,7 +124,7 @@ template `-`*[T: Ct](x: T): T =
#
# ############################################################
template isMsbSet[T: Ct](x: T): CTBool[T] =
template isMsbSet*[T: Ct](x: T): CTBool[T] =
## Returns the most significant bit of an integer
const msb_pos = T.sizeof * 8 - 1
(CTBool[T])(x shr msb_pos)
@ -163,6 +163,16 @@ template `<=`*[T: Ct](x, y: T): CTBool[T] =
template `xor`*[T: Ct](x, y: CTBool[T]): CTBool[T] =
CTBool[T](noteq(T(x), T(y)))
# ############################################################
#
# Conditionals
#
# ############################################################
template cneg*[T: Ct](x: T, ctl: CTBool[T]): T =
# Conditional negate if ctl is true
(x xor -T(ctl)) + T(ctl)
# ############################################################
#
# Workaround system.nim `!=` template

View File

@ -39,3 +39,4 @@ const X86* = defined(amd64) or defined(i386)
when sizeof(int) == 8 and GCC_Compatible:
type
uint128*{.importc: "unsigned __int128".} = object
int128*{.importc: "__int128".} = object

View File

@ -75,6 +75,19 @@ func muladd2*(hi, lo: var Ct[uint32], a, b, c1, c2: Ct[uint32]) {.inline.}=
lo = (Ct[uint32])(dblPrec)
hi = (Ct[uint32])(dblPrec shr 32)
func smul*(hi, lo: var Ct[uint32], a, b: Ct[uint32]) {.inline.} =
## Signed extended precision multiplication
## (hi, lo) <- a*b
##
## Inputs are intentionally unsigned
## as we use their unchecked raw representation for cryptography
##
## This is constant-time on most hardware
## See: https://www.bearssl.org/ctmul.html
let dblPrec = int64(cast[int32](a)) * int64(cast[int32](b)) # Sign-extension via cast+conversion to wider int
lo = cast[Ct[uint32]](dblPrec)
hi = cast[Ct[uint32]](dblPrec shr 32)
# ############################################################
#
# 64-bit words
@ -83,15 +96,15 @@ func muladd2*(hi, lo: var Ct[uint32], a, b, c1, c2: Ct[uint32]) {.inline.}=
when sizeof(int) == 8:
when defined(vcc):
from ./extended_precision_x86_64_msvc import unsafeDiv2n1n, mul, muladd1, muladd2
from ./extended_precision_x86_64_msvc import unsafeDiv2n1n, mul, muladd1, muladd2, smul
elif GCCCompatible:
# TODO: constant-time div2n1n
when X86:
from ./extended_precision_x86_64_gcc import unsafeDiv2n1n
from ./extended_precision_64bit_uint128 import mul, muladd1, muladd2
from ./extended_precision_64bit_uint128 import mul, muladd1, muladd2, smul
else:
from ./extended_precision_64bit_uint128 import unsafeDiv2n1n, mul, muladd1, muladd2
export unsafeDiv2n1n, mul, muladd1, muladd2
from ./extended_precision_64bit_uint128 import unsafeDiv2n1n, mul, muladd1, muladd2, smul
export unsafeDiv2n1n, mul, muladd1, muladd2, smul
# ############################################################
#

View File

@ -97,3 +97,25 @@ func muladd2*(hi, lo: var Ct[uint64], a, b, c1, c2: Ct[uint64]) {.inline.}=
else:
{.emit:["*",hi, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
{.emit:["*",lo, " = (NU64)", dblPrec,";"].}
func smul*(hi, lo: var Ct[uint64], a, b: Ct[uint64]) {.inline.} =
## Extended precision multiplication
## (hi, lo) <- a*b
##
## Inputs are intentionally unsigned
## as we use their unchecked raw representation for cryptography
##
## This is constant-time on most hardware
## See: https://www.bearssl.org/ctmul.html
block:
var dblPrec {.noInit.}: int128
# We need to cast to int64 then sign-extended to int128
{.emit:[dblPrec, " = (__int128)", cast[int64](a)," * (__int128)", cast[int64](b),";"].}
# Don't forget to dereference the var param in C mode
when defined(cpp):
{.emit:[hi, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
{.emit:[lo, " = (NU64)", dblPrec,";"].}
else:
{.emit:["*",hi, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
{.emit:["*",lo, " = (NU64)", dblPrec,";"].}

View File

@ -29,6 +29,7 @@ func udiv128(highDividend, lowDividend, divisor: Ct[uint64], remainder: var Ct[u
## - if n_hi > d result is undefined
func umul128(a, b: Ct[uint64], hi: var Ct[uint64]): Ct[uint64] {.importc:"_umul128", header:"<intrin.h>", nodecl.}
## Unsigned extended precision multiplication
## (hi, lo) <-- a * b
## Return value is the low word
@ -84,3 +85,22 @@ func muladd2*(hi, lo: var Ct[uint64], a, b, c1, c2: Ct[uint64]) {.inline.}=
# Carry chain 2
addC(carry2, lo, lo, c2, Carry(0))
addC(carry2, hi, hi, 0, carry2)
func smul128(a, b: Ct[uint64], hi: var Ct[uint64]): Ct[uint64] {.importc:"_mul128", header:"<intrin.h>", nodecl.}
## Signed extended precision multiplication
## (hi, lo) <-- a * b
## Return value is the low word
##
## Inputs are intentionally unsigned
## as we use their unchecked raw representation for cryptography
func smul*(hi, lo: var Ct[uint64], a, b: Ct[uint64]) {.inline.} =
## Extended precision multiplication
## (hi, lo) <- a*b
##
## Inputs are intentionally unsigned
## as we use their unchecked raw representation for cryptography
##
## This is constant-time on most hardware
## See: https://www.bearssl.org/ctmul.html
lo = smul128(a, b, hi)

View File

@ -133,7 +133,7 @@ func eth_evm_ecadd*(
R.sum(P, Q)
var aff{.noInit.}: ECP_ShortW_Aff[Fp[BN254_Snarks], G1]
aff.affineFromProjective(R)
aff.affine(R)
r.toOpenArray(0, 31).exportRawUint(
aff.x, bigEndian
@ -207,7 +207,7 @@ func eth_evm_ecmul*(
P.scalarMul(s)
var aff{.noInit.}: ECP_ShortW_Aff[Fp[BN254_Snarks], G1]
aff.affineFromProjective(P)
aff.affine(P)
r.toOpenArray(0, 31).exportRawUint(
aff.x, bigEndian
@ -221,7 +221,7 @@ func subgroupCheck(P: ECP_ShortW_Aff[Fp2[BN254_Snarks], G2]): bool =
## that point may not be in the correct cyclic subgroup.
## If we are on the subgroup of order r then [r]P = 0
var Q{.noInit.}: ECP_ShortW_Prj[Fp2[BN254_Snarks], G2]
Q.projectiveFromAffine(P)
Q.fromAffine(P)
return bool(Q.isInSubgroup())
func fromRawCoords(

View File

@ -219,6 +219,17 @@ func conj*(r: var CubicExt, a: CubicExt) =
# Conditional arithmetic
# -------------------------------------------------------------------
func csetZero*(a: var ExtensionField, ctl: SecretBool) =
## Set ``a`` to 0 if ``ctl`` is true
staticFor i, 0, a.coords.len:
a.coords[i].csetZero(ctl)
func csetOne*(a: var ExtensionField, ctl: SecretBool) =
## Set ``a`` to 1 if ``ctl`` is true
a.coords[0].csetOne(ctl)
staticFor i, 1, a.coords.len:
a.coords[i].csetZero(ctl)
func cneg*(a: var ExtensionField, ctl: SecretBool) =
## Constant-time in-place conditional negation
## Only negate if ctl is true

View File

@ -28,16 +28,16 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n
- Squaring
- [x] Dedicated squaring functions
- [x] int128
- [ ] loop unrolling
- [x] loop unrolling
- [x] x86: Full Assembly implementation
- [ ] x86: MULX, ADCX, ADOX instructions
- [x] x86: MULX, ADCX, ADOX instructions
## Finite Fields & Modular Arithmetic
- Representation
- [x] Montgomery Representation
- [ ] Barret Reduction
- [ ] Unsaturated Representation
- [x] Unsaturated Representation
- [ ] Mersenne Prime (2ᵏ - 1),
- [ ] Generalized Mersenne Prime (NIST Prime P256: 2^256 - 2^224 + 2^192 + 2^96 - 1)
- [ ] Pseudo-Mersenne Prime (2^m - k for example Curve25519: 2^255 - 19)
@ -91,9 +91,10 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n
- Inversion (constant-time baseline, Little-Fermat inversion via a^(p-2))
- [x] Constant-time binary GCD algorithm by Möller, algorithm 5 in https://link.springer.com/content/pdf/10.1007%2F978-3-642-40588-4_10.pdf
- [x] Addition-chain for a^(p-2)
- [ ] Constant-time binary GCD algorithm by Bernstein-Yang, https://eprint.iacr.org/2019/266
- [x] Constant-time binary GCD algorithm by Bernstein-Yang, https://eprint.iacr.org/2019/266
- [ ] Constant-time binary GCD algorithm by Pornin, https://eprint.iacr.org/2020/972
- [ ] Simultaneous inversion
- [x] Constant-time binary GCD algorithm by BY with half-delta optimization by libsecp256k1, formally verified, https://eprint.iacr.org/2021/549
- [x] Simultaneous inversion
- Square Root (constant-time)
- [x] baseline sqrt via Little-Fermat for `p ≡ 3 (mod 4)`
@ -193,7 +194,7 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n
- [ ] BLS curves: Ghamman, Fouotsa
- [x] BLS curves: Hayashida, Hayasaka, Teruya
- [ ] Multi-pairing
- [x] Multi-pairing
- [ ] Line accumulation
- [ ] Parallel Multi-Pairing
@ -204,7 +205,7 @@ The optimizations can be of algebraic, algorithmic or "implementation details" n
- [ ] BLS G2: Scott et al
- [ ] BLS G2: Fuentes-Castañeda
- [x] BLS G2: Budroni et al, endomorphism accelerated
- [ ] BN G2
- [x] BN G2: Fuentes-Castañeda
- [ ] BW6-761 G1
- [ ] BW6-761 G2

View File

@ -222,7 +222,7 @@ when isMainModule:
proc roundtrip() =
let fftDesc = FFTDescriptor[G1].init(maxScale = 4)
var data = newSeq[G1](fftDesc.maxWidth)
data[0].projectiveFromAffine(Generator1)
data[0].fromAffine(Generator1)
for i in 1 ..< fftDesc.maxWidth:
data[i].madd(data[i-1], Generator1)
@ -273,7 +273,7 @@ when isMainModule:
let desc = FFTDescriptor[G1].init(uint8 scale)
var data = newSeq[G1](desc.maxWidth)
data[0].projectiveFromAffine(Generator1)
data[0].fromAffine(Generator1)
for i in 1 ..< desc.maxWidth:
data[i].madd(data[i-1], Generator1)

View File

@ -64,7 +64,7 @@ proc checkProofSingle(
): bool =
## Check a proof for a Kate commitment for an evaluation f(x) = y
var xG2, g2: G2
g2.projectiveFromAffine(Generator2)
g2.fromAffine(Generator2)
xG2 = g2
xG2.scalarMul(x.toBig())
@ -72,7 +72,7 @@ proc checkProofSingle(
s_minus_x.diff(kzg.secretG2[1], xG2)
var yG1: G1
yG1.projectiveFromAffine(Generator1)
yG1.fromAffine(Generator1)
yG1.scalarMul(y.toBig())
var commitment_minus_y: G1

View File

@ -47,10 +47,10 @@ func pair_verify*(
var P1a, P2a: G1aff
var Q1a, Q2a: G2aff
P1a.affineFromProjective(P1)
Q1a.affineFromProjective(Q1)
P2a.affineFromProjective(P2)
Q2a.affineFromProjective(Q2)
P1a.affine(P1)
Q1a.affine(Q1)
P2a.affine(P2)
Q2a.affine(Q2)
# To verify if e(P1, Q1) == e(P2, Q2)
# we can do e(P1, Q1) / e(P2, Q2) == 1

View File

@ -555,14 +555,10 @@ proc mainModularInverse() =
let a = BigInt[16].fromUint(42'u16)
let M = BigInt[16].fromUint(2017'u16)
var mp1div2 = M
discard mp1div2.add(One)
mp1div2.shiftRight(1)
let expected = BigInt[16].fromUint(1969'u16)
var r = canary(BigInt[16])
r.invmod(a, M, mp1div2)
r.invmod(a, M)
check: bool(r == expected)
@ -570,14 +566,10 @@ proc mainModularInverse() =
let a = BigInt[381].fromUint(42'u16)
let M = BigInt[381].fromUint(2017'u16)
var mp1div2 = M
discard mp1div2.add(One)
mp1div2.shiftRight(1)
let expected = BigInt[381].fromUint(1969'u16)
var r = canary(BigInt[381])
r.invmod(a, M, mp1div2)
r.invmod(a, M)
check: bool(r == expected)
@ -586,14 +578,10 @@ proc mainModularInverse() =
let a = BigInt[16].fromUint(271'u16)
let M = BigInt[16].fromUint(383'u16)
var mp1div2 = M
discard mp1div2.add(One)
mp1div2.shiftRight(1)
let expected = BigInt[16].fromUint(106'u16)
var r = canary(BigInt[16])
r.invmod(a, M, mp1div2)
r.invmod(a, M)
check: bool(r == expected)
@ -601,14 +589,10 @@ proc mainModularInverse() =
let a = BigInt[381].fromUint(271'u16)
let M = BigInt[381].fromUint(383'u16)
var mp1div2 = M
discard mp1div2.add(One)
mp1div2.shiftRight(1)
let expected = BigInt[381].fromUint(106'u16)
var r = canary(BigInt[381])
r.invmod(a, M, mp1div2)
r.invmod(a, M)
check: bool(r == expected)
@ -616,14 +600,10 @@ proc mainModularInverse() =
let a = BigInt[381].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
let M = BigInt[381].fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab")
var mp1div2 = M
discard mp1div2.add(One)
mp1div2.shiftRight(1)
let expected = BigInt[381].fromHex("0x0636759a0f3034fa47174b2c0334902f11e9915b7bd89c6a2b3082b109abbc9837da17201f6d8286fe6203caa1b9d4c8")
var r = canary(BigInt[381])
r.invmod(a, M, mp1div2)
r.invmod(a, M)
check: bool(r == expected)
@ -632,14 +612,10 @@ proc mainModularInverse() =
let a = BigInt[16].fromUint(0'u16)
let M = BigInt[16].fromUint(2017'u16)
var mp1div2 = M
mp1div2.shiftRight(1)
discard mp1div2.add(One)
let expected = BigInt[16].fromUint(0'u16)
var r = canary(BigInt[16])
r.invmod(a, M, mp1div2)
r.invmod(a, M)
check: bool(r == expected)
@ -647,14 +623,10 @@ proc mainModularInverse() =
let a = BigInt[381].fromUint(0'u16)
let M = BigInt[381].fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab")
var mp1div2 = M
mp1div2.shiftRight(1)
discard mp1div2.add(One)
let expected = BigInt[381].fromUint(0'u16)
var r = canary(BigInt[381])
r.invmod(a, M, mp1div2)
r.invmod(a, M)
check: bool(r == expected)

63
tests/t_ec_conversion.nim Normal file
View File

@ -0,0 +1,63 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../constantine/config/[type_ff, curves],
../constantine/elliptic/[ec_shortweierstrass_jacobian, ec_shortweierstrass_projective],
../constantine/towers,
# Test utilities
./t_ec_template
const
Iters = 8
run_EC_conversion_failures(
moduleName = "test_ec_conversion_fuzzing_failures"
)
run_EC_affine_conversion(
ec = ECP_ShortW_Jac[Fp[BN254_Snarks], G1],
Iters = Iters,
moduleName = "test_ec_conversion_shortw_affine_jacobian_g1_" & $BN254_Snarks
)
run_EC_affine_conversion(
ec = ECP_ShortW_Prj[Fp[BN254_Snarks], G1],
Iters = Iters,
moduleName = "test_ec_conversion_shortw_affine_projective_g1_" & $BN254_Snarks
)
run_EC_affine_conversion(
ec = ECP_ShortW_Jac[Fp2[BN254_Snarks], G2],
Iters = Iters,
moduleName = "test_ec_conversion_shortw_affine_jacobian_g2_" & $BN254_Snarks
)
run_EC_affine_conversion(
ec = ECP_ShortW_Prj[Fp2[BN254_Snarks], G2],
Iters = Iters,
moduleName = "test_ec_conversion_shortw_affine_projective_g2_" & $BN254_Snarks
)
run_EC_affine_conversion(
ec = ECP_ShortW_Jac[Fp[BLS12_381], G1],
Iters = Iters,
moduleName = "test_ec_conversion_shortw_affine_jacobian_g1_" & $BLS12_381
)
run_EC_affine_conversion(
ec = ECP_ShortW_Prj[Fp[BLS12_381], G1],
Iters = Iters,
moduleName = "test_ec_conversion_shortw_affine_projective_g1_" & $BLS12_381
)
run_EC_affine_conversion(
ec = ECP_ShortW_Jac[Fp2[BLS12_381], G2],
Iters = Iters,
moduleName = "test_ec_conversion_shortw_affine_jacobian_g2_" & $BLS12_381
)
run_EC_affine_conversion(
ec = ECP_ShortW_Prj[Fp2[BLS12_381], G2],
Iters = Iters,
moduleName = "test_ec_conversion_shortw_affine_projective_g2_" & $BLS12_381
)

View File

@ -203,12 +203,8 @@ proc run_scalar_mul_test_vs_sage*(
reference {.noInit.}: EC
endo {.noInit.}: EC
when EC is ECP_ShortW_Prj:
P.projectiveFromAffine(vec.vectors[i].P)
Q.projectiveFromAffine(vec.vectors[i].Q)
else:
P.jacobianFromAffine(vec.vectors[i].P)
Q.jacobianFromAffine(vec.vectors[i].Q)
P.fromAffine(vec.vectors[i].P)
Q.fromAffine(vec.vectors[i].Q)
impl = P
reference = P
endo = P

View File

@ -441,10 +441,7 @@ proc run_EC_mixed_add_impl*(
let a = rng.random_point(EC, randZ, gen)
let b = rng.random_point(EC, randZ, gen)
var bAff: ECP_ShortW_Aff[EC.F, EC.G]
when b is ECP_ShortW_Prj:
bAff.affineFromProjective(b)
else:
bAff.affineFromJacobian(b)
bAff.affine(b)
var r_generic, r_mixed: EC
@ -535,4 +532,169 @@ proc run_EC_subgroups_cofactors_impl*(
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Long01Sequence)
echo " [SUCCESS] Test finished with ", inSubgroup, " points in ", G1_or_G2, " subgroup and ",
offSubgroup, " points on curve but not in subgroup (before cofactor clearing)"
offSubgroup, " points on curve but not in subgroup (before cofactor clearing)"
proc run_EC_affine_conversion*(
ec: typedesc,
Iters: static int,
moduleName: string
) =
# Random seed for reproducibility
var rng: RngState
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
rng.seed(seed)
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
const G1_or_G2 = pairingGroup(ec)
const testSuiteDesc = "Elliptic curve in " & $ec.F.C.getEquationForm() & " form"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitwidth & "-bit mode]":
test "EC " & G1_or_G2 & " batchAffine is consistent with single affine conversion":
proc test(EC: typedesc, gen: RandomGen) =
const batchSize = 10
for _ in 0 ..< Iters:
var Ps: array[batchSize, EC]
for i in 0 ..< batchSize:
Ps[i] = rng.random_point(EC, randZ = true, gen)
var Qs, Rs: array[batchSize, affine(EC)]
for i in 0 ..< batchSize:
Qs[i].affine(Ps[i])
Rs.batchAffine(Ps)
for i in countdown(batchSize-1, 0):
doAssert bool(Qs[i] == Rs[i]), block:
var s: string
s &= "Mismatch on iteration " & $i
s &= "\nFailing batch for " & $EC & " (" & $WordBitwidth & "-bit)"
s &= "\n ["
for i in 0 ..< batchSize:
s &= "\n" & Ps[i].toHex(indent = 4)
if i != batchSize-1: s &= ","
s &= "\n ]"
s &= "\nFailing inversions for " & $EC & " (" & $WordBitwidth & "-bit)"
s &= "\n ["
for i in 0 ..< batchSize:
s &= "\n" & Rs[i].toHex(indent = 4)
if i != batchSize-1: s &= ","
s &= "\n ]"
s &= "\nExpected inversions for " & $EC & " (" & $WordBitwidth & "-bit)"
s &= "\n ["
for i in 0 ..< batchSize:
s &= "\n" & Qs[i].toHex(indent = 4)
if i != batchSize-1: s &= ","
s &= "\n ]"
s
test(ec, gen = Uniform)
test(ec, gen = HighHammingWeight)
test(ec, gen = Long01Sequence)
proc run_EC_conversion_failures*(
moduleName: string
) =
echo "\n------------------------------------------------------\n"
echo moduleName
suite moduleName & " - [" & $WordBitwidth & "-bit mode]":
test "EC batchAffine fuzzing failures ":
proc test_bn254_snarks_g1(ECP: type) =
type ECP_Aff = ECP_ShortW_Aff[Fp[BN254_Snarks], G1]
let Ps = [
ECP.fromHex(
x = "0x0e0a76c19a07e01fe56f246f7878652c0b39eb28f5c60b3dd43e438dc50e0d9d",
y = "0x04e6da44bc7f802fab3df34ce45d86857327663bc24ff574da48ee2b01a4932e"
),
ECP.fromHex(
x = "0x2036a21a3d9cc09d8f5f7491fe7e4f44cffd2addf01c6ae587bee7d24f060571",
y = "0x2b5f1cc6f1cdb4a6dbaf3c88b9c02ccf984aecbba4830d5aeb33f940cb632d8a"
),
ECP.fromHex(
x = "0x2fd314a75c6b1f82d70f2edc7b7bf6e7397bc04bc6aaa0584b9e5bbb7689082a",
y = "0x111b3b4a697e7a990400eb39f09a9bb559748cea6699535bd114ffb3dcc0b4d1"
),
ECP.fromHex(
x = "0x0000000000000000000000000000000000000000000000000000000000000000",
y = "0x0000000000000000000000000000000000000000000000000000000000000000"
),
ECP.fromHex(
x = "0x0e0a77c199ffdf2f686ea36f7879462c0a74eb28f5e70b3dd31d438dc58f0d9d",
y = "0x0b3938a732020d98793510be6aa312651a5f5369ebbbe41d7fda8fd914b7f264"
),
ECP.fromHex(
x = "0x0000000007ffffffffffffff80000000000007ffe000000000ffffffffffffff",
y = "0x1d9db0f30e3395ee33a70674a31e2854de0665292dd545c10fb3da579d7df916"
),
ECP.fromHex(
x = "0x000000000000000fffffffffe0000000000007ffffffffffffffffffffffffff",
y = "0x2a5c6df4d24efa9ffcf4003e35801dc202d820b59d67ecc65d57cfdf53b4bbc6"
),
ECP.fromHex(
x = "0x00000000000000000003ffffffc00000000000000c000000000000003ffffffe",
y = "0x09f811f84207472ccd6ca00bb1ec3e6132a1c9206adc9ed768871f0005f0d358"
),
ECP.fromHex(
x = "0x0e0979b99d07df30656ea36f7879462c097beb28f5c8083dd25d448dc58f0ca4",
y = "0x1799b22d8780c917ab1c4e15da718c243babc1c51225b5f8298aa570b5029796"
),
ECP.fromHex(
x = "0x0e0a76c29a07e02f666ea36e806a462c0a78eb25f5c70b3dd35c4b8dc58f0d9c",
y = "0x0529cb1ad2552c7979a900ff59551d5dc1f8680c3a4f20d3b9cdcf68b69ec61c"
)
]
let Qs = [
ECP_Aff.fromHex(
x = "0x0e0a76c19a07e01fe56f246f7878652c0b39eb28f5c60b3dd43e438dc50e0d9d",
y = "0x04e6da44bc7f802fab3df34ce45d86857327663bc24ff574da48ee2b01a4932e"
),
ECP_Aff.fromHex(
x = "0x2036a21a3d9cc09d8f5f7491fe7e4f44cffd2addf01c6ae587bee7d24f060571",
y = "0x2b5f1cc6f1cdb4a6dbaf3c88b9c02ccf984aecbba4830d5aeb33f940cb632d8a"
),
ECP_Aff.fromHex(
x = "0x2fd314a75c6b1f82d70f2edc7b7bf6e7397bc04bc6aaa0584b9e5bbb7689082a",
y = "0x111b3b4a697e7a990400eb39f09a9bb559748cea6699535bd114ffb3dcc0b4d1"
),
ECP_Aff.fromHex(
x = "0x0000000000000000000000000000000000000000000000000000000000000000",
y = "0x0000000000000000000000000000000000000000000000000000000000000000"
),
ECP_Aff.fromHex(
x = "0x0e0a77c199ffdf2f686ea36f7879462c0a74eb28f5e70b3dd31d438dc58f0d9d",
y = "0x0b3938a732020d98793510be6aa312651a5f5369ebbbe41d7fda8fd914b7f264"
),
ECP_Aff.fromHex(
x = "0x0000000007ffffffffffffff80000000000007ffe000000000ffffffffffffff",
y = "0x1d9db0f30e3395ee33a70674a31e2854de0665292dd545c10fb3da579d7df916"
),
ECP_Aff.fromHex(
x = "0x000000000000000fffffffffe0000000000007ffffffffffffffffffffffffff",
y = "0x2a5c6df4d24efa9ffcf4003e35801dc202d820b59d67ecc65d57cfdf53b4bbc6"
),
ECP_Aff.fromHex(
x = "0x00000000000000000003ffffffc00000000000000c000000000000003ffffffe",
y = "0x09f811f84207472ccd6ca00bb1ec3e6132a1c9206adc9ed768871f0005f0d358"
),
ECP_Aff.fromHex(
x = "0x0e0979b99d07df30656ea36f7879462c097beb28f5c8083dd25d448dc58f0ca4",
y = "0x1799b22d8780c917ab1c4e15da718c243babc1c51225b5f8298aa570b5029796"
),
ECP_Aff.fromHex(
x = "0x0e0a76c29a07e02f666ea36e806a462c0a78eb25f5c70b3dd35c4b8dc58f0d9c",
y = "0x0529cb1ad2552c7979a900ff59551d5dc1f8680c3a4f20d3b9cdcf68b69ec61c"
)
]
var Rs: array[10, ECP_Aff]
Rs.batchAffine(Ps)
for i in 0 ..< 10:
doAssert bool(Qs[i] == Rs[i])
test_bn254_snarks_g1(ECP_ShortW_Prj[Fp[BN254_Snarks], G1])
test_bn254_snarks_g1(ECP_ShortW_Jac[Fp[BN254_Snarks], G1])

View File

@ -132,7 +132,7 @@ proc run_hash_to_curve_test(
)
var P_ref: EC
P_ref.projectiveFromAffine(vec.vectors[i].P)
P_ref.fromAffine(vec.vectors[i].P)
doAssert: bool(P == P_ref)

View File

@ -0,0 +1,89 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import std/[unittest,times],
../constantine/config/[common, curves],
../constantine/arithmetic,
../constantine/arithmetic/limbs_unsaturated,
../constantine/io/io_bigints,
../helpers/prng_unsafe
# Random seed for reproducibility
var rng: RngState
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
rng.seed(seed)
echo "\n------------------------------------------------------\n"
echo "test_io_unsaturated xoshiro512** seed: ", seed
type
RandomGen = enum
Uniform
HighHammingWeight
Long01Sequence
func random_bigint*(rng: var RngState, curve: static Curve, gen: static RandomGen): auto =
when gen == Uniform:
rng.random_unsafe(matchingBigInt(curve))
elif gen == HighHammingWeight:
rng.random_highHammingWeight(matchingBigInt(curve))
else:
rng.random_long01Seq(matchingBigInt(curve))
# debug
import std/strutils
proc testRoundtrip(curve: static Curve, gen: static RandomGen) =
const bits = curve.getCurveBitwidth()
const Excess = 2
const UnsatBitwidth = WordBitWidth - Excess
const N = (bits + UnsatBitwidth-1) div UnsatBitwidth
let a = rng.random_bigint(curve, gen)
var u: LimbsUnsaturated[N, Excess]
var b: typeof(a)
u.fromPackedRepr(a.limbs)
b.limbs.fromUnsatRepr(u)
doAssert bool(a == b), block:
"\n a: " & a.toHex() &
"\n b: " & b.toHex()
proc main() =
suite "Packed <-> Unsaturated limbs roundtrips" & " [" & $WordBitwidth & "-bit mode]":
const Iters = 10000
test "BN254_Snarks":
for _ in 0 ..< Iters:
testRoundtrip(BN254_Snarks, Uniform)
for _ in 0 ..< Iters:
testRoundtrip(BN254_Snarks, HighHammingWeight)
for _ in 0 ..< Iters:
testRoundtrip(BN254_Snarks, Long01Sequence)
test "Curve25519":
for _ in 0 ..< Iters:
testRoundtrip(Curve25519, Uniform)
for _ in 0 ..< Iters:
testRoundtrip(Curve25519, HighHammingWeight)
for _ in 0 ..< Iters:
testRoundtrip(Curve25519, Long01Sequence)
test "secp256k1":
for _ in 0 ..< Iters:
testRoundtrip(Secp256k1, Uniform)
for _ in 0 ..< Iters:
testRoundtrip(Secp256k1, HighHammingWeight)
for _ in 0 ..< Iters:
testRoundtrip(Secp256k1, Long01Sequence)
test "BLS12-381":
for _ in 0 ..< Iters:
testRoundtrip(BLS12_381, Uniform)
for _ in 0 ..< Iters:
testRoundtrip(BLS12_381, HighHammingWeight)
for _ in 0 ..< Iters:
testRoundtrip(BLS12_381, Long01Sequence)
main()

View File

@ -100,7 +100,7 @@ suite "Pairing - Line Functions on BLS12-377" & " [" & $WordBitwidth & "-bit mod
TQ.sum(T, Q)
var Qaff{.noInit.}: ECP_ShortW_Aff[Fp2[C], G2]
Qaff.affineFromProjective(Q)
Qaff.affine(Q)
l.line_add(T, Qaff, P)
doAssert: bool(T == TQ)

View File

@ -100,7 +100,7 @@ suite "Pairing - Line Functions on BLS12-381" & " [" & $WordBitwidth & "-bit mod
TQ.sum(T, Q)
var Qaff{.noInit.}: ECP_ShortW_Aff[Fp2[C], G2]
Qaff.affineFromProjective(Q)
Qaff.affine(Q)
l.line_add(T, Qaff, P)
doAssert: bool(T == TQ)

View File

@ -43,9 +43,9 @@ func clearCofactor[F; G: static Subgroup](
ec: var ECP_ShortW_Aff[F, G]) =
# For now we don't have any affine operation defined
var t {.noInit.}: ECP_ShortW_Prj[F, G]
t.projectiveFromAffine(ec)
t.fromAffine(ec)
t.clearCofactor()
ec.affineFromProjective(t)
ec.affine(t)
func random_point*(rng: var RngState, EC: typedesc, randZ: bool, gen: RandomGen): EC {.noInit.} =
if gen == Uniform:
@ -82,10 +82,10 @@ template runPairingTests*(Iters: static int, C: static Curve, G1, G2, GT: typede
var Pa {.noInit.}, Pa2 {.noInit.}: affineType(P)
var Qa {.noInit.}, Qa2 {.noInit.}: affineType(Q)
Pa.affineFromProjective(P)
Pa2.affineFromProjective(P2)
Qa.affineFromProjective(Q)
Qa2.affineFromProjective(Q2)
Pa.affine(P)
Pa2.affine(P2)
Qa.affine(Q)
Qa2.affine(Q2)
r.pairing_fn(Pa, Qa)
r.square()

View File

@ -85,9 +85,9 @@ func publicKeyG1(
seckey: Fr[BLS12_381]
) =
var t: ECP_ShortW_Prj[Fp[BLS12_381], G1]
t.projectiveFromAffine(BLS12_381_G1_generator)
t.fromAffine(BLS12_381_G1_generator)
t.scalarMul(seckey.toBig())
pubkey.affineFromprojective(t)
pubkey.affine(t)
doAssert not bool pubkey.isInf()
func signG2[T: byte|char](
@ -105,7 +105,7 @@ func signG2[T: byte|char](
domainSepTag = DomainSepTag
)
t.scalarMul(secretKey.toBig())
signature.affineFromprojective(t)
signature.affine(t)
doAssert not bool signature.isInf()
func verifyG2[T: byte|char](
@ -125,7 +125,7 @@ func verifyG2[T: byte|char](
message = message,
domainSepTag = DomainSepTag
)
Q.affineFromprojective(Qprj)
Q.affine(Qprj)
var e0{.noInit.}, e1{.noInit.}: Fp12[BLS12_381]
e0.pairing_bls12(pubkey, Q)
@ -154,7 +154,7 @@ func verifyG2_multi[T: byte|char](
var G1s: array[2, ECP_ShortW_Aff[Fp[BLS12_381], G1]]
G1s[0] = pubkey
G2s[0].affineFromprojective(Qprj)
G2s[0].affine(Qprj)
G1s[1].neg(BLS12_381_G1_generator)
G2s[1] = signature