mirror of
https://github.com/logos-storage/constantine.git
synced 2026-01-02 13:13:07 +00:00
* consistent naming for dbl-width * Isolate double-width Fp2 mul * Implement double-width complex multiplication * Lay out Fp4 double-width mul * Off by p in square Fp4 as well :/ * less copies and stack space in addition chains * Address https://github.com/mratsim/constantine/issues/154 partly * Fix #154, faster Fp4 square: less non-residue, no Mul, only square (bit more ops total) * Fix typo * better assembly scheduling for add/sub * Double-width -> Double-precision * Unred -> Unr * double-precision modular addition * Replace canUseNoCarryMontyMul and canUseNoCarryMontySquare by getSpareBits * Complete the double-precision implementation * Use double-precision path for Fp4 squaring and mul * remove mixin annotations * Lazy reduction in Fp4 prod * Fix assembly for sum2xMod * Assembly for double-precision negation * reduce white spaces in pairing benchmarks * ADX implies BMI2
244 lines
6.9 KiB
Nim
244 lines
6.9 KiB
Nim
# Constantine
|
|
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
|
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
|
# Licensed and distributed under either of
|
|
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
|
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
|
|
|
import
|
|
../config/[common, curves, type_ff],
|
|
../primitives,
|
|
./bigints,
|
|
./finite_fields,
|
|
./limbs,
|
|
./limbs_extmul,
|
|
./limbs_montgomery
|
|
|
|
when UseASM_X86_64:
|
|
import assembly/limbs_asm_modular_dbl_prec_x86
|
|
|
|
type FpDbl*[C: static Curve] = object
|
|
## Double-precision Fp element
|
|
## A FpDbl is a partially-reduced double-precision element of Fp
|
|
## The allowed range is [0, 2ⁿp)
|
|
## with n = w*WordBitSize
|
|
## and w the number of words necessary to represent p on the machine.
|
|
## Concretely a 381-bit p needs 6*64 bits limbs (hence 384 bits total)
|
|
## and so FpDbl would 768 bits.
|
|
# We directly work with double the number of limbs,
|
|
# instead of BigInt indirection.
|
|
limbs2x*: matchingLimbs2x(C)
|
|
|
|
template doublePrec*(T: type Fp): type =
|
|
## Return the double-precision type matching with Fp
|
|
FpDbl[T.C]
|
|
|
|
# No exceptions allowed
|
|
{.push raises: [].}
|
|
{.push inline.}
|
|
|
|
func `==`*(a, b: FpDbl): SecretBool =
|
|
a.limbs2x == b.limbs2x
|
|
|
|
func isZero*(a: FpDbl): SecretBool =
|
|
a.limbs2x.isZero()
|
|
|
|
func setZero*(a: var FpDbl) =
|
|
a.limbs2x.setZero()
|
|
|
|
func prod2x*(r: var FpDbl, a, b: Fp) =
|
|
## Double-precision multiplication
|
|
## Store the product of ``a`` by ``b`` into ``r``
|
|
##
|
|
## If a and b are in [0, p)
|
|
## Output is in [0, p²)
|
|
##
|
|
## Output can be up to [0, 2ⁿp) range
|
|
## provided spare bits are available in Fp representation
|
|
r.limbs2x.prod(a.mres.limbs, b.mres.limbs)
|
|
|
|
func square2x*(r: var FpDbl, a: Fp) =
|
|
## Double-precision squaring
|
|
## Store the square of ``a`` into ``r``
|
|
##
|
|
## If a is in [0, p)
|
|
## Output is in [0, p²)
|
|
##
|
|
## Output can be up to [0, 2ⁿp) range
|
|
## provided spare bits are available in Fp representation
|
|
r.limbs2x.square(a.mres.limbs)
|
|
|
|
func redc2x*(r: var Fp, a: FpDbl) =
|
|
## Reduce a double-precision field element into r
|
|
## from [0, 2ⁿp) range to [0, p) range
|
|
const N = r.mres.limbs.len
|
|
montyRedc2x(
|
|
r.mres.limbs,
|
|
a.limbs2x,
|
|
Fp.C.Mod.limbs,
|
|
Fp.getNegInvModWord(),
|
|
Fp.getSpareBits()
|
|
)
|
|
|
|
func diff2xUnr*(r: var FpDbl, a, b: FpDbl) =
|
|
## Double-precision substraction without reduction
|
|
##
|
|
## If the result is negative, fully reduced addition/substraction
|
|
## are necessary afterwards to guarantee the [0, 2ⁿp) range
|
|
discard r.limbs2x.diff(a.limbs2x, b.limbs2x)
|
|
|
|
func diff2xMod*(r: var FpDbl, a, b: FpDbl) =
|
|
## Double-precision modular substraction
|
|
## Output is conditionally reduced by 2ⁿp
|
|
## to stay in the [0, 2ⁿp) range
|
|
when UseASM_X86_64:
|
|
submod2x_asm(r.limbs2x, a.limbs2x, b.limbs2x, FpDbl.C.Mod.limbs)
|
|
else:
|
|
# Substraction step
|
|
var underflowed = SecretBool r.limbs2x.diff(a.limbs2x, b.limbs2x)
|
|
|
|
# Conditional reduction by 2ⁿp
|
|
const N = r.limbs2x.len div 2
|
|
const M = FpDbl.C.Mod
|
|
var carry = Carry(0)
|
|
var sum: SecretWord
|
|
staticFor i, 0, N:
|
|
addC(carry, sum, r.limbs2x[i+N], M.limbs[i], carry)
|
|
underflowed.ccopy(r.limbs2x[i+N], sum)
|
|
|
|
func sum2xUnr*(r: var FpDbl, a, b: FpDbl) =
|
|
## Double-precision addition without reduction
|
|
##
|
|
## If the result is bigger than 2ⁿp, fully reduced addition/substraction
|
|
## are necessary afterwards to guarantee the [0, 2ⁿp) range
|
|
discard r.limbs2x.sum(a.limbs2x, b.limbs2x)
|
|
|
|
func sum2xMod*(r: var FpDbl, a, b: FpDbl) =
|
|
## Double-precision modular addition
|
|
## Output is conditionally reduced by 2ⁿp
|
|
## to stay in the [0, 2ⁿp) range
|
|
when UseASM_X86_64:
|
|
addmod2x_asm(r.limbs2x, a.limbs2x, b.limbs2x, FpDbl.C.Mod.limbs)
|
|
else:
|
|
# Addition step
|
|
var overflowed = SecretBool r.limbs2x.sum(a.limbs2x, b.limbs2x)
|
|
|
|
const N = r.limbs2x.len div 2
|
|
const M = FpDbl.C.Mod
|
|
# Test >= 2ⁿp
|
|
var borrow = Borrow(0)
|
|
var t{.noInit.}: Limbs[N]
|
|
staticFor i, 0, N:
|
|
subB(borrow, t[i], r.limbs2x[i+N], M.limbs[i], borrow)
|
|
|
|
# If no borrow occured, r was bigger than 2ⁿp
|
|
overflowed = overflowed or not(SecretBool borrow)
|
|
|
|
# Conditional reduction by 2ⁿp
|
|
staticFor i, 0, N:
|
|
SecretBool(overflowed).ccopy(r.limbs2x[i+N], t[i])
|
|
|
|
func neg2xMod*(r: var FpDbl, a: FpDbl) =
|
|
## Double-precision modular substraction
|
|
## Negate modulo 2ⁿp
|
|
when UseASM_X86_64:
|
|
negmod2x_asm(r.limbs2x, a.limbs2x, FpDbl.C.Mod.limbs)
|
|
else:
|
|
# If a = 0 we need r = 0 and not r = M
|
|
# as comparison operator assume unicity
|
|
# of the modular representation.
|
|
# Also make sure to handle aliasing where r.addr = a.addr
|
|
var t {.noInit.}: FpDbl
|
|
let isZero = a.isZero()
|
|
const N = r.limbs2x.len div 2
|
|
const M = FpDbl.C.Mod
|
|
var borrow = Borrow(0)
|
|
# 2ⁿp is filled with 0 in the first half
|
|
staticFor i, 0, N:
|
|
subB(borrow, t.limbs2x[i], Zero, a.limbs2x[i], borrow)
|
|
# 2ⁿp has p (shifted) for the rest of the limbs
|
|
staticFor i, N, r.limbs2x.len:
|
|
subB(borrow, t.limbs2x[i], M.limbs[i-N], a.limbs2x[i], borrow)
|
|
|
|
# Zero the result if input was zero
|
|
t.limbs2x.czero(isZero)
|
|
r = t
|
|
|
|
func prod2xImpl(
|
|
r {.noAlias.}: var FpDbl,
|
|
a {.noAlias.}: FpDbl, b: static int) =
|
|
## Multiplication by a small integer known at compile-time
|
|
## Requires no aliasing and b positive
|
|
static: doAssert b >= 0
|
|
|
|
when b == 0:
|
|
r.setZero()
|
|
elif b == 1:
|
|
r = a
|
|
elif b == 2:
|
|
r.sum2xMod(a, a)
|
|
elif b == 3:
|
|
r.sum2xMod(a, a)
|
|
r.sum2xMod(a, r)
|
|
elif b == 4:
|
|
r.sum2xMod(a, a)
|
|
r.sum2xMod(r, r)
|
|
elif b == 5:
|
|
r.sum2xMod(a, a)
|
|
r.sum2xMod(r, r)
|
|
r.sum2xMod(r, a)
|
|
elif b == 6:
|
|
r.sum2xMod(a, a)
|
|
let t2 = r
|
|
r.sum2xMod(r, r) # 4
|
|
r.sum2xMod(t, t2)
|
|
elif b == 7:
|
|
r.sum2xMod(a, a)
|
|
r.sum2xMod(r, r) # 4
|
|
r.sum2xMod(r, r)
|
|
r.diff2xMod(r, a)
|
|
elif b == 8:
|
|
r.sum2xMod(a, a)
|
|
r.sum2xMod(r, r)
|
|
r.sum2xMod(r, r)
|
|
elif b == 9:
|
|
r.sum2xMod(a, a)
|
|
r.sum2xMod(r, r)
|
|
r.sum2xMod(r, r) # 8
|
|
r.sum2xMod(r, a)
|
|
elif b == 10:
|
|
r.sum2xMod(a, a)
|
|
r.sum2xMod(r, r)
|
|
r.sum2xMod(r, a) # 5
|
|
r.sum2xMod(r, r)
|
|
elif b == 11:
|
|
r.sum2xMod(a, a)
|
|
r.sum2xMod(r, r)
|
|
r.sum2xMod(r, a) # 5
|
|
r.sum2xMod(r, r)
|
|
r.sum2xMod(r, a)
|
|
elif b == 12:
|
|
r.sum2xMod(a, a)
|
|
r.sum2xMod(r, r) # 4
|
|
let t4 = a
|
|
r.sum2xMod(r, r) # 8
|
|
r.sum2xMod(r, t4)
|
|
else:
|
|
{.error: "Multiplication by this small int not implemented".}
|
|
|
|
func prod2x*(r: var FpDbl, a: FpDbl, b: static int) =
|
|
## Multiplication by a small integer known at compile-time
|
|
const negate = b < 0
|
|
const b = if negate: -b
|
|
else: b
|
|
when negate:
|
|
var t {.noInit.}: typeof(r)
|
|
t.neg2xMod(a)
|
|
else:
|
|
let t = a
|
|
prod2xImpl(r, t, b)
|
|
|
|
{.pop.} # inline
|
|
{.pop.} # raises no exceptions
|