constantine/constantine/arithmetic/finite_fields_double_precision.nim
Mamy Ratsimbazafy 5806cc4638
Double-Precision towering (#155)
* consistent naming for dbl-width

* Isolate double-width Fp2 mul

* Implement double-width complex multiplication

* Lay out Fp4 double-width mul

* Off by p in square Fp4 as well :/

* less copies and stack space in addition chains

* Address https://github.com/mratsim/constantine/issues/154 partly

* Fix #154, faster Fp4 square: less non-residue, no Mul, only square (bit more ops total)

* Fix typo

* better assembly scheduling for add/sub

* Double-width -> Double-precision

* Unred -> Unr

* double-precision modular addition

* Replace canUseNoCarryMontyMul and canUseNoCarryMontySquare by getSpareBits

* Complete the double-precision implementation

* Use double-precision path for Fp4 squaring and mul

* remove mixin annotations

* Lazy reduction in Fp4 prod

* Fix assembly for sum2xMod

* Assembly for double-precision negation

* reduce white spaces in pairing benchmarks

* ADX implies BMI2
2021-02-09 22:57:45 +01:00

244 lines
6.9 KiB
Nim

# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/[common, curves, type_ff],
../primitives,
./bigints,
./finite_fields,
./limbs,
./limbs_extmul,
./limbs_montgomery
when UseASM_X86_64:
import assembly/limbs_asm_modular_dbl_prec_x86
type FpDbl*[C: static Curve] = object
## Double-precision Fp element
## A FpDbl is a partially-reduced double-precision element of Fp
## The allowed range is [0, 2ⁿp)
## with n = w*WordBitSize
## and w the number of words necessary to represent p on the machine.
## Concretely a 381-bit p needs 6*64 bits limbs (hence 384 bits total)
## and so FpDbl would 768 bits.
# We directly work with double the number of limbs,
# instead of BigInt indirection.
limbs2x*: matchingLimbs2x(C)
template doublePrec*(T: type Fp): type =
## Return the double-precision type matching with Fp
FpDbl[T.C]
# No exceptions allowed
{.push raises: [].}
{.push inline.}
func `==`*(a, b: FpDbl): SecretBool =
a.limbs2x == b.limbs2x
func isZero*(a: FpDbl): SecretBool =
a.limbs2x.isZero()
func setZero*(a: var FpDbl) =
a.limbs2x.setZero()
func prod2x*(r: var FpDbl, a, b: Fp) =
## Double-precision multiplication
## Store the product of ``a`` by ``b`` into ``r``
##
## If a and b are in [0, p)
## Output is in [0, p²)
##
## Output can be up to [0, 2ⁿp) range
## provided spare bits are available in Fp representation
r.limbs2x.prod(a.mres.limbs, b.mres.limbs)
func square2x*(r: var FpDbl, a: Fp) =
## Double-precision squaring
## Store the square of ``a`` into ``r``
##
## If a is in [0, p)
## Output is in [0, p²)
##
## Output can be up to [0, 2ⁿp) range
## provided spare bits are available in Fp representation
r.limbs2x.square(a.mres.limbs)
func redc2x*(r: var Fp, a: FpDbl) =
## Reduce a double-precision field element into r
## from [0, 2ⁿp) range to [0, p) range
const N = r.mres.limbs.len
montyRedc2x(
r.mres.limbs,
a.limbs2x,
Fp.C.Mod.limbs,
Fp.getNegInvModWord(),
Fp.getSpareBits()
)
func diff2xUnr*(r: var FpDbl, a, b: FpDbl) =
## Double-precision substraction without reduction
##
## If the result is negative, fully reduced addition/substraction
## are necessary afterwards to guarantee the [0, 2ⁿp) range
discard r.limbs2x.diff(a.limbs2x, b.limbs2x)
func diff2xMod*(r: var FpDbl, a, b: FpDbl) =
## Double-precision modular substraction
## Output is conditionally reduced by 2ⁿp
## to stay in the [0, 2ⁿp) range
when UseASM_X86_64:
submod2x_asm(r.limbs2x, a.limbs2x, b.limbs2x, FpDbl.C.Mod.limbs)
else:
# Substraction step
var underflowed = SecretBool r.limbs2x.diff(a.limbs2x, b.limbs2x)
# Conditional reduction by 2ⁿp
const N = r.limbs2x.len div 2
const M = FpDbl.C.Mod
var carry = Carry(0)
var sum: SecretWord
staticFor i, 0, N:
addC(carry, sum, r.limbs2x[i+N], M.limbs[i], carry)
underflowed.ccopy(r.limbs2x[i+N], sum)
func sum2xUnr*(r: var FpDbl, a, b: FpDbl) =
## Double-precision addition without reduction
##
## If the result is bigger than 2ⁿp, fully reduced addition/substraction
## are necessary afterwards to guarantee the [0, 2ⁿp) range
discard r.limbs2x.sum(a.limbs2x, b.limbs2x)
func sum2xMod*(r: var FpDbl, a, b: FpDbl) =
## Double-precision modular addition
## Output is conditionally reduced by 2ⁿp
## to stay in the [0, 2ⁿp) range
when UseASM_X86_64:
addmod2x_asm(r.limbs2x, a.limbs2x, b.limbs2x, FpDbl.C.Mod.limbs)
else:
# Addition step
var overflowed = SecretBool r.limbs2x.sum(a.limbs2x, b.limbs2x)
const N = r.limbs2x.len div 2
const M = FpDbl.C.Mod
# Test >= 2ⁿp
var borrow = Borrow(0)
var t{.noInit.}: Limbs[N]
staticFor i, 0, N:
subB(borrow, t[i], r.limbs2x[i+N], M.limbs[i], borrow)
# If no borrow occured, r was bigger than 2ⁿp
overflowed = overflowed or not(SecretBool borrow)
# Conditional reduction by 2ⁿp
staticFor i, 0, N:
SecretBool(overflowed).ccopy(r.limbs2x[i+N], t[i])
func neg2xMod*(r: var FpDbl, a: FpDbl) =
## Double-precision modular substraction
## Negate modulo 2ⁿp
when UseASM_X86_64:
negmod2x_asm(r.limbs2x, a.limbs2x, FpDbl.C.Mod.limbs)
else:
# If a = 0 we need r = 0 and not r = M
# as comparison operator assume unicity
# of the modular representation.
# Also make sure to handle aliasing where r.addr = a.addr
var t {.noInit.}: FpDbl
let isZero = a.isZero()
const N = r.limbs2x.len div 2
const M = FpDbl.C.Mod
var borrow = Borrow(0)
# 2ⁿp is filled with 0 in the first half
staticFor i, 0, N:
subB(borrow, t.limbs2x[i], Zero, a.limbs2x[i], borrow)
# 2ⁿp has p (shifted) for the rest of the limbs
staticFor i, N, r.limbs2x.len:
subB(borrow, t.limbs2x[i], M.limbs[i-N], a.limbs2x[i], borrow)
# Zero the result if input was zero
t.limbs2x.czero(isZero)
r = t
func prod2xImpl(
r {.noAlias.}: var FpDbl,
a {.noAlias.}: FpDbl, b: static int) =
## Multiplication by a small integer known at compile-time
## Requires no aliasing and b positive
static: doAssert b >= 0
when b == 0:
r.setZero()
elif b == 1:
r = a
elif b == 2:
r.sum2xMod(a, a)
elif b == 3:
r.sum2xMod(a, a)
r.sum2xMod(a, r)
elif b == 4:
r.sum2xMod(a, a)
r.sum2xMod(r, r)
elif b == 5:
r.sum2xMod(a, a)
r.sum2xMod(r, r)
r.sum2xMod(r, a)
elif b == 6:
r.sum2xMod(a, a)
let t2 = r
r.sum2xMod(r, r) # 4
r.sum2xMod(t, t2)
elif b == 7:
r.sum2xMod(a, a)
r.sum2xMod(r, r) # 4
r.sum2xMod(r, r)
r.diff2xMod(r, a)
elif b == 8:
r.sum2xMod(a, a)
r.sum2xMod(r, r)
r.sum2xMod(r, r)
elif b == 9:
r.sum2xMod(a, a)
r.sum2xMod(r, r)
r.sum2xMod(r, r) # 8
r.sum2xMod(r, a)
elif b == 10:
r.sum2xMod(a, a)
r.sum2xMod(r, r)
r.sum2xMod(r, a) # 5
r.sum2xMod(r, r)
elif b == 11:
r.sum2xMod(a, a)
r.sum2xMod(r, r)
r.sum2xMod(r, a) # 5
r.sum2xMod(r, r)
r.sum2xMod(r, a)
elif b == 12:
r.sum2xMod(a, a)
r.sum2xMod(r, r) # 4
let t4 = a
r.sum2xMod(r, r) # 8
r.sum2xMod(r, t4)
else:
{.error: "Multiplication by this small int not implemented".}
func prod2x*(r: var FpDbl, a: FpDbl, b: static int) =
## Multiplication by a small integer known at compile-time
const negate = b < 0
const b = if negate: -b
else: b
when negate:
var t {.noInit.}: typeof(r)
t.neg2xMod(a)
else:
let t = a
prod2xImpl(r, t, b)
{.pop.} # inline
{.pop.} # raises no exceptions