1st experiment at accelerating montgomery multiplication (665 lines of specialized duplicated ASM code for some reason, monomorphization is probably better than that)

This commit is contained in:
Mamy André-Ratsimbazafy 2020-02-28 22:46:20 +01:00
parent 155aa21767
commit 05bce529b4
No known key found for this signature in database
GPG Key ID: 7B88AD1FE79492E1
3 changed files with 84 additions and 12 deletions

View File

@ -0,0 +1,66 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
# ############################################################
#
# Benchmark of modular exponentiation
#
# ############################################################
# 2 implementations are available
# - 1 is constant time
# - 1 exposes the exponent bits to:
# timing attack,
# memory access analysis,
# power analysis (i.e. oscilloscopes on embedded)
# It is suitable for public exponents for example
# to compute modular inversion via the Fermat method
import
../constantine/config/[common, curves],
../constantine/arithmetic/[bigints_checked, finite_fields],
../constantine/io/[io_bigints, io_fields],
random, std/monotimes, times, strformat
const Iters = 1_000_000
randomize(1234)
proc addBench() =
var r, x, y: Fp[BLS12_381]
# BN254 field modulus
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
# BLS12-381 prime - 2
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
let start = getMonotime()
for _ in 0 ..< Iters:
x += y
let stop = getMonotime()
echo &"Time for {Iters} conditional additions (constant-time 381-bit): {inMilliseconds(stop-start)} ms"
echo &"Time for 1 conditional addition ==> {inNanoseconds((stop-start) div Iters)} ns"
addBench()
proc mulBench() =
var r, x, y: Fp[BLS12_381]
# BN254 field modulus
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
# BLS12-381 prime - 2
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
let start = getMonotime()
for _ in 0 ..< Iters:
r.prod(x, y)
let stop = getMonotime()
echo &"Time for {Iters} multiplications (constant-time 381-bit): {inMilliseconds(stop-start)} ms"
echo &"Time for 1 multiplication ==> {inNanoseconds((stop-start) div Iters)} ns"
mulBench()

View File

@ -6,6 +6,10 @@ This folder contains the implementation of
## References
- Analyzing and Comparing Montgomery Multiplication Algorithms
Cetin Kaya Koc and Tolga Acar and Burton S. Kaliski Jr.
http://pdfs.semanticscholar.org/5e39/41ff482ec3ee41dc53c3298f0be085c69483.pdf
- Montgomery Arithmetic from a Software Perspective\
Joppe W. Bos and Peter L. Montgomery, 2017\
https://eprint.iacr.org/2017/1057

View File

@ -551,26 +551,28 @@ func montyMul*(
r.setBitLength(bitSizeof(M))
setZero(r)
var r_hi = Zero # represents the high word that is used in intermediate computation before reduction mod M
var partials: array[14, DoubleWord]
for i in 0 ..< nLen:
let zi = (r[0] + wordMul(a[i], b[0])).wordMul(negInvModWord)
var carry = Zero
let zi = (Word(partials[0]) + wordMul(a[i], b[0])).wordMul(negInvModWord)
for j in 0 ..< nLen:
let z = DoubleWord(r[j]) + unsafeExtPrecMul(a[i], b[j]) +
unsafeExtPrecMul(zi, M[j]) + DoubleWord(carry)
carry = Word(z shr WordBitSize)
if j != 0: # "division" by a physical word 2^32 or 2^64
r[j-1] = Word(z).mask()
partials[j] += unsafeExtPrecMul(a[i], b[j]) + unsafeExtPrecMul(zi, M[j])
r_hi += carry
r[^1] = r_hi.mask()
r_hi = r_hi shr WordBitSize
var carry = partials[0] shr WordBitSize
for j in 1 .. nlen: # we need 1 extra temporary at nlen
partials[j] += carry
carry = partials[j] shr WordBitSize
partials[j-1] = partials[j] and DoubleWord(MaxWord)
partials[nlen] = partials[nlen] shr WordBitSize
for i in 0 ..< nLen:
r[i] = Word(partials[i])
# If the extra word is not zero or if r-M does not borrow (i.e. r > M)
# Then substract M
discard r.csub(M, r_hi.isNonZero() or not r.csub(M, CtFalse))
discard r.csub(M, CTBool[Word](partials[nLen].isNonZero()) or not r.csub(M, CtFalse))
func redc*(r: BigIntViewMut, a: BigIntViewAny, one, N: BigIntViewConst, negInvModWord: Word) {.inline.} =
## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)