Implement fused multiply add modular multiplication for single limb "bigint". TODO fallback from assembly.
This commit is contained in:
parent
408bc9b6f3
commit
057ce0cbf9
|
@ -16,6 +16,8 @@
|
||||||
import
|
import
|
||||||
./word_types, ./bigints
|
./word_types, ./bigints
|
||||||
|
|
||||||
|
from ./private/word_types_internal import unsafe_div2n1n
|
||||||
|
|
||||||
type
|
type
|
||||||
Fp*[P: static BigInt] = object
|
Fp*[P: static BigInt] = object
|
||||||
## P is a prime number
|
## P is a prime number
|
||||||
|
@ -64,63 +66,18 @@ func `+`*(a, b: Fp): Fp =
|
||||||
ctl = ctl or not sub(result, Fp.P, False)
|
ctl = ctl or not sub(result, Fp.P, False)
|
||||||
sub(result, Fp.P, ctl)
|
sub(result, Fp.P, ctl)
|
||||||
|
|
||||||
# ############################################################
|
template scaleadd_impl(a: var Fp, c: Limb) =
|
||||||
#
|
## Scale-accumulate
|
||||||
# Montgomery domain primitives
|
##
|
||||||
#
|
## With a word W = 2^LimbBitSize and a field Fp
|
||||||
# ############################################################
|
## Does a <- a * W + c (mod p)
|
||||||
|
|
||||||
from bitops import fastLog2
|
when Fp.P.bits <= LimbBitSize:
|
||||||
# This will only be used at compile-time
|
# If the prime fits in a single limb
|
||||||
# so no constant-time worries (it is constant-time if using the De Bruijn multiplication)
|
var q: Limb
|
||||||
|
|
||||||
func montyMagic*(M: static BigInt): static Limb =
|
# (hi, lo) = a * 2^63 + c
|
||||||
## Returns the Montgomery domain magic number for the input modulus:
|
let hi = a[0] shr 1 # 64 - 63 = 1
|
||||||
## -1/M[0] mod LimbSize
|
let lo = a[0] shl LimbBitSize or c # Assumes most-significant bit in c is not set
|
||||||
## M[0] is the least significant limb of M
|
unsafe_div2n1n(q, a[0], hi, lo, Fp.P.limbs[0]) # (hi, lo) mod P
|
||||||
## M must be odd and greater than 2.
|
return
|
||||||
|
|
||||||
# Test vectors: https://www.researchgate.net/publication/4107322_Montgomery_modular_multiplication_architecture_for_public_key_cryptosystems
|
|
||||||
# on p354
|
|
||||||
# Reference C impl: http://www.hackersdelight.org/hdcodetxt/mont64.c.txt
|
|
||||||
|
|
||||||
# ######################################################################
|
|
||||||
# Implementation of modular multiplication inverse
|
|
||||||
# Assuming 2 positive integers a and m the modulo
|
|
||||||
#
|
|
||||||
# We are looking for z that solves `az ≡ 1 mod m`
|
|
||||||
#
|
|
||||||
# References:
|
|
||||||
# - Knuth, The Art of Computer Programming, Vol2 p342
|
|
||||||
# - Menezes, Handbook of Applied Cryptography (HAC), p610
|
|
||||||
# http://cacr.uwaterloo.ca/hac/about/chap14.pdf
|
|
||||||
|
|
||||||
# Starting from the extended GCD formula (Bezout identity),
|
|
||||||
# `ax + by = gcd(x,y)` with input x,y and outputs a, b, gcd
|
|
||||||
# We assume a and m are coprimes, i.e. gcd is 1, otherwise no inverse
|
|
||||||
# `ax + my = 1` <=> `ax + my ≡ 1 mod m` <=> `ax ≡ 1 mod m`
|
|
||||||
|
|
||||||
# For Montgomery magic number, we are in a special case
|
|
||||||
# where a = M and m = 2^LimbSize.
|
|
||||||
# For a and m to be coprimes, a must be odd.
|
|
||||||
|
|
||||||
# M being a power of 2 greatly simplifies computation:
|
|
||||||
# - https://crypto.stackexchange.com/questions/47493/how-to-determine-the-multiplicative-inverse-modulo-64-or-other-power-of-two
|
|
||||||
# - http://groups.google.com/groups?selm=1994Apr6.093116.27805%40mnemosyne.cs.du.edu
|
|
||||||
# - https://mumble.net/~campbell/2015/01/21/inverse-mod-power-of-two
|
|
||||||
# - https://eprint.iacr.org/2017/411
|
|
||||||
|
|
||||||
# We have the following relation
|
|
||||||
# ax ≡ 1 (mod 2^k) <=> ax(2 - ax) ≡ 1 (mod 2^(2k))
|
|
||||||
#
|
|
||||||
# To get -1/M0 mod LimbSize
|
|
||||||
# we can either negate the resulting x of `ax(2 - ax) ≡ 1 (mod 2^(2k))`
|
|
||||||
# or do ax(2 + ax) ≡ 1 (mod 2^(2k))
|
|
||||||
|
|
||||||
const
|
|
||||||
M0 = M.limbs[0]
|
|
||||||
k = fastLog2(LimbBitSize)
|
|
||||||
|
|
||||||
result = M0 # Start from an inverse of M0 modulo 2, M0 is odd and it's own inverse
|
|
||||||
for _ in static(0 ..< k):
|
|
||||||
result *= 2 + M * result # x' = x(2 + ax) (`+` to avoid negating at the end)
|
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
# Constantine
|
||||||
|
# Copyright (c) 2018 Status Research & Development GmbH
|
||||||
|
# Licensed and distributed under either of
|
||||||
|
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||||
|
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||||
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||||
|
|
||||||
|
# ############################################################
|
||||||
|
#
|
||||||
|
# Montgomery domain primitives
|
||||||
|
#
|
||||||
|
# ############################################################
|
||||||
|
|
||||||
|
import
|
||||||
|
./word_types, ./bigints, ./field_fp
|
||||||
|
|
||||||
|
from bitops import fastLog2
|
||||||
|
# This will only be used at compile-time
|
||||||
|
# so no constant-time worries (it is constant-time if using the De Bruijn multiplication)
|
||||||
|
|
||||||
|
func montyMagic*(M: static BigInt): static Limb =
|
||||||
|
## Returns the Montgomery domain magic number for the input modulus:
|
||||||
|
## -1/M[0] mod LimbSize
|
||||||
|
## M[0] is the least significant limb of M
|
||||||
|
## M must be odd and greater than 2.
|
||||||
|
|
||||||
|
# Test vectors: https://www.researchgate.net/publication/4107322_Montgomery_modular_multiplication_architecture_for_public_key_cryptosystems
|
||||||
|
# on p354
|
||||||
|
# Reference C impl: http://www.hackersdelight.org/hdcodetxt/mont64.c.txt
|
||||||
|
|
||||||
|
# ######################################################################
|
||||||
|
# Implementation of modular multiplication inverse
|
||||||
|
# Assuming 2 positive integers a and m the modulo
|
||||||
|
#
|
||||||
|
# We are looking for z that solves `az ≡ 1 mod m`
|
||||||
|
#
|
||||||
|
# References:
|
||||||
|
# - Knuth, The Art of Computer Programming, Vol2 p342
|
||||||
|
# - Menezes, Handbook of Applied Cryptography (HAC), p610
|
||||||
|
# http://cacr.uwaterloo.ca/hac/about/chap14.pdf
|
||||||
|
|
||||||
|
# Starting from the extended GCD formula (Bezout identity),
|
||||||
|
# `ax + by = gcd(x,y)` with input x,y and outputs a, b, gcd
|
||||||
|
# We assume a and m are coprimes, i.e. gcd is 1, otherwise no inverse
|
||||||
|
# `ax + my = 1` <=> `ax + my ≡ 1 mod m` <=> `ax ≡ 1 mod m`
|
||||||
|
|
||||||
|
# For Montgomery magic number, we are in a special case
|
||||||
|
# where a = M and m = 2^LimbSize.
|
||||||
|
# For a and m to be coprimes, a must be odd.
|
||||||
|
|
||||||
|
# M being a power of 2 greatly simplifies computation:
|
||||||
|
# - https://crypto.stackexchange.com/questions/47493/how-to-determine-the-multiplicative-inverse-modulo-64-or-other-power-of-two
|
||||||
|
# - http://groups.google.com/groups?selm=1994Apr6.093116.27805%40mnemosyne.cs.du.edu
|
||||||
|
# - https://mumble.net/~campbell/2015/01/21/inverse-mod-power-of-two
|
||||||
|
# - https://eprint.iacr.org/2017/411
|
||||||
|
|
||||||
|
# We have the following relation
|
||||||
|
# ax ≡ 1 (mod 2^k) <=> ax(2 - ax) ≡ 1 (mod 2^(2k))
|
||||||
|
#
|
||||||
|
# To get -1/M0 mod LimbSize
|
||||||
|
# we can either negate the resulting x of `ax(2 - ax) ≡ 1 (mod 2^(2k))`
|
||||||
|
# or do ax(2 + ax) ≡ 1 (mod 2^(2k))
|
||||||
|
|
||||||
|
const
|
||||||
|
M0 = M.limbs[0]
|
||||||
|
k = fastLog2(LimbBitSize)
|
||||||
|
|
||||||
|
result = M0 # Start from an inverse of M0 modulo 2, M0 is odd and it's own inverse
|
||||||
|
for _ in static(0 ..< k):
|
||||||
|
result *= 2 + M * result # x' = x(2 + ax) (`+` to avoid negating at the end)
|
||||||
|
|
||||||
|
# func toMonty*[P: static BigInt](a: Fp[P], montyMagic: Limb): Montgomery[P] =
|
||||||
|
|
|
@ -0,0 +1,127 @@
|
||||||
|
# Constantine
|
||||||
|
# Copyright (c) 2018 Status Research & Development GmbH
|
||||||
|
# Licensed and distributed under either of
|
||||||
|
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||||
|
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||||
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||||
|
|
||||||
|
# ############################################################
|
||||||
|
#
|
||||||
|
# Unsafe constant-time primitives with specific restrictions
|
||||||
|
#
|
||||||
|
# ############################################################
|
||||||
|
|
||||||
|
import ../word_types
|
||||||
|
|
||||||
|
func asm_x86_64_div2n1n(q, r: var uint64, n_hi, n_lo, d: uint64) {.inline.}=
|
||||||
|
## Division uint128 by uint64
|
||||||
|
## Warning ⚠️ :
|
||||||
|
## - if n_hi == d, quotient does not fit in an uint64
|
||||||
|
## - if n_hi > d result is undefined
|
||||||
|
|
||||||
|
# TODO !!! - Replace by constant-time, portable, non-assembly version
|
||||||
|
|
||||||
|
# DIV r/m64
|
||||||
|
# Divide RDX:RAX (n_hi:n_lo) by r/m64
|
||||||
|
#
|
||||||
|
# Inputs
|
||||||
|
# - numerator high word in RDX,
|
||||||
|
# - numerator low word in RAX,
|
||||||
|
# - divisor as rm parameter (register or memory at the compiler discretion)
|
||||||
|
# Result
|
||||||
|
# - Quotient in RAX
|
||||||
|
# - Remainder in RDX
|
||||||
|
asm """
|
||||||
|
divq %[divisor] // We name the register/memory divisor
|
||||||
|
: "=a" (`*q`), "=d" (`*r`) // Don't forget to dereference the var hidden pointer
|
||||||
|
: "d" (`n_hi`), "a" (`n_lo`), [divisor] "rm" (`d`)
|
||||||
|
: // no register clobbered besides explicitly used RAX and RDX
|
||||||
|
"""
|
||||||
|
|
||||||
|
func unsafe_div2n1n*(q, r: var Ct[uint64], n_hi, n_lo, d: Ct[uint64]) {.inline.}=
|
||||||
|
## Division uint128 by uint64
|
||||||
|
## Warning ⚠️ :
|
||||||
|
## - if n_hi == d, quotient does not fit in an uint64
|
||||||
|
## - if n_hi > d result is undefined
|
||||||
|
##
|
||||||
|
## TODO, at the moment only x86_64 architecture are supported
|
||||||
|
## as we use assembly.
|
||||||
|
## Also we assume that the native integer division
|
||||||
|
## provided by the PU is constant-time
|
||||||
|
|
||||||
|
# Note, using C/Nim default `div` is inefficient
|
||||||
|
# and complicated to make constant-time
|
||||||
|
# See at the bottom.
|
||||||
|
#
|
||||||
|
# Furthermore compilers try to substitute division
|
||||||
|
# with a fast path that may have branches. It might also
|
||||||
|
# be the same at the hardware level.
|
||||||
|
|
||||||
|
type T = uint64
|
||||||
|
|
||||||
|
when not defined(amd64):
|
||||||
|
{.error: "At the moment only x86_64 architecture is supported".}
|
||||||
|
else:
|
||||||
|
asm_x86_64_div2n1n(T(q), T(r), T(n_hi), T(n_lo), T(d))
|
||||||
|
|
||||||
|
when isMainModule:
|
||||||
|
|
||||||
|
var q, r: uint64
|
||||||
|
|
||||||
|
# (1 shl 64) div 3
|
||||||
|
let n_hi = 1'u64
|
||||||
|
let n_lo = 0'u64
|
||||||
|
let d = 3'u64
|
||||||
|
|
||||||
|
asm_x86_64_div2n1n(q, r, n_hi, n_lo, d)
|
||||||
|
|
||||||
|
doAssert q == 6148914691236517205'u64
|
||||||
|
doAssert r == 1
|
||||||
|
|
||||||
|
# ############################################################
|
||||||
|
#
|
||||||
|
# Non-constant-time portable div2n1n
|
||||||
|
#
|
||||||
|
# ############################################################
|
||||||
|
|
||||||
|
# implementation from Stint: https://github.com/status-im/nim-stint/blob/edb1ade37309390cc641cee07ab62e5459d9ca44/stint/private/uint_div.nim#L131
|
||||||
|
|
||||||
|
# func div2n1n[T: SomeunsignedInt](q, r: var T, n_hi, n_lo, d: T) =
|
||||||
|
#
|
||||||
|
# # assert countLeadingZeroBits(d) == 0, "Divisor was not normalized"
|
||||||
|
#
|
||||||
|
# const
|
||||||
|
# size = bitsof(q)
|
||||||
|
# halfSize = size div 2
|
||||||
|
# halfMask = (1.T shl halfSize) - 1.T
|
||||||
|
#
|
||||||
|
# template halfQR(n_hi, n_lo, d, d_hi, d_lo: T): tuple[q,r: T] =
|
||||||
|
#
|
||||||
|
# var (q, r) = divmod(n_hi, d_hi)
|
||||||
|
# let m = q * d_lo
|
||||||
|
# var r = (r shl halfSize) or n_lo
|
||||||
|
#
|
||||||
|
# # Fix the reminder, we're at most 2 iterations off
|
||||||
|
# if r < m:
|
||||||
|
# dec q
|
||||||
|
# r += d
|
||||||
|
# if r >= d and r < m:
|
||||||
|
# dec q
|
||||||
|
# r += d
|
||||||
|
# r -= m
|
||||||
|
# (q, r)
|
||||||
|
#
|
||||||
|
# let
|
||||||
|
# d_hi = d shr halfSize
|
||||||
|
# d_lo = d and halfMask
|
||||||
|
# n_lohi = nlo shr halfSize
|
||||||
|
# n_lolo = nlo and halfMask
|
||||||
|
#
|
||||||
|
# # First half of the quotient
|
||||||
|
# let (q1, r1) = halfQR(n_hi, n_lohi, d, d_hi, d_lo)
|
||||||
|
#
|
||||||
|
# # Second half
|
||||||
|
# let (q2, r2) = halfQR(r1, n_lolo, d, d_hi, d_lo)
|
||||||
|
#
|
||||||
|
# q = (q1 shl halfSize) or q2
|
||||||
|
# r = r2
|
Loading…
Reference in New Issue