mirror of
https://github.com/logos-storage/constantine.git
synced 2026-01-03 13:43:10 +00:00
298 lines
11 KiB
Nim
298 lines
11 KiB
Nim
# Constantine
|
|
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
|
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
|
# Licensed and distributed under either of
|
|
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
|
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
|
|
|
import
|
|
# Standard library
|
|
std/macros,
|
|
# Internal
|
|
../../config/common,
|
|
../../primitives
|
|
|
|
# ############################################################
|
|
#
|
|
# Assembly implementation of finite fields
|
|
#
|
|
# ############################################################
|
|
|
|
# Note: We can refer to at most 30 registers in inline assembly
|
|
# and "InputOutput" registers count double
|
|
# They are nice to let the compiler deals with mov
|
|
# but too constraining so we move things ourselves.
|
|
|
|
static: doAssert UseASM_X86_64
|
|
|
|
{.localPassC:"-fomit-frame-pointer".} # Needed so that the compiler finds enough registers
|
|
|
|
# Field addition
|
|
# ------------------------------------------------------------
|
|
|
|
macro addmod_gen[N: static int](a: var Limbs[N], b, M: Limbs[N]): untyped =
|
|
## Generate an optimized modular addition kernel
|
|
# Register pressure note:
|
|
# We could generate a kernel per modulus M by hardocing it as immediate
|
|
# however this requires
|
|
# - duplicating the kernel and also
|
|
# - 64-bit immediate encoding is quite large
|
|
|
|
result = newStmtList()
|
|
|
|
var ctx = init(Assembler_x86, BaseType)
|
|
let
|
|
arrA = init(OperandArray, nimSymbol = a, N, PointerInReg, InputOutput)
|
|
# We reuse the reg used for B for overflow detection
|
|
arrB = init(OperandArray, nimSymbol = b, N, PointerInReg, InputOutput)
|
|
# We could force M as immediate by specializing per moduli
|
|
arrM = init(OperandArray, nimSymbol = M, N, PointerInReg, Input)
|
|
# If N is too big, we need to spill registers. TODO.
|
|
arrT = init(OperandArray, nimSymbol = ident"t", N, ElemsInReg, Output_EarlyClobber)
|
|
arrTsub = init(OperandArray, nimSymbol = ident"tsub", N, ElemsInReg, Output_EarlyClobber)
|
|
|
|
# Addition
|
|
for i in 0 ..< N:
|
|
ctx.mov arrT[i], arrA[i]
|
|
if i == 0:
|
|
ctx.add arrT[0], arrB[0]
|
|
else:
|
|
ctx.adc arrT[i], arrB[i]
|
|
# Interleaved copy in a second buffer as well
|
|
ctx.mov arrTsub[i], arrT[i]
|
|
|
|
# Mask: overflowed contains 0xFFFF or 0x0000
|
|
# TODO: unnecessary if MSB never set, i.e. "canUseNoCarryMontyMul"
|
|
let overflowed = arrB.reuseRegister()
|
|
ctx.sbb overflowed, overflowed
|
|
|
|
# Now substract the modulus
|
|
for i in 0 ..< N:
|
|
if i == 0:
|
|
ctx.sub arrTsub[0], arrM[0]
|
|
else:
|
|
ctx.sbb arrTsub[i], arrM[i]
|
|
|
|
# If it overflows here, it means that it was
|
|
# smaller than the modulus and we don't need arrTsub
|
|
ctx.sbb overflowed, 0
|
|
|
|
# Conditional Mov and
|
|
# and store result
|
|
for i in 0 ..< N:
|
|
ctx.cmovnc arrT[i], arrTsub[i]
|
|
ctx.mov arrA[i], arrT[i]
|
|
|
|
let t = arrT.nimSymbol
|
|
let tsub = arrTsub.nimSymbol
|
|
result.add quote do:
|
|
var `t`{.noinit.}, `tsub` {.noInit.}: typeof(`a`)
|
|
result.add ctx.generate
|
|
|
|
func addmod_asm*(a: var Limbs, b, M: Limbs) =
|
|
## Constant-time modular addition
|
|
addmod_gen(a, b, M)
|
|
|
|
# Field substraction
|
|
# ------------------------------------------------------------
|
|
|
|
macro submod_gen[N: static int](a: var Limbs[N], b, M: Limbs[N]): untyped =
|
|
## Generate an optimized modular addition kernel
|
|
# Register pressure note:
|
|
# We could generate a kernel per modulus M by hardocing it as immediate
|
|
# however this requires
|
|
# - duplicating the kernel and also
|
|
# - 64-bit immediate encoding is quite large
|
|
|
|
result = newStmtList()
|
|
|
|
var ctx = init(Assembler_x86, BaseType)
|
|
let
|
|
arrA = init(OperandArray, nimSymbol = a, N, PointerInReg, InputOutput)
|
|
# We reuse the reg used for B for overflow detection
|
|
arrB = init(OperandArray, nimSymbol = b, N, PointerInReg, InputOutput)
|
|
# We could force M as immediate by specializing per moduli
|
|
arrM = init(OperandArray, nimSymbol = M, N, PointerInReg, Input)
|
|
# If N is too big, we need to spill registers. TODO.
|
|
arrT = init(OperandArray, nimSymbol = ident"t", N, ElemsInReg, Output_EarlyClobber)
|
|
arrTadd = init(OperandArray, nimSymbol = ident"tadd", N, ElemsInReg, Output_EarlyClobber)
|
|
|
|
# Substraction
|
|
for i in 0 ..< N:
|
|
ctx.mov arrT[i], arrA[i]
|
|
if i == 0:
|
|
ctx.sub arrT[0], arrB[0]
|
|
else:
|
|
ctx.sbb arrT[i], arrB[i]
|
|
# Interleaved copy the modulus to hide SBB latencies
|
|
ctx.mov arrTadd[i], arrM[i]
|
|
|
|
# Mask: undeflowed contains 0xFFFF or 0x0000
|
|
let underflowed = arrB.reuseRegister()
|
|
ctx.sbb underflowed, underflowed
|
|
|
|
# Now mask the adder, with 0 or the modulus limbs
|
|
for i in 0 ..< N:
|
|
ctx.`and` arrTadd[i], underflowed
|
|
|
|
# Add the masked modulus
|
|
for i in 0 ..< N:
|
|
if i == 0:
|
|
ctx.add arrT[0], arrTadd[0]
|
|
else:
|
|
ctx.adc arrT[i], arrTadd[i]
|
|
ctx.mov arrA[i], arrT[i]
|
|
|
|
let t = arrT.nimSymbol
|
|
let tadd = arrTadd.nimSymbol
|
|
result.add quote do:
|
|
var `t`{.noinit.}, `tadd` {.noInit.}: typeof(`a`)
|
|
result.add ctx.generate
|
|
|
|
func submod_asm*(a: var Limbs, b, M: Limbs) =
|
|
## Constant-time modular substraction
|
|
## Warning, does not handle aliasing of a and b
|
|
submod_gen(a, b, M)
|
|
|
|
# Field negation
|
|
# ------------------------------------------------------------
|
|
|
|
macro negmod_gen[N: static int](r: var Limbs[N], a, M: Limbs[N]): untyped =
|
|
## Generate an optimized modular negation kernel
|
|
|
|
result = newStmtList()
|
|
|
|
var ctx = init(Assembler_x86, BaseType)
|
|
let
|
|
arrA = init(OperandArray, nimSymbol = a, N, PointerInReg, Input)
|
|
arrR = init(OperandArray, nimSymbol = r, N, ElemsInReg, InputOutput)
|
|
# We could force M as immediate by specializing per moduli
|
|
arrM = init(OperandArray, nimSymbol = M, N, PointerInReg, Input)
|
|
|
|
# Addition
|
|
for i in 0 ..< N:
|
|
ctx.mov arrR[i], arrM[i]
|
|
if i == 0:
|
|
ctx.sub arrR[0], arrA[0]
|
|
else:
|
|
ctx.sbb arrR[i], arrA[i]
|
|
|
|
result.add ctx.generate
|
|
|
|
func negmod_asm*(r: var Limbs, a, M: Limbs) {.inline.} =
|
|
## Constant-time modular negation
|
|
negmod_gen(r, a, M)
|
|
|
|
# Sanity checks
|
|
# ----------------------------------------------------------
|
|
|
|
when isMainModule:
|
|
import ../config/type_bigint, algorithm, strutils
|
|
|
|
proc mainAdd() =
|
|
var a = [SecretWord 0xE3DF60E8F6D0AF9A'u64, SecretWord 0x7B2665C2258A7625'u64, SecretWord 0x68FC9A1D0977C8E0'u64, SecretWord 0xF3DC61ED7DE76883'u64]
|
|
var b = [SecretWord 0x78E9C2EF58BB6B78'u64, SecretWord 0x547F65BD19014254'u64, SecretWord 0x556A115819EAD4B5'u64, SecretWord 0x8CA844A546935DC3'u64]
|
|
var M = [SecretWord 0xFFFFFFFF00000001'u64, SecretWord 0x0000000000000000'u64, SecretWord 0x00000000FFFFFFFF'u64, SecretWord 0xFFFFFFFFFFFFFFFF'u64]
|
|
var s = "0x5cc923d94f8c1b11cfa5cb7f3e8bb879be66ab7423629d968084a692c47ac647"
|
|
|
|
a.reverse()
|
|
b.reverse()
|
|
M.reverse()
|
|
|
|
debugecho "--------------------------------"
|
|
debugecho "before:"
|
|
debugecho " a: ", a.toHex()
|
|
debugecho " b: ", b.toHex()
|
|
debugecho " m: ", M.toHex()
|
|
addmod_asm(a, b, M)
|
|
debugecho "after:"
|
|
debugecho " a: ", a.toHex().tolower
|
|
debugecho " s: ", s
|
|
debugecho " ok: ", a.toHex().tolower == s
|
|
|
|
a = [SecretWord 0x00935a991ca215a6'u64, SecretWord 0x5fbdac6294679337'u64, SecretWord 0x1e41793877b80f12'u64, SecretWord 0x5724cd93cb32932d'u64]
|
|
b = [SecretWord 0x19dd4ecfda64ef80'u64, SecretWord 0x92deeb1532169c3d'u64, SecretWord 0x69ce4ee28421cd30'u64, SecretWord 0x4d90ab5a40295321'u64]
|
|
M = [SecretWord 0x2523648240000001'u64, SecretWord 0xba344d8000000008'u64, SecretWord 0x6121000000000013'u64, SecretWord 0xa700000000000013'u64]
|
|
s = "0x1a70a968f7070526f29c9777c67e2f74880fc81afbd9dc42a4b578ee0b5be64e"
|
|
|
|
a.reverse()
|
|
b.reverse()
|
|
M.reverse()
|
|
|
|
debugecho "--------------------------------"
|
|
debugecho "before:"
|
|
debugecho " a: ", a.toHex()
|
|
debugecho " b: ", b.toHex()
|
|
debugecho " m: ", M.toHex()
|
|
addmod_asm(a, b, M)
|
|
debugecho "after:"
|
|
debugecho " a: ", a.toHex().tolower
|
|
debugecho " s: ", s
|
|
debugecho " ok: ", a.toHex().tolower == s
|
|
|
|
a = [SecretWord 0x1c7d810f37fc6e0b'u64, SecretWord 0xb91aba4ce339cea3'u64, SecretWord 0xd9f5571ccc4dfd1a'u64, SecretWord 0xf5906ee9df91f554'u64]
|
|
b = [SecretWord 0x18394ffe94874c9f'u64, SecretWord 0x6e8a8ad032fc5f15'u64, SecretWord 0x7533a2b46b7e9530'u64, SecretWord 0x2849996b4bb61b48'u64]
|
|
M = [SecretWord 0x2523648240000001'u64, SecretWord 0xba344d8000000008'u64, SecretWord 0x6121000000000013'u64, SecretWord 0xa700000000000013'u64]
|
|
s = "0x0f936c8b8c83baa96d70f79d16362db0ee07f9d137cc923776da08552b481089"
|
|
|
|
a.reverse()
|
|
b.reverse()
|
|
M.reverse()
|
|
|
|
debugecho "--------------------------"
|
|
debugecho "before:"
|
|
debugecho " a: ", a.toHex()
|
|
debugecho " b: ", b.toHex()
|
|
debugecho " m: ", M.toHex()
|
|
addmod_asm(a, b, M)
|
|
debugecho "after:"
|
|
debugecho " a: ", a.toHex().tolower
|
|
debugecho " s: ", s
|
|
debugecho " ok: ", a.toHex().tolower == s
|
|
|
|
a = [SecretWord 0xe9d55643'u64, SecretWord 0x580ec4cc3f91cef3'u64, SecretWord 0x11ecbb7d35b36449'u64, SecretWord 0x35535ca31c5dc2ba'u64]
|
|
b = [SecretWord 0x97f7ed94'u64, SecretWord 0xbad96eb98204a622'u64, SecretWord 0xbba94400f9a061d6'u64, SecretWord 0x60d3521a0d3dd9eb'u64]
|
|
M = [SecretWord 0xffffffff'u64, SecretWord 0xffffffffffffffff'u64, SecretWord 0xffffffff00000000'u64, SecretWord 0x0000000000000001'u64]
|
|
s = "0x0000000081cd43d812e83385c1967515cd95ff7f2f53c61f9626aebd299b9ca4"
|
|
|
|
a.reverse()
|
|
b.reverse()
|
|
M.reverse()
|
|
|
|
debugecho "--------------------------"
|
|
debugecho "before:"
|
|
debugecho " a: ", a.toHex()
|
|
debugecho " b: ", b.toHex()
|
|
debugecho " m: ", M.toHex()
|
|
addmod_asm(a, b, M)
|
|
debugecho "after:"
|
|
debugecho " a: ", a.toHex().tolower
|
|
debugecho " s: ", s
|
|
debugecho " ok: ", a.toHex().tolower == s
|
|
|
|
mainAdd()
|
|
|
|
proc mainSub() =
|
|
var a = [SecretWord 0xf9c32e89b80b17bd'u64, SecretWord 0xdbd3069d4ca0e1c3'u64, SecretWord 0x980d4c70d39d5e17'u64, SecretWord 0xd9f0252845f18c3a'u64]
|
|
var b = [SecretWord 0x215075604bfd64de'u64, SecretWord 0x36dc488149fc5d3e'u64, SecretWord 0x91fff665385d20fd'u64, SecretWord 0xe980a5a203b43179'u64]
|
|
var M = [SecretWord 0xFFFFFFFFFFFFFFFF'u64, SecretWord 0xFFFFFFFFFFFFFFFF'u64, SecretWord 0xFFFFFFFFFFFFFFFF'u64, SecretWord 0xFFFFFFFEFFFFFC2F'u64]
|
|
var s = "0xd872b9296c0db2dfa4f6be1c02a48485060d560b9b403d19f06f7f86423d5ac1"
|
|
|
|
a.reverse()
|
|
b.reverse()
|
|
M.reverse()
|
|
|
|
debugecho "--------------------------------"
|
|
debugecho "before:"
|
|
debugecho " a: ", a.toHex()
|
|
debugecho " b: ", b.toHex()
|
|
debugecho " m: ", M.toHex()
|
|
submod_asm(a, b, M)
|
|
debugecho "after:"
|
|
debugecho " a: ", a.toHex().tolower
|
|
debugecho " s: ", s
|
|
debugecho " ok: ", a.toHex().tolower == s
|
|
|
|
mainSub()
|