forgot to commit function sig change (#177)
This commit is contained in:
parent
5db30ef68d
commit
8b5d5089cb
|
@ -128,7 +128,7 @@ macro finalSub_gen*[N: static int](
|
|||
# Field addition
|
||||
# ------------------------------------------------------------
|
||||
|
||||
macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit: static bool): untyped =
|
||||
macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], spareBits: static int): untyped =
|
||||
## Generate an optimized modular addition kernel
|
||||
# Register pressure note:
|
||||
# We could generate a kernel per modulus m by hardcoding it as immediate
|
||||
|
@ -165,7 +165,7 @@ macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit:
|
|||
# Interleaved copy in a second buffer as well
|
||||
ctx.mov v[i], u[i]
|
||||
|
||||
if hasSparebit:
|
||||
if spareBits >= 1:
|
||||
ctx.finalSubNoCarryImpl(r, u, M, v)
|
||||
else:
|
||||
ctx.finalSubMayCarryImpl(
|
||||
|
@ -174,9 +174,9 @@ macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit:
|
|||
|
||||
result.add ctx.generate()
|
||||
|
||||
func addmod_asm*(r: var Limbs, a, b, m: Limbs, hasSpareBit: static bool) =
|
||||
func addmod_asm*(r: var Limbs, a, b, m: Limbs, spareBits: static int) =
|
||||
## Constant-time modular addition
|
||||
addmod_gen(r, a, b, m, hasSpareBit)
|
||||
addmod_gen(r, a, b, m, spareBits)
|
||||
|
||||
# Field substraction
|
||||
# ------------------------------------------------------------
|
||||
|
@ -307,7 +307,7 @@ when isMainModule:
|
|||
debugecho " a: ", a.toHex()
|
||||
debugecho " b: ", b.toHex()
|
||||
debugecho " m: ", m.toHex()
|
||||
addmod_asm(a, a, b, m, hasSpareBit = false)
|
||||
addmod_asm(a, a, b, m, spareBits = 0)
|
||||
debugecho "after:"
|
||||
debugecho " a: ", a.toHex().tolower
|
||||
debugecho " s: ", s
|
||||
|
@ -327,7 +327,7 @@ when isMainModule:
|
|||
debugecho " a: ", a.toHex()
|
||||
debugecho " b: ", b.toHex()
|
||||
debugecho " m: ", m.toHex()
|
||||
addmod_asm(a, a, b, m, hasSpareBit = false)
|
||||
addmod_asm(a, a, b, m, spareBits = 0)
|
||||
debugecho "after:"
|
||||
debugecho " a: ", a.toHex().tolower
|
||||
debugecho " s: ", s
|
||||
|
@ -347,7 +347,7 @@ when isMainModule:
|
|||
debugecho " a: ", a.toHex()
|
||||
debugecho " b: ", b.toHex()
|
||||
debugecho " m: ", m.toHex()
|
||||
addmod_asm(a, a, b, m, hasSpareBit = false)
|
||||
addmod_asm(a, a, b, m, spareBits = 0)
|
||||
debugecho "after:"
|
||||
debugecho " a: ", a.toHex().tolower
|
||||
debugecho " s: ", s
|
||||
|
@ -367,7 +367,7 @@ when isMainModule:
|
|||
debugecho " a: ", a.toHex()
|
||||
debugecho " b: ", b.toHex()
|
||||
debugecho " m: ", m.toHex()
|
||||
addmod_asm(a, a, b, m, hasSpareBit = false)
|
||||
addmod_asm(a, a, b, m, spareBits = 0)
|
||||
debugecho "after:"
|
||||
debugecho " a: ", a.toHex().tolower
|
||||
debugecho " s: ", s
|
||||
|
@ -390,7 +390,7 @@ when isMainModule:
|
|||
debugecho " a: ", a.toHex()
|
||||
debugecho " b: ", b.toHex()
|
||||
debugecho " m: ", m.toHex()
|
||||
submod_asm(a, a, b, m, hasSpareBit = false)
|
||||
submod_asm(a, a, b, m, spareBits = 0)
|
||||
debugecho "after:"
|
||||
debugecho " a: ", a.toHex().tolower
|
||||
debugecho " s: ", s
|
||||
|
@ -415,7 +415,7 @@ when isMainModule:
|
|||
debugecho " a: ", a.toHex()
|
||||
debugecho " b: ", b.toHex()
|
||||
debugecho " m: ", m.toHex()
|
||||
submod_asm(r, a, b, m, hasSpareBit = false)
|
||||
submod_asm(r, a, b, m, spareBits = 0)
|
||||
debugecho "after:"
|
||||
debugecho " r: ", r.toHex().tolower
|
||||
debugecho " s: ", s
|
||||
|
|
|
@ -209,8 +209,8 @@ func squareMont_CIOS_asm*[N](
|
|||
r: var Limbs[N],
|
||||
a, M: Limbs[N],
|
||||
m0ninv: BaseType,
|
||||
hasSpareBit, skipFinalSub: static bool) =
|
||||
spareBits: static int, skipFinalSub: static bool) =
|
||||
## Constant-time modular squaring
|
||||
var r2x {.noInit.}: Limbs[2*N]
|
||||
r2x.square_asm_inline(a)
|
||||
r.redcMont_asm_inline(r2x, M, m0ninv, hasSpareBit, skipFinalSub)
|
||||
r.redcMont_asm_inline(r2x, M, m0ninv, spareBits, skipFinalSub)
|
||||
|
|
|
@ -295,8 +295,8 @@ func squareMont_CIOS_asm_adx*[N](
|
|||
r: var Limbs[N],
|
||||
a, M: Limbs[N],
|
||||
m0ninv: BaseType,
|
||||
hasSpareBit, skipFinalSub: static bool) =
|
||||
spareBits: static int, skipFinalSub: static bool) =
|
||||
## Constant-time modular squaring
|
||||
var r2x {.noInit.}: Limbs[2*N]
|
||||
r2x.square_asm_adx_inline(a)
|
||||
r.redcMont_asm_adx(r2x, M, m0ninv, hasSpareBit, skipFinalSub)
|
||||
r.redcMont_asm_adx(r2x, M, m0ninv, spareBits, skipFinalSub)
|
||||
|
|
|
@ -34,7 +34,7 @@ macro redc2xMont_gen*[N: static int](
|
|||
a_PIR: array[N*2, SecretWord],
|
||||
M_PIR: array[N, SecretWord],
|
||||
m0ninv_REG: BaseType,
|
||||
hasSpareBit, skipFinalSub: static bool
|
||||
spareBits: static int, skipFinalSub: static bool
|
||||
) =
|
||||
|
||||
# No register spilling handling
|
||||
|
@ -153,10 +153,10 @@ macro redc2xMont_gen*[N: static int](
|
|||
# v is invalidated from now on
|
||||
let t = repackRegisters(v, u[N], u[N+1])
|
||||
|
||||
if hasSpareBit and skipFinalSub:
|
||||
if spareBits >= 2 and skipFinalSub:
|
||||
for i in 0 ..< N:
|
||||
ctx.mov r[i], t[i]
|
||||
elif hasSpareBit:
|
||||
elif spareBits >= 1:
|
||||
ctx.finalSubNoCarryImpl(r, u, M, t)
|
||||
else:
|
||||
ctx.finalSubMayCarryImpl(r, u, M, t, rax)
|
||||
|
@ -169,23 +169,24 @@ func redcMont_asm_inline*[N: static int](
|
|||
a: array[N*2, SecretWord],
|
||||
M: array[N, SecretWord],
|
||||
m0ninv: BaseType,
|
||||
hasSpareBit: static bool,
|
||||
spareBits: static int,
|
||||
skipFinalSub: static bool = false
|
||||
) {.inline.} =
|
||||
## Constant-time Montgomery reduction
|
||||
## Inline-version
|
||||
redc2xMont_gen(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
|
||||
redc2xMont_gen(r, a, M, m0ninv, spareBits, skipFinalSub)
|
||||
|
||||
func redcMont_asm*[N: static int](
|
||||
r: var array[N, SecretWord],
|
||||
a: array[N*2, SecretWord],
|
||||
M: array[N, SecretWord],
|
||||
m0ninv: BaseType,
|
||||
hasSpareBit, skipFinalSub: static bool
|
||||
spareBits: static int,
|
||||
skipFinalSub: static bool
|
||||
) =
|
||||
## Constant-time Montgomery reduction
|
||||
static: doAssert UseASM_X86_64, "This requires x86-64."
|
||||
redcMont_asm_inline(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
|
||||
redcMont_asm_inline(r, a, M, m0ninv, spareBits, skipFinalSub)
|
||||
|
||||
# Montgomery conversion
|
||||
# ----------------------------------------------------------
|
||||
|
@ -351,8 +352,8 @@ when isMainModule:
|
|||
var a_sqr{.noInit.}, na_sqr{.noInit.}: Limbs[2]
|
||||
var a_sqr_comba{.noInit.}, na_sqr_comba{.noInit.}: Limbs[2]
|
||||
|
||||
a_sqr.redcMont_asm(adbl_sqr, M, 1, hasSpareBit = false, skipFinalSub = false)
|
||||
na_sqr.redcMont_asm(nadbl_sqr, M, 1, hasSpareBit = false, skipFinalSub = false)
|
||||
a_sqr.redcMont_asm(adbl_sqr, M, 1, spareBits = 0, skipFinalSub = false)
|
||||
na_sqr.redcMont_asm(nadbl_sqr, M, 1, spareBits = 0, skipFinalSub = false)
|
||||
a_sqr_comba.redc2xMont_Comba(adbl_sqr, M, 1)
|
||||
na_sqr_comba.redc2xMont_Comba(nadbl_sqr, M, 1)
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ macro redc2xMont_adx_gen[N: static int](
|
|||
a_PIR: array[N*2, SecretWord],
|
||||
M_PIR: array[N, SecretWord],
|
||||
m0ninv_REG: BaseType,
|
||||
hasSpareBit, skipFinalSub: static bool
|
||||
spareBits: static int, skipFinalSub: static bool
|
||||
) =
|
||||
|
||||
# No register spilling handling
|
||||
|
@ -131,10 +131,10 @@ macro redc2xMont_adx_gen[N: static int](
|
|||
|
||||
let t = repackRegisters(v, u[N])
|
||||
|
||||
if hasSpareBit and skipFinalSub:
|
||||
if spareBits >= 2 and skipFinalSub:
|
||||
for i in 0 ..< N:
|
||||
ctx.mov r[i], t[i]
|
||||
elif hasSpareBit:
|
||||
elif spareBits >= 1:
|
||||
ctx.finalSubNoCarryImpl(r, u, M, t)
|
||||
else:
|
||||
ctx.finalSubMayCarryImpl(r, u, M, t, hi)
|
||||
|
@ -147,24 +147,23 @@ func redcMont_asm_adx_inline*[N: static int](
|
|||
a: array[N*2, SecretWord],
|
||||
M: array[N, SecretWord],
|
||||
m0ninv: BaseType,
|
||||
hasSpareBit: static bool,
|
||||
spareBits: static int,
|
||||
skipFinalSub: static bool = false
|
||||
) {.inline.} =
|
||||
## Constant-time Montgomery reduction
|
||||
## Inline-version
|
||||
redc2xMont_adx_gen(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
|
||||
redc2xMont_adx_gen(r, a, M, m0ninv, spareBits, skipFinalSub)
|
||||
|
||||
func redcMont_asm_adx*[N: static int](
|
||||
r: var array[N, SecretWord],
|
||||
a: array[N*2, SecretWord],
|
||||
M: array[N, SecretWord],
|
||||
m0ninv: BaseType,
|
||||
hasSpareBit: static bool,
|
||||
spareBits: static int,
|
||||
skipFinalSub: static bool = false
|
||||
) =
|
||||
## Constant-time Montgomery reduction
|
||||
redcMont_asm_adx_inline(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
|
||||
|
||||
redcMont_asm_adx_inline(r, a, M, m0ninv, spareBits, skipFinalSub)
|
||||
|
||||
# Montgomery conversion
|
||||
# ----------------------------------------------------------
|
||||
|
|
|
@ -152,7 +152,7 @@ func setMinusOne*(a: var FF) =
|
|||
func `+=`*(a: var FF, b: FF) {.meter.} =
|
||||
## In-place addition modulo p
|
||||
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
|
||||
addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
|
||||
addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
|
||||
else:
|
||||
var overflowed = add(a.mres, b.mres)
|
||||
overflowed = overflowed or not(a.mres < FF.fieldMod())
|
||||
|
@ -169,7 +169,7 @@ func `-=`*(a: var FF, b: FF) {.meter.} =
|
|||
func double*(a: var FF) {.meter.} =
|
||||
## Double ``a`` modulo p
|
||||
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
|
||||
addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
|
||||
addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
|
||||
else:
|
||||
var overflowed = double(a.mres)
|
||||
overflowed = overflowed or not(a.mres < FF.fieldMod())
|
||||
|
@ -179,7 +179,7 @@ func sum*(r: var FF, a, b: FF) {.meter.} =
|
|||
## Sum ``a`` and ``b`` into ``r`` modulo p
|
||||
## r is initialized/overwritten
|
||||
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
|
||||
addmod_asm(r.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
|
||||
addmod_asm(r.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
|
||||
else:
|
||||
var overflowed = r.mres.sum(a.mres, b.mres)
|
||||
overflowed = overflowed or not(r.mres < FF.fieldMod())
|
||||
|
@ -208,7 +208,7 @@ func double*(r: var FF, a: FF) {.meter.} =
|
|||
## Double ``a`` into ``r``
|
||||
## `r` is initialized/overwritten
|
||||
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
|
||||
addmod_asm(r.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
|
||||
addmod_asm(r.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
|
||||
else:
|
||||
var overflowed = r.mres.double(a.mres)
|
||||
overflowed = overflowed or not(r.mres < FF.fieldMod())
|
||||
|
|
|
@ -404,16 +404,16 @@ func redc2xMont*[N: static int](
|
|||
when UseASM_X86_64 and r.len <= 6:
|
||||
# ADX implies BMI2
|
||||
if ({.noSideEffect.}: hasAdx()):
|
||||
redcMont_asm_adx(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
|
||||
redcMont_asm_adx(r, a, M, m0ninv, spareBits, skipFinalSub)
|
||||
else:
|
||||
when r.len in {3..6}:
|
||||
redcMont_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
|
||||
redcMont_asm(r, a, M, m0ninv, spareBits, skipFinalSub)
|
||||
else:
|
||||
redc2xMont_CIOS(r, a, M, m0ninv, skipFinalSub)
|
||||
# redc2xMont_Comba(r, a, M, m0ninv)
|
||||
elif UseASM_X86_64 and r.len in {3..6}:
|
||||
# TODO: Assembly faster than GCC but slower than Clang
|
||||
redcMont_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
|
||||
redcMont_asm(r, a, M, m0ninv, spareBits, skipFinalSub)
|
||||
else:
|
||||
redc2xMont_CIOS(r, a, M, m0ninv, skipFinalSub)
|
||||
# redc2xMont_Comba(r, a, M, m0ninv, skipFinalSub)
|
||||
|
@ -474,9 +474,9 @@ func squareMont*[N](r: var Limbs[N], a, M: Limbs[N],
|
|||
when spareBits >= 1:
|
||||
mulMont_CIOS_sparebit_asm_adx(r, a, a, M, m0ninv, skipFinalSub)
|
||||
else:
|
||||
squareMont_CIOS_asm_adx(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
|
||||
squareMont_CIOS_asm_adx(r, a, M, m0ninv, spareBits, skipFinalSub)
|
||||
else:
|
||||
squareMont_CIOS_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
|
||||
squareMont_CIOS_asm(r, a, M, m0ninv, spareBits, skipFinalSub)
|
||||
elif UseASM_X86_64:
|
||||
var r2x {.noInit.}: Limbs[2*N]
|
||||
r2x.square(a)
|
||||
|
|
|
@ -129,11 +129,11 @@ func mul_fp2_complex_asm_adx*(
|
|||
d.c0.limbs2x,
|
||||
Fp.fieldMod().limbs,
|
||||
Fp.getNegInvModWord(),
|
||||
Fp.has1extraBit()
|
||||
Fp.getSpareBits()
|
||||
)
|
||||
r.c1.mres.limbs.redcMont_asm_adx_inline(
|
||||
d.c1.limbs2x,
|
||||
Fp.fieldMod().limbs,
|
||||
Fp.getNegInvModWord(),
|
||||
Fp.has1extraBit()
|
||||
Fp.getSpareBits()
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue