forgot to commit function sig change (#177)

This commit is contained in:
Mamy Ratsimbazafy 2022-02-14 17:12:30 +01:00 committed by GitHub
parent 5db30ef68d
commit 8b5d5089cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 42 additions and 42 deletions

View File

@ -128,7 +128,7 @@ macro finalSub_gen*[N: static int](
# Field addition
# ------------------------------------------------------------
macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit: static bool): untyped =
macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], spareBits: static int): untyped =
## Generate an optimized modular addition kernel
# Register pressure note:
# We could generate a kernel per modulus m by hardcoding it as immediate
@ -165,7 +165,7 @@ macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit:
# Interleaved copy in a second buffer as well
ctx.mov v[i], u[i]
if hasSparebit:
if spareBits >= 1:
ctx.finalSubNoCarryImpl(r, u, M, v)
else:
ctx.finalSubMayCarryImpl(
@ -174,9 +174,9 @@ macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit:
result.add ctx.generate()
func addmod_asm*(r: var Limbs, a, b, m: Limbs, hasSpareBit: static bool) =
func addmod_asm*(r: var Limbs, a, b, m: Limbs, spareBits: static int) =
## Constant-time modular addition
addmod_gen(r, a, b, m, hasSpareBit)
addmod_gen(r, a, b, m, spareBits)
# Field substraction
# ------------------------------------------------------------
@ -307,7 +307,7 @@ when isMainModule:
debugecho " a: ", a.toHex()
debugecho " b: ", b.toHex()
debugecho " m: ", m.toHex()
addmod_asm(a, a, b, m, hasSpareBit = false)
addmod_asm(a, a, b, m, spareBits = 0)
debugecho "after:"
debugecho " a: ", a.toHex().tolower
debugecho " s: ", s
@ -327,7 +327,7 @@ when isMainModule:
debugecho " a: ", a.toHex()
debugecho " b: ", b.toHex()
debugecho " m: ", m.toHex()
addmod_asm(a, a, b, m, hasSpareBit = false)
addmod_asm(a, a, b, m, spareBits = 0)
debugecho "after:"
debugecho " a: ", a.toHex().tolower
debugecho " s: ", s
@ -347,7 +347,7 @@ when isMainModule:
debugecho " a: ", a.toHex()
debugecho " b: ", b.toHex()
debugecho " m: ", m.toHex()
addmod_asm(a, a, b, m, hasSpareBit = false)
addmod_asm(a, a, b, m, spareBits = 0)
debugecho "after:"
debugecho " a: ", a.toHex().tolower
debugecho " s: ", s
@ -367,7 +367,7 @@ when isMainModule:
debugecho " a: ", a.toHex()
debugecho " b: ", b.toHex()
debugecho " m: ", m.toHex()
addmod_asm(a, a, b, m, hasSpareBit = false)
addmod_asm(a, a, b, m, spareBits = 0)
debugecho "after:"
debugecho " a: ", a.toHex().tolower
debugecho " s: ", s
@ -390,7 +390,7 @@ when isMainModule:
debugecho " a: ", a.toHex()
debugecho " b: ", b.toHex()
debugecho " m: ", m.toHex()
submod_asm(a, a, b, m, hasSpareBit = false)
submod_asm(a, a, b, m, spareBits = 0)
debugecho "after:"
debugecho " a: ", a.toHex().tolower
debugecho " s: ", s
@ -415,7 +415,7 @@ when isMainModule:
debugecho " a: ", a.toHex()
debugecho " b: ", b.toHex()
debugecho " m: ", m.toHex()
submod_asm(r, a, b, m, hasSpareBit = false)
submod_asm(r, a, b, m, spareBits = 0)
debugecho "after:"
debugecho " r: ", r.toHex().tolower
debugecho " s: ", s

View File

@ -209,8 +209,8 @@ func squareMont_CIOS_asm*[N](
r: var Limbs[N],
a, M: Limbs[N],
m0ninv: BaseType,
hasSpareBit, skipFinalSub: static bool) =
spareBits: static int, skipFinalSub: static bool) =
## Constant-time modular squaring
var r2x {.noInit.}: Limbs[2*N]
r2x.square_asm_inline(a)
r.redcMont_asm_inline(r2x, M, m0ninv, hasSpareBit, skipFinalSub)
r.redcMont_asm_inline(r2x, M, m0ninv, spareBits, skipFinalSub)

View File

@ -295,8 +295,8 @@ func squareMont_CIOS_asm_adx*[N](
r: var Limbs[N],
a, M: Limbs[N],
m0ninv: BaseType,
hasSpareBit, skipFinalSub: static bool) =
spareBits: static int, skipFinalSub: static bool) =
## Constant-time modular squaring
var r2x {.noInit.}: Limbs[2*N]
r2x.square_asm_adx_inline(a)
r.redcMont_asm_adx(r2x, M, m0ninv, hasSpareBit, skipFinalSub)
r.redcMont_asm_adx(r2x, M, m0ninv, spareBits, skipFinalSub)

View File

@ -34,7 +34,7 @@ macro redc2xMont_gen*[N: static int](
a_PIR: array[N*2, SecretWord],
M_PIR: array[N, SecretWord],
m0ninv_REG: BaseType,
hasSpareBit, skipFinalSub: static bool
spareBits: static int, skipFinalSub: static bool
) =
# No register spilling handling
@ -153,10 +153,10 @@ macro redc2xMont_gen*[N: static int](
# v is invalidated from now on
let t = repackRegisters(v, u[N], u[N+1])
if hasSpareBit and skipFinalSub:
if spareBits >= 2 and skipFinalSub:
for i in 0 ..< N:
ctx.mov r[i], t[i]
elif hasSpareBit:
elif spareBits >= 1:
ctx.finalSubNoCarryImpl(r, u, M, t)
else:
ctx.finalSubMayCarryImpl(r, u, M, t, rax)
@ -169,23 +169,24 @@ func redcMont_asm_inline*[N: static int](
a: array[N*2, SecretWord],
M: array[N, SecretWord],
m0ninv: BaseType,
hasSpareBit: static bool,
spareBits: static int,
skipFinalSub: static bool = false
) {.inline.} =
## Constant-time Montgomery reduction
## Inline-version
redc2xMont_gen(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
redc2xMont_gen(r, a, M, m0ninv, spareBits, skipFinalSub)
func redcMont_asm*[N: static int](
r: var array[N, SecretWord],
a: array[N*2, SecretWord],
M: array[N, SecretWord],
m0ninv: BaseType,
hasSpareBit, skipFinalSub: static bool
spareBits: static int,
skipFinalSub: static bool
) =
## Constant-time Montgomery reduction
static: doAssert UseASM_X86_64, "This requires x86-64."
redcMont_asm_inline(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
redcMont_asm_inline(r, a, M, m0ninv, spareBits, skipFinalSub)
# Montgomery conversion
# ----------------------------------------------------------
@ -351,8 +352,8 @@ when isMainModule:
var a_sqr{.noInit.}, na_sqr{.noInit.}: Limbs[2]
var a_sqr_comba{.noInit.}, na_sqr_comba{.noInit.}: Limbs[2]
a_sqr.redcMont_asm(adbl_sqr, M, 1, hasSpareBit = false, skipFinalSub = false)
na_sqr.redcMont_asm(nadbl_sqr, M, 1, hasSpareBit = false, skipFinalSub = false)
a_sqr.redcMont_asm(adbl_sqr, M, 1, spareBits = 0, skipFinalSub = false)
na_sqr.redcMont_asm(nadbl_sqr, M, 1, spareBits = 0, skipFinalSub = false)
a_sqr_comba.redc2xMont_Comba(adbl_sqr, M, 1)
na_sqr_comba.redc2xMont_Comba(nadbl_sqr, M, 1)

View File

@ -38,7 +38,7 @@ macro redc2xMont_adx_gen[N: static int](
a_PIR: array[N*2, SecretWord],
M_PIR: array[N, SecretWord],
m0ninv_REG: BaseType,
hasSpareBit, skipFinalSub: static bool
spareBits: static int, skipFinalSub: static bool
) =
# No register spilling handling
@ -131,10 +131,10 @@ macro redc2xMont_adx_gen[N: static int](
let t = repackRegisters(v, u[N])
if hasSpareBit and skipFinalSub:
if spareBits >= 2 and skipFinalSub:
for i in 0 ..< N:
ctx.mov r[i], t[i]
elif hasSpareBit:
elif spareBits >= 1:
ctx.finalSubNoCarryImpl(r, u, M, t)
else:
ctx.finalSubMayCarryImpl(r, u, M, t, hi)
@ -147,24 +147,23 @@ func redcMont_asm_adx_inline*[N: static int](
a: array[N*2, SecretWord],
M: array[N, SecretWord],
m0ninv: BaseType,
hasSpareBit: static bool,
spareBits: static int,
skipFinalSub: static bool = false
) {.inline.} =
## Constant-time Montgomery reduction
## Inline-version
redc2xMont_adx_gen(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
redc2xMont_adx_gen(r, a, M, m0ninv, spareBits, skipFinalSub)
func redcMont_asm_adx*[N: static int](
r: var array[N, SecretWord],
a: array[N*2, SecretWord],
M: array[N, SecretWord],
m0ninv: BaseType,
hasSpareBit: static bool,
spareBits: static int,
skipFinalSub: static bool = false
) =
## Constant-time Montgomery reduction
redcMont_asm_adx_inline(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
redcMont_asm_adx_inline(r, a, M, m0ninv, spareBits, skipFinalSub)
# Montgomery conversion
# ----------------------------------------------------------

View File

@ -152,7 +152,7 @@ func setMinusOne*(a: var FF) =
func `+=`*(a: var FF, b: FF) {.meter.} =
## In-place addition modulo p
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
else:
var overflowed = add(a.mres, b.mres)
overflowed = overflowed or not(a.mres < FF.fieldMod())
@ -169,7 +169,7 @@ func `-=`*(a: var FF, b: FF) {.meter.} =
func double*(a: var FF) {.meter.} =
## Double ``a`` modulo p
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
else:
var overflowed = double(a.mres)
overflowed = overflowed or not(a.mres < FF.fieldMod())
@ -179,7 +179,7 @@ func sum*(r: var FF, a, b: FF) {.meter.} =
## Sum ``a`` and ``b`` into ``r`` modulo p
## r is initialized/overwritten
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
addmod_asm(r.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
addmod_asm(r.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
else:
var overflowed = r.mres.sum(a.mres, b.mres)
overflowed = overflowed or not(r.mres < FF.fieldMod())
@ -208,7 +208,7 @@ func double*(r: var FF, a: FF) {.meter.} =
## Double ``a`` into ``r``
## `r` is initialized/overwritten
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
addmod_asm(r.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
addmod_asm(r.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
else:
var overflowed = r.mres.double(a.mres)
overflowed = overflowed or not(r.mres < FF.fieldMod())

View File

@ -404,16 +404,16 @@ func redc2xMont*[N: static int](
when UseASM_X86_64 and r.len <= 6:
# ADX implies BMI2
if ({.noSideEffect.}: hasAdx()):
redcMont_asm_adx(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
redcMont_asm_adx(r, a, M, m0ninv, spareBits, skipFinalSub)
else:
when r.len in {3..6}:
redcMont_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
redcMont_asm(r, a, M, m0ninv, spareBits, skipFinalSub)
else:
redc2xMont_CIOS(r, a, M, m0ninv, skipFinalSub)
# redc2xMont_Comba(r, a, M, m0ninv)
elif UseASM_X86_64 and r.len in {3..6}:
# TODO: Assembly faster than GCC but slower than Clang
redcMont_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
redcMont_asm(r, a, M, m0ninv, spareBits, skipFinalSub)
else:
redc2xMont_CIOS(r, a, M, m0ninv, skipFinalSub)
# redc2xMont_Comba(r, a, M, m0ninv, skipFinalSub)
@ -474,9 +474,9 @@ func squareMont*[N](r: var Limbs[N], a, M: Limbs[N],
when spareBits >= 1:
mulMont_CIOS_sparebit_asm_adx(r, a, a, M, m0ninv, skipFinalSub)
else:
squareMont_CIOS_asm_adx(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
squareMont_CIOS_asm_adx(r, a, M, m0ninv, spareBits, skipFinalSub)
else:
squareMont_CIOS_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
squareMont_CIOS_asm(r, a, M, m0ninv, spareBits, skipFinalSub)
elif UseASM_X86_64:
var r2x {.noInit.}: Limbs[2*N]
r2x.square(a)

View File

@ -129,11 +129,11 @@ func mul_fp2_complex_asm_adx*(
d.c0.limbs2x,
Fp.fieldMod().limbs,
Fp.getNegInvModWord(),
Fp.has1extraBit()
Fp.getSpareBits()
)
r.c1.mres.limbs.redcMont_asm_adx_inline(
d.c1.limbs2x,
Fp.fieldMod().limbs,
Fp.getNegInvModWord(),
Fp.has1extraBit()
Fp.getSpareBits()
)