From 8b5d5089cb9c828a3f7f148c11896abbb90cee1c Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Mon, 14 Feb 2022 17:12:30 +0100 Subject: [PATCH] forgot to commit function sig change (#177) --- .../assembly/limbs_asm_modular_x86.nim | 20 +++++++++---------- .../assembly/limbs_asm_mul_mont_x86.nim | 4 ++-- .../limbs_asm_mul_mont_x86_adx_bmi2.nim | 4 ++-- .../assembly/limbs_asm_redc_mont_x86.nim | 19 +++++++++--------- .../limbs_asm_redc_mont_x86_adx_bmi2.nim | 15 +++++++------- constantine/arithmetic/finite_fields.nim | 8 ++++---- constantine/arithmetic/limbs_montgomery.nim | 10 +++++----- .../assembly/fp2_asm_x86_adx_bmi2.nim | 4 ++-- 8 files changed, 42 insertions(+), 42 deletions(-) diff --git a/constantine/arithmetic/assembly/limbs_asm_modular_x86.nim b/constantine/arithmetic/assembly/limbs_asm_modular_x86.nim index 9fe52a5..1cffb0d 100644 --- a/constantine/arithmetic/assembly/limbs_asm_modular_x86.nim +++ b/constantine/arithmetic/assembly/limbs_asm_modular_x86.nim @@ -128,7 +128,7 @@ macro finalSub_gen*[N: static int]( # Field addition # ------------------------------------------------------------ -macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit: static bool): untyped = +macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], spareBits: static int): untyped = ## Generate an optimized modular addition kernel # Register pressure note: # We could generate a kernel per modulus m by hardcoding it as immediate @@ -165,7 +165,7 @@ macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit: # Interleaved copy in a second buffer as well ctx.mov v[i], u[i] - if hasSparebit: + if spareBits >= 1: ctx.finalSubNoCarryImpl(r, u, M, v) else: ctx.finalSubMayCarryImpl( @@ -174,9 +174,9 @@ macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit: result.add ctx.generate() -func addmod_asm*(r: var Limbs, a, b, m: Limbs, hasSpareBit: static bool) = +func addmod_asm*(r: var Limbs, a, b, m: Limbs, spareBits: static int) = ## Constant-time modular addition - addmod_gen(r, a, b, m, hasSpareBit) + addmod_gen(r, a, b, m, spareBits) # Field substraction # ------------------------------------------------------------ @@ -307,7 +307,7 @@ when isMainModule: debugecho " a: ", a.toHex() debugecho " b: ", b.toHex() debugecho " m: ", m.toHex() - addmod_asm(a, a, b, m, hasSpareBit = false) + addmod_asm(a, a, b, m, spareBits = 0) debugecho "after:" debugecho " a: ", a.toHex().tolower debugecho " s: ", s @@ -327,7 +327,7 @@ when isMainModule: debugecho " a: ", a.toHex() debugecho " b: ", b.toHex() debugecho " m: ", m.toHex() - addmod_asm(a, a, b, m, hasSpareBit = false) + addmod_asm(a, a, b, m, spareBits = 0) debugecho "after:" debugecho " a: ", a.toHex().tolower debugecho " s: ", s @@ -347,7 +347,7 @@ when isMainModule: debugecho " a: ", a.toHex() debugecho " b: ", b.toHex() debugecho " m: ", m.toHex() - addmod_asm(a, a, b, m, hasSpareBit = false) + addmod_asm(a, a, b, m, spareBits = 0) debugecho "after:" debugecho " a: ", a.toHex().tolower debugecho " s: ", s @@ -367,7 +367,7 @@ when isMainModule: debugecho " a: ", a.toHex() debugecho " b: ", b.toHex() debugecho " m: ", m.toHex() - addmod_asm(a, a, b, m, hasSpareBit = false) + addmod_asm(a, a, b, m, spareBits = 0) debugecho "after:" debugecho " a: ", a.toHex().tolower debugecho " s: ", s @@ -390,7 +390,7 @@ when isMainModule: debugecho " a: ", a.toHex() debugecho " b: ", b.toHex() debugecho " m: ", m.toHex() - submod_asm(a, a, b, m, hasSpareBit = false) + submod_asm(a, a, b, m, spareBits = 0) debugecho "after:" debugecho " a: ", a.toHex().tolower debugecho " s: ", s @@ -415,7 +415,7 @@ when isMainModule: debugecho " a: ", a.toHex() debugecho " b: ", b.toHex() debugecho " m: ", m.toHex() - submod_asm(r, a, b, m, hasSpareBit = false) + submod_asm(r, a, b, m, spareBits = 0) debugecho "after:" debugecho " r: ", r.toHex().tolower debugecho " s: ", s diff --git a/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86.nim b/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86.nim index 479d723..46b8947 100644 --- a/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86.nim +++ b/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86.nim @@ -209,8 +209,8 @@ func squareMont_CIOS_asm*[N]( r: var Limbs[N], a, M: Limbs[N], m0ninv: BaseType, - hasSpareBit, skipFinalSub: static bool) = + spareBits: static int, skipFinalSub: static bool) = ## Constant-time modular squaring var r2x {.noInit.}: Limbs[2*N] r2x.square_asm_inline(a) - r.redcMont_asm_inline(r2x, M, m0ninv, hasSpareBit, skipFinalSub) + r.redcMont_asm_inline(r2x, M, m0ninv, spareBits, skipFinalSub) diff --git a/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim b/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim index 8ba367a..2a1811b 100644 --- a/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim +++ b/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim @@ -295,8 +295,8 @@ func squareMont_CIOS_asm_adx*[N]( r: var Limbs[N], a, M: Limbs[N], m0ninv: BaseType, - hasSpareBit, skipFinalSub: static bool) = + spareBits: static int, skipFinalSub: static bool) = ## Constant-time modular squaring var r2x {.noInit.}: Limbs[2*N] r2x.square_asm_adx_inline(a) - r.redcMont_asm_adx(r2x, M, m0ninv, hasSpareBit, skipFinalSub) + r.redcMont_asm_adx(r2x, M, m0ninv, spareBits, skipFinalSub) diff --git a/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86.nim b/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86.nim index fd7c5aa..9cec0db 100644 --- a/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86.nim +++ b/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86.nim @@ -34,7 +34,7 @@ macro redc2xMont_gen*[N: static int]( a_PIR: array[N*2, SecretWord], M_PIR: array[N, SecretWord], m0ninv_REG: BaseType, - hasSpareBit, skipFinalSub: static bool + spareBits: static int, skipFinalSub: static bool ) = # No register spilling handling @@ -153,10 +153,10 @@ macro redc2xMont_gen*[N: static int]( # v is invalidated from now on let t = repackRegisters(v, u[N], u[N+1]) - if hasSpareBit and skipFinalSub: + if spareBits >= 2 and skipFinalSub: for i in 0 ..< N: ctx.mov r[i], t[i] - elif hasSpareBit: + elif spareBits >= 1: ctx.finalSubNoCarryImpl(r, u, M, t) else: ctx.finalSubMayCarryImpl(r, u, M, t, rax) @@ -169,23 +169,24 @@ func redcMont_asm_inline*[N: static int]( a: array[N*2, SecretWord], M: array[N, SecretWord], m0ninv: BaseType, - hasSpareBit: static bool, + spareBits: static int, skipFinalSub: static bool = false ) {.inline.} = ## Constant-time Montgomery reduction ## Inline-version - redc2xMont_gen(r, a, M, m0ninv, hasSpareBit, skipFinalSub) + redc2xMont_gen(r, a, M, m0ninv, spareBits, skipFinalSub) func redcMont_asm*[N: static int]( r: var array[N, SecretWord], a: array[N*2, SecretWord], M: array[N, SecretWord], m0ninv: BaseType, - hasSpareBit, skipFinalSub: static bool + spareBits: static int, + skipFinalSub: static bool ) = ## Constant-time Montgomery reduction static: doAssert UseASM_X86_64, "This requires x86-64." - redcMont_asm_inline(r, a, M, m0ninv, hasSpareBit, skipFinalSub) + redcMont_asm_inline(r, a, M, m0ninv, spareBits, skipFinalSub) # Montgomery conversion # ---------------------------------------------------------- @@ -351,8 +352,8 @@ when isMainModule: var a_sqr{.noInit.}, na_sqr{.noInit.}: Limbs[2] var a_sqr_comba{.noInit.}, na_sqr_comba{.noInit.}: Limbs[2] - a_sqr.redcMont_asm(adbl_sqr, M, 1, hasSpareBit = false, skipFinalSub = false) - na_sqr.redcMont_asm(nadbl_sqr, M, 1, hasSpareBit = false, skipFinalSub = false) + a_sqr.redcMont_asm(adbl_sqr, M, 1, spareBits = 0, skipFinalSub = false) + na_sqr.redcMont_asm(nadbl_sqr, M, 1, spareBits = 0, skipFinalSub = false) a_sqr_comba.redc2xMont_Comba(adbl_sqr, M, 1) na_sqr_comba.redc2xMont_Comba(nadbl_sqr, M, 1) diff --git a/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86_adx_bmi2.nim b/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86_adx_bmi2.nim index 28021be..0be05af 100644 --- a/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86_adx_bmi2.nim +++ b/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86_adx_bmi2.nim @@ -38,7 +38,7 @@ macro redc2xMont_adx_gen[N: static int]( a_PIR: array[N*2, SecretWord], M_PIR: array[N, SecretWord], m0ninv_REG: BaseType, - hasSpareBit, skipFinalSub: static bool + spareBits: static int, skipFinalSub: static bool ) = # No register spilling handling @@ -131,10 +131,10 @@ macro redc2xMont_adx_gen[N: static int]( let t = repackRegisters(v, u[N]) - if hasSpareBit and skipFinalSub: + if spareBits >= 2 and skipFinalSub: for i in 0 ..< N: ctx.mov r[i], t[i] - elif hasSpareBit: + elif spareBits >= 1: ctx.finalSubNoCarryImpl(r, u, M, t) else: ctx.finalSubMayCarryImpl(r, u, M, t, hi) @@ -147,24 +147,23 @@ func redcMont_asm_adx_inline*[N: static int]( a: array[N*2, SecretWord], M: array[N, SecretWord], m0ninv: BaseType, - hasSpareBit: static bool, + spareBits: static int, skipFinalSub: static bool = false ) {.inline.} = ## Constant-time Montgomery reduction ## Inline-version - redc2xMont_adx_gen(r, a, M, m0ninv, hasSpareBit, skipFinalSub) + redc2xMont_adx_gen(r, a, M, m0ninv, spareBits, skipFinalSub) func redcMont_asm_adx*[N: static int]( r: var array[N, SecretWord], a: array[N*2, SecretWord], M: array[N, SecretWord], m0ninv: BaseType, - hasSpareBit: static bool, + spareBits: static int, skipFinalSub: static bool = false ) = ## Constant-time Montgomery reduction - redcMont_asm_adx_inline(r, a, M, m0ninv, hasSpareBit, skipFinalSub) - + redcMont_asm_adx_inline(r, a, M, m0ninv, spareBits, skipFinalSub) # Montgomery conversion # ---------------------------------------------------------- diff --git a/constantine/arithmetic/finite_fields.nim b/constantine/arithmetic/finite_fields.nim index e8e0d07..fb3fac7 100644 --- a/constantine/arithmetic/finite_fields.nim +++ b/constantine/arithmetic/finite_fields.nim @@ -152,7 +152,7 @@ func setMinusOne*(a: var FF) = func `+=`*(a: var FF, b: FF) {.meter.} = ## In-place addition modulo p when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling - addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1) + addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits()) else: var overflowed = add(a.mres, b.mres) overflowed = overflowed or not(a.mres < FF.fieldMod()) @@ -169,7 +169,7 @@ func `-=`*(a: var FF, b: FF) {.meter.} = func double*(a: var FF) {.meter.} = ## Double ``a`` modulo p when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling - addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1) + addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits()) else: var overflowed = double(a.mres) overflowed = overflowed or not(a.mres < FF.fieldMod()) @@ -179,7 +179,7 @@ func sum*(r: var FF, a, b: FF) {.meter.} = ## Sum ``a`` and ``b`` into ``r`` modulo p ## r is initialized/overwritten when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling - addmod_asm(r.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1) + addmod_asm(r.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits()) else: var overflowed = r.mres.sum(a.mres, b.mres) overflowed = overflowed or not(r.mres < FF.fieldMod()) @@ -208,7 +208,7 @@ func double*(r: var FF, a: FF) {.meter.} = ## Double ``a`` into ``r`` ## `r` is initialized/overwritten when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling - addmod_asm(r.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1) + addmod_asm(r.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits()) else: var overflowed = r.mres.double(a.mres) overflowed = overflowed or not(r.mres < FF.fieldMod()) diff --git a/constantine/arithmetic/limbs_montgomery.nim b/constantine/arithmetic/limbs_montgomery.nim index df9e1db..b8d2018 100644 --- a/constantine/arithmetic/limbs_montgomery.nim +++ b/constantine/arithmetic/limbs_montgomery.nim @@ -404,16 +404,16 @@ func redc2xMont*[N: static int]( when UseASM_X86_64 and r.len <= 6: # ADX implies BMI2 if ({.noSideEffect.}: hasAdx()): - redcMont_asm_adx(r, a, M, m0ninv, spareBits >= 1, skipFinalSub) + redcMont_asm_adx(r, a, M, m0ninv, spareBits, skipFinalSub) else: when r.len in {3..6}: - redcMont_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub) + redcMont_asm(r, a, M, m0ninv, spareBits, skipFinalSub) else: redc2xMont_CIOS(r, a, M, m0ninv, skipFinalSub) # redc2xMont_Comba(r, a, M, m0ninv) elif UseASM_X86_64 and r.len in {3..6}: # TODO: Assembly faster than GCC but slower than Clang - redcMont_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub) + redcMont_asm(r, a, M, m0ninv, spareBits, skipFinalSub) else: redc2xMont_CIOS(r, a, M, m0ninv, skipFinalSub) # redc2xMont_Comba(r, a, M, m0ninv, skipFinalSub) @@ -474,9 +474,9 @@ func squareMont*[N](r: var Limbs[N], a, M: Limbs[N], when spareBits >= 1: mulMont_CIOS_sparebit_asm_adx(r, a, a, M, m0ninv, skipFinalSub) else: - squareMont_CIOS_asm_adx(r, a, M, m0ninv, spareBits >= 1, skipFinalSub) + squareMont_CIOS_asm_adx(r, a, M, m0ninv, spareBits, skipFinalSub) else: - squareMont_CIOS_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub) + squareMont_CIOS_asm(r, a, M, m0ninv, spareBits, skipFinalSub) elif UseASM_X86_64: var r2x {.noInit.}: Limbs[2*N] r2x.square(a) diff --git a/constantine/tower_field_extensions/assembly/fp2_asm_x86_adx_bmi2.nim b/constantine/tower_field_extensions/assembly/fp2_asm_x86_adx_bmi2.nim index 53cf065..d8fea33 100644 --- a/constantine/tower_field_extensions/assembly/fp2_asm_x86_adx_bmi2.nim +++ b/constantine/tower_field_extensions/assembly/fp2_asm_x86_adx_bmi2.nim @@ -129,11 +129,11 @@ func mul_fp2_complex_asm_adx*( d.c0.limbs2x, Fp.fieldMod().limbs, Fp.getNegInvModWord(), - Fp.has1extraBit() + Fp.getSpareBits() ) r.c1.mres.limbs.redcMont_asm_adx_inline( d.c1.limbs2x, Fp.fieldMod().limbs, Fp.getNegInvModWord(), - Fp.has1extraBit() + Fp.getSpareBits() )