forgot to commit function sig change (#177)

2022-02-14 17:12:30 +01:00 · 2022-02-14 17:12:30 +01:00 · 8b5d5089cb
parent 5db30ef68d
commit 8b5d5089cb
8 changed files with 42 additions and 42 deletions
--- a/constantine/arithmetic/assembly/limbs_asm_modular_x86.nim
+++ b/constantine/arithmetic/assembly/limbs_asm_modular_x86.nim
@ -128,7 +128,7 @@ macro finalSub_gen*[N: static int](
 # Field addition
 # ------------------------------------------------------------

-macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit: static bool): untyped =
+macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], spareBits: static int): untyped =
  ## Generate an optimized modular addition kernel
  # Register pressure note:
  #   We could generate a kernel per modulus m by hardcoding it as immediate
@ -165,7 +165,7 @@ macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit:
    # Interleaved copy in a second buffer as well
    ctx.mov v[i], u[i]

-  if hasSparebit:
+  if spareBits >= 1:
    ctx.finalSubNoCarryImpl(r, u, M, v)
  else:
    ctx.finalSubMayCarryImpl(
@ -174,9 +174,9 @@ macro addmod_gen[N: static int](R: var Limbs[N], A, B, m: Limbs[N], hasSpareBit:

  result.add ctx.generate()

-func addmod_asm*(r: var Limbs, a, b, m: Limbs, hasSpareBit: static bool) =
+func addmod_asm*(r: var Limbs, a, b, m: Limbs, spareBits: static int) =
  ## Constant-time modular addition
-  addmod_gen(r, a, b, m, hasSpareBit)
+  addmod_gen(r, a, b, m, spareBits)

 # Field substraction
 # ------------------------------------------------------------
@ -307,7 +307,7 @@ when isMainModule:
    debugecho "  a: ", a.toHex()
    debugecho "  b: ", b.toHex()
    debugecho "  m: ", m.toHex()
-    addmod_asm(a, a, b, m, hasSpareBit = false)
+    addmod_asm(a, a, b, m, spareBits = 0)
    debugecho "after:"
    debugecho "  a: ", a.toHex().tolower
    debugecho "  s: ", s
@ -327,7 +327,7 @@ when isMainModule:
    debugecho "  a: ", a.toHex()
    debugecho "  b: ", b.toHex()
    debugecho "  m: ", m.toHex()
-    addmod_asm(a, a, b, m, hasSpareBit = false)
+    addmod_asm(a, a, b, m, spareBits = 0)
    debugecho "after:"
    debugecho "  a: ", a.toHex().tolower
    debugecho "  s: ", s
@ -347,7 +347,7 @@ when isMainModule:
    debugecho "  a: ", a.toHex()
    debugecho "  b: ", b.toHex()
    debugecho "  m: ", m.toHex()
-    addmod_asm(a, a, b, m, hasSpareBit = false)
+    addmod_asm(a, a, b, m, spareBits = 0)
    debugecho "after:"
    debugecho "  a: ", a.toHex().tolower
    debugecho "  s: ", s
@ -367,7 +367,7 @@ when isMainModule:
    debugecho "  a: ", a.toHex()
    debugecho "  b: ", b.toHex()
    debugecho "  m: ", m.toHex()
-    addmod_asm(a, a, b, m, hasSpareBit = false)
+    addmod_asm(a, a, b, m, spareBits = 0)
    debugecho "after:"
    debugecho "  a: ", a.toHex().tolower
    debugecho "  s: ", s
@ -390,7 +390,7 @@ when isMainModule:
    debugecho "  a: ", a.toHex()
    debugecho "  b: ", b.toHex()
    debugecho "  m: ", m.toHex()
-    submod_asm(a, a, b, m, hasSpareBit = false)
+    submod_asm(a, a, b, m, spareBits = 0)
    debugecho "after:"
    debugecho "  a: ", a.toHex().tolower
    debugecho "  s: ", s
@ -415,7 +415,7 @@ when isMainModule:
    debugecho "  a: ", a.toHex()
    debugecho "  b: ", b.toHex()
    debugecho "  m: ", m.toHex()
-    submod_asm(r, a, b, m, hasSpareBit = false)
+    submod_asm(r, a, b, m, spareBits = 0)
    debugecho "after:"
    debugecho "  r: ", r.toHex().tolower
    debugecho "  s: ", s
--- a/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86.nim
+++ b/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86.nim
@ -209,8 +209,8 @@ func squareMont_CIOS_asm*[N](
       r: var Limbs[N],
       a, M: Limbs[N],
       m0ninv: BaseType,
-       hasSpareBit, skipFinalSub: static bool) =
+       spareBits: static int, skipFinalSub: static bool) =
  ## Constant-time modular squaring
  var r2x {.noInit.}: Limbs[2*N]
  r2x.square_asm_inline(a)
-  r.redcMont_asm_inline(r2x, M, m0ninv, hasSpareBit, skipFinalSub)
+  r.redcMont_asm_inline(r2x, M, m0ninv, spareBits, skipFinalSub)
--- a/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim
+++ b/constantine/arithmetic/assembly/limbs_asm_mul_mont_x86_adx_bmi2.nim
@ -295,8 +295,8 @@ func squareMont_CIOS_asm_adx*[N](
       r: var Limbs[N],
       a, M: Limbs[N],
       m0ninv: BaseType,
-       hasSpareBit, skipFinalSub: static bool) =
+       spareBits: static int, skipFinalSub: static bool) =
  ## Constant-time modular squaring
  var r2x {.noInit.}: Limbs[2*N]
  r2x.square_asm_adx_inline(a)
-  r.redcMont_asm_adx(r2x, M, m0ninv, hasSpareBit, skipFinalSub)
+  r.redcMont_asm_adx(r2x, M, m0ninv, spareBits, skipFinalSub)
--- a/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86.nim
+++ b/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86.nim
@ -34,7 +34,7 @@ macro redc2xMont_gen*[N: static int](
       a_PIR: array[N*2, SecretWord],
       M_PIR: array[N, SecretWord],
       m0ninv_REG: BaseType,
-       hasSpareBit, skipFinalSub: static bool
+       spareBits: static int, skipFinalSub: static bool
      ) =

  # No register spilling handling
@ -153,10 +153,10 @@ macro redc2xMont_gen*[N: static int](
  # v is invalidated from now on
  let t = repackRegisters(v, u[N], u[N+1])
  
-  if hasSpareBit and skipFinalSub:
+  if spareBits >= 2 and skipFinalSub:
    for i in 0 ..< N:
      ctx.mov r[i], t[i]
-  elif hasSpareBit:
+  elif spareBits >= 1:
    ctx.finalSubNoCarryImpl(r, u, M, t)
  else:
    ctx.finalSubMayCarryImpl(r, u, M, t, rax)
@ -169,23 +169,24 @@ func redcMont_asm_inline*[N: static int](
       a: array[N*2, SecretWord],
       M: array[N, SecretWord],
       m0ninv: BaseType,
-       hasSpareBit: static bool,
+       spareBits: static int,
       skipFinalSub: static bool = false
      ) {.inline.} =
  ## Constant-time Montgomery reduction
  ## Inline-version
-  redc2xMont_gen(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
+  redc2xMont_gen(r, a, M, m0ninv, spareBits, skipFinalSub)

 func redcMont_asm*[N: static int](
       r: var array[N, SecretWord],
       a: array[N*2, SecretWord],
       M: array[N, SecretWord],
       m0ninv: BaseType,
-       hasSpareBit, skipFinalSub: static bool
+       spareBits: static int,
+       skipFinalSub: static bool
      ) =
  ## Constant-time Montgomery reduction
  static: doAssert UseASM_X86_64, "This requires x86-64."
-  redcMont_asm_inline(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
+  redcMont_asm_inline(r, a, M, m0ninv, spareBits, skipFinalSub)

 # Montgomery conversion
 # ----------------------------------------------------------
@ -351,8 +352,8 @@ when isMainModule:
    var a_sqr{.noInit.}, na_sqr{.noInit.}: Limbs[2]
    var a_sqr_comba{.noInit.}, na_sqr_comba{.noInit.}: Limbs[2]

-    a_sqr.redcMont_asm(adbl_sqr, M, 1, hasSpareBit = false, skipFinalSub = false)
-    na_sqr.redcMont_asm(nadbl_sqr, M, 1, hasSpareBit = false, skipFinalSub = false)
+    a_sqr.redcMont_asm(adbl_sqr, M, 1, spareBits = 0, skipFinalSub = false)
+    na_sqr.redcMont_asm(nadbl_sqr, M, 1, spareBits = 0, skipFinalSub = false)
    a_sqr_comba.redc2xMont_Comba(adbl_sqr, M, 1)
    na_sqr_comba.redc2xMont_Comba(nadbl_sqr, M, 1)

--- a/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86_adx_bmi2.nim
+++ b/constantine/arithmetic/assembly/limbs_asm_redc_mont_x86_adx_bmi2.nim
@ -38,7 +38,7 @@ macro redc2xMont_adx_gen[N: static int](
       a_PIR: array[N*2, SecretWord],
       M_PIR: array[N, SecretWord],
       m0ninv_REG: BaseType,
-       hasSpareBit, skipFinalSub: static bool
+       spareBits: static int, skipFinalSub: static bool
      ) =

  # No register spilling handling
@ -131,10 +131,10 @@ macro redc2xMont_adx_gen[N: static int](

  let t = repackRegisters(v, u[N])

-  if hasSpareBit and skipFinalSub:
+  if spareBits >= 2 and skipFinalSub:
    for i in 0 ..< N:
      ctx.mov r[i], t[i]
-  elif hasSpareBit:
+  elif spareBits >= 1:
    ctx.finalSubNoCarryImpl(r, u, M, t)
  else:
    ctx.finalSubMayCarryImpl(r, u, M, t, hi)
@ -147,24 +147,23 @@ func redcMont_asm_adx_inline*[N: static int](
       a: array[N*2, SecretWord],
       M: array[N, SecretWord],
       m0ninv: BaseType,
-       hasSpareBit: static bool,
+       spareBits: static int,
       skipFinalSub: static bool = false
      ) {.inline.} =
  ## Constant-time Montgomery reduction
  ## Inline-version
-  redc2xMont_adx_gen(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
+  redc2xMont_adx_gen(r, a, M, m0ninv, spareBits, skipFinalSub)

 func redcMont_asm_adx*[N: static int](
       r: var array[N, SecretWord],
       a: array[N*2, SecretWord],
       M: array[N, SecretWord],
       m0ninv: BaseType,
-       hasSpareBit: static bool,
+       spareBits: static int,
       skipFinalSub: static bool = false
      ) =
  ## Constant-time Montgomery reduction
-  redcMont_asm_adx_inline(r, a, M, m0ninv, hasSpareBit, skipFinalSub)
-
+  redcMont_asm_adx_inline(r, a, M, m0ninv, spareBits, skipFinalSub)

 # Montgomery conversion
 # ----------------------------------------------------------
--- a/constantine/arithmetic/finite_fields.nim
+++ b/constantine/arithmetic/finite_fields.nim
@ -152,7 +152,7 @@ func setMinusOne*(a: var FF) =
 func `+=`*(a: var FF, b: FF) {.meter.} =
  ## In-place addition modulo p
  when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
-    addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
+    addmod_asm(a.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
  else:
    var overflowed = add(a.mres, b.mres)
    overflowed = overflowed or not(a.mres < FF.fieldMod())
@ -169,7 +169,7 @@ func `-=`*(a: var FF, b: FF) {.meter.} =
 func double*(a: var FF) {.meter.} =
  ## Double ``a`` modulo p
  when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
-    addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
+    addmod_asm(a.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
  else:
    var overflowed = double(a.mres)
    overflowed = overflowed or not(a.mres < FF.fieldMod())
@ -179,7 +179,7 @@ func sum*(r: var FF, a, b: FF) {.meter.} =
  ## Sum ``a`` and ``b`` into ``r`` modulo p
  ## r is initialized/overwritten
  when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
-    addmod_asm(r.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
+    addmod_asm(r.mres.limbs, a.mres.limbs, b.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
  else:
    var overflowed = r.mres.sum(a.mres, b.mres)
    overflowed = overflowed or not(r.mres < FF.fieldMod())
@ -208,7 +208,7 @@ func double*(r: var FF, a: FF) {.meter.} =
  ## Double ``a`` into ``r``
  ## `r` is initialized/overwritten
  when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
-    addmod_asm(r.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits() >= 1)
+    addmod_asm(r.mres.limbs, a.mres.limbs, a.mres.limbs, FF.fieldMod().limbs, FF.getSpareBits())
  else:
    var overflowed = r.mres.double(a.mres)
    overflowed = overflowed or not(r.mres < FF.fieldMod())
--- a/constantine/arithmetic/limbs_montgomery.nim
+++ b/constantine/arithmetic/limbs_montgomery.nim
@ -404,16 +404,16 @@ func redc2xMont*[N: static int](
  when UseASM_X86_64 and r.len <= 6:
    # ADX implies BMI2
    if ({.noSideEffect.}: hasAdx()):
-      redcMont_asm_adx(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
+      redcMont_asm_adx(r, a, M, m0ninv, spareBits, skipFinalSub)
    else:
      when r.len in {3..6}:
-        redcMont_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
+        redcMont_asm(r, a, M, m0ninv, spareBits, skipFinalSub)
      else:
        redc2xMont_CIOS(r, a, M, m0ninv, skipFinalSub)
        # redc2xMont_Comba(r, a, M, m0ninv)
  elif UseASM_X86_64 and r.len in {3..6}:
    # TODO: Assembly faster than GCC but slower than Clang
-    redcMont_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
+    redcMont_asm(r, a, M, m0ninv, spareBits, skipFinalSub)
  else:
    redc2xMont_CIOS(r, a, M, m0ninv, skipFinalSub)
    # redc2xMont_Comba(r, a, M, m0ninv, skipFinalSub)
@ -474,9 +474,9 @@ func squareMont*[N](r: var Limbs[N], a, M: Limbs[N],
      when spareBits >= 1:
        mulMont_CIOS_sparebit_asm_adx(r, a, a, M, m0ninv, skipFinalSub)
      else:
-        squareMont_CIOS_asm_adx(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
+        squareMont_CIOS_asm_adx(r, a, M, m0ninv, spareBits, skipFinalSub)
    else:
-      squareMont_CIOS_asm(r, a, M, m0ninv, spareBits >= 1, skipFinalSub)
+      squareMont_CIOS_asm(r, a, M, m0ninv, spareBits, skipFinalSub)
  elif UseASM_X86_64:
    var r2x {.noInit.}: Limbs[2*N]
    r2x.square(a)
--- a/constantine/tower_field_extensions/assembly/fp2_asm_x86_adx_bmi2.nim
+++ b/constantine/tower_field_extensions/assembly/fp2_asm_x86_adx_bmi2.nim
@ -129,11 +129,11 @@ func mul_fp2_complex_asm_adx*(
    d.c0.limbs2x,
    Fp.fieldMod().limbs,
    Fp.getNegInvModWord(),
-    Fp.has1extraBit()
+    Fp.getSpareBits()
  )
  r.c1.mres.limbs.redcMont_asm_adx_inline(
    d.c1.limbs2x,
    Fp.fieldMod().limbs,
    Fp.getNegInvModWord(),
-    Fp.has1extraBit()
+    Fp.getSpareBits()
  )