mirror of
https://github.com/codex-storage/constantine.git
synced 2025-02-24 08:28:09 +00:00
Fp12 over fp6 (#201)
* introduce sumprod for direct fp6_mul * change curves -> constants * forgotten constants * Full pairing using Fp2->Fp6->Fp12 towering
This commit is contained in:
parent
37354e9ca8
commit
9770b3108c
@ -18,7 +18,7 @@ import
|
||||
../constantine/math/config/curves,
|
||||
../constantine/math/arithmetic,
|
||||
../constantine/math/extension_fields,
|
||||
../constantine/math/curves/zoo_square_roots,
|
||||
../constantine/math/constants/zoo_square_roots,
|
||||
# Helpers
|
||||
../helpers/prng_unsafe,
|
||||
./bench_blueprint
|
||||
@ -47,6 +47,20 @@ template bench(op: string, T: typedesc, iters: int, body: untyped): untyped =
|
||||
measure(iters, startTime, stopTime, startClk, stopClk, body)
|
||||
report(op, fixFieldDisplay(T), startTime, stopTime, startClk, stopClk, iters)
|
||||
|
||||
func random_unsafe(rng: var RngState, a: var FpDbl) =
|
||||
## Initialize a standalone Double-Width field element
|
||||
## we don't reduce it modulo p², this is only used for benchmark
|
||||
let aHi = rng.random_unsafe(Fp[FpDbl.C])
|
||||
let aLo = rng.random_unsafe(Fp[FpDbl.C])
|
||||
for i in 0 ..< aLo.mres.limbs.len:
|
||||
a.limbs2x[i] = aLo.mres.limbs[i]
|
||||
for i in 0 ..< aHi.mres.limbs.len:
|
||||
a.limbs2x[aLo.mres.limbs.len+i] = aHi.mres.limbs[i]
|
||||
|
||||
func random_unsafe(rng: var RngState, a: var ExtensionField2x) =
|
||||
for i in 0 ..< a.coords.len:
|
||||
rng.random_unsafe(a.coords[i])
|
||||
|
||||
proc addBench*(T: typedesc, iters: int) =
|
||||
var x = rng.random_unsafe(T)
|
||||
let y = rng.random_unsafe(T)
|
||||
@ -92,21 +106,39 @@ proc sqrBench*(T: typedesc, iters: int) =
|
||||
bench("Squaring", T, iters):
|
||||
r.square(x)
|
||||
|
||||
proc mulUnrBench*(T: typedesc, iters: int) =
|
||||
proc mul2xUnrBench*(T: typedesc, iters: int) =
|
||||
var r: doublePrec(T)
|
||||
let x = rng.random_unsafe(T)
|
||||
let y = rng.random_unsafe(T)
|
||||
preventOptimAway(r)
|
||||
bench("Multiplication unreduced", T, iters):
|
||||
bench("Multiplication 2x unreduced", T, iters):
|
||||
r.prod2x(x, y)
|
||||
|
||||
proc sqrUnrBench*(T: typedesc, iters: int) =
|
||||
proc sqr2xUnrBench*(T: typedesc, iters: int) =
|
||||
var r: doublePrec(T)
|
||||
let x = rng.random_unsafe(T)
|
||||
preventOptimAway(r)
|
||||
bench("Squaring unreduced", T, iters):
|
||||
bench("Squaring 2x unreduced", T, iters):
|
||||
r.square2x(x)
|
||||
|
||||
proc rdc2xBench*(T: typedesc, iters: int) =
|
||||
var r: T
|
||||
var t: doublePrec(T)
|
||||
rng.random_unsafe(t)
|
||||
preventOptimAway(r)
|
||||
bench("Redc 2x", T, iters):
|
||||
r.redc2x(t)
|
||||
|
||||
proc sumprodBench*(T: typedesc, iters: int) =
|
||||
var r: T
|
||||
let a = rng.random_unsafe(T)
|
||||
let b = rng.random_unsafe(T)
|
||||
let u = rng.random_unsafe(T)
|
||||
let v = rng.random_unsafe(T)
|
||||
preventOptimAway(r)
|
||||
bench("Linear combination", T, iters):
|
||||
r.sumprod([a, b], [u, v])
|
||||
|
||||
proc toBigBench*(T: typedesc, iters: int) =
|
||||
var r: matchingBigInt(T.C)
|
||||
let x = rng.random_unsafe(T)
|
||||
|
@ -11,7 +11,7 @@ import
|
||||
../constantine/math/config/curves,
|
||||
../constantine/math/arithmetic,
|
||||
../constantine/math/io/io_bigints,
|
||||
../constantine/math/curves/zoo_square_roots,
|
||||
../constantine/math/constants/zoo_square_roots,
|
||||
# Helpers
|
||||
../helpers/static_for,
|
||||
./bench_fields_template
|
||||
@ -52,8 +52,11 @@ proc main() =
|
||||
mulBench(Fp[curve], Iters)
|
||||
sqrBench(Fp[curve], Iters)
|
||||
smallSeparator()
|
||||
mulUnrBench(Fp[curve], Iters)
|
||||
sqrUnrBench(Fp[curve], Iters)
|
||||
mul2xUnrBench(Fp[curve], Iters)
|
||||
sqr2xUnrBench(Fp[curve], Iters)
|
||||
rdc2xBench(Fp[curve], Iters)
|
||||
smallSeparator()
|
||||
sumprodBench(Fp[curve], Iters)
|
||||
smallSeparator()
|
||||
toBigBench(Fp[curve], Iters)
|
||||
toFieldBench(Fp[curve], Iters)
|
||||
|
@ -46,8 +46,9 @@ proc main() =
|
||||
mulBench(Fp2[curve], Iters)
|
||||
sqrBench(Fp2[curve], Iters)
|
||||
smallSeparator()
|
||||
mulUnrBench(Fp2[curve], Iters)
|
||||
sqrUnrBench(Fp2[curve], Iters)
|
||||
mul2xUnrBench(Fp2[curve], Iters)
|
||||
sqr2xUnrBench(Fp2[curve], Iters)
|
||||
rdc2xBench(Fp2[curve], Iters)
|
||||
smallSeparator()
|
||||
invBench(Fp2[curve], InvIters)
|
||||
isSquareBench(Fp2[curve], InvIters)
|
||||
|
@ -46,8 +46,9 @@ proc main() =
|
||||
mulBench(Fp4[curve], Iters)
|
||||
sqrBench(Fp4[curve], Iters)
|
||||
smallSeparator()
|
||||
mulUnrBench(Fp2[curve], Iters)
|
||||
sqrUnrBench(Fp2[curve], Iters)
|
||||
mul2xUnrBench(Fp4[curve], Iters)
|
||||
sqr2xUnrBench(Fp4[curve], Iters)
|
||||
rdc2xBench(Fp4[curve], Iters)
|
||||
smallSeparator()
|
||||
invBench(Fp4[curve], InvIters)
|
||||
separator()
|
||||
|
@ -44,8 +44,9 @@ proc main() =
|
||||
mulBench(Fp6[curve], Iters)
|
||||
sqrBench(Fp6[curve], Iters)
|
||||
smallSeparator()
|
||||
mulUnrBench(Fp6[curve], Iters)
|
||||
sqrUnrBench(Fp6[curve], Iters)
|
||||
mul2xUnrBench(Fp6[curve], Iters)
|
||||
sqr2xUnrBench(Fp6[curve], Iters)
|
||||
rdc2xBench(Fp6[curve], Iters)
|
||||
smallSeparator()
|
||||
invBench(Fp6[curve], InvIters)
|
||||
separator()
|
||||
|
@ -19,14 +19,14 @@ import
|
||||
../constantine/math/arithmetic,
|
||||
../constantine/math/extension_fields,
|
||||
../constantine/math/ec_shortweierstrass,
|
||||
../constantine/math/curves/zoo_subgroups,
|
||||
../constantine/math/constants/zoo_subgroups,
|
||||
../constantine/math/pairing/[
|
||||
cyclotomic_subgroup,
|
||||
lines_eval,
|
||||
pairing_bls12,
|
||||
pairing_bn
|
||||
],
|
||||
../constantine/math/curves/zoo_pairings,
|
||||
../constantine/math/constants/zoo_pairings,
|
||||
# Helpers
|
||||
../helpers/prng_unsafe,
|
||||
./bench_blueprint
|
||||
|
@ -22,13 +22,13 @@ import
|
||||
ec_shortweierstrass_projective,
|
||||
ec_shortweierstrass_jacobian,
|
||||
ec_scalar_mul, ec_endomorphism_accel],
|
||||
../constantine/math/curves/zoo_subgroups,
|
||||
../constantine/math/constants/zoo_subgroups,
|
||||
../constantine/math/pairing/[
|
||||
cyclotomic_subgroup,
|
||||
pairing_bls12,
|
||||
pairing_bn
|
||||
],
|
||||
../constantine/math/curves/zoo_pairings,
|
||||
../constantine/math/constants/zoo_pairings,
|
||||
../constantine/hashes,
|
||||
../constantine/hash_to_curve/hash_to_curve,
|
||||
# Helpers
|
||||
|
@ -52,10 +52,12 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
|
||||
("tests/math/t_fp2.nim", false),
|
||||
("tests/math/t_fp2_sqrt.nim", false),
|
||||
("tests/math/t_fp4.nim", false),
|
||||
("tests/math/t_fp6_bn254_nogami.nim", false),
|
||||
("tests/math/t_fp6_bn254_snarks.nim", false),
|
||||
("tests/math/t_fp6_bls12_377.nim", false),
|
||||
("tests/math/t_fp6_bls12_381.nim", false),
|
||||
("tests/math/t_fp6_bw6_761.nim", false),
|
||||
("tests/math/t_fp12_bn254_nogami.nim", false),
|
||||
("tests/math/t_fp12_bn254_snarks.nim", false),
|
||||
("tests/math/t_fp12_bls12_377.nim", false),
|
||||
("tests/math/t_fp12_bls12_381.nim", false),
|
||||
|
@ -13,7 +13,7 @@ import
|
||||
ec_shortweierstrass,
|
||||
extension_fields,
|
||||
arithmetic,
|
||||
curves/zoo_subgroups
|
||||
constants/zoo_subgroups
|
||||
],
|
||||
./math/io/[io_bigints, io_fields],
|
||||
hashes,
|
||||
|
@ -23,7 +23,7 @@ import
|
||||
cyclotomic_subgroup,
|
||||
lines_eval
|
||||
],
|
||||
./math/curves/zoo_pairings,
|
||||
./math/constants/zoo_pairings,
|
||||
./hash_to_curve/hash_to_curve
|
||||
|
||||
# ############################################################
|
||||
|
@ -13,7 +13,7 @@ import
|
||||
./math/arithmetic/limbs_montgomery,
|
||||
./math/ec_shortweierstrass,
|
||||
./math/pairing/[pairing_bn, miller_loops, cyclotomic_subgroup],
|
||||
./math/curves/zoo_subgroups,
|
||||
./math/constants/zoo_subgroups,
|
||||
./math/io/[io_bigints, io_fields]
|
||||
|
||||
# ############################################################
|
||||
|
@ -10,7 +10,7 @@ import
|
||||
# Internals
|
||||
../platforms/abstractions,
|
||||
../math/[arithmetic, extension_fields],
|
||||
../math/curves/zoo_hash_to_curve,
|
||||
../math/constants/zoo_hash_to_curve,
|
||||
../math/elliptic/[
|
||||
ec_shortweierstrass_projective,
|
||||
ec_shortweierstrass_jacobian,
|
||||
|
@ -11,7 +11,7 @@ import
|
||||
../platforms/abstractions,
|
||||
../math/config/curves,
|
||||
../math/[arithmetic, extension_fields],
|
||||
../math/curves/zoo_hash_to_curve,
|
||||
../math/constants/zoo_hash_to_curve,
|
||||
./h2c_utilities
|
||||
|
||||
# ############################################################
|
||||
|
@ -11,7 +11,7 @@ import
|
||||
../platforms/abstractions,
|
||||
../math/config/curves,
|
||||
../math/[arithmetic, extension_fields],
|
||||
../math/curves/[zoo_hash_to_curve, zoo_subgroups],
|
||||
../math/constants/[zoo_hash_to_curve, zoo_subgroups],
|
||||
../math/ec_shortweierstrass,
|
||||
./h2c_hash_to_field,
|
||||
./h2c_map_to_isocurve_swu,
|
||||
|
@ -34,7 +34,7 @@ where code size is not an issue for example for multi-precision addition.
|
||||
|
||||
- Faster big-integer modular multiplication for most moduli\
|
||||
Gautam Botrel, Gus Gutoski, and Thomas Piellard, 2020\
|
||||
https://hackmd.io/@zkteam/modular_multiplication
|
||||
https://hackmd.io/@gnark/modular_multiplication
|
||||
|
||||
### Square roots
|
||||
|
||||
|
@ -45,8 +45,8 @@ macro mulMont_CIOS_sparebit_gen[N: static int](
|
||||
## The multiplication and reduction are further merged in the same loop
|
||||
##
|
||||
## This requires the most significant word of the Modulus
|
||||
## M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111)
|
||||
## https://hackmd.io/@zkteam/modular_multiplication
|
||||
## M[^1] < high(SecretWord) shr 1 (i.e. less than 0b01111...1111)
|
||||
## https://hackmd.io/@gnark/modular_multiplication
|
||||
|
||||
# No register spilling handling
|
||||
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
|
||||
@ -213,3 +213,200 @@ func squareMont_CIOS_asm*[N](
|
||||
var r2x {.noInit.}: Limbs[2*N]
|
||||
r2x.square_asm_inline(a)
|
||||
r.redcMont_asm_inline(r2x, M, m0ninv, spareBits, skipFinalSub)
|
||||
|
||||
# Montgomery Sum of Products
|
||||
# ------------------------------------------------------------
|
||||
|
||||
macro sumprodMont_CIOS_spare2bits_gen[N, K: static int](
|
||||
r_PIR: var Limbs[N], a_PIR, b_PIR: array[K, Limbs[N]],
|
||||
M_PIR: Limbs[N], m0ninv_REG: BaseType,
|
||||
skipFinalSub: static bool
|
||||
): untyped =
|
||||
## Generate an optimized Montgomery merged sum of products ⅀aᵢ.bᵢ kernel
|
||||
## using the CIOS method
|
||||
##
|
||||
## This requires 2 spare bits in the most significant word
|
||||
## so that we can skip the intermediate reductions
|
||||
|
||||
# No register spilling handling
|
||||
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
|
||||
|
||||
doAssert K <= 8, "we cannot sum more than 8 products"
|
||||
# Bounds:
|
||||
# 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR
|
||||
# for all intent and purposes this is true since aᵢ.bᵢ is:
|
||||
# if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums
|
||||
# if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1,
|
||||
# with 4p < R due to the 2 unused bits constraint so more than p sums are allowed
|
||||
# 2. We have a high-word tN to accumulate overflows.
|
||||
# with 2 unused bits in the last word,
|
||||
# the multiplication of two last words will leave 4 unused bits
|
||||
# enough for accumulating 8 additions and overflow.
|
||||
|
||||
result = newStmtList()
|
||||
|
||||
var ctx = init(Assembler_x86, BaseType)
|
||||
let
|
||||
scratchSlots = 6
|
||||
|
||||
# We could force M as immediate by specializing per moduli
|
||||
M = init(OperandArray, nimSymbol = M_PIR, N, PointerInReg, Input)
|
||||
# If N is too big, we need to spill registers. TODO.
|
||||
t = init(OperandArray, nimSymbol = ident"t", N, ElemsInReg, Output_EarlyClobber)
|
||||
# MultiPurpose Register slots
|
||||
scratch = init(OperandArray, nimSymbol = ident"scratch", scratchSlots, ElemsInReg, InputOutput_EnsureClobber)
|
||||
|
||||
# MUL requires RAX and RDX
|
||||
|
||||
m0ninv = Operand(
|
||||
desc: OperandDesc(
|
||||
asmId: "[m0ninv]",
|
||||
nimSymbol: m0ninv_REG,
|
||||
rm: MemOffsettable,
|
||||
constraint: Input,
|
||||
cEmit: "&" & $m0ninv_REG
|
||||
)
|
||||
)
|
||||
|
||||
# We're really constrained by register and somehow setting as memory doesn't help
|
||||
# So we store the result `r` in the scratch space and then reload it in RDX
|
||||
# before the scratchspace is used in final substraction
|
||||
a = scratch[0].as2dArrayAddr(rows = K, cols = N) # Store the `a` operand
|
||||
b = scratch[1].as2dArrayAddr(rows = K, cols = N) # Store the `b` operand
|
||||
tN = scratch[2] # High part of extended precision multiplication
|
||||
C = scratch[3] # Carry during reduction step
|
||||
r = scratch[4] # Stores the `r` operand
|
||||
S = scratch[5] # Mul step: Stores the carry A
|
||||
# Red step: Stores (t[0] * m0ninv) mod 2ʷ
|
||||
|
||||
# Registers used:
|
||||
# - 1 for `M`
|
||||
# - 6 for `t` (at most)
|
||||
# - 6 for `scratch`
|
||||
# - 2 for RAX and RDX
|
||||
# Total 15 out of 16
|
||||
# We can save 1 by hardcoding M as immediate (and m0ninv)
|
||||
# but this prevent reusing the same code for multiple curves like BLS12-377 and BLS12-381
|
||||
# We might be able to save registers by having `r` and `M` be memory operand as well
|
||||
|
||||
let tsym = t.nimSymbol
|
||||
let scratchSym = scratch.nimSymbol
|
||||
result.add quote do:
|
||||
static: doAssert: sizeof(SecretWord) == sizeof(ByteAddress)
|
||||
|
||||
var `tsym`{.noInit, used.}: typeof(`r_PIR`)
|
||||
# Assumes 64-bit limbs on 64-bit arch (or you can't store an address)
|
||||
var `scratchSym` {.noInit.}: Limbs[`scratchSlots`]
|
||||
`scratchSym`[0] = cast[SecretWord](`a_PIR`[0][0].unsafeAddr)
|
||||
`scratchSym`[1] = cast[SecretWord](`b_PIR`[0][0].unsafeAddr)
|
||||
`scratchSym`[4] = cast[SecretWord](`r_PIR`[0].unsafeAddr)
|
||||
|
||||
# Algorithm
|
||||
# -----------------------------------------
|
||||
# for i=0 to N-1
|
||||
# tN := 0
|
||||
# for k=0 to K-1
|
||||
# A := 0
|
||||
# for j=0 to N-1
|
||||
# (A,t[j]) := t[j] + a[k][j]*b[k][i] + A
|
||||
# tN += A
|
||||
# m := t[0]*m0ninv mod W
|
||||
# C,_ := t[0] + m*M[0]
|
||||
# for j=1 to N-1
|
||||
# (C,t[j-1]) := t[j] + m*M[j] + C
|
||||
# t[N-1] = tN + C
|
||||
|
||||
# No register spilling handling
|
||||
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
|
||||
|
||||
for i in 0 ..< N:
|
||||
# Multiplication step
|
||||
ctx.comment " Multiplication step"
|
||||
ctx.comment " tN = 0"
|
||||
ctx.`xor` tN, tN
|
||||
for k in 0 ..< K:
|
||||
template A: untyped = S
|
||||
|
||||
ctx.comment " A = 0"
|
||||
ctx.`xor` A, A
|
||||
ctx.comment " (A,t[0]) := t[0] + a[k][0]*b[k][i] + A"
|
||||
ctx.mov rax, a[k, 0]
|
||||
ctx.mul rdx, rax, b[k, i], rax
|
||||
if i == 0 and k == 0: # First accumulation, overwrite t[0]
|
||||
ctx.mov t[0], rax
|
||||
else: # Accumulate in t[0]
|
||||
ctx.add t[0], rax
|
||||
ctx.adc rdx, 0
|
||||
ctx.mov A, rdx
|
||||
|
||||
for j in 1 ..< N:
|
||||
ctx.comment " (A,t[j]) := t[j] + a[k][j]*b[k][i] + A"
|
||||
ctx.mov rax, a[k, j]
|
||||
ctx.mul rdx, rax, b[k, i], rax
|
||||
if i == 0 and k == 0: # First accumulation, overwrite t[0]
|
||||
ctx.mov t[j], A
|
||||
else: # Accumulate in t[0]
|
||||
ctx.add t[j], A
|
||||
ctx.adc rdx, 0
|
||||
ctx.`xor` A, A
|
||||
ctx.add t[j], rax
|
||||
ctx.adc A, rdx
|
||||
|
||||
ctx.comment " tN += A"
|
||||
ctx.add tN, A
|
||||
|
||||
# Reduction step
|
||||
ctx.comment " Reduction step"
|
||||
template m: untyped = S
|
||||
ctx.comment " m := t[0]*m0ninv mod 2ʷ"
|
||||
ctx.mov rax, m0ninv
|
||||
ctx.imul rax, t[0]
|
||||
ctx.mov m, rax
|
||||
ctx.comment " C,_ := t[0] + m*M[0]"
|
||||
ctx.`xor` C, C
|
||||
ctx.mul rdx, rax, M[0], rax
|
||||
ctx.add rax, t[0]
|
||||
ctx.adc C, rdx
|
||||
|
||||
for j in 1 ..< N:
|
||||
ctx.comment " (C,t[j-1]) := t[j] + m*M[j] + C"
|
||||
ctx.mov rax, M[j]
|
||||
ctx.mul rdx, rax, m, rax
|
||||
ctx.add C, t[j]
|
||||
ctx.adc rdx, 0
|
||||
ctx.add C, rax
|
||||
ctx.adc rdx, 0
|
||||
ctx.mov t[j-1], C
|
||||
ctx.mov C, rdx
|
||||
|
||||
ctx.comment "t[N-1] = tN + C"
|
||||
ctx.add tN, C
|
||||
ctx.mov t[N-1], tN
|
||||
|
||||
|
||||
ctx.mov rax, r # move r away from scratchspace that will be used for final substraction
|
||||
let r2 = rax.asArrayAddr(len = N)
|
||||
|
||||
if skipFinalSub:
|
||||
ctx.comment " Copy result"
|
||||
for i in 0 ..< N:
|
||||
ctx.mov r2[i], t[i]
|
||||
else:
|
||||
ctx.comment " Final substraction"
|
||||
ctx.finalSubNoCarryImpl(
|
||||
r2, t, M,
|
||||
scratch
|
||||
)
|
||||
result.add ctx.generate()
|
||||
|
||||
func sumprodMont_CIOS_spare2bits_asm*[N, K: static int](
|
||||
r: var Limbs[N], a, b: array[K, Limbs[N]],
|
||||
M: Limbs[N], m0ninv: BaseType,
|
||||
skipFinalSub: static bool) =
|
||||
## Sum of products ⅀aᵢ.bᵢ in the Montgomery domain
|
||||
## If "skipFinalSub" is set
|
||||
## the result is in the range [0, 2M)
|
||||
## otherwise the result is in the range [0, M)
|
||||
##
|
||||
## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
|
||||
r.sumprodMont_CIOS_spare2bits_gen(a, b, M, m0ninv, skipFinalSub)
|
@ -130,7 +130,8 @@ proc partialRedx(
|
||||
t: OperandArray,
|
||||
M: OperandArray,
|
||||
m0ninv: Operand,
|
||||
lo, S: Operand
|
||||
lo: Operand or Register,
|
||||
S: Operand
|
||||
) =
|
||||
## Partial Montgomery reduction
|
||||
## For CIOS method
|
||||
@ -182,8 +183,8 @@ macro mulMont_CIOS_sparebit_adx_gen[N: static int](
|
||||
## Generate an optimized Montgomery Multiplication kernel
|
||||
## using the CIOS method
|
||||
## This requires the most significant word of the Modulus
|
||||
## M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111)
|
||||
## https://hackmd.io/@zkteam/modular_multiplication
|
||||
## M[^1] < high(SecretWord) shr 1 (i.e. less than 0b01111...1111)
|
||||
## https://hackmd.io/@gnark/modular_multiplication
|
||||
|
||||
# No register spilling handling
|
||||
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
|
||||
@ -308,3 +309,187 @@ func squareMont_CIOS_asm_adx*[N](
|
||||
var r2x {.noInit.}: Limbs[2*N]
|
||||
r2x.square_asm_adx_inline(a)
|
||||
r.redcMont_asm_adx(r2x, M, m0ninv, spareBits, skipFinalSub)
|
||||
|
||||
# Montgomery Sum of Products
|
||||
# ------------------------------------------------------------
|
||||
|
||||
macro sumprodMont_CIOS_spare2bits_adx_gen[N, K: static int](
|
||||
r_PIR: var Limbs[N], a_PIR, b_PIR: array[K, Limbs[N]],
|
||||
M_PIR: Limbs[N], m0ninv_REG: BaseType,
|
||||
skipFinalSub: static bool
|
||||
): untyped =
|
||||
## Generate an optimized Montgomery merged sum of products ⅀aᵢ.bᵢ kernel
|
||||
## using the CIOS method
|
||||
##
|
||||
## This requires 2 spare bits in the most significant word
|
||||
## so that we can skip the intermediate reductions
|
||||
|
||||
# No register spilling handling
|
||||
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
|
||||
|
||||
doAssert K <= 8, "we cannot sum more than 8 products"
|
||||
# Bounds:
|
||||
# 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR
|
||||
# for all intent and purposes this is true since aᵢ.bᵢ is:
|
||||
# if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums
|
||||
# if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1,
|
||||
# with 4p < R due to the 2 unused bits constraint so more than p sums are allowed
|
||||
# 2. We have a high-word tN to accumulate overflows.
|
||||
# with 2 unused bits in the last word,
|
||||
# the multiplication of two last words will leave 4 unused bits
|
||||
# enough for accumulating 8 additions and overflow.
|
||||
|
||||
result = newStmtList()
|
||||
|
||||
var ctx = init(Assembler_x86, BaseType)
|
||||
let
|
||||
scratchSlots = 6
|
||||
|
||||
# We could force M as immediate by specializing per moduli
|
||||
M = init(OperandArray, nimSymbol = M_PIR, N, PointerInReg, Input)
|
||||
# If N is too big, we need to spill registers. TODO.
|
||||
t = init(OperandArray, nimSymbol = ident"t", N, ElemsInReg, Output_EarlyClobber)
|
||||
# MultiPurpose Register slots
|
||||
scratch = init(OperandArray, nimSymbol = ident"scratch", scratchSlots, ElemsInReg, InputOutput_EnsureClobber)
|
||||
|
||||
# MULX requires RDX as well
|
||||
|
||||
m0ninv = Operand(
|
||||
desc: OperandDesc(
|
||||
asmId: "[m0ninv]",
|
||||
nimSymbol: m0ninv_REG,
|
||||
rm: MemOffsettable,
|
||||
constraint: Input,
|
||||
cEmit: "&" & $m0ninv_REG
|
||||
)
|
||||
)
|
||||
|
||||
# We're really constrained by register and somehow setting as memory doesn't help
|
||||
# So we store the result `r` in the scratch space and then reload it in RDX
|
||||
# before the scratchspace is used in final substraction
|
||||
a = scratch[0].as2dArrayAddr(rows = K, cols = N) # Store the `a` operand
|
||||
b = scratch[1].as2dArrayAddr(rows = K, cols = N) # Store the `b` operand
|
||||
tN = scratch[2] # High part of extended precision multiplication
|
||||
C = scratch[3] # Carry during reduction step
|
||||
r = scratch[4] # Stores the `r` operand
|
||||
S = scratch[5] # Mul step: Stores the carry A
|
||||
# Red step: Stores (t[0] * m0ninv) mod 2ʷ
|
||||
|
||||
# Registers used:
|
||||
# - 1 for `M`
|
||||
# - 6 for `t` (at most)
|
||||
# - 6 for `scratch`
|
||||
# - 2 for RAX and RDX
|
||||
# Total 15 out of 16
|
||||
# We can save 1 by hardcoding M as immediate (and m0ninv)
|
||||
# but this prevent reusing the same code for multiple curves like BLS12-377 and BLS12-381
|
||||
# We might be able to save registers by having `r` and `M` be memory operand as well
|
||||
|
||||
let tsym = t.nimSymbol
|
||||
let scratchSym = scratch.nimSymbol
|
||||
result.add quote do:
|
||||
static: doAssert: sizeof(SecretWord) == sizeof(ByteAddress)
|
||||
|
||||
var `tsym`{.noInit, used.}: typeof(`r_PIR`)
|
||||
# Assumes 64-bit limbs on 64-bit arch (or you can't store an address)
|
||||
var `scratchSym` {.noInit.}: Limbs[`scratchSlots`]
|
||||
`scratchSym`[0] = cast[SecretWord](`a_PIR`[0][0].unsafeAddr)
|
||||
`scratchSym`[1] = cast[SecretWord](`b_PIR`[0][0].unsafeAddr)
|
||||
`scratchSym`[4] = cast[SecretWord](`r_PIR`[0].unsafeAddr)
|
||||
|
||||
# Algorithm
|
||||
# -----------------------------------------
|
||||
# for i=0 to N-1
|
||||
# tN := 0
|
||||
# for k=0 to K-1
|
||||
# A := 0
|
||||
# for j=0 to N-1
|
||||
# (A,t[j]) := t[j] + a[k][j]*b[k][i] + A
|
||||
# tN += A
|
||||
# m := t[0]*m0ninv mod W
|
||||
# C,_ := t[0] + m*M[0]
|
||||
# for j=1 to N-1
|
||||
# (C,t[j-1]) := t[j] + m*M[j] + C
|
||||
# t[N-1] = tN + C
|
||||
|
||||
# No register spilling handling
|
||||
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
|
||||
|
||||
for i in 0 ..< N:
|
||||
# Multiplication step
|
||||
ctx.comment " Multiplication step"
|
||||
ctx.comment " tN = 0"
|
||||
ctx.`xor` tN, tN
|
||||
for k in 0 ..< K:
|
||||
template A: untyped = S
|
||||
|
||||
ctx.comment " A = 0"
|
||||
ctx.`xor` A, A
|
||||
ctx.comment " (A,t[0]) := t[0] + a[k][0]*b[k][i] + A"
|
||||
ctx.mov rdx, b[k, i]
|
||||
if i == 0 and k == 0: # First accumulation, overwrite t[0]
|
||||
ctx.mulx t[1], t[0], a[k, 0], rdx
|
||||
else: # Accumulate in t[0]
|
||||
ctx.mulx A, rax, a[k, 0], rdx
|
||||
ctx.adcx t[0], rax
|
||||
ctx.adox t[1], A
|
||||
|
||||
for j in 1 ..< N-1:
|
||||
ctx.comment " (A,t[j]) := t[j] + a[k][j]*b[k][i] + A"
|
||||
if i == 0 and k == 0:
|
||||
ctx.mulx t[j+1], rax, a[k, j], rdx
|
||||
if j == 1:
|
||||
ctx.add t[j], rax
|
||||
else:
|
||||
ctx.adc t[j], rax
|
||||
else:
|
||||
ctx.mulx A, rax, a[k, j], rdx
|
||||
ctx.adcx t[j], rax
|
||||
ctx.adox t[j+1], A
|
||||
|
||||
# Last limb
|
||||
ctx.mulx A, rax, a[k, N-1], rdx
|
||||
if i == 0 and k == 0:
|
||||
ctx.adc t[N-1], rax
|
||||
ctx.comment " tN += A"
|
||||
ctx.adc tN, A
|
||||
else:
|
||||
ctx.adcx t[N-1], rax
|
||||
ctx.comment " tN += A"
|
||||
ctx.mov rdx, 0 # Set to 0 without clearing flags
|
||||
ctx.adox tN, A
|
||||
ctx.adcx tN, rdx
|
||||
|
||||
# Reduction step
|
||||
ctx.partialRedx(
|
||||
tN, t,
|
||||
M, m0ninv,
|
||||
rax, C
|
||||
)
|
||||
|
||||
ctx.mov rax, r # move r away from scratchspace that will be used for final substraction
|
||||
let r2 = rax.asArrayAddr(len = N)
|
||||
|
||||
if skipFinalSub:
|
||||
ctx.comment " Copy result"
|
||||
for i in 0 ..< N:
|
||||
ctx.mov r2[i], t[i]
|
||||
else:
|
||||
ctx.comment " Final substraction"
|
||||
ctx.finalSubNoCarryImpl(
|
||||
r2, t, M,
|
||||
scratch
|
||||
)
|
||||
result.add ctx.generate()
|
||||
|
||||
func sumprodMont_CIOS_spare2bits_asm_adx*[N, K: static int](
|
||||
r: var Limbs[N], a, b: array[K, Limbs[N]],
|
||||
M: Limbs[N], m0ninv: BaseType,
|
||||
skipFinalSub: static bool) =
|
||||
## Sum of products ⅀aᵢ.bᵢ in the Montgomery domain
|
||||
## If "skipFinalSub" is set
|
||||
## the result is in the range [0, 2M)
|
||||
## otherwise the result is in the range [0, M)
|
||||
##
|
||||
## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
|
||||
r.sumprodMont_CIOS_spare2bits_adx_gen(a, b, M, m0ninv, skipFinalSub)
|
@ -66,6 +66,20 @@ func squareMont*(r: var BigInt, a, M: BigInt, negInvModWord: static BaseType,
|
||||
## to avoid duplicating with Nim zero-init policy
|
||||
squareMont(r.limbs, a.limbs, M.limbs, negInvModWord, spareBits, skipFinalSub)
|
||||
|
||||
func sumprodMont*[N: static int](
|
||||
r: var BigInt,
|
||||
a, b: array[N, BigInt],
|
||||
M: BigInt, negInvModWord: static BaseType,
|
||||
spareBits: static int, skipFinalSub: static bool = false) =
|
||||
## Compute r <- ⅀aᵢ.bᵢ (mod M) (sum of products) in the Montgomery domain
|
||||
# We rely on BigInt and Limbs having the same repr to avoid array copies
|
||||
sumprodMont(
|
||||
r.limbs,
|
||||
cast[ptr array[N, typeof(a[0].limbs)]](a.unsafeAddr)[],
|
||||
cast[ptr array[N, typeof(b[0].limbs)]](b.unsafeAddr)[],
|
||||
M.limbs, negInvModWord, spareBits, skipFinalSub
|
||||
)
|
||||
|
||||
func powMont*[mBits: static int](
|
||||
a: var BigInt[mBits], exponent: openarray[byte],
|
||||
M, one: BigInt[mBits], negInvModWord: static BaseType, windowSize: static int,
|
||||
|
@ -149,6 +149,26 @@ func setMinusOne*(a: var FF) =
|
||||
# Check if the compiler optimizes it away
|
||||
a.mres = FF.getMontyPrimeMinus1()
|
||||
|
||||
|
||||
func neg*(r: var FF, a: FF) {.meter.} =
|
||||
## Negate modulo p
|
||||
when UseASM_X86_64:
|
||||
negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
|
||||
else:
|
||||
# If a = 0 we need r = 0 and not r = M
|
||||
# as comparison operator assume unicity
|
||||
# of the modular representation.
|
||||
# Also make sure to handle aliasing where r.addr = a.addr
|
||||
var t {.noInit.}: FF
|
||||
let isZero = a.isZero()
|
||||
discard t.mres.diff(FF.fieldMod(), a.mres)
|
||||
t.mres.csetZero(isZero)
|
||||
r = t
|
||||
|
||||
func neg*(a: var FF) {.meter.} =
|
||||
## Negate modulo p
|
||||
a.neg(a)
|
||||
|
||||
func `+=`*(a: var FF, b: FF) {.meter.} =
|
||||
## In-place addition modulo p
|
||||
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
|
||||
@ -223,24 +243,14 @@ func square*(r: var FF, a: FF, skipFinalSub: static bool = false) {.meter.} =
|
||||
## Squaring modulo p
|
||||
r.mres.squareMont(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub)
|
||||
|
||||
func neg*(r: var FF, a: FF) {.meter.} =
|
||||
## Negate modulo p
|
||||
when UseASM_X86_64:
|
||||
negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
|
||||
else:
|
||||
# If a = 0 we need r = 0 and not r = M
|
||||
# as comparison operator assume unicity
|
||||
# of the modular representation.
|
||||
# Also make sure to handle aliasing where r.addr = a.addr
|
||||
var t {.noInit.}: FF
|
||||
let isZero = a.isZero()
|
||||
discard t.mres.diff(FF.fieldMod(), a.mres)
|
||||
t.mres.csetZero(isZero)
|
||||
r = t
|
||||
|
||||
func neg*(a: var FF) {.meter.} =
|
||||
## Negate modulo p
|
||||
a.neg(a)
|
||||
func sumprod*[N: static int](r: var FF, a, b: array[N, FF], skipFinalSub: static bool = false) {.meter.} =
|
||||
## Compute r <- ⅀aᵢ.bᵢ (mod M) (sum of products)
|
||||
# We rely on FF and Bigints having the same repr to avoid array copies
|
||||
r.mres.sumprodMont(
|
||||
cast[ptr array[N, typeof(a[0].mres)]](a.unsafeAddr)[],
|
||||
cast[ptr array[N, typeof(b[0].mres)]](b.unsafeAddr)[],
|
||||
FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub
|
||||
)
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
|
@ -9,7 +9,7 @@
|
||||
import
|
||||
../../platforms/abstractions,
|
||||
../config/curves,
|
||||
../curves/zoo_square_roots,
|
||||
../constants/zoo_square_roots,
|
||||
./bigints, ./finite_fields, ./limbs_exgcd
|
||||
|
||||
# ############################################################
|
||||
|
@ -305,8 +305,8 @@ func matVecMul_shr_k_mod_M[N, E: static int](
|
||||
|
||||
# First iteration of [u v] [d]
|
||||
# [q r].[e]
|
||||
cd.slincombAccNoCarry(u, d[0], v, e[0])
|
||||
ce.slincombAccNoCarry(q, d[0], r, e[0])
|
||||
cd.ssumprodAccNoCarry(u, d[0], v, e[0])
|
||||
ce.ssumprodAccNoCarry(q, d[0], r, e[0])
|
||||
|
||||
# Compute me and md, multiples of M
|
||||
# such as the bottom k bits if d and e are 0
|
||||
@ -330,8 +330,8 @@ func matVecMul_shr_k_mod_M[N, E: static int](
|
||||
ce.ashr(k)
|
||||
|
||||
for i in 1 ..< N:
|
||||
cd.slincombAccNoCarry(u, d[i], v, e[i])
|
||||
ce.slincombAccNoCarry(q, d[i], r, e[i])
|
||||
cd.ssumprodAccNoCarry(u, d[i], v, e[i])
|
||||
ce.ssumprodAccNoCarry(q, d[i], r, e[i])
|
||||
cd.smulAccNoCarry(md, M[i])
|
||||
ce.smulAccNoCarry(me, M[i])
|
||||
d[i-1] = cd.lo and Max
|
||||
@ -366,8 +366,8 @@ func matVecMul_shr_k[N, E: static int](
|
||||
|
||||
# First iteration of [u v] [f]
|
||||
# [q r].[g]
|
||||
cf.slincombAccNoCarry(u, f[0], v, g[0])
|
||||
cg.slincombAccNoCarry(q, f[0], r, g[0])
|
||||
cf.ssumprodAccNoCarry(u, f[0], v, g[0])
|
||||
cg.ssumprodAccNoCarry(q, f[0], r, g[0])
|
||||
# bottom k bits are zero by construction
|
||||
debug:
|
||||
doAssert BaseType(cf.lo and Max) == 0, "bottom k bits should be 0, cf.lo: " & $BaseType(cf.lo)
|
||||
@ -377,8 +377,8 @@ func matVecMul_shr_k[N, E: static int](
|
||||
cg.ashr(k)
|
||||
|
||||
for i in 1 ..< N:
|
||||
cf.slincombAccNoCarry(u, f[i], v, g[i])
|
||||
cg.slincombAccNoCarry(q, f[i], r, g[i])
|
||||
cf.ssumprodAccNoCarry(u, f[i], v, g[i])
|
||||
cg.ssumprodAccNoCarry(q, f[i], r, g[i])
|
||||
f[i-1] = cf.lo and Max
|
||||
g[i-1] = cg.lo and Max
|
||||
cf.ashr(k)
|
||||
@ -496,11 +496,6 @@ func invmod*(
|
||||
#
|
||||
# Those symbols can be computed either via exponentiation (Fermat's Little Theorem)
|
||||
# or using slight modifications to the Extended Euclidean Algorithm for GCD.
|
||||
#
|
||||
# See
|
||||
# - Algorithm II.7 in Blake, Seroussi, Smart: "Elliptic Curves in Cryptography"
|
||||
# - Algorithm 5.9.2 in Bach and Shallit: "Algorithmic Number Theory"
|
||||
# - Pornin: https://github.com/pornin/x25519-cm0/blob/75a53f2/src/x25519-cm0.S#L89-L155
|
||||
|
||||
func batchedDivstepsSymbol(
|
||||
t: var TransitionMatrix,
|
||||
@ -631,7 +626,7 @@ func legendreImpl[N, E](
|
||||
|
||||
accL = (accL + accL.isOdd()) and SignedSecretWord(3)
|
||||
accL = SignedSecretWord(1)-accL
|
||||
accL.csetZero(f.isZeroMask()) # f = gcd = 1 as M is prime or f = 0 if a = 0
|
||||
accL.csetZero(f.isZeroMask())
|
||||
return SecretWord(accL)
|
||||
|
||||
func legendre*(a, M: Limbs, bits: static int): SecretWord =
|
||||
|
@ -306,11 +306,69 @@ func mulMont_FIPS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub:
|
||||
discard z.csub(M, v.isNonZero() or not(z < M))
|
||||
r = z
|
||||
|
||||
func sumprodMont_CIOS_spare2bits[K: static int](
|
||||
r: var Limbs, a, b: array[K, Limbs],
|
||||
M: Limbs, m0ninv: BaseType,
|
||||
skipFinalSub: static bool = false) =
|
||||
## Compute r = ⅀aᵢ.bᵢ (mod M) (suim of products)
|
||||
## This requires 2 unused bits in the field element representation
|
||||
##
|
||||
## This maps
|
||||
## - [0, 2p) -> [0, 2p) with skipFinalSub
|
||||
## - [0, 2p) -> [0, p) without
|
||||
##
|
||||
## skipFinalSub skips the final substraction step.
|
||||
|
||||
# We want all the computation to be kept in registers
|
||||
# hence we use a temporary `t`, hoping that the compiler does the right thing™.
|
||||
var t: typeof(M) # zero-init
|
||||
const N = t.len
|
||||
# Extra word to handle carries t[N] (not there are 2 unused spare bits)
|
||||
var tN {.noInit.}: SecretWord
|
||||
|
||||
static: doAssert K <= 8, "we cannot sum more than 8 products"
|
||||
# Bounds:
|
||||
# 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR
|
||||
# for all intent and purposes this is true since aᵢ.bᵢ is:
|
||||
# if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums
|
||||
# if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1,
|
||||
# with 4p < R due to the 2 unused bits constraint so more than p sums are allowed
|
||||
# 2. We have a high-word tN to accumulate overflows.
|
||||
# with 2 unused bits in the last word,
|
||||
# the multiplication of two last words will leave 4 unused bits
|
||||
# enough for accumulating 8 additions and overflow.
|
||||
|
||||
staticFor i, 0, N:
|
||||
tN = Zero
|
||||
staticFor k, 0, K:
|
||||
var A = Zero
|
||||
staticFor j, 0, N:
|
||||
# (A, t[j]) <- a[k][j] * b[k][i] + t[j] + A
|
||||
muladd2(A, t[j], a[k][j], b[k][i], t[j], A)
|
||||
tN += A
|
||||
|
||||
# Reduction
|
||||
# m <- (t[0] * m0ninv) mod 2ʷ
|
||||
# (C, _) <- m * M[0] + t[0]
|
||||
var C, lo = Zero
|
||||
let m = t[0] * SecretWord(m0ninv)
|
||||
muladd1(C, lo, m, M[0], t[0])
|
||||
staticFor j, 1, N:
|
||||
# (C, t[j-1]) <- m*M[j] + t[j] + C
|
||||
muladd2(C, t[j-1], m, M[j], t[j], C)
|
||||
# (_,t[N-1]) <- t[N] + C
|
||||
t[N-1] = tN + C
|
||||
|
||||
when not skipFinalSub:
|
||||
discard t.csub(M, not(t < M))
|
||||
r = t
|
||||
|
||||
|
||||
# Montgomery Squaring
|
||||
# --------------------------------------------------------------------------------------------------------------------
|
||||
#
|
||||
# There are Montgomery squaring multiplications mentioned in the litterature
|
||||
# - https://hackmd.io/@zkteam/modular_multiplication if M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111)
|
||||
# - https://hackmd.io/@gnark/modular_multiplication if M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111)
|
||||
# - Architectural Support for Long Integer Modulo Arithmetic on Risc-Based Smart Cards
|
||||
# Johann Großschädl, 2003
|
||||
# https://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=95950BAC26A728114431C0C7B425E022?doi=10.1.1.115.3276&rep=rep1&type=pdf
|
||||
@ -482,9 +540,31 @@ func squareMont*[N](r: var Limbs[N], a, M: Limbs[N],
|
||||
r.redc2xMont(r2x, M, m0ninv, spareBits, skipFinalSub)
|
||||
else:
|
||||
mulMont(r, a, a, M, m0ninv, spareBits, skipFinalSub)
|
||||
|
||||
|
||||
func sumprodMont*[N: static int](
|
||||
r: var Limbs, a, b: array[N, Limbs],
|
||||
M: Limbs, m0ninv: BaseType,
|
||||
spareBits: static int,
|
||||
skipFinalSub: static bool = false) {.inline.} =
|
||||
when spareBits >= 2:
|
||||
when UseASM_X86_64 and r.len in {2 .. 6}:
|
||||
if ({.noSideEffect.}: hasAdx()):
|
||||
r.sumprodMont_CIOS_spare2bits_asm_adx(a, b, M, m0ninv, skipFinalSub)
|
||||
else:
|
||||
r.sumprodMont_CIOS_spare2bits_asm(a, b, M, m0ninv, skipFinalSub)
|
||||
else:
|
||||
r.sumprodMont_CIOS_spare2bits(a, b, M, m0ninv, skipFinalSub)
|
||||
else:
|
||||
r.mulMont(a[0], b[0], M, m0ninv, spareBits, skipFinalSub = false)
|
||||
var ri {.noInit.}: Limbs
|
||||
for i in 1 ..< N:
|
||||
ri.mulMont(a[i], b[i], M, m0ninv, spareBits, skipFinalSub = false)
|
||||
var overflowed = SecretBool r.add(ri)
|
||||
overflowed = overflowed or not(r < M)
|
||||
discard r.csub(M, overflowed)
|
||||
|
||||
func fromMont*(r: var Limbs, a, M: Limbs,
|
||||
m0ninv: BaseType, spareBits: static int) =
|
||||
m0ninv: BaseType, spareBits: static int) {.inline.} =
|
||||
## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)
|
||||
## to the regular natural representation (mod N)
|
||||
##
|
||||
@ -512,7 +592,7 @@ func fromMont*(r: var Limbs, a, M: Limbs,
|
||||
fromMont_CIOS(r, a, M, m0ninv)
|
||||
|
||||
func getMont*(r: var Limbs, a, M, r2modM: Limbs,
|
||||
m0ninv: BaseType, spareBits: static int) =
|
||||
m0ninv: BaseType, spareBits: static int) {.inline.} =
|
||||
## Transform a bigint ``a`` from it's natural representation (mod N)
|
||||
## to a the Montgomery n-residue representation
|
||||
##
|
||||
|
@ -393,8 +393,8 @@ func smulAccNoCarry*(r: var DSWord, a, b: SignedSecretWord) {.inline.}=
|
||||
r.lo = SignedSecretWord UV[0]
|
||||
r.hi = SignedSecretWord UV[1]
|
||||
|
||||
func slincombAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}=
|
||||
## Accumulated linear combination
|
||||
func ssumprodAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}=
|
||||
## Accumulated sum of products
|
||||
## (_, hi, lo) += a*u + b*v
|
||||
## This assumes no overflowing
|
||||
var carry: Carry
|
||||
|
@ -45,6 +45,10 @@ func has_P_3mod4_primeModulus*(C: static Curve): static bool =
|
||||
## Returns true iff p ≡ 3 (mod 4)
|
||||
(BaseType(C.Mod.limbs[0]) and 3) == 3
|
||||
|
||||
func has_P_3mod8_primeModulus*(C: static Curve): static bool =
|
||||
## Returns true iff p ≡ 3 (mod 8)
|
||||
(BaseType(C.Mod.limbs[0]) and 7) == 3
|
||||
|
||||
func has_P_5mod8_primeModulus*(C: static Curve): static bool =
|
||||
## Returns true iff p ≡ 5 (mod 8)
|
||||
(BaseType(C.Mod.limbs[0]) and 7) == 5
|
||||
|
@ -15,7 +15,7 @@ import
|
||||
../ec_shortweierstrass,
|
||||
../io/io_bigints,
|
||||
../isogenies/frobenius,
|
||||
../curves/zoo_endomorphisms
|
||||
../constants/zoo_endomorphisms
|
||||
|
||||
func pow_bls12_377_abs_x[ECP: ECP_ShortW[Fp[BLS12_377], G1] or
|
||||
ECP_ShortW[Fp2[BLS12_377], G2]](
|
@ -15,7 +15,7 @@ import
|
||||
../ec_shortweierstrass,
|
||||
../io/io_bigints,
|
||||
../isogenies/frobenius,
|
||||
../curves/zoo_endomorphisms
|
||||
../constants/zoo_endomorphisms
|
||||
|
||||
func pow_bls12_381_abs_x[ECP: ECP_ShortW[Fp[BLS12_381], G1] or
|
||||
ECP_ShortW[Fp2[BLS12_381], G2]](
|
@ -12,7 +12,7 @@ import
|
||||
# Internal
|
||||
../../platforms/abstractions,
|
||||
../config/[curves, type_bigint],
|
||||
../curves/zoo_endomorphisms,
|
||||
../constants/zoo_endomorphisms,
|
||||
../arithmetic,
|
||||
../extension_fields,
|
||||
../isogenies/frobenius,
|
||||
|
@ -12,7 +12,7 @@ import
|
||||
../arithmetic,
|
||||
../extension_fields,
|
||||
../io/io_bigints,
|
||||
../curves/zoo_endomorphisms,
|
||||
../constants/zoo_endomorphisms,
|
||||
./ec_endomorphism_accel
|
||||
|
||||
# ############################################################
|
||||
|
@ -12,7 +12,7 @@ import
|
||||
../arithmetic,
|
||||
../extension_fields,
|
||||
../io/[io_fields, io_extfields],
|
||||
../curves/zoo_constants
|
||||
../constants/zoo_constants
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
|
@ -11,7 +11,7 @@ import
|
||||
./towers,
|
||||
../arithmetic,
|
||||
../config/curves,
|
||||
../curves/zoo_square_roots_fp2
|
||||
../constants/zoo_square_roots_fp2
|
||||
|
||||
# Square root
|
||||
# -----------------------------------------------------------
|
||||
|
@ -57,8 +57,8 @@ type
|
||||
CubicExt[Fp2[C]]
|
||||
|
||||
Fp12*[C: static Curve] =
|
||||
CubicExt[Fp4[C]]
|
||||
# QuadraticExt[Fp6[C]]
|
||||
# CubicExt[Fp4[C]]
|
||||
QuadraticExt[Fp6[C]]
|
||||
|
||||
template c0*(a: ExtensionField): auto =
|
||||
a.coords[0]
|
||||
@ -662,8 +662,8 @@ func prod*(r: var CubicExt, a: CubicExt, _: type NonResidue) =
|
||||
## and v³ = ξ
|
||||
## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
|
||||
let t {.noInit.} = a.c2
|
||||
r.c1 = a.c0
|
||||
r.c2 = a.c1
|
||||
r.c1 = a.c0
|
||||
r.c0.prod(t, NonResidue)
|
||||
|
||||
func `*=`*(a: var CubicExt, _: type NonResidue) {.inline.} =
|
||||
@ -689,8 +689,8 @@ func prod2x*(
|
||||
## and v³ = ξ
|
||||
## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
|
||||
let t {.noInit.} = a.c2
|
||||
r.c1 = a.c0
|
||||
r.c2 = a.c1
|
||||
r.c1 = a.c0
|
||||
r.c0.prod2x(t, NonResidue)
|
||||
|
||||
{.pop.} # inline
|
||||
@ -811,7 +811,7 @@ func square2x_complex(r: var QuadraticExt2x, a: Fp2) =
|
||||
# Complex multiplications
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
func prod_complex(r: var Fp2, a, b: Fp2) =
|
||||
func prod_complex(r: var Fp2, a, b: Fp2) {.used.} =
|
||||
## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r``
|
||||
## ``r`` is initialized/overwritten
|
||||
##
|
||||
@ -823,10 +823,7 @@ func prod_complex(r: var Fp2, a, b: Fp2) =
|
||||
# while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction)
|
||||
# and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition)
|
||||
#
|
||||
# Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words)
|
||||
# where addition/substraction are significantly cheaper than multiplication
|
||||
#
|
||||
# So we always reframe the imaginary part using Karatsuba approach to save a multiplication
|
||||
# Hence we can consider using q Karatsuba approach to save a multiplication
|
||||
# (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 )
|
||||
#
|
||||
# Costs (naive implementation)
|
||||
@ -841,19 +838,12 @@ func prod_complex(r: var Fp2, a, b: Fp2) =
|
||||
# - 2 Addition 𝔽p
|
||||
# Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary)
|
||||
static: doAssert r.fromComplexExtension()
|
||||
|
||||
var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0)
|
||||
a0b0.prod(a.c0, b.c0) # [1 Mul]
|
||||
a1b1.prod(a.c1, b.c1) # [2 Mul]
|
||||
|
||||
r.c0.sum(a.c0, a.c1) # r0 = (a0 + a1) # [2 Mul, 1 Add]
|
||||
r.c1.sum(b.c0, b.c1) # r1 = (b0 + b1) # [2 Mul, 2 Add]
|
||||
# aliasing: a and b unneeded now
|
||||
r.c1 *= r.c0 # r1 = (b0 + b1)(a0 + a1) # [3 Mul, 2 Add] - 𝔽p temporary
|
||||
|
||||
r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1 # [3 Mul, 2 Add, 1 Sub]
|
||||
r.c1 -= a0b0 # r1 = (b0 + b1)(a0 + a1) - a0b0 # [3 Mul, 2 Add, 2 Sub]
|
||||
r.c1 -= a1b1 # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub]
|
||||
var t {.noInit.}: typeof(r)
|
||||
var na1 {.noInit.}: typeof(r.c0)
|
||||
na1.neg(a.c1)
|
||||
t.c0.sumprod([a.c0, na1], [b.c0, b.c1])
|
||||
t.c1.sumprod([a.c0, a.c1], [b.c1, b.c0])
|
||||
r = t
|
||||
|
||||
func prod2x_complex(r: var QuadraticExt2x, a, b: Fp2) =
|
||||
## Double-precision unreduced complex multiplication
|
||||
@ -983,7 +973,37 @@ func square2x_disjoint*[Fdbl, F](
|
||||
r.c1.diff2xMod(r.c1, V0)
|
||||
r.c1.diff2xMod(r.c1, V1)
|
||||
|
||||
# Generic multiplications
|
||||
# Multiplications (specializations)
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
func prodImpl_fp4o2_p3mod8[C: static Curve](r: var Fp4[C], a, b: Fp4[C]) =
|
||||
## Returns r = a * b
|
||||
## For 𝔽p4/𝔽p2 with p ≡ 3 (mod 8),
|
||||
## hence 𝔽p QNR is 𝑖 = √-1 as p ≡ 3 (mod 8) implies p ≡ 3 (mod 4)
|
||||
## and 𝔽p SNR is (1 + i)
|
||||
static: doAssert C.has_P_3mod8_primeModulus()
|
||||
var
|
||||
b10_m_b11{.noInit.}, b10_p_b11{.noInit.}: Fp[C]
|
||||
n_a01{.noInit.}, n_a11{.noInit.}: Fp[C]
|
||||
|
||||
t{.noInit.}: Fp4[C]
|
||||
|
||||
b10_m_b11.diff(b.c1.c0, b.c1.c1)
|
||||
b10_p_b11.sum(b.c1.c0, b.c1.c1)
|
||||
n_a01.neg(a.c0.c1)
|
||||
n_a11.neg(a.c1.c1)
|
||||
|
||||
t.c0.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11],
|
||||
[b.c0.c0, b.c0.c1, b10_m_b11, b10_p_b11])
|
||||
t.c0.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1],
|
||||
[b.c0.c1, b.c0.c0, b10_p_b11, b10_m_b11])
|
||||
t.c1.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11],
|
||||
[b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1])
|
||||
t.c1.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1],
|
||||
[b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0])
|
||||
r = t
|
||||
|
||||
# Multiplications (generic)
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) =
|
||||
@ -1264,7 +1284,7 @@ func inv2xImpl(r: var QuadraticExt, a: QuadraticExt) =
|
||||
|
||||
# [1 Inv, 2 Sqr, 1 Add]
|
||||
t.redc2x(V0)
|
||||
t.inv() # v1 = 1 / (a0² - β a1²)
|
||||
t.inv() # v1 = 1 / (a0² - β a1²)
|
||||
|
||||
# [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
|
||||
r.c0.prod(a.c0, t) # r0 = a0 / (a0² - β a1²)
|
||||
@ -1288,35 +1308,37 @@ func square2x*(r: var QuadraticExt2x, a: QuadraticExt) =
|
||||
else:
|
||||
r.square2x_disjoint(a.c0, a.c1)
|
||||
|
||||
func square_disjoint*[F](r: var QuadraticExt[F], a0, a1: F) =
|
||||
# TODO understand why Fp4[BLS12_377]
|
||||
# is so slow in the branch
|
||||
# TODO:
|
||||
# - On Fp4, we can have a.c0.c0 off by p
|
||||
# a reduction is missing
|
||||
static: doAssert not r.fromComplexExtension(), "Faster specialization not implemented"
|
||||
|
||||
var d {.noInit.}: doublePrec(typeof(r))
|
||||
d.square2x_disjoint(a0, a1)
|
||||
r.c0.redc2x(d.c0)
|
||||
r.c1.redc2x(d.c1)
|
||||
|
||||
func square*(r: var QuadraticExt, a: QuadraticExt) =
|
||||
when r.fromComplexExtension():
|
||||
when true:
|
||||
when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem() and r.typeof.has1extraBit():
|
||||
if ({.noSideEffect.}: hasAdx()):
|
||||
r.coords.sqrx_complex_sparebit_asm_adx(a.coords)
|
||||
else:
|
||||
r.square_complex(a)
|
||||
when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem() and r.typeof.has1extraBit():
|
||||
if ({.noSideEffect.}: hasAdx()):
|
||||
r.coords.sqrx_complex_sparebit_asm_adx(a.coords)
|
||||
else:
|
||||
r.square_complex(a)
|
||||
else: # slower
|
||||
var d {.noInit.}: doublePrec(typeof(r))
|
||||
d.square2x(a)
|
||||
r.c0.redc2x(d.c0)
|
||||
r.c1.redc2x(d.c1)
|
||||
else:
|
||||
r.square_complex(a)
|
||||
else:
|
||||
when true: # r.typeof.F.C in {BLS12_377, BW6_761}:
|
||||
when QuadraticExt.C.has_large_field_elem():
|
||||
# BW6-761 requires too many registers for Dbl width path
|
||||
r.square_generic(a)
|
||||
else:
|
||||
# TODO understand why Fp4[BLS12_377]
|
||||
# is so slow in the branch
|
||||
# TODO:
|
||||
# - On Fp4, we can have a.c0.c0 off by p
|
||||
# a reduction is missing
|
||||
var d {.noInit.}: doublePrec(typeof(r))
|
||||
d.square2x_disjoint(a.c0, a.c1)
|
||||
r.c0.redc2x(d.c0)
|
||||
r.c1.redc2x(d.c1)
|
||||
elif QuadraticExt is Fp4[BLS12_377]:
|
||||
# TODO BLS12-377 slowness to fix
|
||||
r.square_generic(a)
|
||||
else:
|
||||
r.square_disjoint(a.c0, a.c1)
|
||||
|
||||
func square*(a: var QuadraticExt) =
|
||||
## In-place squaring
|
||||
@ -1326,26 +1348,25 @@ func prod*(r: var QuadraticExt, a, b: QuadraticExt) =
|
||||
## Multiplication r <- a*b
|
||||
|
||||
when r.fromComplexExtension():
|
||||
when false:
|
||||
r.prod_complex(a, b)
|
||||
else: # faster
|
||||
when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem():
|
||||
if ({.noSideEffect.}: hasAdx()):
|
||||
r.coords.mul_fp2_complex_asm_adx(a.coords, b.coords)
|
||||
else:
|
||||
var d {.noInit.}: doublePrec(typeof(r))
|
||||
d.prod2x_complex(a, b)
|
||||
r.c0.redc2x(d.c0)
|
||||
r.c1.redc2x(d.c1)
|
||||
when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem():
|
||||
if ({.noSideEffect.}: hasAdx()):
|
||||
r.coords.mul_fp2_complex_asm_adx(a.coords, b.coords)
|
||||
else:
|
||||
var d {.noInit.}: doublePrec(typeof(r))
|
||||
d.prod2x_complex(a, b)
|
||||
r.c0.redc2x(d.c0)
|
||||
r.c1.redc2x(d.c1)
|
||||
else:
|
||||
var d {.noInit.}: doublePrec(typeof(r))
|
||||
d.prod2x_complex(a, b)
|
||||
r.c0.redc2x(d.c0)
|
||||
r.c1.redc2x(d.c1)
|
||||
else:
|
||||
when r.typeof.F.C.has_large_field_elem():
|
||||
when QuadraticExt is Fp12 or r.typeof.F.C.has_large_field_elem():
|
||||
# BW6-761 requires too many registers for Dbl width path
|
||||
r.prod_generic(a, b)
|
||||
elif QuadraticExt is Fp4 and QuadraticExt.C.has_P_3mod8_primeModulus():
|
||||
r.prodImpl_fp4o2_p3mod8(a, b)
|
||||
else:
|
||||
var d {.noInit.}: doublePrec(typeof(r))
|
||||
d.prod2x_disjoint(a.c0, a.c1, b.c0, b.c1)
|
||||
@ -1595,7 +1616,49 @@ func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) =
|
||||
r.c0.prod(m12, NonResidue)
|
||||
r.c0 += s0
|
||||
|
||||
# Multiplications
|
||||
# Multiplications (specializations)
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
func prodImpl_fp6o2_p3mod8[C: static Curve](r: var Fp6[C], a, b: Fp6[C]) =
|
||||
## Returns r = a * b
|
||||
## For 𝔽p6/𝔽p2 with p ≡ 3 (mod 8),
|
||||
## hence 𝔽p QNR is 𝑖 = √-1 as p ≡ 3 (mod 8) implies p ≡ 3 (mod 4)
|
||||
## and 𝔽p SNR is (1 + i)
|
||||
# https://eprint.iacr.org/2022/367 - Equation 8
|
||||
static: doAssert C.has_P_3mod8_primeModulus()
|
||||
var
|
||||
b10_p_b11{.noInit.}, b10_m_b11{.noInit.}: Fp[C]
|
||||
b20_p_b21{.noInit.}, b20_m_b21{.noInit.}: Fp[C]
|
||||
|
||||
n_a01{.noInit.}, n_a11{.noInit.}, n_a21{.noInit.}: Fp[C]
|
||||
|
||||
t{.noInit.}: Fp6[C]
|
||||
|
||||
b10_p_b11.sum(b.c1.c0, b.c1.c1)
|
||||
b10_m_b11.diff(b.c1.c0, b.c1.c1)
|
||||
b20_p_b21.sum(b.c2.c0, b.c2.c1)
|
||||
b20_m_b21.diff(b.c2.c0, b.c2.c1)
|
||||
|
||||
n_a01.neg(a.c0.c1)
|
||||
n_a11.neg(a.c1.c1)
|
||||
n_a21.neg(a.c2.c1)
|
||||
|
||||
t.c0.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11, a.c2.c0, n_a21],
|
||||
[b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21, b10_m_b11, b10_p_b11])
|
||||
t.c0.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
|
||||
[b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21, b10_p_b11, b10_m_b11])
|
||||
t.c1.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11, a.c2.c0, n_a21],
|
||||
[b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21])
|
||||
t.c1.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
|
||||
[b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21])
|
||||
t.c2.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11, a.c2.c0, n_a21],
|
||||
[b.c2.c0, b.c2.c1, b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1])
|
||||
t.c2.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
|
||||
[b.c2.c1, b.c2.c0, b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0])
|
||||
|
||||
r = t
|
||||
|
||||
# Multiplications (generic)
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
func prodImpl(r: var CubicExt, a, b: CubicExt) =
|
||||
@ -1686,6 +1749,40 @@ func prod2xImpl(r: var CubicExt2x, a, b: CubicExt) =
|
||||
# Sparse multiplication
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
func mul_sparse_by_x00*(r: var CubicExt, a: CubicExt, sparseB: auto) =
|
||||
## Sparse multiplication of a cubic extension element
|
||||
## with coordinates (a₀, a₁, a₂) by (b₀, b0, 0)
|
||||
|
||||
when typeof(sparseB) is typeof(a):
|
||||
template b(): untyped = sparseB.c0
|
||||
elif typeof(sparseB) is typeof(a.c0):
|
||||
template b(): untyped = sparseB
|
||||
else:
|
||||
{.error: "sparseB type is " & $typeof(sparseB) &
|
||||
" which does not match with either a (" & $typeof(a) &
|
||||
") or a.c0 (" & $typeof(a.c0) & ")".}
|
||||
|
||||
r.c0.prod(a.c0, b)
|
||||
r.c1.prod(a.c1, b)
|
||||
r.c2.prod(a.c2, b)
|
||||
|
||||
func mul2x_sparse_by_x00*(r: var CubicExt2x, a: CubicExt, sparseB: auto) =
|
||||
## Sparse multiplication of a cubic extension element
|
||||
## with coordinates (a₀, a₁, a₂) by (b₀, b0, 0)
|
||||
|
||||
when typeof(sparseB) is typeof(a):
|
||||
template b(): untyped = sparseB.c0
|
||||
elif typeof(sparseB) is typeof(a.c0):
|
||||
template b(): untyped = sparseB
|
||||
else:
|
||||
{.error: "sparseB type is " & $typeof(sparseB) &
|
||||
" which does not match with either a (" & $typeof(a) &
|
||||
") or a.c0 (" & $typeof(a.c0) & ")".}
|
||||
|
||||
r.c0.prod2x(a.c0, b)
|
||||
r.c1.prod2x(a.c1, b)
|
||||
r.c2.prod2x(a.c2, b)
|
||||
|
||||
func mul_sparse_by_0y0*(r: var CubicExt, a: CubicExt, sparseB: auto) =
|
||||
## Sparse multiplication of a cubic extenion element
|
||||
## with coordinates (a₀, a₁, a₂) by (0, b₁, 0)
|
||||
@ -1718,6 +1815,171 @@ func mul_sparse_by_0y0*(r: var CubicExt, a: CubicExt, sparseB: auto) =
|
||||
r.c1.prod(a.c0, b)
|
||||
r.c2.prod(a.c1, b)
|
||||
|
||||
func mul2x_sparse_by_0y0*(r: var CubicExt2x, a: CubicExt, sparseB: auto) =
|
||||
## Sparse multiplication of a cubic extenion element
|
||||
## with coordinates (a₀, a₁, a₂) by (0, b₁, 0)
|
||||
|
||||
when typeof(sparseB) is typeof(a):
|
||||
template b(): untyped = sparseB.c1
|
||||
elif typeof(sparseB) is typeof(a.c0):
|
||||
template b(): untyped = sparseB
|
||||
else:
|
||||
{.error: "sparseB type is " & $typeof(sparseB) &
|
||||
" which does not match with either a (" & $typeof(a) &
|
||||
") or a.c0 (" & $typeof(a.c0) & ")".}
|
||||
|
||||
r.c0.prod2x(a.c2, b)
|
||||
r.c0.prod2x(r.c0, NonResidue)
|
||||
r.c1.prod2x(a.c0, b)
|
||||
r.c2.prod2x(a.c1, b)
|
||||
|
||||
|
||||
func mul_sparse_by_xy0*[Fpkdiv3](r: var CubicExt, a: CubicExt,
|
||||
x, y: Fpkdiv3) =
|
||||
## Sparse multiplication of a cubic extension element
|
||||
## with coordinates (a₀, a₁, a₂) by (b₀, b₁, 0)
|
||||
##
|
||||
## r and a must not alias
|
||||
|
||||
# v0 = a0 b0
|
||||
# v1 = a1 b1
|
||||
# v2 = a2 b2 = 0
|
||||
#
|
||||
# r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
|
||||
# = ξ (a1 b1 + a2 b1 - v1) + v0
|
||||
# = ξa2 b1 + v0
|
||||
# r1 = (a0 + a1) * (b0 + b1) - v0 - v1 + ξ v2
|
||||
# = (a0 + a1) * (b0 + b1) - v0 - v1
|
||||
# r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
|
||||
# = a0 b0 + a2 b0 - v0 + v1
|
||||
# = a2 b0 + v1
|
||||
|
||||
static: doAssert a.c0 is Fpkdiv3
|
||||
|
||||
var
|
||||
v0 {.noInit.}: Fpkdiv3
|
||||
v1 {.noInit.}: Fpkdiv3
|
||||
|
||||
v0.prod(a.c0, x)
|
||||
v1.prod(a.c1, y)
|
||||
|
||||
r.c0.prod(a.c2, y)
|
||||
r.c0 *= NonResidue
|
||||
r.c0 += v0
|
||||
|
||||
r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated
|
||||
r.c2.sum(x, y)
|
||||
r.c1 *= r.c2
|
||||
r.c1 -= v0
|
||||
r.c1 -= v1
|
||||
|
||||
r.c2.prod(a.c2, x)
|
||||
r.c2 += v1
|
||||
|
||||
|
||||
func mul2x_sparse_by_xy0*[Fpkdiv3](r: var CubicExt2x, a: CubicExt,
|
||||
x, y: Fpkdiv3) =
|
||||
## Sparse multiplication of a cubic extension element
|
||||
## with coordinates (a₀, a₁, a₂) by (b₀, b₁, 0)
|
||||
##
|
||||
## r and a must not alias
|
||||
|
||||
static: doAssert a.c0 is Fpkdiv3
|
||||
|
||||
var
|
||||
V0 {.noInit.}: doubleprec(Fpkdiv3)
|
||||
V1 {.noInit.}: doubleprec(Fpkdiv3)
|
||||
t0{.noInit.}: Fpkdiv3
|
||||
t1{.noInit.}: Fpkdiv3
|
||||
|
||||
V0.prod2x(a.c0, x)
|
||||
V1.prod2x(a.c1, y)
|
||||
|
||||
r.c0.prod2x(a.c2, y)
|
||||
r.c0.prod2x(r.c0, NonResidue)
|
||||
r.c0.sum2xMod(r.c0, V0)
|
||||
|
||||
t0.sum(a.c0, a.c1)
|
||||
t1.sum(x, y)
|
||||
r.c1.prod2x(t0, t1)
|
||||
r.c1.diff2xMod(r.c1, V0)
|
||||
r.c1.diff2xMod(r.c1, V1)
|
||||
|
||||
r.c2.prod2x(a.c2, x)
|
||||
r.c2.sum2xMod(r.c2, V1)
|
||||
|
||||
func mul_sparse_by_0yz*[Fpkdiv3](r: var CubicExt, a: CubicExt, y, z: Fpkdiv3) =
|
||||
## Sparse multiplication of a cubic extension element
|
||||
## with coordinates (a₀, a₁, a₂) by (0, b₁, b₂)
|
||||
##
|
||||
## r and a must not alias
|
||||
|
||||
# v0 = a0 b0 = 0
|
||||
# v1 = a1 b1
|
||||
# v2 = a2 b2
|
||||
#
|
||||
# r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
|
||||
# = ξ ((a1 + a2) * (b1 + b2) - v1 - v2)
|
||||
# r1 = (a0 + a1) * (b0 + b1) - v0 - v1 + ξ v2
|
||||
# = a0 b1 + a1 b1 - v1 + ξ v2
|
||||
# = a0 b1 + ξ v2
|
||||
# r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
|
||||
# = a0 b2 + a2 b2 - v0 - v2 + v1
|
||||
# = a0 b2 + v1
|
||||
|
||||
static: doAssert a.c0 is Fpkdiv3
|
||||
|
||||
var
|
||||
v1 {.noInit.}: Fpkdiv3
|
||||
v2 {.noInit.}: Fpkdiv3
|
||||
|
||||
v1.prod(a.c1, y)
|
||||
v2.prod(a.c2, z)
|
||||
|
||||
r.c1.sum(a.c1, a.c2)
|
||||
r.c2.sum( y, z)
|
||||
r.c0.prod(r.c1, r.c2)
|
||||
r.c0 -= v1
|
||||
r.c0 -= v2
|
||||
r.c0 *= NonResidue
|
||||
|
||||
r.c1.prod(a.c0, y)
|
||||
v2 *= NonResidue
|
||||
r.c1 += v2
|
||||
|
||||
r.c2.prod(a.c0, z)
|
||||
r.c2 += v1
|
||||
|
||||
func mul2x_sparse_by_0yz*[Fpkdiv3](r: var CubicExt2x, a: CubicExt, y, z: Fpkdiv3) =
|
||||
## Sparse multiplication of a cubic extension element
|
||||
## with coordinates (a₀, a₁, a₂) by (0, b₁, b₂)
|
||||
##
|
||||
## r and a must not alias
|
||||
static: doAssert a.c0 is Fpkdiv3
|
||||
|
||||
var
|
||||
V1 {.noInit.}: doubleprec(Fpkdiv3)
|
||||
V2 {.noInit.}: doubleprec(Fpkdiv3)
|
||||
t1 {.noInit.}: Fpkdiv3
|
||||
t2 {.noInit.}: Fpkdiv3
|
||||
|
||||
V1.prod2x(a.c1, y)
|
||||
V2.prod2x(a.c2, z)
|
||||
|
||||
t1.sum(a.c1, a.c2)
|
||||
t2.sum( y, z)
|
||||
r.c0.prod2x(t1, t2)
|
||||
r.c0.diff2xMod(r.c0, V1)
|
||||
r.c0.diff2xMod(r.c0, V2)
|
||||
r.c0.prod2x(r.c0, NonResidue)
|
||||
|
||||
r.c1.prod2x(a.c0, y)
|
||||
V2.prod2x(V2, NonResidue)
|
||||
r.c1.sum2xMod(r.c1, V2)
|
||||
|
||||
r.c2.prod2x(a.c0, z)
|
||||
r.c2.sum2xMod(r.c2, V1)
|
||||
|
||||
# Inversion
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
@ -1856,6 +2118,8 @@ func prod*(r: var CubicExt, a, b: CubicExt) =
|
||||
## Out-of-place multiplication
|
||||
when CubicExt.C.has_large_field_elem():
|
||||
r.prodImpl(a, b)
|
||||
elif r is Fp6 and CubicExt.C.has_P_3mod8_primeModulus():
|
||||
r.prodImpl_fp6o2_p3mod8(a, b)
|
||||
else:
|
||||
var d {.noInit.}: doublePrec(typeof(r))
|
||||
d.prod2x(a, b)
|
||||
|
@ -33,7 +33,7 @@ proc spaces*(num: int): string =
|
||||
for i in 0 ..< num:
|
||||
result[i] = ' '
|
||||
|
||||
func appendHex*(accum: var string, f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, order: static Endianness = bigEndian) =
|
||||
func appendHex*(accum: var string, f: ExtensionField, indent = 0, order: static Endianness = bigEndian) =
|
||||
## Hex accumulator
|
||||
accum.add static($f.typeof.genericHead() & '(')
|
||||
staticFor i, 0, f.coords.len:
|
||||
@ -46,7 +46,7 @@ func appendHex*(accum: var string, f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, ord
|
||||
accum.appendHex(f.coords[i], indent+2, order)
|
||||
accum.add ")"
|
||||
|
||||
func toHex*(f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, order: static Endianness = bigEndian): string =
|
||||
func toHex*(f: ExtensionField, indent = 0, order: static Endianness = bigEndian): string =
|
||||
## Stringify a tower field element to hex.
|
||||
## Note. Leading zeros are not removed.
|
||||
## Result is prefixed with 0x
|
||||
|
@ -11,7 +11,7 @@ import
|
||||
../../platforms/primitives,
|
||||
../arithmetic,
|
||||
../extension_fields,
|
||||
../curves/zoo_frobenius
|
||||
../constants/zoo_frobenius
|
||||
|
||||
# Frobenius Map
|
||||
# ------------------------------------------------------------
|
||||
@ -79,17 +79,24 @@ func frobenius_map*[C](r: var Fp6[C], a: Fp6[C], k: static int = 1) {.inline.} =
|
||||
func frobenius_map*[C](r: var Fp12[C], a: Fp12[C], k: static int = 1) {.inline.} =
|
||||
## Computes a^(pᵏ)
|
||||
## The p-power frobenius automorphism on 𝔽p12
|
||||
static: doAssert r.c0 is Fp4
|
||||
staticFor i, 0, r.coords.len:
|
||||
staticFor j, 0, r.coords[0].coords.len:
|
||||
r.coords[i].coords[j].frobenius_map(a.coords[i].coords[j], k)
|
||||
|
||||
r.c0.c0.mulCheckSparse frobMapConst(C, 0, k)
|
||||
r.c0.c1.mulCheckSparse frobMapConst(C, 3, k)
|
||||
r.c1.c0.mulCheckSparse frobMapConst(C, 1, k)
|
||||
r.c1.c1.mulCheckSparse frobMapConst(C, 4, k)
|
||||
r.c2.c0.mulCheckSparse frobMapConst(C, 2, k)
|
||||
r.c2.c1.mulCheckSparse frobMapConst(C, 5, k)
|
||||
when r.c0 is Fp4:
|
||||
r.c0.c0.mulCheckSparse frobMapConst(C, 0, k)
|
||||
r.c0.c1.mulCheckSparse frobMapConst(C, 3, k)
|
||||
r.c1.c0.mulCheckSparse frobMapConst(C, 1, k)
|
||||
r.c1.c1.mulCheckSparse frobMapConst(C, 4, k)
|
||||
r.c2.c0.mulCheckSparse frobMapConst(C, 2, k)
|
||||
r.c2.c1.mulCheckSparse frobMapConst(C, 5, k)
|
||||
else:
|
||||
r.c0.c0.mulCheckSparse frobMapConst(C, 0, k)
|
||||
r.c0.c1.mulCheckSparse frobMapConst(C, 2, k)
|
||||
r.c0.c2.mulCheckSparse frobMapConst(C, 4, k)
|
||||
r.c1.c0.mulCheckSparse frobMapConst(C, 1, k)
|
||||
r.c1.c1.mulCheckSparse frobMapConst(C, 3, k)
|
||||
r.c1.c2.mulCheckSparse frobMapConst(C, 5, k)
|
||||
|
||||
# ψ (Psi) - Untwist-Frobenius-Twist Endomorphisms on twisted curves
|
||||
# -----------------------------------------------------------------
|
||||
|
@ -215,7 +215,7 @@ func cyclotomic_inv*[FT](r: var FT, a: FT) {.meter.} =
|
||||
## consequently `a` MUST be unitary
|
||||
r.conj(a)
|
||||
|
||||
func cyclotomic_square*[FT](r: var FT, a: FT) {.meter.} =
|
||||
func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) =
|
||||
## Square `a` into `r`
|
||||
## `a` MUST be in the cyclotomic subgroup
|
||||
## consequently `a` MUST be unitary
|
||||
@ -224,47 +224,147 @@ func cyclotomic_square*[FT](r: var FT, a: FT) {.meter.} =
|
||||
# Granger, Scott, 2009
|
||||
# https://eprint.iacr.org/2009/565.pdf
|
||||
|
||||
# Cubic extension field
|
||||
# A = 3a² − 2 ̄a
|
||||
# B = 3 √i c² + 2 ̄b
|
||||
# C = 3b² − 2 ̄c
|
||||
var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(a.c0)
|
||||
|
||||
template a0: untyped = a.c0.c0
|
||||
template a1: untyped = a.c0.c1
|
||||
template a2: untyped = a.c1.c0
|
||||
template a3: untyped = a.c1.c1
|
||||
template a4: untyped = a.c2.c0
|
||||
template a5: untyped = a.c2.c1
|
||||
|
||||
v0.square(a.c0)
|
||||
v1.square(a.c1)
|
||||
v2.square(a.c2)
|
||||
|
||||
# From here on, r aliasing with a is only for the first operation
|
||||
# and only read/write the exact same coordinates
|
||||
|
||||
# 3v₀₀ - 2a₀
|
||||
r.c0.c0.diff(v0.c0, a0)
|
||||
r.c0.c0.double()
|
||||
r.c0.c0 += v0.c0
|
||||
# 3v₀₁ + 2a₁
|
||||
r.c0.c1.sum(v0.c1, a1)
|
||||
r.c0.c1.double()
|
||||
r.c0.c1 += v0.c1
|
||||
# 3v₁₀ - 2a₄
|
||||
r.c2.c0.diff(v1.c0, a4)
|
||||
r.c2.c0.double()
|
||||
r.c2.c0 += v1.c0
|
||||
# 3v₁₁ + 2b₅
|
||||
r.c2.c1.sum(v1.c1, a5)
|
||||
r.c2.c1.double()
|
||||
r.c2.c1 += v1.c1
|
||||
|
||||
# Now B = 3 √i c² + 2 ̄b
|
||||
# beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i
|
||||
|
||||
# 3 (√i c²)₀ + 2a₂
|
||||
v2.c1 *= NonResidue
|
||||
r.c1.c0.sum(v2.c1, a2)
|
||||
r.c1.c0.double()
|
||||
r.c1.c0 += v2.c1
|
||||
|
||||
# 3 (√i c²)₁ - 2a₃
|
||||
r.c1.c1.diff(v2.c0, a3)
|
||||
r.c1.c1.double()
|
||||
r.c1.c1 += v2.c0
|
||||
|
||||
func cyclotomic_square_quad_over_cube[F](r: var QuadraticExt[F], a: QuadraticExt[F]) =
|
||||
## Square `a` into `r`
|
||||
## `a` MUST be in the cyclotomic subgroup
|
||||
## consequently `a` MUST be unitary
|
||||
# Mapping between towering schemes
|
||||
# --------------------------------
|
||||
#
|
||||
# canonical <=> cubic over quadratic <=> quadratic over cubic
|
||||
# c₀ <=> a₀ <=> b₀
|
||||
# c₁ <=> a₂ <=> b₃
|
||||
# c₂ <=> a₄ <=> b₁
|
||||
# c₃ <=> a₁ <=> b₄
|
||||
# c₄ <=> a₃ <=> b₂
|
||||
# c₅ <=> a₅ <=> b₅
|
||||
#
|
||||
# Hence, this formula for a cubic extension field
|
||||
# A = 3a² − 2 ̄a
|
||||
# B = 3 √i c² + 2 ̄b
|
||||
# C = 3b² − 2 ̄c
|
||||
#
|
||||
# becomes
|
||||
# A = (b₀, b₄) = 3(b₀, b₄)² - 2(b₀,-b₄)
|
||||
# B = (b₃, b₂) = 3 √i(b₁, b₅)² + 2(b₃, -b₂)
|
||||
# C = (b₁, b₅) = 3(b₃, b₂)² - 2(b₁, -b₅)
|
||||
#
|
||||
# with
|
||||
# v₀ = (b₀, b₄) = (a.c0.c0, a.c1.c1)
|
||||
# v₁ = (b₃, b₂) = (a.c1.c0, a.c0.c2)
|
||||
# v₂ = (b₁, b₅) = (a.c0.c1, a.c1.c2)
|
||||
var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: QuadraticExt[typeof(r.c0.c0)]
|
||||
|
||||
template b0: untyped = a.c0.c0
|
||||
template b1: untyped = a.c0.c1
|
||||
template b2: untyped = a.c0.c2
|
||||
template b3: untyped = a.c1.c0
|
||||
template b4: untyped = a.c1.c1
|
||||
template b5: untyped = a.c1.c2
|
||||
|
||||
v0.square_disjoint(b0, b4)
|
||||
v1.square_disjoint(b3, b2)
|
||||
v2.square_disjoint(b1, b5)
|
||||
|
||||
# From here on, r aliasing with a is only for the first operation
|
||||
# and only read/write the exact same coordinates
|
||||
|
||||
# 3v₀₀ - 2b₀
|
||||
r.c0.c0.diff(v0.c0, b0)
|
||||
r.c0.c0.double()
|
||||
r.c0.c0 += v0.c0
|
||||
# 3v₁₀ - 2b₁
|
||||
r.c0.c1.diff(v1.c0, b1)
|
||||
r.c0.c1.double()
|
||||
r.c0.c1 += v1.c0
|
||||
# 3v₀₁ + 2b₄
|
||||
r.c1.c1.sum(v0.c1, b4)
|
||||
r.c1.c1.double()
|
||||
r.c1.c1 += v0.c1
|
||||
# 3v₁₁ + 2b₅
|
||||
r.c1.c2.sum(v1.c1, b5)
|
||||
r.c1.c2.double()
|
||||
r.c1.c2 += v1.c1
|
||||
|
||||
# Now B = (b₃, b₂) = 3 √i(b₁, b₅)² + 2(b₃, -b₂)
|
||||
# beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i
|
||||
|
||||
# 3 (√i (b₁, b₅)²)₀ + 2b₃
|
||||
v2.c1 *= NonResidue
|
||||
r.c1.c0.sum(v2.c1, b3)
|
||||
r.c1.c0.double()
|
||||
r.c1.c0 += v2.c1
|
||||
|
||||
# 3 (√i (b₁, b₅)²)₁ - 2b₃
|
||||
r.c0.c2.diff(v2.c0, b2)
|
||||
r.c0.c2.double()
|
||||
r.c0.c2 += v2.c0
|
||||
|
||||
func cyclotomic_square*[FT](r: var FT, a: FT) {.inline, meter.} =
|
||||
## Square `a` into `r`
|
||||
## `a` MUST be in the cyclotomic subgroup
|
||||
## consequently `a` MUST be unitary
|
||||
#
|
||||
# Faster Squaring in the Cyclotomic Subgroup of Sixth Degree Extensions
|
||||
# Granger, Scott, 2009
|
||||
# https://eprint.iacr.org/2009/565.pdf
|
||||
when a is CubicExt:
|
||||
# Cubic extension field
|
||||
# A = 3a² − 2 ̄a
|
||||
# B = 3 √i c² + 2 ̄b
|
||||
# C = 3b² − 2 ̄c
|
||||
var t0{.noinit.}, t1{.noinit.}: typeof(a.c0)
|
||||
|
||||
t0.square(a.c0) # t0 = a²
|
||||
t1.double(t0) # t1 = 2a²
|
||||
t1 += t0 # t1 = 3a²
|
||||
|
||||
t0.conj(a.c0) # t0 = ̄a
|
||||
t0.double() # t0 = 2 ̄a
|
||||
r.c0.diff(t1, t0) # r0 = 3a² − 2 ̄a
|
||||
|
||||
# Aliasing: a.c0 unused
|
||||
|
||||
t0.square(a.c2) # t0 = c²
|
||||
t0 *= NonResidue # t0 = √i c²
|
||||
t1.double(t0) # t1 = 2 √i c²
|
||||
t0 += t1 # t0 = 3 √i c²
|
||||
|
||||
t1.square(a.c1) # t1 = b²
|
||||
|
||||
r.c1.conj(a.c1) # r1 = ̄b
|
||||
r.c1.double() # r1 = 2 ̄b
|
||||
r.c1 += t0 # r1 = 3 √i c² + 2 ̄b
|
||||
|
||||
# Aliasing: a.c1 unused
|
||||
|
||||
t0.double(t1) # t0 = 2b²
|
||||
t0 += t1 # t0 = 3b²
|
||||
|
||||
t1.conj(a.c2) # r2 = ̄c
|
||||
t1.double() # r2 = 2 ̄c
|
||||
r.c2.diff(t0, t1) # r2 = 3b² - 2 ̄c
|
||||
|
||||
r.cyclotomic_square_cube_over_quad(a)
|
||||
else:
|
||||
{.error: "Not implemented".}
|
||||
r.cyclotomic_square_quad_over_cube(a)
|
||||
|
||||
func cyclotomic_square*[FT](a: var FT) {.meter.} =
|
||||
func cyclotomic_square*[FT](a: var FT) {.inline.} =
|
||||
## Square `a` into `r`
|
||||
## `a` MUST be in the cyclotomic subgroup
|
||||
## consequently `a` MUST be unitary
|
||||
@ -560,7 +660,10 @@ func fromFpk*[Fpkdiv6, Fpk](
|
||||
{.error: "a must be a sextic extension field".}
|
||||
elif a is QuadraticExt:
|
||||
when a.c0 is CubicExt:
|
||||
{.error: "𝔽pᵏᐟ⁶ -> 𝔽pᵏᐟ³ -> 𝔽pᵏ towering (quadratic over cubic) is not implemented.".}
|
||||
g.g2 = a.c1.c0
|
||||
g.g3 = a.c0.c2
|
||||
g.g4 = a.c0.c1
|
||||
g.g5 = a.c1.c2
|
||||
else:
|
||||
{.error: "a must be a sextic extension field".}
|
||||
else:
|
||||
@ -584,7 +687,12 @@ func asFpk*[Fpkdiv6, Fpk](
|
||||
{.error: "a must be a sextic extension field".}
|
||||
elif a is QuadraticExt:
|
||||
when a.c0 is CubicExt:
|
||||
{.error: "𝔽pᵏᐟ⁶ -> 𝔽pᵏᐟ³ -> 𝔽pᵏ towering (quadratic over cubic) is not implemented.".}
|
||||
a.c0.c0 = g0
|
||||
a.c0.c1 = g.g4
|
||||
a.c0.c2 = g.g3
|
||||
a.c1.c0 = g.g2
|
||||
a.c1.c1 = g1
|
||||
a.c1.c2 = g.g5
|
||||
else:
|
||||
{.error: "a must be a sextic extension field".}
|
||||
else:
|
||||
@ -598,9 +706,6 @@ func cyclotomic_exp_compressed*[N: static int, Fpk](
|
||||
## Exponentiation is done least-signigicant bits first
|
||||
## `squarings` represents the number of squarings
|
||||
## to do before the next multiplication.
|
||||
##
|
||||
## `accumSquarings` stores the accumulated squarings so far
|
||||
## iff N != 1
|
||||
|
||||
type Fpkdiv6 = typeof(a.c0.c0)
|
||||
|
||||
|
@ -17,7 +17,7 @@ import
|
||||
ec_shortweierstrass_projective
|
||||
],
|
||||
../io/io_extfields,
|
||||
../curves/zoo_constants
|
||||
../constants/zoo_constants
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [].}
|
||||
@ -390,44 +390,19 @@ func line_add*[F1, F2](
|
||||
# D-Twist
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func mul_by_line_xy0*[Fpkdiv2, Fpkdiv6](
|
||||
r: var Fpkdiv2,
|
||||
a: Fpkdiv2,
|
||||
x, y: Fpkdiv6) =
|
||||
## Sparse multiplication of an 𝔽pᵏᐟ² element
|
||||
## with coordinates (a₀, a₁, a₂) by a line (x, y, 0)
|
||||
## The z coordinates in the line will be ignored.
|
||||
## `r` and `a` must not alias
|
||||
|
||||
static:
|
||||
doAssert a is CubicExt
|
||||
doAssert a.c0 is Fpkdiv6
|
||||
|
||||
var
|
||||
v0 {.noInit.}: Fpkdiv6
|
||||
v1 {.noInit.}: Fpkdiv6
|
||||
|
||||
v0.prod(a.c0, x)
|
||||
v1.prod(a.c1, y)
|
||||
r.c0.prod(a.c2, y)
|
||||
r.c0 *= SexticNonResidue
|
||||
r.c0 += v0
|
||||
|
||||
r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated
|
||||
r.c2.sum(x, y)
|
||||
r.c1 *= r.c2
|
||||
r.c1 -= v0
|
||||
r.c1 -= v1
|
||||
|
||||
r.c2.prod(a.c2, x)
|
||||
r.c2 += v1
|
||||
|
||||
func mul_sparse_by_line_ab00c0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
|
||||
func mul_sparse_by_line_a00bc0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
|
||||
## Sparse multiplication of an 𝔽pᵏ element
|
||||
## by a sparse 𝔽pᵏ element coming from an D-Twist line function
|
||||
## With a quadratic over cubic towering (Fp2 -> Fp6 -> Fp12)
|
||||
## with a cubic over quadratic towering (𝔽p2 -> 𝔽p6 -> 𝔽p12)
|
||||
## The sparse element is represented by a packed Line type
|
||||
## with coordinate (a,b,c) matching 𝔽pᵏ coordinates ab00c0
|
||||
## with coordinate (a,b,c) matching 𝔽pᵏ coordinates a00bc0
|
||||
|
||||
# a0b0 = (a00, a01, a02).( x, 0, 0)
|
||||
# a1b1 = (a10, a11, a12).( y, z, 0)
|
||||
#
|
||||
# r0 = a0 b0 + ξ a1 b1
|
||||
# r1 = a0 b1 + a1 b0 (Schoolbook)
|
||||
# = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
|
||||
|
||||
static:
|
||||
doAssert Fpk.C.getSexticTwist() == D_Twist
|
||||
@ -436,27 +411,290 @@ func mul_sparse_by_line_ab00c0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
|
||||
|
||||
type Fpkdiv2 = typeof(f.c0)
|
||||
|
||||
var
|
||||
v0 {.noInit.}: Fpkdiv2
|
||||
v1 {.noInit.}: Fpkdiv2
|
||||
v2 {.noInit.}: Line[Fpkdiv6]
|
||||
v3 {.noInit.}: Fpkdiv2
|
||||
when Fpk.C.has_large_field_elem():
|
||||
var
|
||||
v0 {.noInit.}: Fpkdiv2
|
||||
v1 {.noInit.}: Fpkdiv2
|
||||
t {.noInit.}: Fpkdiv6
|
||||
|
||||
v0.mul_by_line_xy0(f.c0, l.a, l.b)
|
||||
v1.mul_sparse_by_0y0(f.c1, l.c)
|
||||
v1.sum(f.c0, f.c1)
|
||||
t.sum(l.a, l.b)
|
||||
v0.mul_sparse_by_xy0(v1, t, l.c)
|
||||
|
||||
v2.x = l.a
|
||||
v2.y.sum(l.b, l.c)
|
||||
f.c1 += f.c0
|
||||
v3.mul_by_line_xy0(f.c1, v2.x, v2.y)
|
||||
v3 -= v0
|
||||
f.c1.diff(v3, v1)
|
||||
v1.mul_sparse_by_xy0(f.c1, l.b, l.c)
|
||||
|
||||
v3.c0.prod(v1.c2, SexticNonResidue)
|
||||
v3.c0 += v0.c0
|
||||
v3.c1.sum(v0.c1, v1.c0)
|
||||
v3.c2.sum(v0.c2, v1.c1)
|
||||
f.c0 = v3
|
||||
f.c1.diff(v0, v1)
|
||||
|
||||
v0.mul_sparse_by_x00(f.c0, l.a)
|
||||
f.c1 -= v0
|
||||
|
||||
v1 *= NonResidue
|
||||
f.c0.sum(v0, v1)
|
||||
|
||||
else: # Lazy reduction
|
||||
var
|
||||
V0 {.noInit.}: doubleprec(Fpkdiv2)
|
||||
V1 {.noInit.}: doubleprec(Fpkdiv2)
|
||||
V {.noInit.}: doubleprec(Fpkdiv2)
|
||||
t {.noInit.}: Fpkdiv6
|
||||
u {.noInit.}: Fpkdiv2
|
||||
|
||||
u.sum(f.c0, f.c1)
|
||||
t.sum(l.a, l.b)
|
||||
V.mul2x_sparse_by_xy0(u, t, l.c)
|
||||
V1.mul2x_sparse_by_xy0(f.c1, l.b, l.c)
|
||||
V0.mul2x_sparse_by_x00(f.c0, l.a)
|
||||
|
||||
V.diff2xMod(V, V1)
|
||||
V.diff2xMod(V, V0)
|
||||
f.c1.redc2x(V)
|
||||
|
||||
V1.prod2x(V1, NonResidue)
|
||||
V0.sum2xMod(V0, V1)
|
||||
f.c0.redc2x(V0)
|
||||
|
||||
func prod_x00yz0_x00yz0_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Line[Fpkdiv6]) =
|
||||
## Multiply 2 lines together
|
||||
## The result is sparse in f.
|
||||
# Preambule
|
||||
#
|
||||
# A sparse xy0 by xy0 multiplication in 𝔽pᵏᐟ² (cubic) would be
|
||||
# (a0', a1', 0).(b0', b1', 0)
|
||||
#
|
||||
# r0' = a0'b0'
|
||||
# r1' = a0'b1 + a1'b0'
|
||||
# or (a0'+b1')(a1'+b0')-a0'a1'-b0'b1'
|
||||
# r2' = a1'b1
|
||||
#
|
||||
# Note¹ that the Karatsuba does not reuse terms!
|
||||
#
|
||||
# In the following equations (taken from quad extension implementation)
|
||||
# a0 = ( x , 0 , 0)
|
||||
# a1 = ( y , z , 0)
|
||||
# b0 = ( x', 0, 0)
|
||||
# b1 = ( y', z', 0)
|
||||
#
|
||||
# a0b0 = ( x, 0, 0).( x', 0 , 0) = (xx', 0, 0 )
|
||||
# a1b1 = ( y, z, 0).( y', z', 0) = (yy', yz'+zy', zz')
|
||||
|
||||
# r0 = a0 b0 + ξ a1 b1
|
||||
# r1 = a0 b1 + a1 b0 (Schoolbook)
|
||||
# = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
|
||||
#
|
||||
# Note² that Karatsuba doesn't help as a0 and b0 are actually very cheap (x, 0, 0)
|
||||
#
|
||||
# Replacing by the coordinates in the lower tower we get
|
||||
#
|
||||
# r0 = (xx'+ξzz', yy', yz'+zy')
|
||||
# r1 = ( x, 0, 0)( y', z', 0) + ( y , z , 0)( x', 0, 0)
|
||||
# = (xy'+yx', xz'+zx', 0)
|
||||
#
|
||||
# Now we can apply Karasuba¹² for a total of 6 multiplications
|
||||
|
||||
static:
|
||||
doAssert Fpk.C.getSexticTwist() == D_Twist
|
||||
doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
|
||||
doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
|
||||
|
||||
var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
|
||||
|
||||
XX.prod2x(l0.a, l1.a)
|
||||
YY.prod2x(l0.b, l1.b)
|
||||
ZZ.prod2x(l0.c, l1.c)
|
||||
|
||||
var V{.noInit.}: doublePrec(Fpkdiv6)
|
||||
var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
|
||||
|
||||
# r0 = (xx'+ξzz', yy', yz'+zy')
|
||||
# r00 = xx'+ξzz''
|
||||
# r01 = yy'
|
||||
# r02 = yz'+zy' = (y+z)(y'+z') - yy' - zz'
|
||||
V.prod2x(ZZ, NonResidue)
|
||||
V.sum2xMod(V,XX)
|
||||
f.c0.c0.redc2x(V)
|
||||
|
||||
f.c0.c1.redc2x(YY)
|
||||
|
||||
t0.sum(l0.b, l0.c)
|
||||
t1.sum(l1.b, l1.c)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, YY)
|
||||
V.diff2xMod(V, ZZ)
|
||||
f.c0.c2.redc2x(V)
|
||||
|
||||
# r1 = (xy'+yx', xz'+zx', 0)
|
||||
# r10 = xy'+yx' = (x+y)(x'+y') - xx' - yy'
|
||||
# r11 = xz'+zx' = (x+z)(x'+z') - xx' - zz'
|
||||
# r12 = 0
|
||||
t0.sum(l0.a, l0.b)
|
||||
t1.sum(l1.a, l1.b)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, XX)
|
||||
V.diff2xMod(V, YY)
|
||||
f.c1.c0.redc2x(V)
|
||||
|
||||
t0.sum(l0.a, l0.c)
|
||||
t1.sum(l1.a, l1.c)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, XX)
|
||||
V.diff2xMod(V, ZZ)
|
||||
f.c1.c1.redc2x(V)
|
||||
|
||||
f.c1.c2.setZero()
|
||||
|
||||
# M-Twist
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func mul_sparse_by_line_cb00a0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
|
||||
## Sparse multiplication of an 𝔽pᵏ element
|
||||
## by a sparse 𝔽pᵏ element coming from an M-Twist line function
|
||||
## with a cubic over quadratic towering (𝔽p2 -> 𝔽p6 -> 𝔽p12)
|
||||
## The sparse element is represented by a packed Line type
|
||||
## with coordinate (a,b,c) matching 𝔽pᵏ coordinates cb00a0
|
||||
|
||||
# a0b0 = (a00, a01, a02).( z, y, 0)
|
||||
# a1b1 = (a10, a11, a12).( 0, x, 0)
|
||||
#
|
||||
# r0 = a0 b0 + ξ a1 b1
|
||||
# r1 = a0 b1 + a1 b0 (Schoolbook)
|
||||
# = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
|
||||
|
||||
static:
|
||||
doAssert Fpk.C.getSexticTwist() == M_Twist
|
||||
doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
|
||||
doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
|
||||
|
||||
type Fpkdiv2 = typeof(f.c0)
|
||||
|
||||
when Fpk.C.has_large_field_elem():
|
||||
var
|
||||
v0 {.noInit.}: Fpkdiv2
|
||||
v1 {.noInit.}: Fpkdiv2
|
||||
t {.noInit.}: Fpkdiv6
|
||||
|
||||
v1.sum(f.c0, f.c1)
|
||||
t.sum(l.b, l.a)
|
||||
v0.mul_sparse_by_xy0(v1, l.c, t)
|
||||
|
||||
v1.mul_sparse_by_0y0(f.c1, l.a)
|
||||
|
||||
f.c1.diff(v0, v1)
|
||||
|
||||
v0.mul_sparse_by_xy0(f.c0, l.c, l.b)
|
||||
f.c1 -= v0
|
||||
|
||||
v1 *= NonResidue
|
||||
f.c0.sum(v0, v1)
|
||||
else: # Lazy reduction
|
||||
var
|
||||
V0 {.noInit.}: doubleprec(Fpkdiv2)
|
||||
V1 {.noInit.}: doubleprec(Fpkdiv2)
|
||||
V {.noInit.}: doubleprec(Fpkdiv2)
|
||||
t {.noInit.}: Fpkdiv6
|
||||
u {.noInit.}: Fpkdiv2
|
||||
|
||||
u.sum(f.c0, f.c1)
|
||||
t.sum(l.b, l.a)
|
||||
V.mul2x_sparse_by_xy0(u, l.c, t)
|
||||
V1.mul2x_sparse_by_0y0(f.c1, l.a)
|
||||
V0.mul2x_sparse_by_xy0(f.c0, l.c, l.b)
|
||||
|
||||
V.diff2xMod(V, V1)
|
||||
V.diff2xMod(V, V0)
|
||||
f.c1.redc2x(V)
|
||||
|
||||
V1.prod2x(V1, NonResidue)
|
||||
V0.sum2xMod(V0, V1)
|
||||
f.c0.redc2x(V0)
|
||||
|
||||
func prod_zy00x0_zy00x0_into_abcdef00ghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Line[Fpkdiv6]) =
|
||||
## Multiply 2 lines together
|
||||
## The result is sparse in f.c1.c0
|
||||
# Preambule
|
||||
#
|
||||
# A sparse xy0 by xy0 multiplication in 𝔽pᵏᐟ² (cubic) would be
|
||||
# (a0', a1', 0).(b0', b1', 0)
|
||||
#
|
||||
# r0' = a0'b0'
|
||||
# r1' = a0'b1 + a1'b0'
|
||||
# or (a0'+b1')(a1'+b0')-a0'a1'-b0'b1'
|
||||
# r2' = a1'b1
|
||||
#
|
||||
# Note¹ that the Karatsuba does not reuse terms!
|
||||
#
|
||||
# In the following equations (taken from quad extension implementation)
|
||||
# a0 = ( z , y , 0)
|
||||
# a1 = ( 0 , x , 0)
|
||||
# b0 = ( z', y', 0)
|
||||
# b1 = ( 0 , x', 0)
|
||||
#
|
||||
# a0b0 = ( z, y, 0).(z', y', 0) = (zz', zy'+yz', yy')
|
||||
# a1b1 = ( 0, x, 0).( 0, x', 0) = ( 0, 0, xx')
|
||||
|
||||
# r0 = a0 b0 + ξ a1 b1
|
||||
# r1 = a0 b1 + a1 b0 (Schoolbook)
|
||||
# = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
|
||||
#
|
||||
# Note² that Karatsuba doesn't help as a1 and b1 are actually very cheap (0, x, 0)
|
||||
#
|
||||
# Replacing by the coordinates in the lower tower we get
|
||||
#
|
||||
# r0 = (zz'+ξxx', zy'+z'y, yy')
|
||||
# r1 = (z, y, 0)(0, x', 0) + (z', y', 0)(0, x, 0)
|
||||
# = (0, zx'+xz', yx'+xy')
|
||||
#
|
||||
# Now we can apply Karasuba¹² for a total of 6 multiplications
|
||||
|
||||
static:
|
||||
doAssert Fpk.C.getSexticTwist() == M_Twist
|
||||
doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
|
||||
doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
|
||||
|
||||
var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
|
||||
|
||||
XX.prod2x(l0.a, l1.a)
|
||||
YY.prod2x(l0.b, l1.b)
|
||||
ZZ.prod2x(l0.c, l1.c)
|
||||
|
||||
var V{.noInit.}: doublePrec(Fpkdiv6)
|
||||
var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
|
||||
|
||||
# r0 = (zz'+ξxx', zy'+z'y, yy')
|
||||
# r00 = zz'+ξxx'
|
||||
# r01 = zy'+z'y = (z+y)(z'+y') - zz' - yy'
|
||||
# r02 = yy'
|
||||
V.prod2x(XX, NonResidue)
|
||||
V.sum2xMod(V, ZZ)
|
||||
f.c0.c0.redc2x(V)
|
||||
|
||||
t0.sum(l0.c, l0.b)
|
||||
t1.sum(l1.c, l1.b)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, ZZ)
|
||||
V.diff2xMod(V, YY)
|
||||
f.c0.c1.redc2x(V)
|
||||
|
||||
f.c0.c2.redc2x(YY)
|
||||
|
||||
# r1 = (0, zx'+xz', yx'+xy')
|
||||
# r10 = 0
|
||||
# r11 = zx'+xz' = (z+x)(z'+x') - zz' - xx'
|
||||
# r12 = xy'+yx' = (x+y)(x'+y') - xx' - yy'
|
||||
f.c1.c0.setZero()
|
||||
|
||||
t0.sum(l0.c, l0.a)
|
||||
t1.sum(l1.c, l1.a)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, ZZ)
|
||||
V.diff2xMod(V, XX)
|
||||
f.c1.c1.redc2x(V)
|
||||
|
||||
t0.sum(l0.a, l0.b)
|
||||
t1.sum(l1.a, l1.b)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, XX)
|
||||
V.diff2xMod(V, YY)
|
||||
f.c1.c2.redc2x(V)
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
@ -556,16 +794,16 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
|
||||
## Multiply 2 lines together
|
||||
## The result is sparse in f.c1.c1
|
||||
# In the following equations (taken from cubic extension implementation)
|
||||
# a0 = (x0, z0)
|
||||
# a1 = (y0, 0)
|
||||
# a2 = ( 0, 0)
|
||||
# b0 = (x1, z1)
|
||||
# b1 = (y1, 0)
|
||||
# b2 = ( 0, 0)
|
||||
# a0 = ( x, z)
|
||||
# a1 = ( y, 0)
|
||||
# a2 = ( 0, 0)
|
||||
# b0 = (x', z')
|
||||
# b1 = (y', 0)
|
||||
# b2 = ( 0, 0)
|
||||
#
|
||||
# v0 = a0 b0 = (x0, z0).(x1, z1)
|
||||
# v1 = a1 b1 = (y0, 0).(y1, 0)
|
||||
# v2 = a2 b2 = ( 0, 0).( 0, 0)
|
||||
# v0 = a0 b0 = (x, z).(x', z')
|
||||
# v1 = a1 b1 = (y, 0).(y', 0)
|
||||
# v2 = a2 b2 = (0, 0).( 0, 0)
|
||||
#
|
||||
# r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
|
||||
# = ξ (a1 b1 + a2 b1 - v1) + v0
|
||||
@ -575,106 +813,65 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
|
||||
# r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
|
||||
# = a0 b0 - v0 + v1
|
||||
# = v1
|
||||
#
|
||||
# r1 is computed in 3 extra muls in the base field 𝔽pᵏᐟ⁶ (Karatsuba product in quadratic field)
|
||||
#
|
||||
# If we dive deeper:
|
||||
# r1 = a0b1 + a1b0 = (xy'+yx', zy'+yz') (Schoolbook - 4 muls)
|
||||
# r10 = zy'+yz' = (x+y)(x'+y') - xx' - yy'
|
||||
# r11 = xy'+yx' = (z+y)(z'+y') - zz' - yy'
|
||||
#
|
||||
# The substraction are all computed as part of v0 and v1
|
||||
# hence r1 can be compute for 2 extra muls in 𝔽pᵏᐟ⁶ only
|
||||
|
||||
static:
|
||||
doAssert Fpk.C.getSexticTwist() == D_Twist
|
||||
doAssert f is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
|
||||
doAssert f.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
|
||||
|
||||
type Fpkdiv3 = typeof(f.c0)
|
||||
var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
|
||||
|
||||
var V0{.noInit.}, f2x{.noInit.}: doublePrec(Fpkdiv3)
|
||||
var V1{.noInit.}: doublePrec(Fpkdiv6)
|
||||
XX.prod2x(l0.a, l1.a)
|
||||
YY.prod2x(l0.b, l1.b)
|
||||
ZZ.prod2x(l0.c, l1.c)
|
||||
|
||||
V0.prod2x_disjoint(l0.a, l0.c, l1.a, l1.c) # a0 b0 = (x0, z0).(x1, z1)
|
||||
V1.prod2x(l0.b, l1.b) # a1 b1 = (y0, 0).(y1, 0)
|
||||
var V{.noInit.}: doublePrec(Fpkdiv6)
|
||||
var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
|
||||
|
||||
# r1 = (a0 + a1) * (b0 + b1) - v0 - v1
|
||||
f.c1.c0.sum(l0.a, l0.b) # x0 + y0
|
||||
f.c1.c1.sum(l1.a, l1.b) # x1 + y1
|
||||
f2x.prod2x_disjoint(f.c1.c0, l0.c, f.c1.c1, l1.c) # (x0 + y0, z0)(x1 + y1, z1) = (a0 + a1) * (b0 + b1)
|
||||
f2x.diff2xMod(f2x, V0)
|
||||
f2x.c0.diff2xMod(f2x.c0, V1)
|
||||
f.c1.redc2x(f2x)
|
||||
# r0 = a0 b0 = (x, z).(x', z')
|
||||
# r00 = xx' + βzz'
|
||||
# r01 = (x+z)(x'+z') - zz' - xx'
|
||||
V.prod2x(ZZ, NonResidue)
|
||||
V.sum2xMod(V, XX)
|
||||
f.c0.c0.redc2x(V)
|
||||
|
||||
t0.sum(l0.a, l0.c)
|
||||
t1.sum(l1.a, l1.c)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, ZZ)
|
||||
V.diff2xMod(V, XX)
|
||||
f.c0.c1.redc2x(V)
|
||||
|
||||
# r0 = v0
|
||||
f.c0.redc2x(V0)
|
||||
# r10 = zy'+yz' = (x+y)(x'+y') - xx' - yy'
|
||||
# r11 = xy'+yx' = (z+y)(z'+y') - zz' - yy'
|
||||
t0.sum(l0.a, l0.b)
|
||||
t1.sum(l1.a, l1.b)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, XX)
|
||||
V.diff2xMod(V, YY)
|
||||
f.c1.c0.redc2x(V)
|
||||
|
||||
# r2 = v1
|
||||
f.c2.c0.redc2x(V1)
|
||||
t0.sum(l0.c, l0.b)
|
||||
t1.sum(l1.c, l1.b)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, ZZ)
|
||||
V.diff2xMod(V, YY)
|
||||
f.c1.c1.redc2x(V)
|
||||
|
||||
# r2 = v2 = (y, 0).(y', 0) = (yy', 0)
|
||||
f.c2.c0.redc2x(YY)
|
||||
f.c2.c1.setZero()
|
||||
|
||||
func mul_sparse_by_abcdefghij00*[Fpk](
|
||||
a: var Fpk, b: Fpk) =
|
||||
## Sparse multiplication of an 𝔽pᵏ element
|
||||
## by a sparse 𝔽pᵏ element abcdefghij00
|
||||
## with each representing 𝔽pᵏᐟ⁶ coordinate
|
||||
|
||||
static:
|
||||
doAssert Fpk.C.getSexticTwist() == D_Twist
|
||||
doAssert a is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
|
||||
doAssert a.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
|
||||
|
||||
type Fpkdiv3 = typeof(a.c0)
|
||||
|
||||
# In the following equations (taken from cubic extension implementation)
|
||||
# b0 = (b00, b01)
|
||||
# b1 = (b10, b11)
|
||||
# b2 = (b20, 0)
|
||||
#
|
||||
# v0 = a0 b0 = (f00, f01).(b00, b01)
|
||||
# v1 = a1 b1 = (f10, f11).(b10, b11)
|
||||
# v2 = a2 b2 = (f20, f21).(b20, 0)
|
||||
#
|
||||
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
|
||||
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
|
||||
# r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
|
||||
|
||||
var V0 {.noInit.}, V1 {.noInit.}, V2 {.noinit.}: doublePrec(Fpkdiv3)
|
||||
var t0 {.noInit.}, t1 {.noInit.}: Fpkdiv3
|
||||
var f2x{.noInit.}, g2x {.noinit.}: doublePrec(Fpkdiv3)
|
||||
|
||||
V0.prod2x(a.c0, b.c0)
|
||||
V1.prod2x(a.c1, b.c1)
|
||||
V2.mul2x_sparse_by_x0(a.c2, b.c2)
|
||||
|
||||
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
|
||||
t0.sum(a.c1, a.c2)
|
||||
t1.c0.sum(b.c1.c0, b.c2.c0) # b₂ = (b20, 0)
|
||||
f2x.prod2x_disjoint(t0, t1.c0, b.c1.c1) # (a₁ + a₂).(b₁ + b₂)
|
||||
f2x.diff2xMod(f2x, V1)
|
||||
f2x.diff2xMod(f2x, V2)
|
||||
f2x.prod2x(f2x, NonResidue)
|
||||
f2x.sum2xMod(f2x, V0)
|
||||
|
||||
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁
|
||||
t0.sum(a.c0, a.c1)
|
||||
t1.sum(b.c0, b.c1)
|
||||
g2x.prod2x(t0, t1)
|
||||
g2x.diff2xMod(g2x, V0)
|
||||
g2x.diff2xMod(g2x, V1)
|
||||
|
||||
# r₂ = (a₀ + a₂) and (b₀ + b₂)
|
||||
t0.sum(a.c0, a.c2)
|
||||
t1.c0.sum(b.c0.c0, b.c2.c0) # b₂ = (b20, 0)
|
||||
|
||||
# Now we are aliasing free
|
||||
|
||||
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
|
||||
a.c0.redc2x(f2x)
|
||||
|
||||
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
|
||||
f2x.prod2x(V2, NonResidue)
|
||||
g2x.sum2xMod(g2x, f2x)
|
||||
a.c1.redc2x(g2x)
|
||||
|
||||
# r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
|
||||
f2x.prod2x_disjoint(t0, t1.c0, b.c0.c1)
|
||||
f2x.diff2xMod(f2x, V0)
|
||||
f2x.diff2xMod(f2x, V2)
|
||||
f2x.sum2xMod(f2x, V1)
|
||||
a.c2.redc2x(f2x)
|
||||
|
||||
# M-Twist
|
||||
# ------------------------------------------------------------
|
||||
|
||||
@ -769,16 +966,16 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
|
||||
## Multiply 2 lines together
|
||||
## The result is sparse in f.c1.c1
|
||||
# In the following equations (taken from cubic extension implementation)
|
||||
# a0 = (z0, x0)
|
||||
# a0 = ( z, x)
|
||||
# a1 = ( 0, 0)
|
||||
# a2 = (y0, 0)
|
||||
# b0 = (z1, x1)
|
||||
# a2 = ( y, 0)
|
||||
# b0 = (z', x')
|
||||
# b1 = ( 0, 0)
|
||||
# b2 = (y0, 0)
|
||||
# b2 = (y', 0)
|
||||
#
|
||||
# v0 = a0 b0 = (z0, x0).(z1, x1)
|
||||
# v1 = a1 b1 = ( 0, 0).( 0, 0)
|
||||
# v2 = a2 b2 = (y0, 0).(y1, 0)
|
||||
# v0 = a0 b0 = (z, x).(z', x')
|
||||
# v1 = a1 b1 = (0, 0).(0 , 0)
|
||||
# v2 = a2 b2 = (y, 0).(y', 0)
|
||||
#
|
||||
# r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
|
||||
# = ξ (a1 b2 + a2 b2 - v2) + v0
|
||||
@ -788,36 +985,221 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
|
||||
# = ξ v2
|
||||
# r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
|
||||
# = (a0 + a2) * (b0 + b2) - v0 - v2
|
||||
#
|
||||
# r2 is computed in 3 extra muls in the base field 𝔽pᵏᐟ⁶ (Karatsuba product in quadratic field)
|
||||
#
|
||||
# If we dive deeper:
|
||||
# r2 = a0b2 + a2b0 = (zy'+yz', xy'+x'y) (Schoolbook - 4 muls)
|
||||
# r20 = zy'+z'y = (z+y)(z'+y') - zz' - yy'
|
||||
# r21 = xy'+x'y = (x+y)(x'+y') - xx' - yy'
|
||||
#
|
||||
# The substraction are all computed as part of v0 and v2
|
||||
# hence r2 can be compute for 2 extra muls in 𝔽pᵏᐟ⁶ only
|
||||
|
||||
static:
|
||||
doAssert Fpk.C.getSexticTwist() == M_Twist
|
||||
doAssert f is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
|
||||
doAssert f.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
|
||||
|
||||
type Fpkdiv3 = typeof(f.c0)
|
||||
var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
|
||||
|
||||
var V0{.noInit.}, f2x{.noInit.}: doublePrec(Fpkdiv3)
|
||||
var V2{.noInit.}: doublePrec(Fpkdiv6)
|
||||
XX.prod2x(l0.a, l1.a)
|
||||
YY.prod2x(l0.b, l1.b)
|
||||
ZZ.prod2x(l0.c, l1.c)
|
||||
|
||||
V0.prod2x_disjoint(l0.c, l0.a, l1.c, l1.a) # a0 b0 = (z0, x0).(z1, x1)
|
||||
V2.prod2x(l0.b, l1.b) # a2 b2 = (y0, 0).(y1, 0)
|
||||
var V{.noInit.}: doublePrec(Fpkdiv6)
|
||||
var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
|
||||
|
||||
# r2 = (a0 + a2) * (b0 + b2) - v0 - v2
|
||||
f.c2.c0.sum(l0.b, l0.c) # y0 + z0
|
||||
f.c2.c1.sum(l1.b, l1.c) # y1 + z1
|
||||
f2x.prod2x_disjoint(f.c2.c0, l0.a, f.c2.c1, l1.a) # (z0 + y0, x0).(z1 + y1, x1) = (a0 + a2) * (b0 + b2)
|
||||
f2x.diff2xMod(f2x, V0) # (a0 + a2) * (b0 + b2) - v0
|
||||
f2x.c0.diff2xMod(f2x.c0, V2) # (a0 + a2) * (b0 + b2) - v0 - v2
|
||||
f.c2.redc2x(f2x)
|
||||
# r0 = a0 b0 = (z, x).(z', x')
|
||||
# r00 = zz' + βxx'
|
||||
# r01 = (z+x)(z'+x') - zz' - xx'
|
||||
V.prod2x(XX, NonResidue)
|
||||
V.sum2xMod(V, ZZ)
|
||||
f.c0.c0.redc2x(V)
|
||||
|
||||
t0.sum(l0.c, l0.a)
|
||||
t1.sum(l1.c, l1.a)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, ZZ)
|
||||
V.diff2xMod(V, XX)
|
||||
f.c0.c1.redc2x(V)
|
||||
|
||||
# r1 = ξ v2
|
||||
f.c1.c1.redc2x(V2)
|
||||
# r1 = ξ v2 = ξ(y, 0).(y', 0) = ξ(yy', 0) = (0, yy')
|
||||
f.c1.c0.setZero()
|
||||
f.c1.c1.redc2x(YY)
|
||||
|
||||
# r0 = v0
|
||||
f.c0.redc2x(V0)
|
||||
# r20 = zy'+z'y = (z+y)(z'+y') - zz' - yy'
|
||||
# r21 = xy'+x'y = (x+y)(x'+y') - xx' - yy'
|
||||
t0.sum(l0.c, l0.b)
|
||||
t1.sum(l1.c, l1.b)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, ZZ)
|
||||
V.diff2xMod(V, YY)
|
||||
f.c2.c0.redc2x(V)
|
||||
|
||||
func mul_sparse_by_abcd00efghij*[Fpk](
|
||||
t0.sum(l0.a, l0.b)
|
||||
t1.sum(l1.a, l1.b)
|
||||
V.prod2x(t0, t1)
|
||||
V.diff2xMod(V, XX)
|
||||
V.diff2xMod(V, YY)
|
||||
f.c2.c1.redc2x(V)
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Sparse multiplication
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
func mul_sparse_by_abcdefghij00_quad_over_cube*[Fpk](
|
||||
a: var Fpk, b: Fpk) =
|
||||
## Sparse multiplication of an 𝔽pᵏ element
|
||||
## by a sparse 𝔽pᵏ element abcdefghij00
|
||||
## with each representing 𝔽pᵏᐟ⁶ coordinate
|
||||
|
||||
static:
|
||||
doAssert Fpk.C.getSexticTwist() == D_Twist
|
||||
doAssert a is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
|
||||
doAssert a.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
|
||||
|
||||
type Fpkdiv2 = typeof(a.c0)
|
||||
|
||||
# In the following equations (taken from quadratic extension implementation)
|
||||
# b0 = (b00, b01, b02)
|
||||
# b1 = (b10, b11, 0)
|
||||
#
|
||||
# v0 = a0 b0 = (a00, a01, a02).(b00, b01, b02)
|
||||
# v1 = a1 b1 = (a10, a11, a12).(b10, b11, 0)
|
||||
#
|
||||
# r0 = a0 b0 + ξ a1 b1
|
||||
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1
|
||||
|
||||
when Fpk.C.has_large_field_elem():
|
||||
var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: Fpkdiv2
|
||||
|
||||
# v2 <- (a0 + a1)(b0 + b1)
|
||||
v0.sum(a.c0, a.c1)
|
||||
v1.c0.sum(b.c0.c0, b.c1.c0)
|
||||
v1.c1.sum(b.c0.c1, b.c1.c1)
|
||||
v1.c2 = b.c0.c2
|
||||
v2.prod(v0, v1)
|
||||
|
||||
# v0 <- a0 b0
|
||||
# v1 <- a1 b1
|
||||
v0.prod(a.c0, b.c0)
|
||||
v1.mul_sparse_by_xy0(a.c1, b.c1.c0, b.c1.c1)
|
||||
|
||||
# aliasing: a and b unneeded now
|
||||
|
||||
# r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
|
||||
a.c1.diff(v2, v0)
|
||||
a.c1 -= v1
|
||||
|
||||
# r0 <- a0 b0 + β a1 b1
|
||||
v1 *= NonResidue
|
||||
a.c0.sum(v0, v1)
|
||||
|
||||
else: # Lazy reduction
|
||||
var V0 {.noInit.}, V1 {.noInit.}, V2 {.noInit.}: doublePrec(Fpkdiv2)
|
||||
var t0{.noInit.}, t1{.noInit.}: Fpkdiv2
|
||||
|
||||
# v2 <- (a0 + a1)(b0 + b1)
|
||||
t0.sum(a.c0, a.c1)
|
||||
t1.c0.sum(b.c0.c0, b.c1.c0)
|
||||
t1.c1.sum(b.c0.c1, b.c1.c1)
|
||||
t1.c2 = b.c0.c2
|
||||
V2.prod2x(t0, t1)
|
||||
|
||||
# v0 <- a0 b0
|
||||
# v1 <- a1 b1
|
||||
V0.prod2x(a.c0, b.c0)
|
||||
V1.mul2x_sparse_by_xy0(a.c1, b.c1.c0, b.c1.c1)
|
||||
|
||||
# r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
|
||||
V2.diff2xMod(V2, V0)
|
||||
V2.diff2xMod(V2, V1)
|
||||
a.c1.redc2x(V2)
|
||||
|
||||
# r0 <- a0 b0 + β a1 b1
|
||||
V1.prod2x(V1, NonResidue)
|
||||
V2.sum2xMod(V0, V1)
|
||||
a.c0.redc2x(V2)
|
||||
|
||||
func mul_sparse_by_abcdef00ghij_quad_over_cube*[Fpk](
|
||||
a: var Fpk, b: Fpk) =
|
||||
## Sparse multiplication of an 𝔽pᵏ element
|
||||
## by a sparse 𝔽pᵏ element abcdef00ghij
|
||||
## with each representing 𝔽pᵏᐟ⁶ coordinate
|
||||
|
||||
static:
|
||||
doAssert Fpk.C.getSexticTwist() == M_Twist
|
||||
doAssert a is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
|
||||
doAssert a.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
|
||||
|
||||
type Fpkdiv2 = typeof(a.c0)
|
||||
|
||||
# In the following equations (taken from quadratic extension implementation)
|
||||
# b0 = (b00, b01, b02)
|
||||
# b1 = ( 0, b11, b12)
|
||||
#
|
||||
# v0 = a0 b0 = (a00, a01, a02).(b00, b01, b02)
|
||||
# v1 = a1 b1 = (a10, a11, a12).( 0, b11, b12)
|
||||
#
|
||||
# r0 = a0 b0 + ξ a1 b1
|
||||
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1
|
||||
|
||||
when Fpk.C.has_large_field_elem():
|
||||
var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: Fpkdiv2
|
||||
|
||||
# v2 <- (a0 + a1)(b0 + b1)
|
||||
v0.sum(a.c0, a.c1)
|
||||
v1.c0 = b.c0.c0
|
||||
v1.c1.sum(b.c0.c1, b.c1.c1)
|
||||
v1.c2.sum(b.c0.c2, b.c1.c2)
|
||||
v2.prod(v0, v1)
|
||||
|
||||
# v0 <- a0 b0
|
||||
# v1 <- a1 b1
|
||||
v0.prod(a.c0, b.c0)
|
||||
v1.mul_sparse_by_0yz(a.c1, b.c1.c1, b.c1.c2)
|
||||
|
||||
# aliasing: a and b unneeded now
|
||||
|
||||
# r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
|
||||
a.c1.diff(v2, v0)
|
||||
a.c1 -= v1
|
||||
|
||||
# r0 <- a0 b0 + β a1 b1
|
||||
v1 *= NonResidue
|
||||
a.c0.sum(v0, v1)
|
||||
|
||||
else: # Lazy reduction
|
||||
var V0 {.noInit.}, V1 {.noInit.}, V2 {.noInit.}: doublePrec(Fpkdiv2)
|
||||
var t0{.noInit.}, t1{.noInit.}: Fpkdiv2
|
||||
|
||||
# v2 <- (a0 + a1)(b0 + b1)
|
||||
t0.sum(a.c0, a.c1)
|
||||
t1.c0 = b.c0.c0
|
||||
t1.c1.sum(b.c0.c1, b.c1.c1)
|
||||
t1.c2.sum(b.c0.c2, b.c1.c2)
|
||||
V2.prod2x(t0, t1)
|
||||
|
||||
# v0 <- a0 b0
|
||||
# v1 <- a1 b1
|
||||
V0.prod2x(a.c0, b.c0)
|
||||
V1.mul2x_sparse_by_0yz(a.c1, b.c1.c1, b.c1.c2)
|
||||
|
||||
# r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
|
||||
V2.diff2xMod(V2, V0)
|
||||
V2.diff2xMod(V2, V1)
|
||||
a.c1.redc2x(V2)
|
||||
|
||||
# r0 <- a0 b0 + β a1 b1
|
||||
V1.prod2x(V1, NonResidue)
|
||||
V2.sum2xMod(V0, V1)
|
||||
a.c0.redc2x(V2)
|
||||
|
||||
|
||||
func mul_sparse_by_abcd00efghij_cube_over_quad*[Fpk](
|
||||
a: var Fpk, b: Fpk) =
|
||||
## Sparse multiplication of an 𝔽pᵏ element
|
||||
## by a sparse 𝔽pᵏ element abcd00efghij
|
||||
@ -888,15 +1270,92 @@ func mul_sparse_by_abcd00efghij*[Fpk](
|
||||
f2x.sum2xMod(f2x, V1)
|
||||
a.c2.redc2x(f2x)
|
||||
|
||||
func mul_sparse_by_abcdefghij00_cube_over_quad*[Fpk](
|
||||
a: var Fpk, b: Fpk) =
|
||||
## Sparse multiplication of an 𝔽pᵏ element
|
||||
## by a sparse 𝔽pᵏ element abcdefghij00
|
||||
## with each representing 𝔽pᵏᐟ⁶ coordinate
|
||||
|
||||
static:
|
||||
doAssert Fpk.C.getSexticTwist() == D_Twist
|
||||
doAssert a is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
|
||||
doAssert a.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
|
||||
|
||||
type Fpkdiv3 = typeof(a.c0)
|
||||
|
||||
# In the following equations (taken from cubic extension implementation)
|
||||
# b0 = (b00, b01)
|
||||
# b1 = (b10, b11)
|
||||
# b2 = (b20, 0)
|
||||
#
|
||||
# v0 = a0 b0 = (f00, f01).(b00, b01)
|
||||
# v1 = a1 b1 = (f10, f11).(b10, b11)
|
||||
# v2 = a2 b2 = (f20, f21).(b20, 0)
|
||||
#
|
||||
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
|
||||
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
|
||||
# r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
|
||||
|
||||
var V0 {.noInit.}, V1 {.noInit.}, V2 {.noinit.}: doublePrec(Fpkdiv3)
|
||||
var t0 {.noInit.}, t1 {.noInit.}: Fpkdiv3
|
||||
var f2x{.noInit.}, g2x {.noinit.}: doublePrec(Fpkdiv3)
|
||||
|
||||
V0.prod2x(a.c0, b.c0)
|
||||
V1.prod2x(a.c1, b.c1)
|
||||
V2.mul2x_sparse_by_x0(a.c2, b.c2)
|
||||
|
||||
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
|
||||
t0.sum(a.c1, a.c2)
|
||||
t1.c0.sum(b.c1.c0, b.c2.c0) # b₂ = (b20, 0)
|
||||
f2x.prod2x_disjoint(t0, t1.c0, b.c1.c1) # (a₁ + a₂).(b₁ + b₂)
|
||||
f2x.diff2xMod(f2x, V1)
|
||||
f2x.diff2xMod(f2x, V2)
|
||||
f2x.prod2x(f2x, NonResidue)
|
||||
f2x.sum2xMod(f2x, V0)
|
||||
|
||||
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁
|
||||
t0.sum(a.c0, a.c1)
|
||||
t1.sum(b.c0, b.c1)
|
||||
g2x.prod2x(t0, t1)
|
||||
g2x.diff2xMod(g2x, V0)
|
||||
g2x.diff2xMod(g2x, V1)
|
||||
|
||||
# r₂ = (a₀ + a₂) and (b₀ + b₂)
|
||||
t0.sum(a.c0, a.c2)
|
||||
t1.c0.sum(b.c0.c0, b.c2.c0) # b₂ = (b20, 0)
|
||||
|
||||
# Now we are aliasing free
|
||||
|
||||
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
|
||||
a.c0.redc2x(f2x)
|
||||
|
||||
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
|
||||
f2x.prod2x(V2, NonResidue)
|
||||
g2x.sum2xMod(g2x, f2x)
|
||||
a.c1.redc2x(g2x)
|
||||
|
||||
# r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
|
||||
f2x.prod2x_disjoint(t0, t1.c0, b.c0.c1)
|
||||
f2x.diff2xMod(f2x, V0)
|
||||
f2x.diff2xMod(f2x, V2)
|
||||
f2x.sum2xMod(f2x, V1)
|
||||
a.c2.redc2x(f2x)
|
||||
|
||||
# Dispatch
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func mul_by_line*[Fpk, Fpkdiv6](f: var Fpk, line: Line[Fpkdiv6]) {.inline.} =
|
||||
## Multiply an element of Fp12 by a sparse line function
|
||||
when Fpk.C.getSexticTwist() == D_Twist:
|
||||
f.mul_sparse_by_line_acb000(line)
|
||||
when f is CubicExt:
|
||||
f.mul_sparse_by_line_acb000(line)
|
||||
else:
|
||||
f.mul_sparse_by_line_a00bc0(line)
|
||||
elif Fpk.C.getSexticTwist() == M_Twist:
|
||||
f.mul_sparse_by_line_ca00b0(line)
|
||||
when f is CubicExt:
|
||||
f.mul_sparse_by_line_ca00b0(line)
|
||||
else:
|
||||
f.mul_sparse_by_line_cb00a0(line)
|
||||
else:
|
||||
{.error: "A line function assumes that the curve has a twist".}
|
||||
|
||||
@ -905,18 +1364,30 @@ func prod_from_2_lines*[Fpk, Fpkdiv6](f: var Fpk, line0, line1: Line[Fpkdiv6]) {
|
||||
## and store the result in f
|
||||
## f is overwritten
|
||||
when Fpk.C.getSexticTwist() == D_Twist:
|
||||
f.prod_xzy000_xzy000_into_abcdefghij00(line0, line1)
|
||||
when f is CubicExt:
|
||||
f.prod_xzy000_xzy000_into_abcdefghij00(line0, line1)
|
||||
else:
|
||||
f.prod_x00yz0_x00yz0_into_abcdefghij00(line0, line1)
|
||||
elif Fpk.C.getSexticTwist() == M_Twist:
|
||||
f.prod_zx00y0_zx00y0_into_abcd00efghij(line0, line1)
|
||||
when f is CubicExt:
|
||||
f.prod_zx00y0_zx00y0_into_abcd00efghij(line0, line1)
|
||||
else:
|
||||
f.prod_zy00x0_zy00x0_into_abcdef00ghij(line0, line1)
|
||||
else:
|
||||
{.error: "A line function assumes that the curve has a twist".}
|
||||
|
||||
func mul_by_prod_of_2_lines*[Fpk](f: var Fpk, g: Fpk) {.inline.} =
|
||||
## Multiply f by the somewhat sparse product of 2 lines
|
||||
when Fpk.C.getSexticTwist() == D_Twist:
|
||||
f.mul_sparse_by_abcdefghij00(g)
|
||||
when f is CubicExt:
|
||||
f.mul_sparse_by_abcdefghij00_cube_over_quad(g)
|
||||
else:
|
||||
f.mul_sparse_by_abcdefghij00_quad_over_cube(g)
|
||||
elif Fpk.C.getSexticTwist() == M_Twist:
|
||||
f.mul_sparse_by_abcd00efghij(g)
|
||||
when f is CubicExt:
|
||||
f.mul_sparse_by_abcd00efghij_cube_over_quad(g)
|
||||
else:
|
||||
f.mul_sparse_by_abcdef00ghij_quad_over_cube(g)
|
||||
else:
|
||||
{.error: "A line function assumes that the curve has a twist".}
|
||||
|
||||
|
@ -15,7 +15,7 @@ import
|
||||
ec_shortweierstrass_projective
|
||||
],
|
||||
../isogenies/frobenius,
|
||||
../curves/zoo_pairings,
|
||||
../constants/zoo_pairings,
|
||||
../arithmetic,
|
||||
./cyclotomic_subgroup,
|
||||
./lines_eval,
|
||||
|
@ -15,7 +15,7 @@ import
|
||||
ec_shortweierstrass_projective
|
||||
],
|
||||
../isogenies/frobenius,
|
||||
../curves/zoo_pairings,
|
||||
../constants/zoo_pairings,
|
||||
./lines_eval,
|
||||
./cyclotomic_subgroup,
|
||||
./miller_loops
|
||||
|
@ -15,7 +15,7 @@ import
|
||||
ec_shortweierstrass_projective
|
||||
],
|
||||
../isogenies/frobenius,
|
||||
../curves/zoo_pairings,
|
||||
../constants/zoo_pairings,
|
||||
./lines_eval,
|
||||
./miller_loops
|
||||
|
||||
|
@ -74,6 +74,7 @@ type
|
||||
kRegister
|
||||
kFromArray
|
||||
kArrayAddr
|
||||
k2dArrayAddr
|
||||
|
||||
Operand* = object
|
||||
desc*: OperandDesc
|
||||
@ -84,6 +85,9 @@ type
|
||||
offset: int
|
||||
of kArrayAddr:
|
||||
buf: seq[Operand]
|
||||
of k2dArrayAddr:
|
||||
dims: array[2, int]
|
||||
buf2d: seq[Operand]
|
||||
|
||||
OperandDesc* = ref object
|
||||
asmId*: string # [a] - ASM id
|
||||
@ -141,11 +145,17 @@ proc `[]`*(opArray: OperandArray, index: int): Operand =
|
||||
func `[]`*(opArray: var OperandArray, index: int): var Operand =
|
||||
opArray.buf[index]
|
||||
|
||||
func `[]`*(arrayAddr: Operand, index: int): Operand =
|
||||
arrayAddr.buf[index]
|
||||
func `[]`*(arrAddr: Operand, index: int): Operand =
|
||||
arrAddr.buf[index]
|
||||
|
||||
func `[]`*(arrayAddr: var Operand, index: int): var Operand =
|
||||
arrayAddr.buf[index]
|
||||
func `[]`*(arrAddr: var Operand, index: int): var Operand =
|
||||
arrAddr.buf[index]
|
||||
|
||||
func `[]`*(arr2dAddr: Operand, i, j: int): Operand =
|
||||
arr2dAddr.buf2d[i*arr2dAddr.dims[1] + j]
|
||||
|
||||
func `[]`*(arr2dAddr: var Operand, i, j: int): var Operand =
|
||||
arr2dAddr.buf2d[i*arr2dAddr.dims[1] + j]
|
||||
|
||||
func init*(T: type Assembler_x86, Word: typedesc[SomeUnsignedInt]): Assembler_x86 =
|
||||
result.wordSize = sizeof(Word)
|
||||
@ -236,6 +246,22 @@ func asArrayAddr*(op: Register, len: int): Operand =
|
||||
offset: i
|
||||
)
|
||||
|
||||
func as2dArrayAddr*(op: Operand, rows, cols: int): Operand =
|
||||
## Use the value stored in an operand as an array address
|
||||
doAssert op.desc.rm in {Reg, PointerInReg, ElemsInReg}+SpecificRegisters
|
||||
result = Operand(
|
||||
kind: k2dArrayAddr,
|
||||
desc: nil,
|
||||
dims: [rows, cols],
|
||||
buf2d: newSeq[Operand](rows*cols)
|
||||
)
|
||||
for i in 0 ..< rows*cols:
|
||||
result.buf2d[i] = Operand(
|
||||
desc: op.desc,
|
||||
kind: kFromArray,
|
||||
offset: i
|
||||
)
|
||||
|
||||
# Code generation
|
||||
# ------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@ -344,7 +370,7 @@ func generate*(a: Assembler_x86): NimNode =
|
||||
|
||||
func getStrOffset(a: Assembler_x86, op: Operand): string =
|
||||
if op.kind != kFromArray:
|
||||
if op.kind == kArrayAddr:
|
||||
if op.kind in {kArrayAddr, k2dArrayAddr}:
|
||||
# We are operating on an array pointer
|
||||
# instead of array elements
|
||||
if op.buf[0].desc.constraint == ClobberedRegister:
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
import
|
||||
../math/[ec_shortweierstrass, pairings],
|
||||
../math/curves/zoo_generators,
|
||||
../math/constants/zoo_generators,
|
||||
../hash_to_curve/hash_to_curve,
|
||||
../hashes
|
||||
|
||||
|
@ -12,7 +12,7 @@ import
|
||||
../constantine/math/config/[common, curves],
|
||||
../constantine/math/[arithmetic, extension_fields],
|
||||
../constantine/math/elliptic/ec_shortweierstrass_projective,
|
||||
../constantine/math/curves/zoo_subgroups,
|
||||
../constantine/math/constants/zoo_subgroups,
|
||||
../constantine/math/pairing/pairing_bls12,
|
||||
# Helpers
|
||||
../helpers/prng_unsafe
|
||||
|
@ -27,7 +27,7 @@ import
|
||||
ec_twistededwards_projective,
|
||||
ec_scalar_mul],
|
||||
../../constantine/math/io/[io_bigints, io_fields, io_ec],
|
||||
../../constantine/math/curves/zoo_subgroups,
|
||||
../../constantine/math/constants/zoo_subgroups,
|
||||
# Test utilities
|
||||
../../helpers/prng_unsafe,
|
||||
./support/ec_reference_scalar_mult
|
||||
|
@ -28,7 +28,7 @@ echo "test_finite_fields_mulsquare xoshiro512** seed: ", seed
|
||||
static: doAssert defined(testingCurves), "This modules requires the -d:testingCurves compile option"
|
||||
|
||||
proc sanity(C: static Curve) =
|
||||
test "Squaring 0,1,2 with "& $Curve(C) & " [FastSquaring = " & $(Fp[C].getSpareBits() >= 2) & "]":
|
||||
test "Squaring 0,1,2 with " & $Curve(C) & " [FastSquaring = " & $(Fp[C].getSpareBits() >= 2) & "]":
|
||||
block: # 0² mod
|
||||
var n: Fp[C]
|
||||
|
||||
@ -310,3 +310,74 @@ suite "Modular squaring - bugs highlighted by property-based testing":
|
||||
check:
|
||||
bool(a2mul == expected)
|
||||
bool(a2sqr == expected)
|
||||
|
||||
|
||||
proc random_sumprod(C: static Curve, N: static int) =
|
||||
template sumprod_test(random_instancer: untyped) =
|
||||
block:
|
||||
var a: array[N, Fp[C]]
|
||||
var b: array[N, Fp[C]]
|
||||
|
||||
for i in 0 ..< N:
|
||||
a[i] = rng.random_instancer(Fp[C])
|
||||
b[i] = rng.random_instancer(Fp[C])
|
||||
|
||||
var r, r_ref, t: Fp[C]
|
||||
|
||||
r_ref.prod(a[0], b[0])
|
||||
for i in 1 ..< N:
|
||||
t.prod(a[i], b[i])
|
||||
r_ref += t
|
||||
|
||||
r.sumprod(a, b)
|
||||
|
||||
doAssert bool(r == r_ref)
|
||||
|
||||
template sumProdMax() =
|
||||
block:
|
||||
var a: array[N, Fp[C]]
|
||||
var b: array[N, Fp[C]]
|
||||
|
||||
for i in 0 ..< N:
|
||||
a[i].setMinusOne()
|
||||
b[i].setMinusOne()
|
||||
|
||||
var r, r_ref, t: Fp[C]
|
||||
|
||||
r_ref.prod(a[0], b[0])
|
||||
for i in 1 ..< N:
|
||||
t.prod(a[i], b[i])
|
||||
r_ref += t
|
||||
|
||||
r.sumprod(a, b)
|
||||
|
||||
doAssert bool(r == r_ref)
|
||||
|
||||
sumprod_test(random_unsafe)
|
||||
sumprod_test(randomHighHammingWeight)
|
||||
sumprod_test(random_long01Seq)
|
||||
sumProdMax()
|
||||
|
||||
suite "Random sum products is consistent with naive " & " [" & $WordBitwidth & "-bit mode]":
|
||||
|
||||
const MaxLength = 8
|
||||
test "Random sum products mod P-224]":
|
||||
for _ in 0 ..< Iters:
|
||||
staticFor N, 2, MaxLength:
|
||||
random_sumprod(P224, N)
|
||||
test "Random sum products mod BN254_Nogami]":
|
||||
for _ in 0 ..< Iters:
|
||||
staticFor N, 2, MaxLength:
|
||||
random_sumprod(BN254_Nogami, N)
|
||||
test "Random sum products mod BN254_Snarks]":
|
||||
for _ in 0 ..< Iters:
|
||||
staticFor N, 2, MaxLength:
|
||||
random_sumprod(BN254_Snarks, N)
|
||||
test "Random sum products mod BLS12_377]":
|
||||
for _ in 0 ..< Iters:
|
||||
staticFor N, 2, MaxLength:
|
||||
random_sumprod(BLS12_377, N)
|
||||
test "Random sum products mod BLS12_381]":
|
||||
for _ in 0 ..< Iters:
|
||||
staticFor N, 2, MaxLength:
|
||||
random_sumprod(BLS12_381, N)
|
26
tests/math/t_fp12_bn254_nogami.nim
Normal file
26
tests/math/t_fp12_bn254_nogami.nim
Normal file
@ -0,0 +1,26 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
# Internals
|
||||
../../constantine/math/extension_fields,
|
||||
../../constantine/math/config/curves,
|
||||
# Test utilities
|
||||
./t_fp_tower_template
|
||||
|
||||
const TestCurves = [
|
||||
BN254_Nogami,
|
||||
]
|
||||
|
||||
runTowerTests(
|
||||
ExtDegree = 12,
|
||||
Iters = 12,
|
||||
TestCurves = TestCurves,
|
||||
moduleName = "test_fp12_" & $BN254_Nogami,
|
||||
testSuiteDesc = "𝔽p12 = 𝔽p6[w] " & $BN254_Nogami
|
||||
)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user