Fp12 over fp6 (#201)

* introduce sumprod for direct fp6_mul

* change curves -> constants

* forgotten constants

* Full pairing using Fp2->Fp6->Fp12 towering
This commit is contained in:
Mamy Ratsimbazafy 2022-08-14 09:48:10 +02:00 committed by GitHub
parent 37354e9ca8
commit 9770b3108c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
106 changed files with 2141 additions and 431 deletions

View File

@ -18,7 +18,7 @@ import
../constantine/math/config/curves, ../constantine/math/config/curves,
../constantine/math/arithmetic, ../constantine/math/arithmetic,
../constantine/math/extension_fields, ../constantine/math/extension_fields,
../constantine/math/curves/zoo_square_roots, ../constantine/math/constants/zoo_square_roots,
# Helpers # Helpers
../helpers/prng_unsafe, ../helpers/prng_unsafe,
./bench_blueprint ./bench_blueprint
@ -47,6 +47,20 @@ template bench(op: string, T: typedesc, iters: int, body: untyped): untyped =
measure(iters, startTime, stopTime, startClk, stopClk, body) measure(iters, startTime, stopTime, startClk, stopClk, body)
report(op, fixFieldDisplay(T), startTime, stopTime, startClk, stopClk, iters) report(op, fixFieldDisplay(T), startTime, stopTime, startClk, stopClk, iters)
func random_unsafe(rng: var RngState, a: var FpDbl) =
## Initialize a standalone Double-Width field element
## we don't reduce it modulo p², this is only used for benchmark
let aHi = rng.random_unsafe(Fp[FpDbl.C])
let aLo = rng.random_unsafe(Fp[FpDbl.C])
for i in 0 ..< aLo.mres.limbs.len:
a.limbs2x[i] = aLo.mres.limbs[i]
for i in 0 ..< aHi.mres.limbs.len:
a.limbs2x[aLo.mres.limbs.len+i] = aHi.mres.limbs[i]
func random_unsafe(rng: var RngState, a: var ExtensionField2x) =
for i in 0 ..< a.coords.len:
rng.random_unsafe(a.coords[i])
proc addBench*(T: typedesc, iters: int) = proc addBench*(T: typedesc, iters: int) =
var x = rng.random_unsafe(T) var x = rng.random_unsafe(T)
let y = rng.random_unsafe(T) let y = rng.random_unsafe(T)
@ -92,21 +106,39 @@ proc sqrBench*(T: typedesc, iters: int) =
bench("Squaring", T, iters): bench("Squaring", T, iters):
r.square(x) r.square(x)
proc mulUnrBench*(T: typedesc, iters: int) = proc mul2xUnrBench*(T: typedesc, iters: int) =
var r: doublePrec(T) var r: doublePrec(T)
let x = rng.random_unsafe(T) let x = rng.random_unsafe(T)
let y = rng.random_unsafe(T) let y = rng.random_unsafe(T)
preventOptimAway(r) preventOptimAway(r)
bench("Multiplication unreduced", T, iters): bench("Multiplication 2x unreduced", T, iters):
r.prod2x(x, y) r.prod2x(x, y)
proc sqrUnrBench*(T: typedesc, iters: int) = proc sqr2xUnrBench*(T: typedesc, iters: int) =
var r: doublePrec(T) var r: doublePrec(T)
let x = rng.random_unsafe(T) let x = rng.random_unsafe(T)
preventOptimAway(r) preventOptimAway(r)
bench("Squaring unreduced", T, iters): bench("Squaring 2x unreduced", T, iters):
r.square2x(x) r.square2x(x)
proc rdc2xBench*(T: typedesc, iters: int) =
var r: T
var t: doublePrec(T)
rng.random_unsafe(t)
preventOptimAway(r)
bench("Redc 2x", T, iters):
r.redc2x(t)
proc sumprodBench*(T: typedesc, iters: int) =
var r: T
let a = rng.random_unsafe(T)
let b = rng.random_unsafe(T)
let u = rng.random_unsafe(T)
let v = rng.random_unsafe(T)
preventOptimAway(r)
bench("Linear combination", T, iters):
r.sumprod([a, b], [u, v])
proc toBigBench*(T: typedesc, iters: int) = proc toBigBench*(T: typedesc, iters: int) =
var r: matchingBigInt(T.C) var r: matchingBigInt(T.C)
let x = rng.random_unsafe(T) let x = rng.random_unsafe(T)

View File

@ -11,7 +11,7 @@ import
../constantine/math/config/curves, ../constantine/math/config/curves,
../constantine/math/arithmetic, ../constantine/math/arithmetic,
../constantine/math/io/io_bigints, ../constantine/math/io/io_bigints,
../constantine/math/curves/zoo_square_roots, ../constantine/math/constants/zoo_square_roots,
# Helpers # Helpers
../helpers/static_for, ../helpers/static_for,
./bench_fields_template ./bench_fields_template
@ -52,8 +52,11 @@ proc main() =
mulBench(Fp[curve], Iters) mulBench(Fp[curve], Iters)
sqrBench(Fp[curve], Iters) sqrBench(Fp[curve], Iters)
smallSeparator() smallSeparator()
mulUnrBench(Fp[curve], Iters) mul2xUnrBench(Fp[curve], Iters)
sqrUnrBench(Fp[curve], Iters) sqr2xUnrBench(Fp[curve], Iters)
rdc2xBench(Fp[curve], Iters)
smallSeparator()
sumprodBench(Fp[curve], Iters)
smallSeparator() smallSeparator()
toBigBench(Fp[curve], Iters) toBigBench(Fp[curve], Iters)
toFieldBench(Fp[curve], Iters) toFieldBench(Fp[curve], Iters)

View File

@ -46,8 +46,9 @@ proc main() =
mulBench(Fp2[curve], Iters) mulBench(Fp2[curve], Iters)
sqrBench(Fp2[curve], Iters) sqrBench(Fp2[curve], Iters)
smallSeparator() smallSeparator()
mulUnrBench(Fp2[curve], Iters) mul2xUnrBench(Fp2[curve], Iters)
sqrUnrBench(Fp2[curve], Iters) sqr2xUnrBench(Fp2[curve], Iters)
rdc2xBench(Fp2[curve], Iters)
smallSeparator() smallSeparator()
invBench(Fp2[curve], InvIters) invBench(Fp2[curve], InvIters)
isSquareBench(Fp2[curve], InvIters) isSquareBench(Fp2[curve], InvIters)

View File

@ -46,8 +46,9 @@ proc main() =
mulBench(Fp4[curve], Iters) mulBench(Fp4[curve], Iters)
sqrBench(Fp4[curve], Iters) sqrBench(Fp4[curve], Iters)
smallSeparator() smallSeparator()
mulUnrBench(Fp2[curve], Iters) mul2xUnrBench(Fp4[curve], Iters)
sqrUnrBench(Fp2[curve], Iters) sqr2xUnrBench(Fp4[curve], Iters)
rdc2xBench(Fp4[curve], Iters)
smallSeparator() smallSeparator()
invBench(Fp4[curve], InvIters) invBench(Fp4[curve], InvIters)
separator() separator()

View File

@ -44,8 +44,9 @@ proc main() =
mulBench(Fp6[curve], Iters) mulBench(Fp6[curve], Iters)
sqrBench(Fp6[curve], Iters) sqrBench(Fp6[curve], Iters)
smallSeparator() smallSeparator()
mulUnrBench(Fp6[curve], Iters) mul2xUnrBench(Fp6[curve], Iters)
sqrUnrBench(Fp6[curve], Iters) sqr2xUnrBench(Fp6[curve], Iters)
rdc2xBench(Fp6[curve], Iters)
smallSeparator() smallSeparator()
invBench(Fp6[curve], InvIters) invBench(Fp6[curve], InvIters)
separator() separator()

View File

@ -19,14 +19,14 @@ import
../constantine/math/arithmetic, ../constantine/math/arithmetic,
../constantine/math/extension_fields, ../constantine/math/extension_fields,
../constantine/math/ec_shortweierstrass, ../constantine/math/ec_shortweierstrass,
../constantine/math/curves/zoo_subgroups, ../constantine/math/constants/zoo_subgroups,
../constantine/math/pairing/[ ../constantine/math/pairing/[
cyclotomic_subgroup, cyclotomic_subgroup,
lines_eval, lines_eval,
pairing_bls12, pairing_bls12,
pairing_bn pairing_bn
], ],
../constantine/math/curves/zoo_pairings, ../constantine/math/constants/zoo_pairings,
# Helpers # Helpers
../helpers/prng_unsafe, ../helpers/prng_unsafe,
./bench_blueprint ./bench_blueprint

View File

@ -22,13 +22,13 @@ import
ec_shortweierstrass_projective, ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian, ec_shortweierstrass_jacobian,
ec_scalar_mul, ec_endomorphism_accel], ec_scalar_mul, ec_endomorphism_accel],
../constantine/math/curves/zoo_subgroups, ../constantine/math/constants/zoo_subgroups,
../constantine/math/pairing/[ ../constantine/math/pairing/[
cyclotomic_subgroup, cyclotomic_subgroup,
pairing_bls12, pairing_bls12,
pairing_bn pairing_bn
], ],
../constantine/math/curves/zoo_pairings, ../constantine/math/constants/zoo_pairings,
../constantine/hashes, ../constantine/hashes,
../constantine/hash_to_curve/hash_to_curve, ../constantine/hash_to_curve/hash_to_curve,
# Helpers # Helpers

View File

@ -52,10 +52,12 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
("tests/math/t_fp2.nim", false), ("tests/math/t_fp2.nim", false),
("tests/math/t_fp2_sqrt.nim", false), ("tests/math/t_fp2_sqrt.nim", false),
("tests/math/t_fp4.nim", false), ("tests/math/t_fp4.nim", false),
("tests/math/t_fp6_bn254_nogami.nim", false),
("tests/math/t_fp6_bn254_snarks.nim", false), ("tests/math/t_fp6_bn254_snarks.nim", false),
("tests/math/t_fp6_bls12_377.nim", false), ("tests/math/t_fp6_bls12_377.nim", false),
("tests/math/t_fp6_bls12_381.nim", false), ("tests/math/t_fp6_bls12_381.nim", false),
("tests/math/t_fp6_bw6_761.nim", false), ("tests/math/t_fp6_bw6_761.nim", false),
("tests/math/t_fp12_bn254_nogami.nim", false),
("tests/math/t_fp12_bn254_snarks.nim", false), ("tests/math/t_fp12_bn254_snarks.nim", false),
("tests/math/t_fp12_bls12_377.nim", false), ("tests/math/t_fp12_bls12_377.nim", false),
("tests/math/t_fp12_bls12_381.nim", false), ("tests/math/t_fp12_bls12_381.nim", false),

View File

@ -13,7 +13,7 @@ import
ec_shortweierstrass, ec_shortweierstrass,
extension_fields, extension_fields,
arithmetic, arithmetic,
curves/zoo_subgroups constants/zoo_subgroups
], ],
./math/io/[io_bigints, io_fields], ./math/io/[io_bigints, io_fields],
hashes, hashes,

View File

@ -23,7 +23,7 @@ import
cyclotomic_subgroup, cyclotomic_subgroup,
lines_eval lines_eval
], ],
./math/curves/zoo_pairings, ./math/constants/zoo_pairings,
./hash_to_curve/hash_to_curve ./hash_to_curve/hash_to_curve
# ############################################################ # ############################################################

View File

@ -13,7 +13,7 @@ import
./math/arithmetic/limbs_montgomery, ./math/arithmetic/limbs_montgomery,
./math/ec_shortweierstrass, ./math/ec_shortweierstrass,
./math/pairing/[pairing_bn, miller_loops, cyclotomic_subgroup], ./math/pairing/[pairing_bn, miller_loops, cyclotomic_subgroup],
./math/curves/zoo_subgroups, ./math/constants/zoo_subgroups,
./math/io/[io_bigints, io_fields] ./math/io/[io_bigints, io_fields]
# ############################################################ # ############################################################

View File

@ -10,7 +10,7 @@ import
# Internals # Internals
../platforms/abstractions, ../platforms/abstractions,
../math/[arithmetic, extension_fields], ../math/[arithmetic, extension_fields],
../math/curves/zoo_hash_to_curve, ../math/constants/zoo_hash_to_curve,
../math/elliptic/[ ../math/elliptic/[
ec_shortweierstrass_projective, ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian, ec_shortweierstrass_jacobian,

View File

@ -11,7 +11,7 @@ import
../platforms/abstractions, ../platforms/abstractions,
../math/config/curves, ../math/config/curves,
../math/[arithmetic, extension_fields], ../math/[arithmetic, extension_fields],
../math/curves/zoo_hash_to_curve, ../math/constants/zoo_hash_to_curve,
./h2c_utilities ./h2c_utilities
# ############################################################ # ############################################################

View File

@ -11,7 +11,7 @@ import
../platforms/abstractions, ../platforms/abstractions,
../math/config/curves, ../math/config/curves,
../math/[arithmetic, extension_fields], ../math/[arithmetic, extension_fields],
../math/curves/[zoo_hash_to_curve, zoo_subgroups], ../math/constants/[zoo_hash_to_curve, zoo_subgroups],
../math/ec_shortweierstrass, ../math/ec_shortweierstrass,
./h2c_hash_to_field, ./h2c_hash_to_field,
./h2c_map_to_isocurve_swu, ./h2c_map_to_isocurve_swu,

View File

@ -34,7 +34,7 @@ where code size is not an issue for example for multi-precision addition.
- Faster big-integer modular multiplication for most moduli\ - Faster big-integer modular multiplication for most moduli\
Gautam Botrel, Gus Gutoski, and Thomas Piellard, 2020\ Gautam Botrel, Gus Gutoski, and Thomas Piellard, 2020\
https://hackmd.io/@zkteam/modular_multiplication https://hackmd.io/@gnark/modular_multiplication
### Square roots ### Square roots

View File

@ -45,8 +45,8 @@ macro mulMont_CIOS_sparebit_gen[N: static int](
## The multiplication and reduction are further merged in the same loop ## The multiplication and reduction are further merged in the same loop
## ##
## This requires the most significant word of the Modulus ## This requires the most significant word of the Modulus
## M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111) ## M[^1] < high(SecretWord) shr 1 (i.e. less than 0b01111...1111)
## https://hackmd.io/@zkteam/modular_multiplication ## https://hackmd.io/@gnark/modular_multiplication
# No register spilling handling # No register spilling handling
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs." doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
@ -213,3 +213,200 @@ func squareMont_CIOS_asm*[N](
var r2x {.noInit.}: Limbs[2*N] var r2x {.noInit.}: Limbs[2*N]
r2x.square_asm_inline(a) r2x.square_asm_inline(a)
r.redcMont_asm_inline(r2x, M, m0ninv, spareBits, skipFinalSub) r.redcMont_asm_inline(r2x, M, m0ninv, spareBits, skipFinalSub)
# Montgomery Sum of Products
# ------------------------------------------------------------
macro sumprodMont_CIOS_spare2bits_gen[N, K: static int](
r_PIR: var Limbs[N], a_PIR, b_PIR: array[K, Limbs[N]],
M_PIR: Limbs[N], m0ninv_REG: BaseType,
skipFinalSub: static bool
): untyped =
## Generate an optimized Montgomery merged sum of products ⅀aᵢ.bᵢ kernel
## using the CIOS method
##
## This requires 2 spare bits in the most significant word
## so that we can skip the intermediate reductions
# No register spilling handling
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
doAssert K <= 8, "we cannot sum more than 8 products"
# Bounds:
# 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR
# for all intent and purposes this is true since aᵢ.bᵢ is:
# if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums
# if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1,
# with 4p < R due to the 2 unused bits constraint so more than p sums are allowed
# 2. We have a high-word tN to accumulate overflows.
# with 2 unused bits in the last word,
# the multiplication of two last words will leave 4 unused bits
# enough for accumulating 8 additions and overflow.
result = newStmtList()
var ctx = init(Assembler_x86, BaseType)
let
scratchSlots = 6
# We could force M as immediate by specializing per moduli
M = init(OperandArray, nimSymbol = M_PIR, N, PointerInReg, Input)
# If N is too big, we need to spill registers. TODO.
t = init(OperandArray, nimSymbol = ident"t", N, ElemsInReg, Output_EarlyClobber)
# MultiPurpose Register slots
scratch = init(OperandArray, nimSymbol = ident"scratch", scratchSlots, ElemsInReg, InputOutput_EnsureClobber)
# MUL requires RAX and RDX
m0ninv = Operand(
desc: OperandDesc(
asmId: "[m0ninv]",
nimSymbol: m0ninv_REG,
rm: MemOffsettable,
constraint: Input,
cEmit: "&" & $m0ninv_REG
)
)
# We're really constrained by register and somehow setting as memory doesn't help
# So we store the result `r` in the scratch space and then reload it in RDX
# before the scratchspace is used in final substraction
a = scratch[0].as2dArrayAddr(rows = K, cols = N) # Store the `a` operand
b = scratch[1].as2dArrayAddr(rows = K, cols = N) # Store the `b` operand
tN = scratch[2] # High part of extended precision multiplication
C = scratch[3] # Carry during reduction step
r = scratch[4] # Stores the `r` operand
S = scratch[5] # Mul step: Stores the carry A
# Red step: Stores (t[0] * m0ninv) mod 2ʷ
# Registers used:
# - 1 for `M`
# - 6 for `t` (at most)
# - 6 for `scratch`
# - 2 for RAX and RDX
# Total 15 out of 16
# We can save 1 by hardcoding M as immediate (and m0ninv)
# but this prevent reusing the same code for multiple curves like BLS12-377 and BLS12-381
# We might be able to save registers by having `r` and `M` be memory operand as well
let tsym = t.nimSymbol
let scratchSym = scratch.nimSymbol
result.add quote do:
static: doAssert: sizeof(SecretWord) == sizeof(ByteAddress)
var `tsym`{.noInit, used.}: typeof(`r_PIR`)
# Assumes 64-bit limbs on 64-bit arch (or you can't store an address)
var `scratchSym` {.noInit.}: Limbs[`scratchSlots`]
`scratchSym`[0] = cast[SecretWord](`a_PIR`[0][0].unsafeAddr)
`scratchSym`[1] = cast[SecretWord](`b_PIR`[0][0].unsafeAddr)
`scratchSym`[4] = cast[SecretWord](`r_PIR`[0].unsafeAddr)
# Algorithm
# -----------------------------------------
# for i=0 to N-1
# tN := 0
# for k=0 to K-1
# A := 0
# for j=0 to N-1
# (A,t[j]) := t[j] + a[k][j]*b[k][i] + A
# tN += A
# m := t[0]*m0ninv mod W
# C,_ := t[0] + m*M[0]
# for j=1 to N-1
# (C,t[j-1]) := t[j] + m*M[j] + C
# t[N-1] = tN + C
# No register spilling handling
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
for i in 0 ..< N:
# Multiplication step
ctx.comment " Multiplication step"
ctx.comment " tN = 0"
ctx.`xor` tN, tN
for k in 0 ..< K:
template A: untyped = S
ctx.comment " A = 0"
ctx.`xor` A, A
ctx.comment " (A,t[0]) := t[0] + a[k][0]*b[k][i] + A"
ctx.mov rax, a[k, 0]
ctx.mul rdx, rax, b[k, i], rax
if i == 0 and k == 0: # First accumulation, overwrite t[0]
ctx.mov t[0], rax
else: # Accumulate in t[0]
ctx.add t[0], rax
ctx.adc rdx, 0
ctx.mov A, rdx
for j in 1 ..< N:
ctx.comment " (A,t[j]) := t[j] + a[k][j]*b[k][i] + A"
ctx.mov rax, a[k, j]
ctx.mul rdx, rax, b[k, i], rax
if i == 0 and k == 0: # First accumulation, overwrite t[0]
ctx.mov t[j], A
else: # Accumulate in t[0]
ctx.add t[j], A
ctx.adc rdx, 0
ctx.`xor` A, A
ctx.add t[j], rax
ctx.adc A, rdx
ctx.comment " tN += A"
ctx.add tN, A
# Reduction step
ctx.comment " Reduction step"
template m: untyped = S
ctx.comment " m := t[0]*m0ninv mod 2ʷ"
ctx.mov rax, m0ninv
ctx.imul rax, t[0]
ctx.mov m, rax
ctx.comment " C,_ := t[0] + m*M[0]"
ctx.`xor` C, C
ctx.mul rdx, rax, M[0], rax
ctx.add rax, t[0]
ctx.adc C, rdx
for j in 1 ..< N:
ctx.comment " (C,t[j-1]) := t[j] + m*M[j] + C"
ctx.mov rax, M[j]
ctx.mul rdx, rax, m, rax
ctx.add C, t[j]
ctx.adc rdx, 0
ctx.add C, rax
ctx.adc rdx, 0
ctx.mov t[j-1], C
ctx.mov C, rdx
ctx.comment "t[N-1] = tN + C"
ctx.add tN, C
ctx.mov t[N-1], tN
ctx.mov rax, r # move r away from scratchspace that will be used for final substraction
let r2 = rax.asArrayAddr(len = N)
if skipFinalSub:
ctx.comment " Copy result"
for i in 0 ..< N:
ctx.mov r2[i], t[i]
else:
ctx.comment " Final substraction"
ctx.finalSubNoCarryImpl(
r2, t, M,
scratch
)
result.add ctx.generate()
func sumprodMont_CIOS_spare2bits_asm*[N, K: static int](
r: var Limbs[N], a, b: array[K, Limbs[N]],
M: Limbs[N], m0ninv: BaseType,
skipFinalSub: static bool) =
## Sum of products ⅀aᵢ.bᵢ in the Montgomery domain
## If "skipFinalSub" is set
## the result is in the range [0, 2M)
## otherwise the result is in the range [0, M)
##
## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
r.sumprodMont_CIOS_spare2bits_gen(a, b, M, m0ninv, skipFinalSub)

View File

@ -130,7 +130,8 @@ proc partialRedx(
t: OperandArray, t: OperandArray,
M: OperandArray, M: OperandArray,
m0ninv: Operand, m0ninv: Operand,
lo, S: Operand lo: Operand or Register,
S: Operand
) = ) =
## Partial Montgomery reduction ## Partial Montgomery reduction
## For CIOS method ## For CIOS method
@ -182,8 +183,8 @@ macro mulMont_CIOS_sparebit_adx_gen[N: static int](
## Generate an optimized Montgomery Multiplication kernel ## Generate an optimized Montgomery Multiplication kernel
## using the CIOS method ## using the CIOS method
## This requires the most significant word of the Modulus ## This requires the most significant word of the Modulus
## M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111) ## M[^1] < high(SecretWord) shr 1 (i.e. less than 0b01111...1111)
## https://hackmd.io/@zkteam/modular_multiplication ## https://hackmd.io/@gnark/modular_multiplication
# No register spilling handling # No register spilling handling
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs." doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
@ -308,3 +309,187 @@ func squareMont_CIOS_asm_adx*[N](
var r2x {.noInit.}: Limbs[2*N] var r2x {.noInit.}: Limbs[2*N]
r2x.square_asm_adx_inline(a) r2x.square_asm_adx_inline(a)
r.redcMont_asm_adx(r2x, M, m0ninv, spareBits, skipFinalSub) r.redcMont_asm_adx(r2x, M, m0ninv, spareBits, skipFinalSub)
# Montgomery Sum of Products
# ------------------------------------------------------------
macro sumprodMont_CIOS_spare2bits_adx_gen[N, K: static int](
r_PIR: var Limbs[N], a_PIR, b_PIR: array[K, Limbs[N]],
M_PIR: Limbs[N], m0ninv_REG: BaseType,
skipFinalSub: static bool
): untyped =
## Generate an optimized Montgomery merged sum of products ⅀aᵢ.bᵢ kernel
## using the CIOS method
##
## This requires 2 spare bits in the most significant word
## so that we can skip the intermediate reductions
# No register spilling handling
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
doAssert K <= 8, "we cannot sum more than 8 products"
# Bounds:
# 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR
# for all intent and purposes this is true since aᵢ.bᵢ is:
# if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums
# if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1,
# with 4p < R due to the 2 unused bits constraint so more than p sums are allowed
# 2. We have a high-word tN to accumulate overflows.
# with 2 unused bits in the last word,
# the multiplication of two last words will leave 4 unused bits
# enough for accumulating 8 additions and overflow.
result = newStmtList()
var ctx = init(Assembler_x86, BaseType)
let
scratchSlots = 6
# We could force M as immediate by specializing per moduli
M = init(OperandArray, nimSymbol = M_PIR, N, PointerInReg, Input)
# If N is too big, we need to spill registers. TODO.
t = init(OperandArray, nimSymbol = ident"t", N, ElemsInReg, Output_EarlyClobber)
# MultiPurpose Register slots
scratch = init(OperandArray, nimSymbol = ident"scratch", scratchSlots, ElemsInReg, InputOutput_EnsureClobber)
# MULX requires RDX as well
m0ninv = Operand(
desc: OperandDesc(
asmId: "[m0ninv]",
nimSymbol: m0ninv_REG,
rm: MemOffsettable,
constraint: Input,
cEmit: "&" & $m0ninv_REG
)
)
# We're really constrained by register and somehow setting as memory doesn't help
# So we store the result `r` in the scratch space and then reload it in RDX
# before the scratchspace is used in final substraction
a = scratch[0].as2dArrayAddr(rows = K, cols = N) # Store the `a` operand
b = scratch[1].as2dArrayAddr(rows = K, cols = N) # Store the `b` operand
tN = scratch[2] # High part of extended precision multiplication
C = scratch[3] # Carry during reduction step
r = scratch[4] # Stores the `r` operand
S = scratch[5] # Mul step: Stores the carry A
# Red step: Stores (t[0] * m0ninv) mod 2ʷ
# Registers used:
# - 1 for `M`
# - 6 for `t` (at most)
# - 6 for `scratch`
# - 2 for RAX and RDX
# Total 15 out of 16
# We can save 1 by hardcoding M as immediate (and m0ninv)
# but this prevent reusing the same code for multiple curves like BLS12-377 and BLS12-381
# We might be able to save registers by having `r` and `M` be memory operand as well
let tsym = t.nimSymbol
let scratchSym = scratch.nimSymbol
result.add quote do:
static: doAssert: sizeof(SecretWord) == sizeof(ByteAddress)
var `tsym`{.noInit, used.}: typeof(`r_PIR`)
# Assumes 64-bit limbs on 64-bit arch (or you can't store an address)
var `scratchSym` {.noInit.}: Limbs[`scratchSlots`]
`scratchSym`[0] = cast[SecretWord](`a_PIR`[0][0].unsafeAddr)
`scratchSym`[1] = cast[SecretWord](`b_PIR`[0][0].unsafeAddr)
`scratchSym`[4] = cast[SecretWord](`r_PIR`[0].unsafeAddr)
# Algorithm
# -----------------------------------------
# for i=0 to N-1
# tN := 0
# for k=0 to K-1
# A := 0
# for j=0 to N-1
# (A,t[j]) := t[j] + a[k][j]*b[k][i] + A
# tN += A
# m := t[0]*m0ninv mod W
# C,_ := t[0] + m*M[0]
# for j=1 to N-1
# (C,t[j-1]) := t[j] + m*M[j] + C
# t[N-1] = tN + C
# No register spilling handling
doAssert N <= 6, "The Assembly-optimized montgomery multiplication requires at most 6 limbs."
for i in 0 ..< N:
# Multiplication step
ctx.comment " Multiplication step"
ctx.comment " tN = 0"
ctx.`xor` tN, tN
for k in 0 ..< K:
template A: untyped = S
ctx.comment " A = 0"
ctx.`xor` A, A
ctx.comment " (A,t[0]) := t[0] + a[k][0]*b[k][i] + A"
ctx.mov rdx, b[k, i]
if i == 0 and k == 0: # First accumulation, overwrite t[0]
ctx.mulx t[1], t[0], a[k, 0], rdx
else: # Accumulate in t[0]
ctx.mulx A, rax, a[k, 0], rdx
ctx.adcx t[0], rax
ctx.adox t[1], A
for j in 1 ..< N-1:
ctx.comment " (A,t[j]) := t[j] + a[k][j]*b[k][i] + A"
if i == 0 and k == 0:
ctx.mulx t[j+1], rax, a[k, j], rdx
if j == 1:
ctx.add t[j], rax
else:
ctx.adc t[j], rax
else:
ctx.mulx A, rax, a[k, j], rdx
ctx.adcx t[j], rax
ctx.adox t[j+1], A
# Last limb
ctx.mulx A, rax, a[k, N-1], rdx
if i == 0 and k == 0:
ctx.adc t[N-1], rax
ctx.comment " tN += A"
ctx.adc tN, A
else:
ctx.adcx t[N-1], rax
ctx.comment " tN += A"
ctx.mov rdx, 0 # Set to 0 without clearing flags
ctx.adox tN, A
ctx.adcx tN, rdx
# Reduction step
ctx.partialRedx(
tN, t,
M, m0ninv,
rax, C
)
ctx.mov rax, r # move r away from scratchspace that will be used for final substraction
let r2 = rax.asArrayAddr(len = N)
if skipFinalSub:
ctx.comment " Copy result"
for i in 0 ..< N:
ctx.mov r2[i], t[i]
else:
ctx.comment " Final substraction"
ctx.finalSubNoCarryImpl(
r2, t, M,
scratch
)
result.add ctx.generate()
func sumprodMont_CIOS_spare2bits_asm_adx*[N, K: static int](
r: var Limbs[N], a, b: array[K, Limbs[N]],
M: Limbs[N], m0ninv: BaseType,
skipFinalSub: static bool) =
## Sum of products ⅀aᵢ.bᵢ in the Montgomery domain
## If "skipFinalSub" is set
## the result is in the range [0, 2M)
## otherwise the result is in the range [0, M)
##
## This procedure can only be called if the modulus doesn't use the full bitwidth of its underlying representation
r.sumprodMont_CIOS_spare2bits_adx_gen(a, b, M, m0ninv, skipFinalSub)

View File

@ -66,6 +66,20 @@ func squareMont*(r: var BigInt, a, M: BigInt, negInvModWord: static BaseType,
## to avoid duplicating with Nim zero-init policy ## to avoid duplicating with Nim zero-init policy
squareMont(r.limbs, a.limbs, M.limbs, negInvModWord, spareBits, skipFinalSub) squareMont(r.limbs, a.limbs, M.limbs, negInvModWord, spareBits, skipFinalSub)
func sumprodMont*[N: static int](
r: var BigInt,
a, b: array[N, BigInt],
M: BigInt, negInvModWord: static BaseType,
spareBits: static int, skipFinalSub: static bool = false) =
## Compute r <- ⅀aᵢ.bᵢ (mod M) (sum of products) in the Montgomery domain
# We rely on BigInt and Limbs having the same repr to avoid array copies
sumprodMont(
r.limbs,
cast[ptr array[N, typeof(a[0].limbs)]](a.unsafeAddr)[],
cast[ptr array[N, typeof(b[0].limbs)]](b.unsafeAddr)[],
M.limbs, negInvModWord, spareBits, skipFinalSub
)
func powMont*[mBits: static int]( func powMont*[mBits: static int](
a: var BigInt[mBits], exponent: openarray[byte], a: var BigInt[mBits], exponent: openarray[byte],
M, one: BigInt[mBits], negInvModWord: static BaseType, windowSize: static int, M, one: BigInt[mBits], negInvModWord: static BaseType, windowSize: static int,

View File

@ -149,6 +149,26 @@ func setMinusOne*(a: var FF) =
# Check if the compiler optimizes it away # Check if the compiler optimizes it away
a.mres = FF.getMontyPrimeMinus1() a.mres = FF.getMontyPrimeMinus1()
func neg*(r: var FF, a: FF) {.meter.} =
## Negate modulo p
when UseASM_X86_64:
negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs)
else:
# If a = 0 we need r = 0 and not r = M
# as comparison operator assume unicity
# of the modular representation.
# Also make sure to handle aliasing where r.addr = a.addr
var t {.noInit.}: FF
let isZero = a.isZero()
discard t.mres.diff(FF.fieldMod(), a.mres)
t.mres.csetZero(isZero)
r = t
func neg*(a: var FF) {.meter.} =
## Negate modulo p
a.neg(a)
func `+=`*(a: var FF, b: FF) {.meter.} = func `+=`*(a: var FF, b: FF) {.meter.} =
## In-place addition modulo p ## In-place addition modulo p
when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling when UseASM_X86_64 and a.mres.limbs.len <= 6: # TODO: handle spilling
@ -223,24 +243,14 @@ func square*(r: var FF, a: FF, skipFinalSub: static bool = false) {.meter.} =
## Squaring modulo p ## Squaring modulo p
r.mres.squareMont(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub) r.mres.squareMont(a.mres, FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub)
func neg*(r: var FF, a: FF) {.meter.} = func sumprod*[N: static int](r: var FF, a, b: array[N, FF], skipFinalSub: static bool = false) {.meter.} =
## Negate modulo p ## Compute r <- ⅀aᵢ.bᵢ (mod M) (sum of products)
when UseASM_X86_64: # We rely on FF and Bigints having the same repr to avoid array copies
negmod_asm(r.mres.limbs, a.mres.limbs, FF.fieldMod().limbs) r.mres.sumprodMont(
else: cast[ptr array[N, typeof(a[0].mres)]](a.unsafeAddr)[],
# If a = 0 we need r = 0 and not r = M cast[ptr array[N, typeof(b[0].mres)]](b.unsafeAddr)[],
# as comparison operator assume unicity FF.fieldMod(), FF.getNegInvModWord(), FF.getSpareBits(), skipFinalSub
# of the modular representation. )
# Also make sure to handle aliasing where r.addr = a.addr
var t {.noInit.}: FF
let isZero = a.isZero()
discard t.mres.diff(FF.fieldMod(), a.mres)
t.mres.csetZero(isZero)
r = t
func neg*(a: var FF) {.meter.} =
## Negate modulo p
a.neg(a)
# ############################################################ # ############################################################
# #

View File

@ -9,7 +9,7 @@
import import
../../platforms/abstractions, ../../platforms/abstractions,
../config/curves, ../config/curves,
../curves/zoo_square_roots, ../constants/zoo_square_roots,
./bigints, ./finite_fields, ./limbs_exgcd ./bigints, ./finite_fields, ./limbs_exgcd
# ############################################################ # ############################################################

View File

@ -305,8 +305,8 @@ func matVecMul_shr_k_mod_M[N, E: static int](
# First iteration of [u v] [d] # First iteration of [u v] [d]
# [q r].[e] # [q r].[e]
cd.slincombAccNoCarry(u, d[0], v, e[0]) cd.ssumprodAccNoCarry(u, d[0], v, e[0])
ce.slincombAccNoCarry(q, d[0], r, e[0]) ce.ssumprodAccNoCarry(q, d[0], r, e[0])
# Compute me and md, multiples of M # Compute me and md, multiples of M
# such as the bottom k bits if d and e are 0 # such as the bottom k bits if d and e are 0
@ -330,8 +330,8 @@ func matVecMul_shr_k_mod_M[N, E: static int](
ce.ashr(k) ce.ashr(k)
for i in 1 ..< N: for i in 1 ..< N:
cd.slincombAccNoCarry(u, d[i], v, e[i]) cd.ssumprodAccNoCarry(u, d[i], v, e[i])
ce.slincombAccNoCarry(q, d[i], r, e[i]) ce.ssumprodAccNoCarry(q, d[i], r, e[i])
cd.smulAccNoCarry(md, M[i]) cd.smulAccNoCarry(md, M[i])
ce.smulAccNoCarry(me, M[i]) ce.smulAccNoCarry(me, M[i])
d[i-1] = cd.lo and Max d[i-1] = cd.lo and Max
@ -366,8 +366,8 @@ func matVecMul_shr_k[N, E: static int](
# First iteration of [u v] [f] # First iteration of [u v] [f]
# [q r].[g] # [q r].[g]
cf.slincombAccNoCarry(u, f[0], v, g[0]) cf.ssumprodAccNoCarry(u, f[0], v, g[0])
cg.slincombAccNoCarry(q, f[0], r, g[0]) cg.ssumprodAccNoCarry(q, f[0], r, g[0])
# bottom k bits are zero by construction # bottom k bits are zero by construction
debug: debug:
doAssert BaseType(cf.lo and Max) == 0, "bottom k bits should be 0, cf.lo: " & $BaseType(cf.lo) doAssert BaseType(cf.lo and Max) == 0, "bottom k bits should be 0, cf.lo: " & $BaseType(cf.lo)
@ -377,8 +377,8 @@ func matVecMul_shr_k[N, E: static int](
cg.ashr(k) cg.ashr(k)
for i in 1 ..< N: for i in 1 ..< N:
cf.slincombAccNoCarry(u, f[i], v, g[i]) cf.ssumprodAccNoCarry(u, f[i], v, g[i])
cg.slincombAccNoCarry(q, f[i], r, g[i]) cg.ssumprodAccNoCarry(q, f[i], r, g[i])
f[i-1] = cf.lo and Max f[i-1] = cf.lo and Max
g[i-1] = cg.lo and Max g[i-1] = cg.lo and Max
cf.ashr(k) cf.ashr(k)
@ -496,11 +496,6 @@ func invmod*(
# #
# Those symbols can be computed either via exponentiation (Fermat's Little Theorem) # Those symbols can be computed either via exponentiation (Fermat's Little Theorem)
# or using slight modifications to the Extended Euclidean Algorithm for GCD. # or using slight modifications to the Extended Euclidean Algorithm for GCD.
#
# See
# - Algorithm II.7 in Blake, Seroussi, Smart: "Elliptic Curves in Cryptography"
# - Algorithm 5.9.2 in Bach and Shallit: "Algorithmic Number Theory"
# - Pornin: https://github.com/pornin/x25519-cm0/blob/75a53f2/src/x25519-cm0.S#L89-L155
func batchedDivstepsSymbol( func batchedDivstepsSymbol(
t: var TransitionMatrix, t: var TransitionMatrix,
@ -631,7 +626,7 @@ func legendreImpl[N, E](
accL = (accL + accL.isOdd()) and SignedSecretWord(3) accL = (accL + accL.isOdd()) and SignedSecretWord(3)
accL = SignedSecretWord(1)-accL accL = SignedSecretWord(1)-accL
accL.csetZero(f.isZeroMask()) # f = gcd = 1 as M is prime or f = 0 if a = 0 accL.csetZero(f.isZeroMask())
return SecretWord(accL) return SecretWord(accL)
func legendre*(a, M: Limbs, bits: static int): SecretWord = func legendre*(a, M: Limbs, bits: static int): SecretWord =

View File

@ -306,11 +306,69 @@ func mulMont_FIPS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub:
discard z.csub(M, v.isNonZero() or not(z < M)) discard z.csub(M, v.isNonZero() or not(z < M))
r = z r = z
func sumprodMont_CIOS_spare2bits[K: static int](
r: var Limbs, a, b: array[K, Limbs],
M: Limbs, m0ninv: BaseType,
skipFinalSub: static bool = false) =
## Compute r = ⅀aᵢ.bᵢ (mod M) (suim of products)
## This requires 2 unused bits in the field element representation
##
## This maps
## - [0, 2p) -> [0, 2p) with skipFinalSub
## - [0, 2p) -> [0, p) without
##
## skipFinalSub skips the final substraction step.
# We want all the computation to be kept in registers
# hence we use a temporary `t`, hoping that the compiler does the right thing™.
var t: typeof(M) # zero-init
const N = t.len
# Extra word to handle carries t[N] (not there are 2 unused spare bits)
var tN {.noInit.}: SecretWord
static: doAssert K <= 8, "we cannot sum more than 8 products"
# Bounds:
# 1. To ensure mapping in [0, 2p), we need ⅀aᵢ.bᵢ <=pR
# for all intent and purposes this is true since aᵢ.bᵢ is:
# if reduced inputs: (p-1).(p-1) = p²-2p+1 which would allow more than p sums
# if unreduced inputs: (2p-1).(2p-1) = 4p²-4p+1,
# with 4p < R due to the 2 unused bits constraint so more than p sums are allowed
# 2. We have a high-word tN to accumulate overflows.
# with 2 unused bits in the last word,
# the multiplication of two last words will leave 4 unused bits
# enough for accumulating 8 additions and overflow.
staticFor i, 0, N:
tN = Zero
staticFor k, 0, K:
var A = Zero
staticFor j, 0, N:
# (A, t[j]) <- a[k][j] * b[k][i] + t[j] + A
muladd2(A, t[j], a[k][j], b[k][i], t[j], A)
tN += A
# Reduction
# m <- (t[0] * m0ninv) mod 2ʷ
# (C, _) <- m * M[0] + t[0]
var C, lo = Zero
let m = t[0] * SecretWord(m0ninv)
muladd1(C, lo, m, M[0], t[0])
staticFor j, 1, N:
# (C, t[j-1]) <- m*M[j] + t[j] + C
muladd2(C, t[j-1], m, M[j], t[j], C)
# (_,t[N-1]) <- t[N] + C
t[N-1] = tN + C
when not skipFinalSub:
discard t.csub(M, not(t < M))
r = t
# Montgomery Squaring # Montgomery Squaring
# -------------------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------------------
# #
# There are Montgomery squaring multiplications mentioned in the litterature # There are Montgomery squaring multiplications mentioned in the litterature
# - https://hackmd.io/@zkteam/modular_multiplication if M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111) # - https://hackmd.io/@gnark/modular_multiplication if M[^1] < high(SecretWord) shr 2 (i.e. less than 0b00111...1111)
# - Architectural Support for Long Integer Modulo Arithmetic on Risc-Based Smart Cards # - Architectural Support for Long Integer Modulo Arithmetic on Risc-Based Smart Cards
# Johann Großschädl, 2003 # Johann Großschädl, 2003
# https://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=95950BAC26A728114431C0C7B425E022?doi=10.1.1.115.3276&rep=rep1&type=pdf # https://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=95950BAC26A728114431C0C7B425E022?doi=10.1.1.115.3276&rep=rep1&type=pdf
@ -482,9 +540,31 @@ func squareMont*[N](r: var Limbs[N], a, M: Limbs[N],
r.redc2xMont(r2x, M, m0ninv, spareBits, skipFinalSub) r.redc2xMont(r2x, M, m0ninv, spareBits, skipFinalSub)
else: else:
mulMont(r, a, a, M, m0ninv, spareBits, skipFinalSub) mulMont(r, a, a, M, m0ninv, spareBits, skipFinalSub)
func sumprodMont*[N: static int](
r: var Limbs, a, b: array[N, Limbs],
M: Limbs, m0ninv: BaseType,
spareBits: static int,
skipFinalSub: static bool = false) {.inline.} =
when spareBits >= 2:
when UseASM_X86_64 and r.len in {2 .. 6}:
if ({.noSideEffect.}: hasAdx()):
r.sumprodMont_CIOS_spare2bits_asm_adx(a, b, M, m0ninv, skipFinalSub)
else:
r.sumprodMont_CIOS_spare2bits_asm(a, b, M, m0ninv, skipFinalSub)
else:
r.sumprodMont_CIOS_spare2bits(a, b, M, m0ninv, skipFinalSub)
else:
r.mulMont(a[0], b[0], M, m0ninv, spareBits, skipFinalSub = false)
var ri {.noInit.}: Limbs
for i in 1 ..< N:
ri.mulMont(a[i], b[i], M, m0ninv, spareBits, skipFinalSub = false)
var overflowed = SecretBool r.add(ri)
overflowed = overflowed or not(r < M)
discard r.csub(M, overflowed)
func fromMont*(r: var Limbs, a, M: Limbs, func fromMont*(r: var Limbs, a, M: Limbs,
m0ninv: BaseType, spareBits: static int) = m0ninv: BaseType, spareBits: static int) {.inline.} =
## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N) ## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)
## to the regular natural representation (mod N) ## to the regular natural representation (mod N)
## ##
@ -512,7 +592,7 @@ func fromMont*(r: var Limbs, a, M: Limbs,
fromMont_CIOS(r, a, M, m0ninv) fromMont_CIOS(r, a, M, m0ninv)
func getMont*(r: var Limbs, a, M, r2modM: Limbs, func getMont*(r: var Limbs, a, M, r2modM: Limbs,
m0ninv: BaseType, spareBits: static int) = m0ninv: BaseType, spareBits: static int) {.inline.} =
## Transform a bigint ``a`` from it's natural representation (mod N) ## Transform a bigint ``a`` from it's natural representation (mod N)
## to a the Montgomery n-residue representation ## to a the Montgomery n-residue representation
## ##

View File

@ -393,8 +393,8 @@ func smulAccNoCarry*(r: var DSWord, a, b: SignedSecretWord) {.inline.}=
r.lo = SignedSecretWord UV[0] r.lo = SignedSecretWord UV[0]
r.hi = SignedSecretWord UV[1] r.hi = SignedSecretWord UV[1]
func slincombAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}= func ssumprodAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}=
## Accumulated linear combination ## Accumulated sum of products
## (_, hi, lo) += a*u + b*v ## (_, hi, lo) += a*u + b*v
## This assumes no overflowing ## This assumes no overflowing
var carry: Carry var carry: Carry

View File

@ -45,6 +45,10 @@ func has_P_3mod4_primeModulus*(C: static Curve): static bool =
## Returns true iff p ≡ 3 (mod 4) ## Returns true iff p ≡ 3 (mod 4)
(BaseType(C.Mod.limbs[0]) and 3) == 3 (BaseType(C.Mod.limbs[0]) and 3) == 3
func has_P_3mod8_primeModulus*(C: static Curve): static bool =
## Returns true iff p ≡ 3 (mod 8)
(BaseType(C.Mod.limbs[0]) and 7) == 3
func has_P_5mod8_primeModulus*(C: static Curve): static bool = func has_P_5mod8_primeModulus*(C: static Curve): static bool =
## Returns true iff p ≡ 5 (mod 8) ## Returns true iff p ≡ 5 (mod 8)
(BaseType(C.Mod.limbs[0]) and 7) == 5 (BaseType(C.Mod.limbs[0]) and 7) == 5

View File

@ -15,7 +15,7 @@ import
../ec_shortweierstrass, ../ec_shortweierstrass,
../io/io_bigints, ../io/io_bigints,
../isogenies/frobenius, ../isogenies/frobenius,
../curves/zoo_endomorphisms ../constants/zoo_endomorphisms
func pow_bls12_377_abs_x[ECP: ECP_ShortW[Fp[BLS12_377], G1] or func pow_bls12_377_abs_x[ECP: ECP_ShortW[Fp[BLS12_377], G1] or
ECP_ShortW[Fp2[BLS12_377], G2]]( ECP_ShortW[Fp2[BLS12_377], G2]](

View File

@ -15,7 +15,7 @@ import
../ec_shortweierstrass, ../ec_shortweierstrass,
../io/io_bigints, ../io/io_bigints,
../isogenies/frobenius, ../isogenies/frobenius,
../curves/zoo_endomorphisms ../constants/zoo_endomorphisms
func pow_bls12_381_abs_x[ECP: ECP_ShortW[Fp[BLS12_381], G1] or func pow_bls12_381_abs_x[ECP: ECP_ShortW[Fp[BLS12_381], G1] or
ECP_ShortW[Fp2[BLS12_381], G2]]( ECP_ShortW[Fp2[BLS12_381], G2]](

View File

@ -12,7 +12,7 @@ import
# Internal # Internal
../../platforms/abstractions, ../../platforms/abstractions,
../config/[curves, type_bigint], ../config/[curves, type_bigint],
../curves/zoo_endomorphisms, ../constants/zoo_endomorphisms,
../arithmetic, ../arithmetic,
../extension_fields, ../extension_fields,
../isogenies/frobenius, ../isogenies/frobenius,

View File

@ -12,7 +12,7 @@ import
../arithmetic, ../arithmetic,
../extension_fields, ../extension_fields,
../io/io_bigints, ../io/io_bigints,
../curves/zoo_endomorphisms, ../constants/zoo_endomorphisms,
./ec_endomorphism_accel ./ec_endomorphism_accel
# ############################################################ # ############################################################

View File

@ -12,7 +12,7 @@ import
../arithmetic, ../arithmetic,
../extension_fields, ../extension_fields,
../io/[io_fields, io_extfields], ../io/[io_fields, io_extfields],
../curves/zoo_constants ../constants/zoo_constants
# ############################################################ # ############################################################
# #

View File

@ -11,7 +11,7 @@ import
./towers, ./towers,
../arithmetic, ../arithmetic,
../config/curves, ../config/curves,
../curves/zoo_square_roots_fp2 ../constants/zoo_square_roots_fp2
# Square root # Square root
# ----------------------------------------------------------- # -----------------------------------------------------------

View File

@ -57,8 +57,8 @@ type
CubicExt[Fp2[C]] CubicExt[Fp2[C]]
Fp12*[C: static Curve] = Fp12*[C: static Curve] =
CubicExt[Fp4[C]] # CubicExt[Fp4[C]]
# QuadraticExt[Fp6[C]] QuadraticExt[Fp6[C]]
template c0*(a: ExtensionField): auto = template c0*(a: ExtensionField): auto =
a.coords[0] a.coords[0]
@ -662,8 +662,8 @@ func prod*(r: var CubicExt, a: CubicExt, _: type NonResidue) =
## and v³ = ξ ## and v³ = ξ
## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v² ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
let t {.noInit.} = a.c2 let t {.noInit.} = a.c2
r.c1 = a.c0
r.c2 = a.c1 r.c2 = a.c1
r.c1 = a.c0
r.c0.prod(t, NonResidue) r.c0.prod(t, NonResidue)
func `*=`*(a: var CubicExt, _: type NonResidue) {.inline.} = func `*=`*(a: var CubicExt, _: type NonResidue) {.inline.} =
@ -689,8 +689,8 @@ func prod2x*(
## and v³ = ξ ## and v³ = ξ
## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v² ## (c0 + c1 v + c2 v²) v => ξ c2 + c0 v + c1 v²
let t {.noInit.} = a.c2 let t {.noInit.} = a.c2
r.c1 = a.c0
r.c2 = a.c1 r.c2 = a.c1
r.c1 = a.c0
r.c0.prod2x(t, NonResidue) r.c0.prod2x(t, NonResidue)
{.pop.} # inline {.pop.} # inline
@ -811,7 +811,7 @@ func square2x_complex(r: var QuadraticExt2x, a: Fp2) =
# Complex multiplications # Complex multiplications
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
func prod_complex(r: var Fp2, a, b: Fp2) = func prod_complex(r: var Fp2, a, b: Fp2) {.used.} =
## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r`` ## Return a * b in 𝔽p2 = 𝔽p[𝑖] in ``r``
## ``r`` is initialized/overwritten ## ``r`` is initialized/overwritten
## ##
@ -823,10 +823,7 @@ func prod_complex(r: var Fp2, a, b: Fp2) =
# while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction) # while addition has cost O(3n) (n for addition, n for overflow, n for conditional substraction)
# and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition) # and substraction has cost O(2n) (n for substraction + underflow, n for conditional addition)
# #
# Even for 256-bit primes, we are looking at always a minimum of n=5 limbs (with 2^63 words) # Hence we can consider using q Karatsuba approach to save a multiplication
# where addition/substraction are significantly cheaper than multiplication
#
# So we always reframe the imaginary part using Karatsuba approach to save a multiplication
# (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 ) # (a0, a1) (b0, b1) => (a0 b0 - a1 b1) + 𝑖( (a0 + a1)(b0 + b1) - a0 b0 - a1 b1 )
# #
# Costs (naive implementation) # Costs (naive implementation)
@ -841,19 +838,12 @@ func prod_complex(r: var Fp2, a, b: Fp2) =
# - 2 Addition 𝔽p # - 2 Addition 𝔽p
# Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary) # Stack: 6 * ModulusBitSize (4x 𝔽p element + 2x named temporaries + 1 in-place multiplication temporary)
static: doAssert r.fromComplexExtension() static: doAssert r.fromComplexExtension()
var t {.noInit.}: typeof(r)
var a0b0 {.noInit.}, a1b1 {.noInit.}: typeof(r.c0) var na1 {.noInit.}: typeof(r.c0)
a0b0.prod(a.c0, b.c0) # [1 Mul] na1.neg(a.c1)
a1b1.prod(a.c1, b.c1) # [2 Mul] t.c0.sumprod([a.c0, na1], [b.c0, b.c1])
t.c1.sumprod([a.c0, a.c1], [b.c1, b.c0])
r.c0.sum(a.c0, a.c1) # r0 = (a0 + a1) # [2 Mul, 1 Add] r = t
r.c1.sum(b.c0, b.c1) # r1 = (b0 + b1) # [2 Mul, 2 Add]
# aliasing: a and b unneeded now
r.c1 *= r.c0 # r1 = (b0 + b1)(a0 + a1) # [3 Mul, 2 Add] - 𝔽p temporary
r.c0.diff(a0b0, a1b1) # r0 = a0 b0 - a1 b1 # [3 Mul, 2 Add, 1 Sub]
r.c1 -= a0b0 # r1 = (b0 + b1)(a0 + a1) - a0b0 # [3 Mul, 2 Add, 2 Sub]
r.c1 -= a1b1 # r1 = (b0 + b1)(a0 + a1) - a0b0 - a1b1 # [3 Mul, 2 Add, 3 Sub]
func prod2x_complex(r: var QuadraticExt2x, a, b: Fp2) = func prod2x_complex(r: var QuadraticExt2x, a, b: Fp2) =
## Double-precision unreduced complex multiplication ## Double-precision unreduced complex multiplication
@ -983,7 +973,37 @@ func square2x_disjoint*[Fdbl, F](
r.c1.diff2xMod(r.c1, V0) r.c1.diff2xMod(r.c1, V0)
r.c1.diff2xMod(r.c1, V1) r.c1.diff2xMod(r.c1, V1)
# Generic multiplications # Multiplications (specializations)
# -------------------------------------------------------------------
func prodImpl_fp4o2_p3mod8[C: static Curve](r: var Fp4[C], a, b: Fp4[C]) =
## Returns r = a * b
## For 𝔽p4/𝔽p2 with p ≡ 3 (mod 8),
## hence 𝔽p QNR is 𝑖 = √-1 as p ≡ 3 (mod 8) implies p ≡ 3 (mod 4)
## and 𝔽p SNR is (1 + i)
static: doAssert C.has_P_3mod8_primeModulus()
var
b10_m_b11{.noInit.}, b10_p_b11{.noInit.}: Fp[C]
n_a01{.noInit.}, n_a11{.noInit.}: Fp[C]
t{.noInit.}: Fp4[C]
b10_m_b11.diff(b.c1.c0, b.c1.c1)
b10_p_b11.sum(b.c1.c0, b.c1.c1)
n_a01.neg(a.c0.c1)
n_a11.neg(a.c1.c1)
t.c0.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11],
[b.c0.c0, b.c0.c1, b10_m_b11, b10_p_b11])
t.c0.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1],
[b.c0.c1, b.c0.c0, b10_p_b11, b10_m_b11])
t.c1.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11],
[b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1])
t.c1.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1],
[b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0])
r = t
# Multiplications (generic)
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) = func prod_generic(r: var QuadraticExt, a, b: QuadraticExt) =
@ -1264,7 +1284,7 @@ func inv2xImpl(r: var QuadraticExt, a: QuadraticExt) =
# [1 Inv, 2 Sqr, 1 Add] # [1 Inv, 2 Sqr, 1 Add]
t.redc2x(V0) t.redc2x(V0)
t.inv() # v1 = 1 / (a0² - β a1²) t.inv() # v1 = 1 / (a0² - β a1²)
# [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg] # [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
r.c0.prod(a.c0, t) # r0 = a0 / (a0² - β a1²) r.c0.prod(a.c0, t) # r0 = a0 / (a0² - β a1²)
@ -1288,35 +1308,37 @@ func square2x*(r: var QuadraticExt2x, a: QuadraticExt) =
else: else:
r.square2x_disjoint(a.c0, a.c1) r.square2x_disjoint(a.c0, a.c1)
func square_disjoint*[F](r: var QuadraticExt[F], a0, a1: F) =
# TODO understand why Fp4[BLS12_377]
# is so slow in the branch
# TODO:
# - On Fp4, we can have a.c0.c0 off by p
# a reduction is missing
static: doAssert not r.fromComplexExtension(), "Faster specialization not implemented"
var d {.noInit.}: doublePrec(typeof(r))
d.square2x_disjoint(a0, a1)
r.c0.redc2x(d.c0)
r.c1.redc2x(d.c1)
func square*(r: var QuadraticExt, a: QuadraticExt) = func square*(r: var QuadraticExt, a: QuadraticExt) =
when r.fromComplexExtension(): when r.fromComplexExtension():
when true: when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem() and r.typeof.has1extraBit():
when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem() and r.typeof.has1extraBit(): if ({.noSideEffect.}: hasAdx()):
if ({.noSideEffect.}: hasAdx()): r.coords.sqrx_complex_sparebit_asm_adx(a.coords)
r.coords.sqrx_complex_sparebit_asm_adx(a.coords)
else:
r.square_complex(a)
else: else:
r.square_complex(a) r.square_complex(a)
else: # slower else:
var d {.noInit.}: doublePrec(typeof(r)) r.square_complex(a)
d.square2x(a)
r.c0.redc2x(d.c0)
r.c1.redc2x(d.c1)
else: else:
when true: # r.typeof.F.C in {BLS12_377, BW6_761}: when QuadraticExt.C.has_large_field_elem():
# BW6-761 requires too many registers for Dbl width path # BW6-761 requires too many registers for Dbl width path
r.square_generic(a) r.square_generic(a)
else: elif QuadraticExt is Fp4[BLS12_377]:
# TODO understand why Fp4[BLS12_377] # TODO BLS12-377 slowness to fix
# is so slow in the branch r.square_generic(a)
# TODO: else:
# - On Fp4, we can have a.c0.c0 off by p r.square_disjoint(a.c0, a.c1)
# a reduction is missing
var d {.noInit.}: doublePrec(typeof(r))
d.square2x_disjoint(a.c0, a.c1)
r.c0.redc2x(d.c0)
r.c1.redc2x(d.c1)
func square*(a: var QuadraticExt) = func square*(a: var QuadraticExt) =
## In-place squaring ## In-place squaring
@ -1326,26 +1348,25 @@ func prod*(r: var QuadraticExt, a, b: QuadraticExt) =
## Multiplication r <- a*b ## Multiplication r <- a*b
when r.fromComplexExtension(): when r.fromComplexExtension():
when false: when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem():
r.prod_complex(a, b) if ({.noSideEffect.}: hasAdx()):
else: # faster r.coords.mul_fp2_complex_asm_adx(a.coords, b.coords)
when UseASM_X86_64 and not QuadraticExt.C.has_large_field_elem():
if ({.noSideEffect.}: hasAdx()):
r.coords.mul_fp2_complex_asm_adx(a.coords, b.coords)
else:
var d {.noInit.}: doublePrec(typeof(r))
d.prod2x_complex(a, b)
r.c0.redc2x(d.c0)
r.c1.redc2x(d.c1)
else: else:
var d {.noInit.}: doublePrec(typeof(r)) var d {.noInit.}: doublePrec(typeof(r))
d.prod2x_complex(a, b) d.prod2x_complex(a, b)
r.c0.redc2x(d.c0) r.c0.redc2x(d.c0)
r.c1.redc2x(d.c1) r.c1.redc2x(d.c1)
else:
var d {.noInit.}: doublePrec(typeof(r))
d.prod2x_complex(a, b)
r.c0.redc2x(d.c0)
r.c1.redc2x(d.c1)
else: else:
when r.typeof.F.C.has_large_field_elem(): when QuadraticExt is Fp12 or r.typeof.F.C.has_large_field_elem():
# BW6-761 requires too many registers for Dbl width path # BW6-761 requires too many registers for Dbl width path
r.prod_generic(a, b) r.prod_generic(a, b)
elif QuadraticExt is Fp4 and QuadraticExt.C.has_P_3mod8_primeModulus():
r.prodImpl_fp4o2_p3mod8(a, b)
else: else:
var d {.noInit.}: doublePrec(typeof(r)) var d {.noInit.}: doublePrec(typeof(r))
d.prod2x_disjoint(a.c0, a.c1, b.c0, b.c1) d.prod2x_disjoint(a.c0, a.c1, b.c0, b.c1)
@ -1595,7 +1616,49 @@ func square_Chung_Hasan_SQR3(r: var CubicExt, a: CubicExt) =
r.c0.prod(m12, NonResidue) r.c0.prod(m12, NonResidue)
r.c0 += s0 r.c0 += s0
# Multiplications # Multiplications (specializations)
# -------------------------------------------------------------------
func prodImpl_fp6o2_p3mod8[C: static Curve](r: var Fp6[C], a, b: Fp6[C]) =
## Returns r = a * b
## For 𝔽p6/𝔽p2 with p ≡ 3 (mod 8),
## hence 𝔽p QNR is 𝑖 = √-1 as p ≡ 3 (mod 8) implies p ≡ 3 (mod 4)
## and 𝔽p SNR is (1 + i)
# https://eprint.iacr.org/2022/367 - Equation 8
static: doAssert C.has_P_3mod8_primeModulus()
var
b10_p_b11{.noInit.}, b10_m_b11{.noInit.}: Fp[C]
b20_p_b21{.noInit.}, b20_m_b21{.noInit.}: Fp[C]
n_a01{.noInit.}, n_a11{.noInit.}, n_a21{.noInit.}: Fp[C]
t{.noInit.}: Fp6[C]
b10_p_b11.sum(b.c1.c0, b.c1.c1)
b10_m_b11.diff(b.c1.c0, b.c1.c1)
b20_p_b21.sum(b.c2.c0, b.c2.c1)
b20_m_b21.diff(b.c2.c0, b.c2.c1)
n_a01.neg(a.c0.c1)
n_a11.neg(a.c1.c1)
n_a21.neg(a.c2.c1)
t.c0.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11, a.c2.c0, n_a21],
[b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21, b10_m_b11, b10_p_b11])
t.c0.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
[b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21, b10_p_b11, b10_m_b11])
t.c1.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11, a.c2.c0, n_a21],
[b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1, b20_m_b21, b20_p_b21])
t.c1.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
[b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0, b20_p_b21, b20_m_b21])
t.c2.c0.sumprod([a.c0.c0, n_a01, a.c1.c0, n_a11, a.c2.c0, n_a21],
[b.c2.c0, b.c2.c1, b.c1.c0, b.c1.c1, b.c0.c0, b.c0.c1])
t.c2.c1.sumprod([a.c0.c0, a.c0.c1, a.c1.c0, a.c1.c1, a.c2.c0, a.c2.c1],
[b.c2.c1, b.c2.c0, b.c1.c1, b.c1.c0, b.c0.c1, b.c0.c0])
r = t
# Multiplications (generic)
# ------------------------------------------------------------------- # -------------------------------------------------------------------
func prodImpl(r: var CubicExt, a, b: CubicExt) = func prodImpl(r: var CubicExt, a, b: CubicExt) =
@ -1686,6 +1749,40 @@ func prod2xImpl(r: var CubicExt2x, a, b: CubicExt) =
# Sparse multiplication # Sparse multiplication
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
func mul_sparse_by_x00*(r: var CubicExt, a: CubicExt, sparseB: auto) =
## Sparse multiplication of a cubic extension element
## with coordinates (a₀, a₁, a₂) by (b₀, b0, 0)
when typeof(sparseB) is typeof(a):
template b(): untyped = sparseB.c0
elif typeof(sparseB) is typeof(a.c0):
template b(): untyped = sparseB
else:
{.error: "sparseB type is " & $typeof(sparseB) &
" which does not match with either a (" & $typeof(a) &
") or a.c0 (" & $typeof(a.c0) & ")".}
r.c0.prod(a.c0, b)
r.c1.prod(a.c1, b)
r.c2.prod(a.c2, b)
func mul2x_sparse_by_x00*(r: var CubicExt2x, a: CubicExt, sparseB: auto) =
## Sparse multiplication of a cubic extension element
## with coordinates (a₀, a₁, a₂) by (b₀, b0, 0)
when typeof(sparseB) is typeof(a):
template b(): untyped = sparseB.c0
elif typeof(sparseB) is typeof(a.c0):
template b(): untyped = sparseB
else:
{.error: "sparseB type is " & $typeof(sparseB) &
" which does not match with either a (" & $typeof(a) &
") or a.c0 (" & $typeof(a.c0) & ")".}
r.c0.prod2x(a.c0, b)
r.c1.prod2x(a.c1, b)
r.c2.prod2x(a.c2, b)
func mul_sparse_by_0y0*(r: var CubicExt, a: CubicExt, sparseB: auto) = func mul_sparse_by_0y0*(r: var CubicExt, a: CubicExt, sparseB: auto) =
## Sparse multiplication of a cubic extenion element ## Sparse multiplication of a cubic extenion element
## with coordinates (a₀, a₁, a₂) by (0, b₁, 0) ## with coordinates (a₀, a₁, a₂) by (0, b₁, 0)
@ -1718,6 +1815,171 @@ func mul_sparse_by_0y0*(r: var CubicExt, a: CubicExt, sparseB: auto) =
r.c1.prod(a.c0, b) r.c1.prod(a.c0, b)
r.c2.prod(a.c1, b) r.c2.prod(a.c1, b)
func mul2x_sparse_by_0y0*(r: var CubicExt2x, a: CubicExt, sparseB: auto) =
## Sparse multiplication of a cubic extenion element
## with coordinates (a₀, a₁, a₂) by (0, b₁, 0)
when typeof(sparseB) is typeof(a):
template b(): untyped = sparseB.c1
elif typeof(sparseB) is typeof(a.c0):
template b(): untyped = sparseB
else:
{.error: "sparseB type is " & $typeof(sparseB) &
" which does not match with either a (" & $typeof(a) &
") or a.c0 (" & $typeof(a.c0) & ")".}
r.c0.prod2x(a.c2, b)
r.c0.prod2x(r.c0, NonResidue)
r.c1.prod2x(a.c0, b)
r.c2.prod2x(a.c1, b)
func mul_sparse_by_xy0*[Fpkdiv3](r: var CubicExt, a: CubicExt,
x, y: Fpkdiv3) =
## Sparse multiplication of a cubic extension element
## with coordinates (a₀, a₁, a₂) by (b₀, b₁, 0)
##
## r and a must not alias
# v0 = a0 b0
# v1 = a1 b1
# v2 = a2 b2 = 0
#
# r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
# = ξ (a1 b1 + a2 b1 - v1) + v0
# = ξa2 b1 + v0
# r1 = (a0 + a1) * (b0 + b1) - v0 - v1 + ξ v2
# = (a0 + a1) * (b0 + b1) - v0 - v1
# r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
# = a0 b0 + a2 b0 - v0 + v1
# = a2 b0 + v1
static: doAssert a.c0 is Fpkdiv3
var
v0 {.noInit.}: Fpkdiv3
v1 {.noInit.}: Fpkdiv3
v0.prod(a.c0, x)
v1.prod(a.c1, y)
r.c0.prod(a.c2, y)
r.c0 *= NonResidue
r.c0 += v0
r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated
r.c2.sum(x, y)
r.c1 *= r.c2
r.c1 -= v0
r.c1 -= v1
r.c2.prod(a.c2, x)
r.c2 += v1
func mul2x_sparse_by_xy0*[Fpkdiv3](r: var CubicExt2x, a: CubicExt,
x, y: Fpkdiv3) =
## Sparse multiplication of a cubic extension element
## with coordinates (a₀, a₁, a₂) by (b₀, b₁, 0)
##
## r and a must not alias
static: doAssert a.c0 is Fpkdiv3
var
V0 {.noInit.}: doubleprec(Fpkdiv3)
V1 {.noInit.}: doubleprec(Fpkdiv3)
t0{.noInit.}: Fpkdiv3
t1{.noInit.}: Fpkdiv3
V0.prod2x(a.c0, x)
V1.prod2x(a.c1, y)
r.c0.prod2x(a.c2, y)
r.c0.prod2x(r.c0, NonResidue)
r.c0.sum2xMod(r.c0, V0)
t0.sum(a.c0, a.c1)
t1.sum(x, y)
r.c1.prod2x(t0, t1)
r.c1.diff2xMod(r.c1, V0)
r.c1.diff2xMod(r.c1, V1)
r.c2.prod2x(a.c2, x)
r.c2.sum2xMod(r.c2, V1)
func mul_sparse_by_0yz*[Fpkdiv3](r: var CubicExt, a: CubicExt, y, z: Fpkdiv3) =
## Sparse multiplication of a cubic extension element
## with coordinates (a₀, a₁, a₂) by (0, b₁, b₂)
##
## r and a must not alias
# v0 = a0 b0 = 0
# v1 = a1 b1
# v2 = a2 b2
#
# r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
# = ξ ((a1 + a2) * (b1 + b2) - v1 - v2)
# r1 = (a0 + a1) * (b0 + b1) - v0 - v1 + ξ v2
# = a0 b1 + a1 b1 - v1 + ξ v2
# = a0 b1 + ξ v2
# r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
# = a0 b2 + a2 b2 - v0 - v2 + v1
# = a0 b2 + v1
static: doAssert a.c0 is Fpkdiv3
var
v1 {.noInit.}: Fpkdiv3
v2 {.noInit.}: Fpkdiv3
v1.prod(a.c1, y)
v2.prod(a.c2, z)
r.c1.sum(a.c1, a.c2)
r.c2.sum( y, z)
r.c0.prod(r.c1, r.c2)
r.c0 -= v1
r.c0 -= v2
r.c0 *= NonResidue
r.c1.prod(a.c0, y)
v2 *= NonResidue
r.c1 += v2
r.c2.prod(a.c0, z)
r.c2 += v1
func mul2x_sparse_by_0yz*[Fpkdiv3](r: var CubicExt2x, a: CubicExt, y, z: Fpkdiv3) =
## Sparse multiplication of a cubic extension element
## with coordinates (a₀, a₁, a₂) by (0, b₁, b₂)
##
## r and a must not alias
static: doAssert a.c0 is Fpkdiv3
var
V1 {.noInit.}: doubleprec(Fpkdiv3)
V2 {.noInit.}: doubleprec(Fpkdiv3)
t1 {.noInit.}: Fpkdiv3
t2 {.noInit.}: Fpkdiv3
V1.prod2x(a.c1, y)
V2.prod2x(a.c2, z)
t1.sum(a.c1, a.c2)
t2.sum( y, z)
r.c0.prod2x(t1, t2)
r.c0.diff2xMod(r.c0, V1)
r.c0.diff2xMod(r.c0, V2)
r.c0.prod2x(r.c0, NonResidue)
r.c1.prod2x(a.c0, y)
V2.prod2x(V2, NonResidue)
r.c1.sum2xMod(r.c1, V2)
r.c2.prod2x(a.c0, z)
r.c2.sum2xMod(r.c2, V1)
# Inversion # Inversion
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
@ -1856,6 +2118,8 @@ func prod*(r: var CubicExt, a, b: CubicExt) =
## Out-of-place multiplication ## Out-of-place multiplication
when CubicExt.C.has_large_field_elem(): when CubicExt.C.has_large_field_elem():
r.prodImpl(a, b) r.prodImpl(a, b)
elif r is Fp6 and CubicExt.C.has_P_3mod8_primeModulus():
r.prodImpl_fp6o2_p3mod8(a, b)
else: else:
var d {.noInit.}: doublePrec(typeof(r)) var d {.noInit.}: doublePrec(typeof(r))
d.prod2x(a, b) d.prod2x(a, b)

View File

@ -33,7 +33,7 @@ proc spaces*(num: int): string =
for i in 0 ..< num: for i in 0 ..< num:
result[i] = ' ' result[i] = ' '
func appendHex*(accum: var string, f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, order: static Endianness = bigEndian) = func appendHex*(accum: var string, f: ExtensionField, indent = 0, order: static Endianness = bigEndian) =
## Hex accumulator ## Hex accumulator
accum.add static($f.typeof.genericHead() & '(') accum.add static($f.typeof.genericHead() & '(')
staticFor i, 0, f.coords.len: staticFor i, 0, f.coords.len:
@ -46,7 +46,7 @@ func appendHex*(accum: var string, f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, ord
accum.appendHex(f.coords[i], indent+2, order) accum.appendHex(f.coords[i], indent+2, order)
accum.add ")" accum.add ")"
func toHex*(f: Fp2 or Fp4 or Fp6 or Fp12, indent = 0, order: static Endianness = bigEndian): string = func toHex*(f: ExtensionField, indent = 0, order: static Endianness = bigEndian): string =
## Stringify a tower field element to hex. ## Stringify a tower field element to hex.
## Note. Leading zeros are not removed. ## Note. Leading zeros are not removed.
## Result is prefixed with 0x ## Result is prefixed with 0x

View File

@ -11,7 +11,7 @@ import
../../platforms/primitives, ../../platforms/primitives,
../arithmetic, ../arithmetic,
../extension_fields, ../extension_fields,
../curves/zoo_frobenius ../constants/zoo_frobenius
# Frobenius Map # Frobenius Map
# ------------------------------------------------------------ # ------------------------------------------------------------
@ -79,17 +79,24 @@ func frobenius_map*[C](r: var Fp6[C], a: Fp6[C], k: static int = 1) {.inline.} =
func frobenius_map*[C](r: var Fp12[C], a: Fp12[C], k: static int = 1) {.inline.} = func frobenius_map*[C](r: var Fp12[C], a: Fp12[C], k: static int = 1) {.inline.} =
## Computes a^(pᵏ) ## Computes a^(pᵏ)
## The p-power frobenius automorphism on 𝔽p12 ## The p-power frobenius automorphism on 𝔽p12
static: doAssert r.c0 is Fp4
staticFor i, 0, r.coords.len: staticFor i, 0, r.coords.len:
staticFor j, 0, r.coords[0].coords.len: staticFor j, 0, r.coords[0].coords.len:
r.coords[i].coords[j].frobenius_map(a.coords[i].coords[j], k) r.coords[i].coords[j].frobenius_map(a.coords[i].coords[j], k)
r.c0.c0.mulCheckSparse frobMapConst(C, 0, k) when r.c0 is Fp4:
r.c0.c1.mulCheckSparse frobMapConst(C, 3, k) r.c0.c0.mulCheckSparse frobMapConst(C, 0, k)
r.c1.c0.mulCheckSparse frobMapConst(C, 1, k) r.c0.c1.mulCheckSparse frobMapConst(C, 3, k)
r.c1.c1.mulCheckSparse frobMapConst(C, 4, k) r.c1.c0.mulCheckSparse frobMapConst(C, 1, k)
r.c2.c0.mulCheckSparse frobMapConst(C, 2, k) r.c1.c1.mulCheckSparse frobMapConst(C, 4, k)
r.c2.c1.mulCheckSparse frobMapConst(C, 5, k) r.c2.c0.mulCheckSparse frobMapConst(C, 2, k)
r.c2.c1.mulCheckSparse frobMapConst(C, 5, k)
else:
r.c0.c0.mulCheckSparse frobMapConst(C, 0, k)
r.c0.c1.mulCheckSparse frobMapConst(C, 2, k)
r.c0.c2.mulCheckSparse frobMapConst(C, 4, k)
r.c1.c0.mulCheckSparse frobMapConst(C, 1, k)
r.c1.c1.mulCheckSparse frobMapConst(C, 3, k)
r.c1.c2.mulCheckSparse frobMapConst(C, 5, k)
# ψ (Psi) - Untwist-Frobenius-Twist Endomorphisms on twisted curves # ψ (Psi) - Untwist-Frobenius-Twist Endomorphisms on twisted curves
# ----------------------------------------------------------------- # -----------------------------------------------------------------

View File

@ -215,7 +215,7 @@ func cyclotomic_inv*[FT](r: var FT, a: FT) {.meter.} =
## consequently `a` MUST be unitary ## consequently `a` MUST be unitary
r.conj(a) r.conj(a)
func cyclotomic_square*[FT](r: var FT, a: FT) {.meter.} = func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) =
## Square `a` into `r` ## Square `a` into `r`
## `a` MUST be in the cyclotomic subgroup ## `a` MUST be in the cyclotomic subgroup
## consequently `a` MUST be unitary ## consequently `a` MUST be unitary
@ -224,47 +224,147 @@ func cyclotomic_square*[FT](r: var FT, a: FT) {.meter.} =
# Granger, Scott, 2009 # Granger, Scott, 2009
# https://eprint.iacr.org/2009/565.pdf # https://eprint.iacr.org/2009/565.pdf
# Cubic extension field
# A = 3a² 2 ̄a
# B = 3 √i c² + 2 ̄b
# C = 3b² 2 ̄c
var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(a.c0)
template a0: untyped = a.c0.c0
template a1: untyped = a.c0.c1
template a2: untyped = a.c1.c0
template a3: untyped = a.c1.c1
template a4: untyped = a.c2.c0
template a5: untyped = a.c2.c1
v0.square(a.c0)
v1.square(a.c1)
v2.square(a.c2)
# From here on, r aliasing with a is only for the first operation
# and only read/write the exact same coordinates
# 3v₀₀ - 2a₀
r.c0.c0.diff(v0.c0, a0)
r.c0.c0.double()
r.c0.c0 += v0.c0
# 3v₀₁ + 2a₁
r.c0.c1.sum(v0.c1, a1)
r.c0.c1.double()
r.c0.c1 += v0.c1
# 3v₁₀ - 2a₄
r.c2.c0.diff(v1.c0, a4)
r.c2.c0.double()
r.c2.c0 += v1.c0
# 3v₁₁ + 2b₅
r.c2.c1.sum(v1.c1, a5)
r.c2.c1.double()
r.c2.c1 += v1.c1
# Now B = 3 √i c² + 2 ̄b
# beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i
# 3 (√i c²)₀ + 2a₂
v2.c1 *= NonResidue
r.c1.c0.sum(v2.c1, a2)
r.c1.c0.double()
r.c1.c0 += v2.c1
# 3 (√i c²)₁ - 2a₃
r.c1.c1.diff(v2.c0, a3)
r.c1.c1.double()
r.c1.c1 += v2.c0
func cyclotomic_square_quad_over_cube[F](r: var QuadraticExt[F], a: QuadraticExt[F]) =
## Square `a` into `r`
## `a` MUST be in the cyclotomic subgroup
## consequently `a` MUST be unitary
# Mapping between towering schemes
# --------------------------------
#
# canonical <=> cubic over quadratic <=> quadratic over cubic
# c₀ <=> a₀ <=> b₀
# c₁ <=> a₂ <=> b₃
# c₂ <=> a₄ <=> b₁
# c₃ <=> a₁ <=> b₄
# c₄ <=> a₃ <=> b₂
# c₅ <=> a₅ <=> b₅
#
# Hence, this formula for a cubic extension field
# A = 3a² 2 ̄a
# B = 3 √i c² + 2 ̄b
# C = 3b² 2 ̄c
#
# becomes
# A = (b₀, b₄) = 3(b₀, b₄)² - 2(b₀,-b₄)
# B = (b₃, b₂) = 3 √i(b₁, b₅)² + 2(b₃, -b₂)
# C = (b₁, b₅) = 3(b₃, b₂)² - 2(b₁, -b₅)
#
# with
# v₀ = (b₀, b₄) = (a.c0.c0, a.c1.c1)
# v₁ = (b₃, b₂) = (a.c1.c0, a.c0.c2)
# v₂ = (b₁, b₅) = (a.c0.c1, a.c1.c2)
var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: QuadraticExt[typeof(r.c0.c0)]
template b0: untyped = a.c0.c0
template b1: untyped = a.c0.c1
template b2: untyped = a.c0.c2
template b3: untyped = a.c1.c0
template b4: untyped = a.c1.c1
template b5: untyped = a.c1.c2
v0.square_disjoint(b0, b4)
v1.square_disjoint(b3, b2)
v2.square_disjoint(b1, b5)
# From here on, r aliasing with a is only for the first operation
# and only read/write the exact same coordinates
# 3v₀₀ - 2b₀
r.c0.c0.diff(v0.c0, b0)
r.c0.c0.double()
r.c0.c0 += v0.c0
# 3v₁₀ - 2b₁
r.c0.c1.diff(v1.c0, b1)
r.c0.c1.double()
r.c0.c1 += v1.c0
# 3v₀₁ + 2b₄
r.c1.c1.sum(v0.c1, b4)
r.c1.c1.double()
r.c1.c1 += v0.c1
# 3v₁₁ + 2b₅
r.c1.c2.sum(v1.c1, b5)
r.c1.c2.double()
r.c1.c2 += v1.c1
# Now B = (b₃, b₂) = 3 √i(b₁, b₅)² + 2(b₃, -b₂)
# beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i
# 3 (√i (b₁, b₅)²)₀ + 2b₃
v2.c1 *= NonResidue
r.c1.c0.sum(v2.c1, b3)
r.c1.c0.double()
r.c1.c0 += v2.c1
# 3 (√i (b₁, b₅)²)₁ - 2b₃
r.c0.c2.diff(v2.c0, b2)
r.c0.c2.double()
r.c0.c2 += v2.c0
func cyclotomic_square*[FT](r: var FT, a: FT) {.inline, meter.} =
## Square `a` into `r`
## `a` MUST be in the cyclotomic subgroup
## consequently `a` MUST be unitary
#
# Faster Squaring in the Cyclotomic Subgroup of Sixth Degree Extensions
# Granger, Scott, 2009
# https://eprint.iacr.org/2009/565.pdf
when a is CubicExt: when a is CubicExt:
# Cubic extension field r.cyclotomic_square_cube_over_quad(a)
# A = 3a² 2 ̄a
# B = 3 √i c² + 2 ̄b
# C = 3b² 2 ̄c
var t0{.noinit.}, t1{.noinit.}: typeof(a.c0)
t0.square(a.c0) # t0 = a²
t1.double(t0) # t1 = 2a²
t1 += t0 # t1 = 3a²
t0.conj(a.c0) # t0 = ̄a
t0.double() # t0 = 2 ̄a
r.c0.diff(t1, t0) # r0 = 3a² 2 ̄a
# Aliasing: a.c0 unused
t0.square(a.c2) # t0 = c²
t0 *= NonResidue # t0 = √i c²
t1.double(t0) # t1 = 2 √i c²
t0 += t1 # t0 = 3 √i c²
t1.square(a.c1) # t1 = b²
r.c1.conj(a.c1) # r1 = ̄b
r.c1.double() # r1 = 2 ̄b
r.c1 += t0 # r1 = 3 √i c² + 2 ̄b
# Aliasing: a.c1 unused
t0.double(t1) # t0 = 2b²
t0 += t1 # t0 = 3b²
t1.conj(a.c2) # r2 = ̄c
t1.double() # r2 = 2 ̄c
r.c2.diff(t0, t1) # r2 = 3b² - 2 ̄c
else: else:
{.error: "Not implemented".} r.cyclotomic_square_quad_over_cube(a)
func cyclotomic_square*[FT](a: var FT) {.meter.} = func cyclotomic_square*[FT](a: var FT) {.inline.} =
## Square `a` into `r` ## Square `a` into `r`
## `a` MUST be in the cyclotomic subgroup ## `a` MUST be in the cyclotomic subgroup
## consequently `a` MUST be unitary ## consequently `a` MUST be unitary
@ -560,7 +660,10 @@ func fromFpk*[Fpkdiv6, Fpk](
{.error: "a must be a sextic extension field".} {.error: "a must be a sextic extension field".}
elif a is QuadraticExt: elif a is QuadraticExt:
when a.c0 is CubicExt: when a.c0 is CubicExt:
{.error: "𝔽pᵏᐟ⁶ -> 𝔽pᵏᐟ³ -> 𝔽pᵏ towering (quadratic over cubic) is not implemented.".} g.g2 = a.c1.c0
g.g3 = a.c0.c2
g.g4 = a.c0.c1
g.g5 = a.c1.c2
else: else:
{.error: "a must be a sextic extension field".} {.error: "a must be a sextic extension field".}
else: else:
@ -584,7 +687,12 @@ func asFpk*[Fpkdiv6, Fpk](
{.error: "a must be a sextic extension field".} {.error: "a must be a sextic extension field".}
elif a is QuadraticExt: elif a is QuadraticExt:
when a.c0 is CubicExt: when a.c0 is CubicExt:
{.error: "𝔽pᵏᐟ⁶ -> 𝔽pᵏᐟ³ -> 𝔽pᵏ towering (quadratic over cubic) is not implemented.".} a.c0.c0 = g0
a.c0.c1 = g.g4
a.c0.c2 = g.g3
a.c1.c0 = g.g2
a.c1.c1 = g1
a.c1.c2 = g.g5
else: else:
{.error: "a must be a sextic extension field".} {.error: "a must be a sextic extension field".}
else: else:
@ -598,9 +706,6 @@ func cyclotomic_exp_compressed*[N: static int, Fpk](
## Exponentiation is done least-signigicant bits first ## Exponentiation is done least-signigicant bits first
## `squarings` represents the number of squarings ## `squarings` represents the number of squarings
## to do before the next multiplication. ## to do before the next multiplication.
##
## `accumSquarings` stores the accumulated squarings so far
## iff N != 1
type Fpkdiv6 = typeof(a.c0.c0) type Fpkdiv6 = typeof(a.c0.c0)

View File

@ -17,7 +17,7 @@ import
ec_shortweierstrass_projective ec_shortweierstrass_projective
], ],
../io/io_extfields, ../io/io_extfields,
../curves/zoo_constants ../constants/zoo_constants
# No exceptions allowed # No exceptions allowed
{.push raises: [].} {.push raises: [].}
@ -390,44 +390,19 @@ func line_add*[F1, F2](
# D-Twist # D-Twist
# ------------------------------------------------------------ # ------------------------------------------------------------
func mul_by_line_xy0*[Fpkdiv2, Fpkdiv6]( func mul_sparse_by_line_a00bc0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
r: var Fpkdiv2,
a: Fpkdiv2,
x, y: Fpkdiv6) =
## Sparse multiplication of an 𝔽pᵏᐟ² element
## with coordinates (a₀, a₁, a₂) by a line (x, y, 0)
## The z coordinates in the line will be ignored.
## `r` and `a` must not alias
static:
doAssert a is CubicExt
doAssert a.c0 is Fpkdiv6
var
v0 {.noInit.}: Fpkdiv6
v1 {.noInit.}: Fpkdiv6
v0.prod(a.c0, x)
v1.prod(a.c1, y)
r.c0.prod(a.c2, y)
r.c0 *= SexticNonResidue
r.c0 += v0
r.c1.sum(a.c0, a.c1) # Error when r and a alias as r.c0 was updated
r.c2.sum(x, y)
r.c1 *= r.c2
r.c1 -= v0
r.c1 -= v1
r.c2.prod(a.c2, x)
r.c2 += v1
func mul_sparse_by_line_ab00c0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
## Sparse multiplication of an 𝔽pᵏ element ## Sparse multiplication of an 𝔽pᵏ element
## by a sparse 𝔽pᵏ element coming from an D-Twist line function ## by a sparse 𝔽pᵏ element coming from an D-Twist line function
## With a quadratic over cubic towering (Fp2 -> Fp6 -> Fp12) ## with a cubic over quadratic towering (𝔽p2 -> 𝔽p6 -> 𝔽p12)
## The sparse element is represented by a packed Line type ## The sparse element is represented by a packed Line type
## with coordinate (a,b,c) matching 𝔽pᵏ coordinates ab00c0 ## with coordinate (a,b,c) matching 𝔽pᵏ coordinates a00bc0
# a0b0 = (a00, a01, a02).( x, 0, 0)
# a1b1 = (a10, a11, a12).( y, z, 0)
#
# r0 = a0 b0 + ξ a1 b1
# r1 = a0 b1 + a1 b0 (Schoolbook)
# = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
static: static:
doAssert Fpk.C.getSexticTwist() == D_Twist doAssert Fpk.C.getSexticTwist() == D_Twist
@ -436,27 +411,290 @@ func mul_sparse_by_line_ab00c0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
type Fpkdiv2 = typeof(f.c0) type Fpkdiv2 = typeof(f.c0)
var when Fpk.C.has_large_field_elem():
v0 {.noInit.}: Fpkdiv2 var
v1 {.noInit.}: Fpkdiv2 v0 {.noInit.}: Fpkdiv2
v2 {.noInit.}: Line[Fpkdiv6] v1 {.noInit.}: Fpkdiv2
v3 {.noInit.}: Fpkdiv2 t {.noInit.}: Fpkdiv6
v0.mul_by_line_xy0(f.c0, l.a, l.b) v1.sum(f.c0, f.c1)
v1.mul_sparse_by_0y0(f.c1, l.c) t.sum(l.a, l.b)
v0.mul_sparse_by_xy0(v1, t, l.c)
v2.x = l.a v1.mul_sparse_by_xy0(f.c1, l.b, l.c)
v2.y.sum(l.b, l.c)
f.c1 += f.c0
v3.mul_by_line_xy0(f.c1, v2.x, v2.y)
v3 -= v0
f.c1.diff(v3, v1)
v3.c0.prod(v1.c2, SexticNonResidue) f.c1.diff(v0, v1)
v3.c0 += v0.c0
v3.c1.sum(v0.c1, v1.c0) v0.mul_sparse_by_x00(f.c0, l.a)
v3.c2.sum(v0.c2, v1.c1) f.c1 -= v0
f.c0 = v3
v1 *= NonResidue
f.c0.sum(v0, v1)
else: # Lazy reduction
var
V0 {.noInit.}: doubleprec(Fpkdiv2)
V1 {.noInit.}: doubleprec(Fpkdiv2)
V {.noInit.}: doubleprec(Fpkdiv2)
t {.noInit.}: Fpkdiv6
u {.noInit.}: Fpkdiv2
u.sum(f.c0, f.c1)
t.sum(l.a, l.b)
V.mul2x_sparse_by_xy0(u, t, l.c)
V1.mul2x_sparse_by_xy0(f.c1, l.b, l.c)
V0.mul2x_sparse_by_x00(f.c0, l.a)
V.diff2xMod(V, V1)
V.diff2xMod(V, V0)
f.c1.redc2x(V)
V1.prod2x(V1, NonResidue)
V0.sum2xMod(V0, V1)
f.c0.redc2x(V0)
func prod_x00yz0_x00yz0_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Line[Fpkdiv6]) =
## Multiply 2 lines together
## The result is sparse in f.
# Preambule
#
# A sparse xy0 by xy0 multiplication in 𝔽pᵏᐟ² (cubic) would be
# (a0', a1', 0).(b0', b1', 0)
#
# r0' = a0'b0'
# r1' = a0'b1 + a1'b0'
# or (a0'+b1')(a1'+b0')-a0'a1'-b0'b1'
# r2' = a1'b1
#
# Note¹ that the Karatsuba does not reuse terms!
#
# In the following equations (taken from quad extension implementation)
# a0 = ( x , 0 , 0)
# a1 = ( y , z , 0)
# b0 = ( x', 0, 0)
# b1 = ( y', z', 0)
#
# a0b0 = ( x, 0, 0).( x', 0 , 0) = (xx', 0, 0 )
# a1b1 = ( y, z, 0).( y', z', 0) = (yy', yz'+zy', zz')
# r0 = a0 b0 + ξ a1 b1
# r1 = a0 b1 + a1 b0 (Schoolbook)
# = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
#
# Note² that Karatsuba doesn't help as a0 and b0 are actually very cheap (x, 0, 0)
#
# Replacing by the coordinates in the lower tower we get
#
# r0 = (xx'+ξzz', yy', yz'+zy')
# r1 = ( x, 0, 0)( y', z', 0) + ( y , z , 0)( x', 0, 0)
# = (xy'+yx', xz'+zx', 0)
#
# Now we can apply Karasuba¹² for a total of 6 multiplications
static:
doAssert Fpk.C.getSexticTwist() == D_Twist
doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
XX.prod2x(l0.a, l1.a)
YY.prod2x(l0.b, l1.b)
ZZ.prod2x(l0.c, l1.c)
var V{.noInit.}: doublePrec(Fpkdiv6)
var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
# r0 = (xx'+ξzz', yy', yz'+zy')
# r00 = xx'+ξzz''
# r01 = yy'
# r02 = yz'+zy' = (y+z)(y'+z') - yy' - zz'
V.prod2x(ZZ, NonResidue)
V.sum2xMod(V,XX)
f.c0.c0.redc2x(V)
f.c0.c1.redc2x(YY)
t0.sum(l0.b, l0.c)
t1.sum(l1.b, l1.c)
V.prod2x(t0, t1)
V.diff2xMod(V, YY)
V.diff2xMod(V, ZZ)
f.c0.c2.redc2x(V)
# r1 = (xy'+yx', xz'+zx', 0)
# r10 = xy'+yx' = (x+y)(x'+y') - xx' - yy'
# r11 = xz'+zx' = (x+z)(x'+z') - xx' - zz'
# r12 = 0
t0.sum(l0.a, l0.b)
t1.sum(l1.a, l1.b)
V.prod2x(t0, t1)
V.diff2xMod(V, XX)
V.diff2xMod(V, YY)
f.c1.c0.redc2x(V)
t0.sum(l0.a, l0.c)
t1.sum(l1.a, l1.c)
V.prod2x(t0, t1)
V.diff2xMod(V, XX)
V.diff2xMod(V, ZZ)
f.c1.c1.redc2x(V)
f.c1.c2.setZero()
# M-Twist
# ------------------------------------------------------------
func mul_sparse_by_line_cb00a0*[Fpk, Fpkdiv6](f: var Fpk, l: Line[Fpkdiv6]) =
## Sparse multiplication of an 𝔽pᵏ element
## by a sparse 𝔽pᵏ element coming from an M-Twist line function
## with a cubic over quadratic towering (𝔽p2 -> 𝔽p6 -> 𝔽p12)
## The sparse element is represented by a packed Line type
## with coordinate (a,b,c) matching 𝔽pᵏ coordinates cb00a0
# a0b0 = (a00, a01, a02).( z, y, 0)
# a1b1 = (a10, a11, a12).( 0, x, 0)
#
# r0 = a0 b0 + ξ a1 b1
# r1 = a0 b1 + a1 b0 (Schoolbook)
# = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
static:
doAssert Fpk.C.getSexticTwist() == M_Twist
doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
type Fpkdiv2 = typeof(f.c0)
when Fpk.C.has_large_field_elem():
var
v0 {.noInit.}: Fpkdiv2
v1 {.noInit.}: Fpkdiv2
t {.noInit.}: Fpkdiv6
v1.sum(f.c0, f.c1)
t.sum(l.b, l.a)
v0.mul_sparse_by_xy0(v1, l.c, t)
v1.mul_sparse_by_0y0(f.c1, l.a)
f.c1.diff(v0, v1)
v0.mul_sparse_by_xy0(f.c0, l.c, l.b)
f.c1 -= v0
v1 *= NonResidue
f.c0.sum(v0, v1)
else: # Lazy reduction
var
V0 {.noInit.}: doubleprec(Fpkdiv2)
V1 {.noInit.}: doubleprec(Fpkdiv2)
V {.noInit.}: doubleprec(Fpkdiv2)
t {.noInit.}: Fpkdiv6
u {.noInit.}: Fpkdiv2
u.sum(f.c0, f.c1)
t.sum(l.b, l.a)
V.mul2x_sparse_by_xy0(u, l.c, t)
V1.mul2x_sparse_by_0y0(f.c1, l.a)
V0.mul2x_sparse_by_xy0(f.c0, l.c, l.b)
V.diff2xMod(V, V1)
V.diff2xMod(V, V0)
f.c1.redc2x(V)
V1.prod2x(V1, NonResidue)
V0.sum2xMod(V0, V1)
f.c0.redc2x(V0)
func prod_zy00x0_zy00x0_into_abcdef00ghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Line[Fpkdiv6]) =
## Multiply 2 lines together
## The result is sparse in f.c1.c0
# Preambule
#
# A sparse xy0 by xy0 multiplication in 𝔽pᵏᐟ² (cubic) would be
# (a0', a1', 0).(b0', b1', 0)
#
# r0' = a0'b0'
# r1' = a0'b1 + a1'b0'
# or (a0'+b1')(a1'+b0')-a0'a1'-b0'b1'
# r2' = a1'b1
#
# Note¹ that the Karatsuba does not reuse terms!
#
# In the following equations (taken from quad extension implementation)
# a0 = ( z , y , 0)
# a1 = ( 0 , x , 0)
# b0 = ( z', y', 0)
# b1 = ( 0 , x', 0)
#
# a0b0 = ( z, y, 0).(z', y', 0) = (zz', zy'+yz', yy')
# a1b1 = ( 0, x, 0).( 0, x', 0) = ( 0, 0, xx')
# r0 = a0 b0 + ξ a1 b1
# r1 = a0 b1 + a1 b0 (Schoolbook)
# = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1 (Karatsuba)
#
# Note² that Karatsuba doesn't help as a1 and b1 are actually very cheap (0, x, 0)
#
# Replacing by the coordinates in the lower tower we get
#
# r0 = (zz'+ξxx', zy'+z'y, yy')
# r1 = (z, y, 0)(0, x', 0) + (z', y', 0)(0, x, 0)
# = (0, zx'+xz', yx'+xy')
#
# Now we can apply Karasuba¹² for a total of 6 multiplications
static:
doAssert Fpk.C.getSexticTwist() == M_Twist
doAssert f is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
doAssert f.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
XX.prod2x(l0.a, l1.a)
YY.prod2x(l0.b, l1.b)
ZZ.prod2x(l0.c, l1.c)
var V{.noInit.}: doublePrec(Fpkdiv6)
var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
# r0 = (zz'+ξxx', zy'+z'y, yy')
# r00 = zz'+ξxx'
# r01 = zy'+z'y = (z+y)(z'+y') - zz' - yy'
# r02 = yy'
V.prod2x(XX, NonResidue)
V.sum2xMod(V, ZZ)
f.c0.c0.redc2x(V)
t0.sum(l0.c, l0.b)
t1.sum(l1.c, l1.b)
V.prod2x(t0, t1)
V.diff2xMod(V, ZZ)
V.diff2xMod(V, YY)
f.c0.c1.redc2x(V)
f.c0.c2.redc2x(YY)
# r1 = (0, zx'+xz', yx'+xy')
# r10 = 0
# r11 = zx'+xz' = (z+x)(z'+x') - zz' - xx'
# r12 = xy'+yx' = (x+y)(x'+y') - xx' - yy'
f.c1.c0.setZero()
t0.sum(l0.c, l0.a)
t1.sum(l1.c, l1.a)
V.prod2x(t0, t1)
V.diff2xMod(V, ZZ)
V.diff2xMod(V, XX)
f.c1.c1.redc2x(V)
t0.sum(l0.a, l0.b)
t1.sum(l1.a, l1.b)
V.prod2x(t0, t1)
V.diff2xMod(V, XX)
V.diff2xMod(V, YY)
f.c1.c2.redc2x(V)
# ############################################################ # ############################################################
# #
@ -556,16 +794,16 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
## Multiply 2 lines together ## Multiply 2 lines together
## The result is sparse in f.c1.c1 ## The result is sparse in f.c1.c1
# In the following equations (taken from cubic extension implementation) # In the following equations (taken from cubic extension implementation)
# a0 = (x0, z0) # a0 = ( x, z)
# a1 = (y0, 0) # a1 = ( y, 0)
# a2 = ( 0, 0) # a2 = ( 0, 0)
# b0 = (x1, z1) # b0 = (x', z')
# b1 = (y1, 0) # b1 = (y', 0)
# b2 = ( 0, 0) # b2 = ( 0, 0)
# #
# v0 = a0 b0 = (x0, z0).(x1, z1) # v0 = a0 b0 = (x, z).(x', z')
# v1 = a1 b1 = (y0, 0).(y1, 0) # v1 = a1 b1 = (y, 0).(y', 0)
# v2 = a2 b2 = ( 0, 0).( 0, 0) # v2 = a2 b2 = (0, 0).( 0, 0)
# #
# r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0 # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
# = ξ (a1 b1 + a2 b1 - v1) + v0 # = ξ (a1 b1 + a2 b1 - v1) + v0
@ -575,106 +813,65 @@ func prod_xzy000_xzy000_into_abcdefghij00*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
# r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1 # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
# = a0 b0 - v0 + v1 # = a0 b0 - v0 + v1
# = v1 # = v1
#
# r1 is computed in 3 extra muls in the base field 𝔽pᵏᐟ⁶ (Karatsuba product in quadratic field)
#
# If we dive deeper:
# r1 = a0b1 + a1b0 = (xy'+yx', zy'+yz') (Schoolbook - 4 muls)
# r10 = zy'+yz' = (x+y)(x'+y') - xx' - yy'
# r11 = xy'+yx' = (z+y)(z'+y') - zz' - yy'
#
# The substraction are all computed as part of v0 and v1
# hence r1 can be compute for 2 extra muls in 𝔽pᵏᐟ⁶ only
static: static:
doAssert Fpk.C.getSexticTwist() == D_Twist doAssert Fpk.C.getSexticTwist() == D_Twist
doAssert f is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³" doAssert f is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
doAssert f.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶" doAssert f.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
type Fpkdiv3 = typeof(f.c0) var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
var V0{.noInit.}, f2x{.noInit.}: doublePrec(Fpkdiv3) XX.prod2x(l0.a, l1.a)
var V1{.noInit.}: doublePrec(Fpkdiv6) YY.prod2x(l0.b, l1.b)
ZZ.prod2x(l0.c, l1.c)
V0.prod2x_disjoint(l0.a, l0.c, l1.a, l1.c) # a0 b0 = (x0, z0).(x1, z1) var V{.noInit.}: doublePrec(Fpkdiv6)
V1.prod2x(l0.b, l1.b) # a1 b1 = (y0, 0).(y1, 0) var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
# r1 = (a0 + a1) * (b0 + b1) - v0 - v1 # r0 = a0 b0 = (x, z).(x', z')
f.c1.c0.sum(l0.a, l0.b) # x0 + y0 # r00 = xx' + βzz'
f.c1.c1.sum(l1.a, l1.b) # x1 + y1 # r01 = (x+z)(x'+z') - zz' - xx'
f2x.prod2x_disjoint(f.c1.c0, l0.c, f.c1.c1, l1.c) # (x0 + y0, z0)(x1 + y1, z1) = (a0 + a1) * (b0 + b1) V.prod2x(ZZ, NonResidue)
f2x.diff2xMod(f2x, V0) V.sum2xMod(V, XX)
f2x.c0.diff2xMod(f2x.c0, V1) f.c0.c0.redc2x(V)
f.c1.redc2x(f2x)
t0.sum(l0.a, l0.c)
t1.sum(l1.a, l1.c)
V.prod2x(t0, t1)
V.diff2xMod(V, ZZ)
V.diff2xMod(V, XX)
f.c0.c1.redc2x(V)
# r0 = v0 # r10 = zy'+yz' = (x+y)(x'+y') - xx' - yy'
f.c0.redc2x(V0) # r11 = xy'+yx' = (z+y)(z'+y') - zz' - yy'
t0.sum(l0.a, l0.b)
t1.sum(l1.a, l1.b)
V.prod2x(t0, t1)
V.diff2xMod(V, XX)
V.diff2xMod(V, YY)
f.c1.c0.redc2x(V)
# r2 = v1 t0.sum(l0.c, l0.b)
f.c2.c0.redc2x(V1) t1.sum(l1.c, l1.b)
V.prod2x(t0, t1)
V.diff2xMod(V, ZZ)
V.diff2xMod(V, YY)
f.c1.c1.redc2x(V)
# r2 = v2 = (y, 0).(y', 0) = (yy', 0)
f.c2.c0.redc2x(YY)
f.c2.c1.setZero() f.c2.c1.setZero()
func mul_sparse_by_abcdefghij00*[Fpk](
a: var Fpk, b: Fpk) =
## Sparse multiplication of an 𝔽pᵏ element
## by a sparse 𝔽pᵏ element abcdefghij00
## with each representing 𝔽pᵏᐟ⁶ coordinate
static:
doAssert Fpk.C.getSexticTwist() == D_Twist
doAssert a is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
doAssert a.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
type Fpkdiv3 = typeof(a.c0)
# In the following equations (taken from cubic extension implementation)
# b0 = (b00, b01)
# b1 = (b10, b11)
# b2 = (b20, 0)
#
# v0 = a0 b0 = (f00, f01).(b00, b01)
# v1 = a1 b1 = (f10, f11).(b10, b11)
# v2 = a2 b2 = (f20, f21).(b20, 0)
#
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
# r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
var V0 {.noInit.}, V1 {.noInit.}, V2 {.noinit.}: doublePrec(Fpkdiv3)
var t0 {.noInit.}, t1 {.noInit.}: Fpkdiv3
var f2x{.noInit.}, g2x {.noinit.}: doublePrec(Fpkdiv3)
V0.prod2x(a.c0, b.c0)
V1.prod2x(a.c1, b.c1)
V2.mul2x_sparse_by_x0(a.c2, b.c2)
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
t0.sum(a.c1, a.c2)
t1.c0.sum(b.c1.c0, b.c2.c0) # b₂ = (b20, 0)
f2x.prod2x_disjoint(t0, t1.c0, b.c1.c1) # (a₁ + a₂).(b₁ + b₂)
f2x.diff2xMod(f2x, V1)
f2x.diff2xMod(f2x, V2)
f2x.prod2x(f2x, NonResidue)
f2x.sum2xMod(f2x, V0)
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁
t0.sum(a.c0, a.c1)
t1.sum(b.c0, b.c1)
g2x.prod2x(t0, t1)
g2x.diff2xMod(g2x, V0)
g2x.diff2xMod(g2x, V1)
# r₂ = (a₀ + a₂) and (b₀ + b₂)
t0.sum(a.c0, a.c2)
t1.c0.sum(b.c0.c0, b.c2.c0) # b₂ = (b20, 0)
# Now we are aliasing free
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
a.c0.redc2x(f2x)
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
f2x.prod2x(V2, NonResidue)
g2x.sum2xMod(g2x, f2x)
a.c1.redc2x(g2x)
# r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
f2x.prod2x_disjoint(t0, t1.c0, b.c0.c1)
f2x.diff2xMod(f2x, V0)
f2x.diff2xMod(f2x, V2)
f2x.sum2xMod(f2x, V1)
a.c2.redc2x(f2x)
# M-Twist # M-Twist
# ------------------------------------------------------------ # ------------------------------------------------------------
@ -769,16 +966,16 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
## Multiply 2 lines together ## Multiply 2 lines together
## The result is sparse in f.c1.c1 ## The result is sparse in f.c1.c1
# In the following equations (taken from cubic extension implementation) # In the following equations (taken from cubic extension implementation)
# a0 = (z0, x0) # a0 = ( z, x)
# a1 = ( 0, 0) # a1 = ( 0, 0)
# a2 = (y0, 0) # a2 = ( y, 0)
# b0 = (z1, x1) # b0 = (z', x')
# b1 = ( 0, 0) # b1 = ( 0, 0)
# b2 = (y0, 0) # b2 = (y', 0)
# #
# v0 = a0 b0 = (z0, x0).(z1, x1) # v0 = a0 b0 = (z, x).(z', x')
# v1 = a1 b1 = ( 0, 0).( 0, 0) # v1 = a1 b1 = (0, 0).(0 , 0)
# v2 = a2 b2 = (y0, 0).(y1, 0) # v2 = a2 b2 = (y, 0).(y', 0)
# #
# r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0 # r0 = ξ ((a1 + a2) * (b1 + b2) - v1 - v2) + v0
# = ξ (a1 b2 + a2 b2 - v2) + v0 # = ξ (a1 b2 + a2 b2 - v2) + v0
@ -788,36 +985,221 @@ func prod_zx00y0_zx00y0_into_abcd00efghij*[Fpk, Fpkdiv6](f: var Fpk, l0, l1: Lin
# = ξ v2 # = ξ v2
# r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1 # r2 = (a0 + a2) * (b0 + b2) - v0 - v2 + v1
# = (a0 + a2) * (b0 + b2) - v0 - v2 # = (a0 + a2) * (b0 + b2) - v0 - v2
#
# r2 is computed in 3 extra muls in the base field 𝔽pᵏᐟ⁶ (Karatsuba product in quadratic field)
#
# If we dive deeper:
# r2 = a0b2 + a2b0 = (zy'+yz', xy'+x'y) (Schoolbook - 4 muls)
# r20 = zy'+z'y = (z+y)(z'+y') - zz' - yy'
# r21 = xy'+x'y = (x+y)(x'+y') - xx' - yy'
#
# The substraction are all computed as part of v0 and v2
# hence r2 can be compute for 2 extra muls in 𝔽pᵏᐟ⁶ only
static: static:
doAssert Fpk.C.getSexticTwist() == M_Twist doAssert Fpk.C.getSexticTwist() == M_Twist
doAssert f is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³" doAssert f is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
doAssert f.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶" doAssert f.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
type Fpkdiv3 = typeof(f.c0) var XX{.noInit.}, YY{.noInit.}, ZZ{.noInit.}: doublePrec(Fpkdiv6)
var V0{.noInit.}, f2x{.noInit.}: doublePrec(Fpkdiv3) XX.prod2x(l0.a, l1.a)
var V2{.noInit.}: doublePrec(Fpkdiv6) YY.prod2x(l0.b, l1.b)
ZZ.prod2x(l0.c, l1.c)
V0.prod2x_disjoint(l0.c, l0.a, l1.c, l1.a) # a0 b0 = (z0, x0).(z1, x1) var V{.noInit.}: doublePrec(Fpkdiv6)
V2.prod2x(l0.b, l1.b) # a2 b2 = (y0, 0).(y1, 0) var t0{.noInit.}, t1{.noInit.}: Fpkdiv6
# r2 = (a0 + a2) * (b0 + b2) - v0 - v2 # r0 = a0 b0 = (z, x).(z', x')
f.c2.c0.sum(l0.b, l0.c) # y0 + z0 # r00 = zz' + βxx'
f.c2.c1.sum(l1.b, l1.c) # y1 + z1 # r01 = (z+x)(z'+x') - zz' - xx'
f2x.prod2x_disjoint(f.c2.c0, l0.a, f.c2.c1, l1.a) # (z0 + y0, x0).(z1 + y1, x1) = (a0 + a2) * (b0 + b2) V.prod2x(XX, NonResidue)
f2x.diff2xMod(f2x, V0) # (a0 + a2) * (b0 + b2) - v0 V.sum2xMod(V, ZZ)
f2x.c0.diff2xMod(f2x.c0, V2) # (a0 + a2) * (b0 + b2) - v0 - v2 f.c0.c0.redc2x(V)
f.c2.redc2x(f2x)
t0.sum(l0.c, l0.a)
t1.sum(l1.c, l1.a)
V.prod2x(t0, t1)
V.diff2xMod(V, ZZ)
V.diff2xMod(V, XX)
f.c0.c1.redc2x(V)
# r1 = ξ v2 # r1 = ξ v2 = ξ(y, 0).(y', 0) = ξ(yy', 0) = (0, yy')
f.c1.c1.redc2x(V2)
f.c1.c0.setZero() f.c1.c0.setZero()
f.c1.c1.redc2x(YY)
# r0 = v0 # r20 = zy'+z'y = (z+y)(z'+y') - zz' - yy'
f.c0.redc2x(V0) # r21 = xy'+x'y = (x+y)(x'+y') - xx' - yy'
t0.sum(l0.c, l0.b)
t1.sum(l1.c, l1.b)
V.prod2x(t0, t1)
V.diff2xMod(V, ZZ)
V.diff2xMod(V, YY)
f.c2.c0.redc2x(V)
func mul_sparse_by_abcd00efghij*[Fpk]( t0.sum(l0.a, l0.b)
t1.sum(l1.a, l1.b)
V.prod2x(t0, t1)
V.diff2xMod(V, XX)
V.diff2xMod(V, YY)
f.c2.c1.redc2x(V)
# ############################################################
#
# Sparse multiplication
#
# ############################################################
func mul_sparse_by_abcdefghij00_quad_over_cube*[Fpk](
a: var Fpk, b: Fpk) =
## Sparse multiplication of an 𝔽pᵏ element
## by a sparse 𝔽pᵏ element abcdefghij00
## with each representing 𝔽pᵏᐟ⁶ coordinate
static:
doAssert Fpk.C.getSexticTwist() == D_Twist
doAssert a is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
doAssert a.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
type Fpkdiv2 = typeof(a.c0)
# In the following equations (taken from quadratic extension implementation)
# b0 = (b00, b01, b02)
# b1 = (b10, b11, 0)
#
# v0 = a0 b0 = (a00, a01, a02).(b00, b01, b02)
# v1 = a1 b1 = (a10, a11, a12).(b10, b11, 0)
#
# r0 = a0 b0 + ξ a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1
when Fpk.C.has_large_field_elem():
var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: Fpkdiv2
# v2 <- (a0 + a1)(b0 + b1)
v0.sum(a.c0, a.c1)
v1.c0.sum(b.c0.c0, b.c1.c0)
v1.c1.sum(b.c0.c1, b.c1.c1)
v1.c2 = b.c0.c2
v2.prod(v0, v1)
# v0 <- a0 b0
# v1 <- a1 b1
v0.prod(a.c0, b.c0)
v1.mul_sparse_by_xy0(a.c1, b.c1.c0, b.c1.c1)
# aliasing: a and b unneeded now
# r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
a.c1.diff(v2, v0)
a.c1 -= v1
# r0 <- a0 b0 + β a1 b1
v1 *= NonResidue
a.c0.sum(v0, v1)
else: # Lazy reduction
var V0 {.noInit.}, V1 {.noInit.}, V2 {.noInit.}: doublePrec(Fpkdiv2)
var t0{.noInit.}, t1{.noInit.}: Fpkdiv2
# v2 <- (a0 + a1)(b0 + b1)
t0.sum(a.c0, a.c1)
t1.c0.sum(b.c0.c0, b.c1.c0)
t1.c1.sum(b.c0.c1, b.c1.c1)
t1.c2 = b.c0.c2
V2.prod2x(t0, t1)
# v0 <- a0 b0
# v1 <- a1 b1
V0.prod2x(a.c0, b.c0)
V1.mul2x_sparse_by_xy0(a.c1, b.c1.c0, b.c1.c1)
# r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
V2.diff2xMod(V2, V0)
V2.diff2xMod(V2, V1)
a.c1.redc2x(V2)
# r0 <- a0 b0 + β a1 b1
V1.prod2x(V1, NonResidue)
V2.sum2xMod(V0, V1)
a.c0.redc2x(V2)
func mul_sparse_by_abcdef00ghij_quad_over_cube*[Fpk](
a: var Fpk, b: Fpk) =
## Sparse multiplication of an 𝔽pᵏ element
## by a sparse 𝔽pᵏ element abcdef00ghij
## with each representing 𝔽pᵏᐟ⁶ coordinate
static:
doAssert Fpk.C.getSexticTwist() == M_Twist
doAssert a is QuadraticExt, "This assumes 𝔽pᵏ as a quadratic extension of 𝔽pᵏᐟ²"
doAssert a.c0 is CubicExt, "This assumes 𝔽pᵏᐟ² as a cubic extension of 𝔽pᵏᐟ⁶"
type Fpkdiv2 = typeof(a.c0)
# In the following equations (taken from quadratic extension implementation)
# b0 = (b00, b01, b02)
# b1 = ( 0, b11, b12)
#
# v0 = a0 b0 = (a00, a01, a02).(b00, b01, b02)
# v1 = a1 b1 = (a10, a11, a12).( 0, b11, b12)
#
# r0 = a0 b0 + ξ a1 b1
# r1 = (a0 + a1) (b0 + b1) - a0 b0 - a1 b1
when Fpk.C.has_large_field_elem():
var v0 {.noInit.}, v1 {.noInit.}, v2 {.noInit.}: Fpkdiv2
# v2 <- (a0 + a1)(b0 + b1)
v0.sum(a.c0, a.c1)
v1.c0 = b.c0.c0
v1.c1.sum(b.c0.c1, b.c1.c1)
v1.c2.sum(b.c0.c2, b.c1.c2)
v2.prod(v0, v1)
# v0 <- a0 b0
# v1 <- a1 b1
v0.prod(a.c0, b.c0)
v1.mul_sparse_by_0yz(a.c1, b.c1.c1, b.c1.c2)
# aliasing: a and b unneeded now
# r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
a.c1.diff(v2, v0)
a.c1 -= v1
# r0 <- a0 b0 + β a1 b1
v1 *= NonResidue
a.c0.sum(v0, v1)
else: # Lazy reduction
var V0 {.noInit.}, V1 {.noInit.}, V2 {.noInit.}: doublePrec(Fpkdiv2)
var t0{.noInit.}, t1{.noInit.}: Fpkdiv2
# v2 <- (a0 + a1)(b0 + b1)
t0.sum(a.c0, a.c1)
t1.c0 = b.c0.c0
t1.c1.sum(b.c0.c1, b.c1.c1)
t1.c2.sum(b.c0.c2, b.c1.c2)
V2.prod2x(t0, t1)
# v0 <- a0 b0
# v1 <- a1 b1
V0.prod2x(a.c0, b.c0)
V1.mul2x_sparse_by_0yz(a.c1, b.c1.c1, b.c1.c2)
# r1 <- (a0 + a1)(b0 + b1) - a0 b0 - a1 b1
V2.diff2xMod(V2, V0)
V2.diff2xMod(V2, V1)
a.c1.redc2x(V2)
# r0 <- a0 b0 + β a1 b1
V1.prod2x(V1, NonResidue)
V2.sum2xMod(V0, V1)
a.c0.redc2x(V2)
func mul_sparse_by_abcd00efghij_cube_over_quad*[Fpk](
a: var Fpk, b: Fpk) = a: var Fpk, b: Fpk) =
## Sparse multiplication of an 𝔽pᵏ element ## Sparse multiplication of an 𝔽pᵏ element
## by a sparse 𝔽pᵏ element abcd00efghij ## by a sparse 𝔽pᵏ element abcd00efghij
@ -888,15 +1270,92 @@ func mul_sparse_by_abcd00efghij*[Fpk](
f2x.sum2xMod(f2x, V1) f2x.sum2xMod(f2x, V1)
a.c2.redc2x(f2x) a.c2.redc2x(f2x)
func mul_sparse_by_abcdefghij00_cube_over_quad*[Fpk](
a: var Fpk, b: Fpk) =
## Sparse multiplication of an 𝔽pᵏ element
## by a sparse 𝔽pᵏ element abcdefghij00
## with each representing 𝔽pᵏᐟ⁶ coordinate
static:
doAssert Fpk.C.getSexticTwist() == D_Twist
doAssert a is CubicExt, "This assumes 𝔽pᵏ as a cubic extension of 𝔽pᵏᐟ³"
doAssert a.c0 is QuadraticExt, "This assumes 𝔽pᵏᐟ³ as a quadratic extension of 𝔽pᵏᐟ⁶"
type Fpkdiv3 = typeof(a.c0)
# In the following equations (taken from cubic extension implementation)
# b0 = (b00, b01)
# b1 = (b10, b11)
# b2 = (b20, 0)
#
# v0 = a0 b0 = (f00, f01).(b00, b01)
# v1 = a1 b1 = (f10, f11).(b10, b11)
# v2 = a2 b2 = (f20, f21).(b20, 0)
#
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
# r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
var V0 {.noInit.}, V1 {.noInit.}, V2 {.noinit.}: doublePrec(Fpkdiv3)
var t0 {.noInit.}, t1 {.noInit.}: Fpkdiv3
var f2x{.noInit.}, g2x {.noinit.}: doublePrec(Fpkdiv3)
V0.prod2x(a.c0, b.c0)
V1.prod2x(a.c1, b.c1)
V2.mul2x_sparse_by_x0(a.c2, b.c2)
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
t0.sum(a.c1, a.c2)
t1.c0.sum(b.c1.c0, b.c2.c0) # b₂ = (b20, 0)
f2x.prod2x_disjoint(t0, t1.c0, b.c1.c1) # (a₁ + a₂).(b₁ + b₂)
f2x.diff2xMod(f2x, V1)
f2x.diff2xMod(f2x, V2)
f2x.prod2x(f2x, NonResidue)
f2x.sum2xMod(f2x, V0)
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁
t0.sum(a.c0, a.c1)
t1.sum(b.c0, b.c1)
g2x.prod2x(t0, t1)
g2x.diff2xMod(g2x, V0)
g2x.diff2xMod(g2x, V1)
# r₂ = (a₀ + a₂) and (b₀ + b₂)
t0.sum(a.c0, a.c2)
t1.c0.sum(b.c0.c0, b.c2.c0) # b₂ = (b20, 0)
# Now we are aliasing free
# r₀ = ξ ((a₁ + a₂)(b₁ + b₂) - v₁ - v₂) + v₀
a.c0.redc2x(f2x)
# r₁ = (a₀ + a₁) * (b₀ + b₁) - v₀ - v₁ + β v₂
f2x.prod2x(V2, NonResidue)
g2x.sum2xMod(g2x, f2x)
a.c1.redc2x(g2x)
# r₂ = (a₀ + a₂) * (b₀ + b₂) - v₀ - v₂ + v₁
f2x.prod2x_disjoint(t0, t1.c0, b.c0.c1)
f2x.diff2xMod(f2x, V0)
f2x.diff2xMod(f2x, V2)
f2x.sum2xMod(f2x, V1)
a.c2.redc2x(f2x)
# Dispatch # Dispatch
# ------------------------------------------------------------ # ------------------------------------------------------------
func mul_by_line*[Fpk, Fpkdiv6](f: var Fpk, line: Line[Fpkdiv6]) {.inline.} = func mul_by_line*[Fpk, Fpkdiv6](f: var Fpk, line: Line[Fpkdiv6]) {.inline.} =
## Multiply an element of Fp12 by a sparse line function ## Multiply an element of Fp12 by a sparse line function
when Fpk.C.getSexticTwist() == D_Twist: when Fpk.C.getSexticTwist() == D_Twist:
f.mul_sparse_by_line_acb000(line) when f is CubicExt:
f.mul_sparse_by_line_acb000(line)
else:
f.mul_sparse_by_line_a00bc0(line)
elif Fpk.C.getSexticTwist() == M_Twist: elif Fpk.C.getSexticTwist() == M_Twist:
f.mul_sparse_by_line_ca00b0(line) when f is CubicExt:
f.mul_sparse_by_line_ca00b0(line)
else:
f.mul_sparse_by_line_cb00a0(line)
else: else:
{.error: "A line function assumes that the curve has a twist".} {.error: "A line function assumes that the curve has a twist".}
@ -905,18 +1364,30 @@ func prod_from_2_lines*[Fpk, Fpkdiv6](f: var Fpk, line0, line1: Line[Fpkdiv6]) {
## and store the result in f ## and store the result in f
## f is overwritten ## f is overwritten
when Fpk.C.getSexticTwist() == D_Twist: when Fpk.C.getSexticTwist() == D_Twist:
f.prod_xzy000_xzy000_into_abcdefghij00(line0, line1) when f is CubicExt:
f.prod_xzy000_xzy000_into_abcdefghij00(line0, line1)
else:
f.prod_x00yz0_x00yz0_into_abcdefghij00(line0, line1)
elif Fpk.C.getSexticTwist() == M_Twist: elif Fpk.C.getSexticTwist() == M_Twist:
f.prod_zx00y0_zx00y0_into_abcd00efghij(line0, line1) when f is CubicExt:
f.prod_zx00y0_zx00y0_into_abcd00efghij(line0, line1)
else:
f.prod_zy00x0_zy00x0_into_abcdef00ghij(line0, line1)
else: else:
{.error: "A line function assumes that the curve has a twist".} {.error: "A line function assumes that the curve has a twist".}
func mul_by_prod_of_2_lines*[Fpk](f: var Fpk, g: Fpk) {.inline.} = func mul_by_prod_of_2_lines*[Fpk](f: var Fpk, g: Fpk) {.inline.} =
## Multiply f by the somewhat sparse product of 2 lines ## Multiply f by the somewhat sparse product of 2 lines
when Fpk.C.getSexticTwist() == D_Twist: when Fpk.C.getSexticTwist() == D_Twist:
f.mul_sparse_by_abcdefghij00(g) when f is CubicExt:
f.mul_sparse_by_abcdefghij00_cube_over_quad(g)
else:
f.mul_sparse_by_abcdefghij00_quad_over_cube(g)
elif Fpk.C.getSexticTwist() == M_Twist: elif Fpk.C.getSexticTwist() == M_Twist:
f.mul_sparse_by_abcd00efghij(g) when f is CubicExt:
f.mul_sparse_by_abcd00efghij_cube_over_quad(g)
else:
f.mul_sparse_by_abcdef00ghij_quad_over_cube(g)
else: else:
{.error: "A line function assumes that the curve has a twist".} {.error: "A line function assumes that the curve has a twist".}

View File

@ -15,7 +15,7 @@ import
ec_shortweierstrass_projective ec_shortweierstrass_projective
], ],
../isogenies/frobenius, ../isogenies/frobenius,
../curves/zoo_pairings, ../constants/zoo_pairings,
../arithmetic, ../arithmetic,
./cyclotomic_subgroup, ./cyclotomic_subgroup,
./lines_eval, ./lines_eval,

View File

@ -15,7 +15,7 @@ import
ec_shortweierstrass_projective ec_shortweierstrass_projective
], ],
../isogenies/frobenius, ../isogenies/frobenius,
../curves/zoo_pairings, ../constants/zoo_pairings,
./lines_eval, ./lines_eval,
./cyclotomic_subgroup, ./cyclotomic_subgroup,
./miller_loops ./miller_loops

View File

@ -15,7 +15,7 @@ import
ec_shortweierstrass_projective ec_shortweierstrass_projective
], ],
../isogenies/frobenius, ../isogenies/frobenius,
../curves/zoo_pairings, ../constants/zoo_pairings,
./lines_eval, ./lines_eval,
./miller_loops ./miller_loops

View File

@ -74,6 +74,7 @@ type
kRegister kRegister
kFromArray kFromArray
kArrayAddr kArrayAddr
k2dArrayAddr
Operand* = object Operand* = object
desc*: OperandDesc desc*: OperandDesc
@ -84,6 +85,9 @@ type
offset: int offset: int
of kArrayAddr: of kArrayAddr:
buf: seq[Operand] buf: seq[Operand]
of k2dArrayAddr:
dims: array[2, int]
buf2d: seq[Operand]
OperandDesc* = ref object OperandDesc* = ref object
asmId*: string # [a] - ASM id asmId*: string # [a] - ASM id
@ -141,11 +145,17 @@ proc `[]`*(opArray: OperandArray, index: int): Operand =
func `[]`*(opArray: var OperandArray, index: int): var Operand = func `[]`*(opArray: var OperandArray, index: int): var Operand =
opArray.buf[index] opArray.buf[index]
func `[]`*(arrayAddr: Operand, index: int): Operand = func `[]`*(arrAddr: Operand, index: int): Operand =
arrayAddr.buf[index] arrAddr.buf[index]
func `[]`*(arrayAddr: var Operand, index: int): var Operand = func `[]`*(arrAddr: var Operand, index: int): var Operand =
arrayAddr.buf[index] arrAddr.buf[index]
func `[]`*(arr2dAddr: Operand, i, j: int): Operand =
arr2dAddr.buf2d[i*arr2dAddr.dims[1] + j]
func `[]`*(arr2dAddr: var Operand, i, j: int): var Operand =
arr2dAddr.buf2d[i*arr2dAddr.dims[1] + j]
func init*(T: type Assembler_x86, Word: typedesc[SomeUnsignedInt]): Assembler_x86 = func init*(T: type Assembler_x86, Word: typedesc[SomeUnsignedInt]): Assembler_x86 =
result.wordSize = sizeof(Word) result.wordSize = sizeof(Word)
@ -236,6 +246,22 @@ func asArrayAddr*(op: Register, len: int): Operand =
offset: i offset: i
) )
func as2dArrayAddr*(op: Operand, rows, cols: int): Operand =
## Use the value stored in an operand as an array address
doAssert op.desc.rm in {Reg, PointerInReg, ElemsInReg}+SpecificRegisters
result = Operand(
kind: k2dArrayAddr,
desc: nil,
dims: [rows, cols],
buf2d: newSeq[Operand](rows*cols)
)
for i in 0 ..< rows*cols:
result.buf2d[i] = Operand(
desc: op.desc,
kind: kFromArray,
offset: i
)
# Code generation # Code generation
# ------------------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------------
@ -344,7 +370,7 @@ func generate*(a: Assembler_x86): NimNode =
func getStrOffset(a: Assembler_x86, op: Operand): string = func getStrOffset(a: Assembler_x86, op: Operand): string =
if op.kind != kFromArray: if op.kind != kFromArray:
if op.kind == kArrayAddr: if op.kind in {kArrayAddr, k2dArrayAddr}:
# We are operating on an array pointer # We are operating on an array pointer
# instead of array elements # instead of array elements
if op.buf[0].desc.constraint == ClobberedRegister: if op.buf[0].desc.constraint == ClobberedRegister:

View File

@ -8,7 +8,7 @@
import import
../math/[ec_shortweierstrass, pairings], ../math/[ec_shortweierstrass, pairings],
../math/curves/zoo_generators, ../math/constants/zoo_generators,
../hash_to_curve/hash_to_curve, ../hash_to_curve/hash_to_curve,
../hashes ../hashes

View File

@ -12,7 +12,7 @@ import
../constantine/math/config/[common, curves], ../constantine/math/config/[common, curves],
../constantine/math/[arithmetic, extension_fields], ../constantine/math/[arithmetic, extension_fields],
../constantine/math/elliptic/ec_shortweierstrass_projective, ../constantine/math/elliptic/ec_shortweierstrass_projective,
../constantine/math/curves/zoo_subgroups, ../constantine/math/constants/zoo_subgroups,
../constantine/math/pairing/pairing_bls12, ../constantine/math/pairing/pairing_bls12,
# Helpers # Helpers
../helpers/prng_unsafe ../helpers/prng_unsafe

View File

@ -27,7 +27,7 @@ import
ec_twistededwards_projective, ec_twistededwards_projective,
ec_scalar_mul], ec_scalar_mul],
../../constantine/math/io/[io_bigints, io_fields, io_ec], ../../constantine/math/io/[io_bigints, io_fields, io_ec],
../../constantine/math/curves/zoo_subgroups, ../../constantine/math/constants/zoo_subgroups,
# Test utilities # Test utilities
../../helpers/prng_unsafe, ../../helpers/prng_unsafe,
./support/ec_reference_scalar_mult ./support/ec_reference_scalar_mult

View File

@ -28,7 +28,7 @@ echo "test_finite_fields_mulsquare xoshiro512** seed: ", seed
static: doAssert defined(testingCurves), "This modules requires the -d:testingCurves compile option" static: doAssert defined(testingCurves), "This modules requires the -d:testingCurves compile option"
proc sanity(C: static Curve) = proc sanity(C: static Curve) =
test "Squaring 0,1,2 with "& $Curve(C) & " [FastSquaring = " & $(Fp[C].getSpareBits() >= 2) & "]": test "Squaring 0,1,2 with " & $Curve(C) & " [FastSquaring = " & $(Fp[C].getSpareBits() >= 2) & "]":
block: # 0² mod block: # 0² mod
var n: Fp[C] var n: Fp[C]
@ -310,3 +310,74 @@ suite "Modular squaring - bugs highlighted by property-based testing":
check: check:
bool(a2mul == expected) bool(a2mul == expected)
bool(a2sqr == expected) bool(a2sqr == expected)
proc random_sumprod(C: static Curve, N: static int) =
template sumprod_test(random_instancer: untyped) =
block:
var a: array[N, Fp[C]]
var b: array[N, Fp[C]]
for i in 0 ..< N:
a[i] = rng.random_instancer(Fp[C])
b[i] = rng.random_instancer(Fp[C])
var r, r_ref, t: Fp[C]
r_ref.prod(a[0], b[0])
for i in 1 ..< N:
t.prod(a[i], b[i])
r_ref += t
r.sumprod(a, b)
doAssert bool(r == r_ref)
template sumProdMax() =
block:
var a: array[N, Fp[C]]
var b: array[N, Fp[C]]
for i in 0 ..< N:
a[i].setMinusOne()
b[i].setMinusOne()
var r, r_ref, t: Fp[C]
r_ref.prod(a[0], b[0])
for i in 1 ..< N:
t.prod(a[i], b[i])
r_ref += t
r.sumprod(a, b)
doAssert bool(r == r_ref)
sumprod_test(random_unsafe)
sumprod_test(randomHighHammingWeight)
sumprod_test(random_long01Seq)
sumProdMax()
suite "Random sum products is consistent with naive " & " [" & $WordBitwidth & "-bit mode]":
const MaxLength = 8
test "Random sum products mod P-224]":
for _ in 0 ..< Iters:
staticFor N, 2, MaxLength:
random_sumprod(P224, N)
test "Random sum products mod BN254_Nogami]":
for _ in 0 ..< Iters:
staticFor N, 2, MaxLength:
random_sumprod(BN254_Nogami, N)
test "Random sum products mod BN254_Snarks]":
for _ in 0 ..< Iters:
staticFor N, 2, MaxLength:
random_sumprod(BN254_Snarks, N)
test "Random sum products mod BLS12_377]":
for _ in 0 ..< Iters:
staticFor N, 2, MaxLength:
random_sumprod(BLS12_377, N)
test "Random sum products mod BLS12_381]":
for _ in 0 ..< Iters:
staticFor N, 2, MaxLength:
random_sumprod(BLS12_381, N)

View File

@ -0,0 +1,26 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../../constantine/math/extension_fields,
../../constantine/math/config/curves,
# Test utilities
./t_fp_tower_template
const TestCurves = [
BN254_Nogami,
]
runTowerTests(
ExtDegree = 12,
Iters = 12,
TestCurves = TestCurves,
moduleName = "test_fp12_" & $BN254_Nogami,
testSuiteDesc = "𝔽p12 = 𝔽p6[w] " & $BN254_Nogami
)

Some files were not shown because too many files have changed in this diff Show More