Multi-Scalar-Multiplication / Linear combination (#220)

* unoptimized msm

* MSM: reorder loops

* add a signed windowed recoding technique

* improve wNAF table access

* use batchAffine

* revamp EC tests

* MSM signed digit support

* refactor MSM: recode signed ahead of time

* missing test vector

* refactor allocs and Alloca sideeffect

* add an endomorphism threshold

* Add Jacobian extended coordinates

* refactor recodings, prepare for parallelizable on-the-fly signed recoding

* recoding changes, introduce proper NAF for pairings

* more pairings refactoring, introduce miller accumulator for EVM

* some optim to the addchain miller loop

* start optimizing multi-pairing

* finish multi-miller loop refactoring

* minor tuning

* MSM: signed encoding suitable for parallelism (no precompute)

* cleanup signed window encoding

* add prefetching

* add metering

* properly init result to infinity

* comment on prefetching

* introduce vartime inversion for batch additions

* fix JacExt infinity conversion

* add batchAffine for MSM, though slower than JacExtended at the moment

* add a batch affine scheduler for MSM

* Add Multi-Scalar-Multiplication endomorphism acceleration

* some tuning

* signed integer fixes + 32-bit + tuning

* Some more tuning

* common msm bench + don't use affine for c < 9

* nit
This commit is contained in:
Mamy Ratsimbazafy 2023-02-16 12:45:05 +01:00 committed by GitHub
parent 082cd1deb9
commit e5612f5705
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
143 changed files with 18968 additions and 1295 deletions

View File

@ -12,7 +12,8 @@ import
../constantine/math/arithmetic,
../constantine/math/elliptic/[
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian],
ec_shortweierstrass_jacobian,
ec_shortweierstrass_jacobian_extended],
# Helpers
./bench_elliptic_template
@ -46,33 +47,32 @@ proc main() =
const curve = AvailableCurves[i]
addBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
addBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
addBench(ECP_ShortW_JacExt[Fp[curve], G1], Iters)
mixedAddBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
mixedAddBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
mixedAddBench(ECP_ShortW_JacExt[Fp[curve], G1], Iters)
doublingBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
doublingBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
doublingBench(ECP_ShortW_JacExt[Fp[curve], G1], Iters)
separator()
affFromProjBench(ECP_ShortW_Prj[Fp[curve], G1], MulIters)
affFromJacBench(ECP_ShortW_Jac[Fp[curve], G1], MulIters)
separator()
scalarMulUnsafeDoubleAddBench(ECP_ShortW_Prj[Fp[curve], G1], MulIters)
scalarMulUnsafeDoubleAddBench(ECP_ShortW_Jac[Fp[curve], G1], MulIters)
for numPoints in [10, 100, 1000, 10000]:
let batchIters = max(1, Iters div numPoints)
affFromProjBatchBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, useBatching = false, batchIters)
separator()
scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Prj[Fp[curve], G1], MulIters)
scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Jac[Fp[curve], G1], MulIters)
for numPoints in [10, 100, 1000, 10000]:
let batchIters = max(1, Iters div numPoints)
affFromProjBatchBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, useBatching = true, batchIters)
separator()
scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], window = 2, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], window = 3, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], window = 4, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], window = 5, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp[curve], G1], window = 2, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp[curve], G1], window = 3, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp[curve], G1], window = 4, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp[curve], G1], window = 5, MulIters)
for numPoints in [10, 100, 1000, 10000]:
let batchIters = max(1, Iters div numPoints)
affFromJacBatchBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, useBatching = false, batchIters)
separator()
scalarMulEndo(ECP_ShortW_Prj[Fp[curve], G1], MulIters)
scalarMulEndoWindow(ECP_ShortW_Prj[Fp[curve], G1], MulIters)
scalarMulEndo(ECP_ShortW_Jac[Fp[curve], G1], MulIters)
scalarMulEndoWindow(ECP_ShortW_Jac[Fp[curve], G1], MulIters)
for numPoints in [10, 100, 1000, 10000]:
let batchIters = max(1, Iters div numPoints)
affFromJacBatchBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, useBatching = true, batchIters)
separator()
separator()

View File

@ -14,6 +14,7 @@ import
ec_shortweierstrass_affine,
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian,
ec_shortweierstrass_jacobian_extended,
ec_shortweierstrass_batch_ops_parallel],
../constantine/platforms/threadpool/threadpool,
# Helpers
@ -38,7 +39,7 @@ proc multiAddParallelBench*(EC: typedesc, numPoints: int, iters: int) =
var tp = Threadpool.new()
bench("EC parallel batch add (" & align($tp.numThreads, 2) & " threads) " & $EC.G & " (" & $numPoints & " points)", EC, iters):
tp.sum_batch_vartime_parallel(r, points)
tp.sum_reduce_vartime_parallel(r, points)
tp.shutdown()
@ -57,41 +58,55 @@ const AvailableCurves = [
BLS12_381,
]
# const testNumPoints = [10, 100, 1000, 10000, 100000]
const testNumPoints = [4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 65536, 131072]
proc main() =
separator()
staticFor i, 0, AvailableCurves.len:
const curve = AvailableCurves[i]
addBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
addBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
mixedAddBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
mixedAddBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
doublingBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
mixedAddBench(ECP_ShortW_Prj[Fp[curve], G1], Iters)
addBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
doublingBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
mixedAddBench(ECP_ShortW_Jac[Fp[curve], G1], Iters)
addBench(ECP_ShortW_JacExt[Fp[curve], G1], Iters)
doublingBench(ECP_ShortW_JacExt[Fp[curve], G1], Iters)
mixedAddBench(ECP_ShortW_JacExt[Fp[curve], G1], Iters)
separator()
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
multiAddBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, useBatching = false, batchIters)
separator()
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
multiAddBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, useBatching = true, batchIters)
separator()
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
multiAddParallelBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, batchIters)
separator()
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
multiAddBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, useBatching = false, batchIters)
separator()
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
multiAddBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, useBatching = true, batchIters)
separator()
for numPoints in [10, 100, 1000, 10000, 100000, 1000000]:
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
multiAddParallelBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, batchIters)
separator()
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
multiAddBench(ECP_ShortW_JacExt[Fp[curve], G1], numPoints, useBatching = false, batchIters)
separator()
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
multiAddBench(ECP_ShortW_JacExt[Fp[curve], G1], numPoints, useBatching = true, batchIters)
separator()
separator()
main()

View File

@ -0,0 +1,60 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../constantine/math/config/curves,
../constantine/math/arithmetic,
../constantine/math/elliptic/[
ec_shortweierstrass_affine,
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian,
ec_scalar_mul,
ec_multi_scalar_mul],
../constantine/math/constants/zoo_subgroups,
# Helpers
../helpers/prng_unsafe,
./bench_elliptic_template,
./bench_blueprint
# ############################################################
#
# Benchmark of the G1 group of
# Short Weierstrass elliptic curves
# in (homogeneous) projective coordinates
#
# ############################################################
const Iters = 10_000
const AvailableCurves = [
BLS12_381,
]
# const testNumPoints = [10, 100, 1000, 10000, 100000]
const testNumPoints = [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192,
16384, 32768, 65536, 131072, 262144]
proc main() =
separator()
staticFor i, 0, AvailableCurves.len:
const curve = AvailableCurves[i]
separator()
# for numPoints in testNumPoints:
# let batchIters = max(1, Iters div numPoints)
# msmBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, batchIters)
# separator()
# separator()
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
msmBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, batchIters)
separator()
separator()
main()
notes()

View File

@ -0,0 +1,59 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../constantine/math/config/curves,
../constantine/math/arithmetic,
../constantine/math/elliptic/[
ec_shortweierstrass_affine,
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian,
ec_scalar_mul,
ec_multi_scalar_mul],
../constantine/math/constants/zoo_subgroups,
# Helpers
../helpers/prng_unsafe,
./bench_elliptic_template,
./bench_blueprint
# ############################################################
#
# Benchmark of the G1 group of
# Short Weierstrass elliptic curves
# in (homogeneous) projective coordinates
#
# ############################################################
const Iters = 10_000
const AvailableCurves = [
BN254_Snarks,
]
const testNumPoints = [10, 100, 1000, 10000, 100000]
# const testNumPoints = [64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072]
proc main() =
separator()
staticFor i, 0, AvailableCurves.len:
const curve = AvailableCurves[i]
separator()
# for numPoints in testNumPoints:
# let batchIters = max(1, Iters div numPoints)
# msmBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, batchIters)
# separator()
# separator()
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
msmBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, batchIters)
separator()
separator()
main()
notes()

View File

@ -0,0 +1,81 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../constantine/math/config/curves,
../constantine/math/arithmetic,
../constantine/math/elliptic/[
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian],
# Helpers
./bench_elliptic_template
# ############################################################
#
# Benchmark of the G1 group of
# Short Weierstrass elliptic curves
# in (homogeneous) projective coordinates
#
# ############################################################
const Iters = 10_000
const MulIters = 100
const AvailableCurves = [
# P224,
BN254_Nogami,
BN254_Snarks,
# Edwards25519,
# P256,
# Secp256k1,
Pallas,
Vesta,
BLS12_377,
BLS12_381,
]
proc main() =
separator()
staticFor i, 0, AvailableCurves.len:
const curve = AvailableCurves[i]
const bits = 64 # curve.getCurveOrderBitwidth()
scalarMulUnsafeDoubleAddBench(ECP_ShortW_Prj[Fp[curve], G1], bits, MulIters)
scalarMulUnsafeDoubleAddBench(ECP_ShortW_Jac[Fp[curve], G1], bits, MulIters)
separator()
scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Prj[Fp[curve], G1], bits, MulIters)
scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Jac[Fp[curve], G1], bits, MulIters)
separator()
scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], bits, window = 2, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], bits, window = 3, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], bits, window = 4, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp[curve], G1], bits, window = 5, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp[curve], G1], bits, window = 2, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp[curve], G1], bits, window = 3, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp[curve], G1], bits, window = 4, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp[curve], G1], bits, window = 5, MulIters)
separator()
scalarMulUnsafeWNAFBench(ECP_ShortW_Prj[Fp[curve], G1], bits, window = 2, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Prj[Fp[curve], G1], bits, window = 3, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Prj[Fp[curve], G1], bits, window = 4, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Prj[Fp[curve], G1], bits, window = 5, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Jac[Fp[curve], G1], bits, window = 2, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Jac[Fp[curve], G1], bits, window = 3, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Jac[Fp[curve], G1], bits, window = 4, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Jac[Fp[curve], G1], bits, window = 5, MulIters)
separator()
when bits >= 196: # All endomorphisms constants are below this threshold
scalarMulEndo( ECP_ShortW_Prj[Fp[curve], G1], bits, MulIters)
scalarMulEndoWindow(ECP_ShortW_Prj[Fp[curve], G1], bits, MulIters)
scalarMulEndo( ECP_ShortW_Jac[Fp[curve], G1], bits, MulIters)
scalarMulEndoWindow(ECP_ShortW_Jac[Fp[curve], G1], bits, MulIters)
separator()
separator()
main()
notes()

View File

@ -13,7 +13,8 @@ import
../constantine/math/extension_fields,
../constantine/math/elliptic/[
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian],
ec_shortweierstrass_jacobian,
ec_shortweierstrass_jacobian_extended],
# Helpers
./bench_elliptic_template,
# Standard library
@ -47,31 +48,32 @@ proc main() =
const curve = AvailableCurves[i]
addBench(ECP_ShortW_Prj[Fp2[curve], G2], Iters)
addBench(ECP_ShortW_Jac[Fp2[curve], G2], Iters)
addBench(ECP_ShortW_JacExt[Fp2[curve], G2], Iters)
mixedAddBench(ECP_ShortW_Prj[Fp2[curve], G2], Iters)
mixedAddBench(ECP_ShortW_Jac[Fp2[curve], G2], Iters)
mixedAddBench(ECP_ShortW_JacExt[Fp2[curve], G2], Iters)
doublingBench(ECP_ShortW_Prj[Fp2[curve], G2], Iters)
doublingBench(ECP_ShortW_Jac[Fp2[curve], G2], Iters)
doublingBench(ECP_ShortW_JacExt[Fp2[curve], G2], Iters)
separator()
affFromProjBench(ECP_ShortW_Prj[Fp2[curve], G2], MulIters)
affFromJacBench(ECP_ShortW_Jac[Fp2[curve], G2], MulIters)
separator()
scalarMulUnsafeDoubleAddBench(ECP_ShortW_Prj[Fp2[curve], G2], MulIters)
scalarMulUnsafeDoubleAddBench(ECP_ShortW_Jac[Fp2[curve], G2], MulIters)
for numPoints in [10, 100, 1000, 10000]:
let batchIters = max(1, Iters div numPoints)
affFromProjBatchBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, useBatching = false, batchIters)
separator()
scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Prj[Fp2[curve], G2], MulIters)
scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Jac[Fp2[curve], G2], MulIters)
for numPoints in [10, 100, 1000, 10000]:
let batchIters = max(1, Iters div numPoints)
affFromProjBatchBench(ECP_ShortW_Prj[Fp[curve], G1], numPoints, useBatching = true, batchIters)
separator()
scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], window = 2, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], window = 3, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], window = 4, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], window = 5, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp2[curve], G2], window = 2, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp2[curve], G2], window = 3, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp2[curve], G2], window = 4, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp2[curve], G2], window = 5, MulIters)
for numPoints in [10, 100, 1000, 10000]:
let batchIters = max(1, Iters div numPoints)
affFromJacBatchBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, useBatching = false, batchIters)
separator()
scalarMulEndo(ECP_ShortW_Prj[Fp2[curve], G2], MulIters)
scalarMulEndo(ECP_ShortW_Jac[Fp2[curve], G2], MulIters)
for numPoints in [10, 100, 1000, 10000]:
let batchIters = max(1, Iters div numPoints)
affFromJacBatchBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, useBatching = true, batchIters)
separator()
separator()

View File

@ -0,0 +1,80 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../constantine/math/config/curves,
../constantine/math/arithmetic,
../constantine/math/extension_fields,
../constantine/math/elliptic/[
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian],
# Helpers
./bench_elliptic_template,
# Standard library
std/strutils
# ############################################################
#
# Benchmark of the G1 group of
# Short Weierstrass elliptic curves
# in (homogeneous) projective coordinates
#
# ############################################################
const Iters = 10_000
const MulIters = 500
const AvailableCurves = [
# P224,
BN254_Nogami,
BN254_Snarks,
# Edwards25519,
# P256,
# Secp256k1,
BLS12_377,
BLS12_381,
]
proc main() =
separator()
staticFor i, 0, AvailableCurves.len:
const curve = AvailableCurves[i]
const bits = 64 # curve.getCurveOrderBitwidth()
scalarMulUnsafeDoubleAddBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, MulIters)
scalarMulUnsafeDoubleAddBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, MulIters)
separator()
scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, MulIters)
scalarMulUnsafeMinHammingWeightRecodingBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, MulIters)
separator()
scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, window = 2, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, window = 3, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, window = 4, MulIters)
scalarMulGenericBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, window = 5, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, window = 2, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, window = 3, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, window = 4, MulIters)
scalarMulGenericBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, window = 5, MulIters)
separator()
scalarMulUnsafeWNAFBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, window = 2, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, window = 3, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, window = 4, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Prj[Fp2[curve], G2], bits, window = 5, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, window = 2, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, window = 3, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, window = 4, MulIters)
scalarMulUnsafeWNAFBench(ECP_ShortW_Jac[Fp2[curve], G2], bits, window = 5, MulIters)
separator()
when bits >= 196: # All endomorphisms constants are below this threshold
scalarMulEndo(ECP_ShortW_Prj[Fp2[curve], G2], bits, MulIters)
scalarMulEndo(ECP_ShortW_Jac[Fp2[curve], G2], bits, MulIters)
separator()
separator()
main()
notes()

View File

@ -22,19 +22,21 @@ import
ec_shortweierstrass_affine,
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian,
ec_shortweierstrass_jacobian_extended,
ec_shortweierstrass_batch_ops,
ec_scalar_mul, ec_endomorphism_accel],
../constantine/math/constants/zoo_subgroups,
# Helpers
../helpers/prng_unsafe,
./platforms,
./bench_blueprint,
# Reference unsafe scalar multiplication
../tests/math/support/ec_reference_scalar_mult
../constantine/math/elliptic/ec_scalar_mul_vartime
export notes
export abstractions # generic sandwich on SecretBool and SecretBool in Jacobian sum
proc separator*() = separator(177)
proc separator*() = separator(206)
macro fixEllipticDisplay(EC: typedesc): untyped =
# At compile-time, enums are integers and their display is buggy
@ -50,18 +52,28 @@ proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int6
let ns = inNanoseconds((stop-start) div iters)
let throughput = 1e9 / float64(ns)
when SupportsGetTicks:
echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
echo &"{op:<80} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>12} ns/op {(stopClk - startClk) div iters:>12} CPU cycles (approx)"
else:
echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op"
echo &"{op:<80} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>12} ns/op"
template bench*(op: string, EC: typedesc, iters: int, body: untyped): untyped =
measure(iters, startTime, stopTime, startClk, stopClk, body)
report(op, fixEllipticDisplay(EC), startTime, stopTime, startClk, stopClk, iters)
func `+=`[F; G: static Subgroup](P: var ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_JacExt[F, G]) {.inline.}=
P.sum_vartime(P, Q)
func `+=`[F; G: static Subgroup](P: var ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_Aff[F, G]) {.inline.}=
P.madd_vartime(P, Q)
proc addBench*(EC: typedesc, iters: int) =
var r {.noInit.}: EC
let P = rng.random_unsafe(EC)
let Q = rng.random_unsafe(EC)
when EC is ECP_ShortW_JacExt:
bench("EC Add vartime " & $EC.G, EC, iters):
r.sum_vartime(P, Q)
else:
bench("EC Add " & $EC.G, EC, iters):
r.sum(P, Q)
@ -71,6 +83,11 @@ proc mixedAddBench*(EC: typedesc, iters: int) =
let Q = rng.random_unsafe(EC)
var Qaff: ECP_ShortW_Aff[EC.F, EC.G]
Qaff.affine(Q)
when EC is ECP_ShortW_JacExt:
bench("EC Mixed Addition vartime " & $EC.G, EC, iters):
r.madd_vartime(P, Qaff)
else:
bench("EC Mixed Addition " & $EC.G, EC, iters):
r.madd(P, Qaff)
@ -92,11 +109,40 @@ proc affFromJacBench*(EC: typedesc, iters: int) =
bench("EC Jacobian to Affine " & $EC.G, EC, iters):
r.affine(P)
proc scalarMulGenericBench*(EC: typedesc, window: static int, iters: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
proc affFromProjBatchBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) =
var r = newSeq[affine(EC)](numPoints)
var points = newSeq[EC](numPoints)
for i in 0 ..< numPoints:
points[i] = rng.random_unsafe(EC)
if useBatching:
bench("EC Projective to Affine - batched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
r.asUnchecked().batchAffine(points.asUnchecked(), numPoints)
else:
bench("EC Projective to Affine - unbatched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
for i in 0 ..< numPoints:
r[i].affine(points[i])
proc affFromJacBatchBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) =
var r = newSeq[affine(EC)](numPoints)
var points = newSeq[EC](numPoints)
for i in 0 ..< numPoints:
points[i] = rng.random_unsafe(EC)
if useBatching:
bench("EC Jacobian to Affine - batched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
r.asUnchecked().batchAffine(points.asUnchecked(), numPoints)
else:
bench("EC Jacobian to Affine - unbatched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
for i in 0 ..< numPoints:
r[i].affine(points[i])
proc scalarMulGenericBench*(EC: typedesc, bits, window: static int, iters: int) =
var r {.noInit.}: EC
let P = rng.random_unsafe(EC) # TODO: clear cofactor
var P = rng.random_unsafe(EC)
P.clearCofactor()
let exponent = rng.random_unsafe(BigInt[bits])
@ -104,11 +150,10 @@ proc scalarMulGenericBench*(EC: typedesc, window: static int, iters: int) =
r = P
r.scalarMulGeneric(exponent, window)
proc scalarMulEndo*(EC: typedesc, iters: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
proc scalarMulEndo*(EC: typedesc, bits: static int, iters: int) =
var r {.noInit.}: EC
let P = rng.random_unsafe(EC) # TODO: clear cofactor
var P = rng.random_unsafe(EC)
P.clearCofactor()
let exponent = rng.random_unsafe(BigInt[bits])
@ -116,11 +161,10 @@ proc scalarMulEndo*(EC: typedesc, iters: int) =
r = P
r.scalarMulEndo(exponent)
proc scalarMulEndoWindow*(EC: typedesc, iters: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
proc scalarMulEndoWindow*(EC: typedesc, bits: static int, iters: int) =
var r {.noInit.}: EC
let P = rng.random_unsafe(EC) # TODO: clear cofactor
var P = rng.random_unsafe(EC)
P.clearCofactor()
let exponent = rng.random_unsafe(BigInt[bits])
@ -131,29 +175,38 @@ proc scalarMulEndoWindow*(EC: typedesc, iters: int) =
else:
{.error: "Not implemented".}
proc scalarMulUnsafeDoubleAddBench*(EC: typedesc, iters: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
proc scalarMulUnsafeDoubleAddBench*(EC: typedesc, bits: static int, iters: int) =
var r {.noInit.}: EC
let P = rng.random_unsafe(EC) # TODO: clear cofactor
var P = rng.random_unsafe(EC)
P.clearCofactor()
let exponent = rng.random_unsafe(BigInt[bits])
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (unsafe reference DoubleAdd)", EC, iters):
r = P
r.unsafe_ECmul_double_add(exponent)
proc scalarMulUnsafeMinHammingWeightRecodingBench*(EC: typedesc, iters: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
r.scalarMul_doubleAdd_vartime(exponent)
proc scalarMulUnsafeMinHammingWeightRecodingBench*(EC: typedesc, bits: static int, iters: int) =
var r {.noInit.}: EC
var P = rng.random_unsafe(EC) # TODO: clear cofactor
var P = rng.random_unsafe(EC)
P.clearCofactor()
let exponent = rng.random_unsafe(BigInt[bits])
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (unsafe min Hamming Weight recoding)", EC, iters):
r = P
r.unsafe_ECmul_minHammingWeight(exponent)
r.scalarMul_minHammingWeight_vartime(exponent)
proc scalarMulUnsafeWNAFBench*(EC: typedesc, bits, window: static int, iters: int) =
var r {.noInit.}: EC
var P = rng.random_unsafe(EC)
P.clearCofactor()
let exponent = rng.random_unsafe(BigInt[bits])
bench("EC ScalarMul " & $bits & "-bit " & $EC.G & " (unsafe wNAF-" & $window & ")", EC, iters):
r = P
r.scalarMul_minHammingWeight_windowed_vartime(exponent, window)
proc multiAddBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) =
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](numPoints)
@ -165,9 +218,61 @@ proc multiAddBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int)
if useBatching:
bench("EC Multi Add batched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
r.sum_batch_vartime(points)
r.sum_reduce_vartime(points)
else:
bench("EC Multi Mixed-Add unbatched " & $EC.G & " (" & $numPoints & " points)", EC, iters):
r.setInf()
for i in 0 ..< numPoints:
r += points[i]
proc msmBench*(EC: typedesc, numPoints: int, iters: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](numPoints)
var scalars = newSeq[BigInt[bits]](numPoints)
for i in 0 ..< numPoints:
var tmp = rng.random_unsafe(EC)
tmp.clearCofactor()
points[i].affine(tmp)
scalars[i] = rng.random_unsafe(BigInt[bits])
var r{.noInit.}: EC
var startNaive, stopNaive, startMSMbaseline, stopMSMbaseline, startMSMopt, stopMSMopt: MonoTime
if numPoints <= 100000:
bench("EC scalar muls " & align($numPoints, 7) & " (scalars " & $bits & "-bit, points) pairs ", EC, iters):
startNaive = getMonotime()
var tmp: EC
r.setInf()
for i in 0 ..< points.len:
tmp.fromAffine(points[i])
tmp.scalarMul(scalars[i])
r += tmp
stopNaive = getMonotime()
block:
bench("EC multi-scalar-mul baseline " & align($numPoints, 7) & " (scalars " & $bits & "-bit, points) pairs ", EC, iters):
startMSMbaseline = getMonotime()
r.multiScalarMul_reference_vartime(scalars, points)
stopMSMbaseline = getMonotime()
block:
bench("EC multi-scalar-mul optimized " & align($numPoints, 7) & " (scalars " & $bits & "-bit, points) pairs ", EC, iters):
startMSMopt = getMonotime()
r.multiScalarMul_vartime(scalars, points)
stopMSMopt = getMonotime()
let perfNaive = inNanoseconds((stopNaive-startNaive) div iters)
let perfMSMbaseline = inNanoseconds((stopMSMbaseline-startMSMbaseline) div iters)
let perfMSMopt = inNanoseconds((stopMSMopt-startMSMopt) div iters)
if numPoints <= 100000:
let speedupBaseline = float(perfNaive) / float(perfMSMbaseline)
echo &"Speedup ratio baseline over naive linear combination: {speedupBaseline:>6.3f}x"
let speedupOpt = float(perfNaive) / float(perfMSMopt)
echo &"Speedup ratio optimized over naive linear combination: {speedupOpt:>6.3f}x"
let speedupOptBaseline = float(perfMSMbaseline) / float(perfMSMopt)
echo &"Speedup ratio optimized over baseline linear combination: {speedupOptBaseline:>6.3f}x"

View File

@ -160,6 +160,13 @@ proc invBench*(T: typedesc, iters: int) =
bench("Inversion (constant-time)", T, iters):
r.inv(x)
proc invVartimeBench*(T: typedesc, iters: int) =
var r: T
let x = rng.random_unsafe(T)
preventOptimAway(r)
bench("Inversion (variable-time)", T, iters):
r.inv_vartime(x)
proc isSquareBench*(T: typedesc, iters: int) =
let x = rng.random_unsafe(T)
bench("isSquare (constant-time)", T, iters):

View File

@ -61,6 +61,7 @@ proc main() =
toFieldBench(Fp[curve], Iters)
smallSeparator()
invBench(Fp[curve], ExponentIters)
invVartimeBench(Fp[curve], ExponentIters)
isSquareBench(Fp[curve], ExponentIters)
sqrtBench(Fp[curve], ExponentIters)
sqrtRatioBench(Fp[curve], ExponentIters)

View File

@ -40,6 +40,7 @@ proc main() =
mulBench(Fp12[curve], Iters)
sqrBench(Fp12[curve], Iters)
invBench(Fp12[curve], InvIters)
invVartimeBench(Fp12[curve], InvIters)
separator()
main()

View File

@ -48,6 +48,7 @@ proc main() =
rdc2xBench(Fp2[curve], Iters)
smallSeparator()
invBench(Fp2[curve], InvIters)
invVartimeBench(Fp2[curve], InvIters)
isSquareBench(Fp2[curve], InvIters)
sqrtBench(Fp2[curve], InvIters)
separator()

View File

@ -48,6 +48,7 @@ proc main() =
rdc2xBench(Fp4[curve], Iters)
smallSeparator()
invBench(Fp4[curve], InvIters)
invVartimeBench(Fp4[curve], InvIters)
separator()
main()

View File

@ -46,6 +46,7 @@ proc main() =
rdc2xBench(Fp6[curve], Iters)
smallSeparator()
invBench(Fp6[curve], InvIters)
invVartimeBench(Fp6[curve], InvIters)
separator()
main()

View File

@ -184,7 +184,7 @@ proc millerLoopBLS12Bench*(C: static Curve, iters: int) =
var f: Fp12[C]
bench("Miller Loop BLS12", C, iters):
f.millerLoopGenericBLS12(P, Q)
f.millerLoopGenericBLS12(Q, P)
proc millerLoopBNBench*(C: static Curve, iters: int) =
let
@ -193,7 +193,7 @@ proc millerLoopBNBench*(C: static Curve, iters: int) =
var f: Fp12[C]
bench("Miller Loop BN", C, iters):
f.millerLoopGenericBN(P, Q)
f.millerLoopGenericBN(Q, P)
proc finalExpEasyBench*(C: static Curve, iters: int) =
var r = rng.random_unsafe(Fp12[C])

View File

@ -172,7 +172,7 @@ proc millerLoopBLS12Bench*(C: static Curve, iters: int) =
var f: Fp12[C]
bench("Miller Loop BLS12", C, iters):
f.millerLoopGenericBLS12(P, Q)
f.millerLoopGenericBLS12(Q, P)
proc millerLoopBNBench*(C: static Curve, iters: int) =
let
@ -181,7 +181,7 @@ proc millerLoopBNBench*(C: static Curve, iters: int) =
var f: Fp12[C]
bench("Miller Loop BN", C, iters):
f.millerLoopGenericBN(P, Q)
f.millerLoopGenericBN(Q, P)
proc finalExpBLS12Bench*(C: static Curve, iters: int) =
var r = rng.random_unsafe(Fp12[C])

View File

@ -99,15 +99,18 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
# ("tests/math/t_ec_shortw_prj_g1_add_double.nim", false),
# ("tests/math/t_ec_shortw_prj_g1_mul_sanity.nim", false),
# ("tests/math/t_ec_shortw_prj_g1_mul_distri.nim", false),
# ("tests/math/t_ec_shortw_prj_g1_mul_vs_ref.nim", false),
("tests/math/t_ec_shortw_prj_g1_mul_vs_ref.nim", false),
# ("tests/math/t_ec_shortw_prj_g1_mixed_add.nim", false),
# ("tests/math/t_ec_shortw_jac_g1_add_double.nim", false),
# ("tests/math/t_ec_shortw_jac_g1_mul_sanity.nim", false),
# ("tests/math/t_ec_shortw_jac_g1_mul_distri.nim", false),
# ("tests/math/t_ec_shortw_jac_g1_mul_vs_ref.nim", false),
("tests/math/t_ec_shortw_jac_g1_mul_vs_ref.nim", false),
# ("tests/math/t_ec_shortw_jac_g1_mixed_add.nim", false),
("tests/math/t_ec_shortw_jacext_g1_add_double.nim", false),
("tests/math/t_ec_shortw_jacext_g1_mixed_add.nim", false),
# ("tests/math/t_ec_twedwards_prj_add_double", false),
# ("tests/math/t_ec_twedwards_prj_mul_sanity", false),
# ("tests/math/t_ec_twedwards_prj_mul_distri", false),
@ -118,49 +121,49 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
# ("tests/math/t_ec_shortw_prj_g2_add_double_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bn254_snarks.nim", false),
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mixed_add_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_add_double_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bls12_381.nim", false),
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mixed_add_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_add_double_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bls12_377.nim", false),
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mixed_add_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_add_double_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_sanity_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_distri_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bw6_761.nim", false),
("tests/math/t_ec_shortw_prj_g2_mul_vs_ref_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_prj_g2_mixed_add_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_add_double_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bn254_snarks.nim", false),
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mixed_add_bn254_snarks.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_add_double_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bls12_381.nim", false),
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mixed_add_bls12_381.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_add_double_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bls12_377.nim", false),
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mixed_add_bls12_377.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_add_double_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_sanity_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_distri_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bw6_761.nim", false),
("tests/math/t_ec_shortw_jac_g2_mul_vs_ref_bw6_761.nim", false),
# ("tests/math/t_ec_shortw_jac_g2_mixed_add_bw6_761.nim", false),
# Elliptic curve arithmetic vs Sagemath
@ -179,8 +182,11 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
# Elliptic curve arithmetic - batch computation
# ----------------------------------------------------------
("tests/math/t_ec_shortw_prj_g1_batch_add.nim", false),
("tests/math/t_ec_shortw_jac_g1_batch_add.nim", false),
("tests/math/t_ec_shortw_prj_g1_sum_reduce.nim", false),
("tests/math/t_ec_shortw_jac_g1_sum_reduce.nim", false),
("tests/math/t_ec_shortw_jacext_g1_sum_reduce.nim", false),
("tests/math/t_ec_shortw_prj_g1_msm.nim", false),
("tests/math/t_ec_shortw_jac_g1_msm.nim", false),
# Subgroups and cofactors
# ----------------------------------------------------------
@ -210,6 +216,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
# ----------------------------------------------------------
("tests/math/t_pairing_bn254_nogami_multi.nim", false),
("tests/math/t_pairing_bn254_snarks_multi.nim", false),
("tests/math/t_pairing_bls12_377_multi.nim", false),
("tests/math/t_pairing_bls12_381_multi.nim", false),
# Prime order fields
@ -258,8 +265,12 @@ const benchDesc = [
"bench_fp6",
"bench_fp12",
"bench_ec_g1",
"bench_ec_g1_scalar_mul",
"bench_ec_g1_batch",
"bench_ec_g1_msm_bn254_snarks",
"bench_ec_g1_msm_bls12_381",
"bench_ec_g2",
"bench_ec_g2_scalar_mul",
"bench_pairing_bls12_377",
"bench_pairing_bls12_381",
"bench_pairing_bn254_nogami",
@ -826,7 +837,58 @@ task bench_ec_g1_batch_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (
runBench("bench_ec_g1_batch", "gcc", useAsm = false)
task bench_ec_g1_batch_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - Clang no Assembly":
runBench("bench_ec_g1", "clang", useAsm = false)
runBench("bench_ec_g1_batch", "clang", useAsm = false)
# Elliptic curve G1 - scalar multiplication
# ------------------------------------------
task bench_ec_g1_scalar_mul, "Run benchmark on Elliptic Curve group 𝔾1 (Scalar Multiplication) - Default compiler":
runBench("bench_ec_g1_scalar_mul")
task bench_ec_g1_scalar_mul_gcc, "Run benchmark on Elliptic Curve group 𝔾1 (Scalar Multiplication) - GCC":
runBench("bench_ec_g1_scalar_mul", "gcc")
task bench_ec_g1_scalar_mul_clang, "Run benchmark on Elliptic Curve group 𝔾1 (Scalar Multiplication) - Clang":
runBench("bench_ec_g1_scalar_mul", "clang")
task bench_ec_g1_scalar_mul_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (Scalar Multiplication) - GCC no Assembly":
runBench("bench_ec_g1_scalar_mul", "gcc", useAsm = false)
task bench_ec_g1_scalar_mul_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (Scalar Multiplication) - Clang no Assembly":
runBench("bench_ec_g1_scalar_mul", "clang", useAsm = false)
# Elliptic curve G1 - Multi-scalar-mul
# ------------------------------------------
task bench_ec_g1_msm_bn254_snarks, "Run benchmark on Elliptic Curve group 𝔾1 (Multi-Scalar-Mul) for BN254-Snarks - Default compiler":
runBench("bench_ec_g1_msm_bn254_snarks")
task bench_ec_g1_msm_bn254_snarks_gcc, "Run benchmark on Elliptic Curve group 𝔾1 (Multi-Scalar-Mul) for BN254-Snarks - GCC":
runBench("bench_ec_g1_msm_bn254_snarks", "gcc")
task bench_ec_g1_msm_bn254_snarks_clang, "Run benchmark on Elliptic Curve group 𝔾1 (Multi-Scalar-Mul) for BN254-Snarks - Clang":
runBench("bench_ec_g1_msm_bn254_snarks", "clang")
task bench_ec_g1_msm_bn254_snarks_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (Multi-Scalar-Mul) for BN254-Snarks - GCC no Assembly":
runBench("bench_ec_g1_msm_bn254_snarks", "gcc", useAsm = false)
task bench_ec_g1_msm_bn254_snarks_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (Multi-Scalar-Mul) for BN254-Snarks - Clang no Assembly":
runBench("bench_ec_g1_msm_bn254_snarks", "clang", useAsm = false)
task bench_ec_g1_msm_bls12_381, "Run benchmark on Elliptic Curve group 𝔾1 (Multi-Scalar-Mul) for BLS12-381 - Default compiler":
runBench("bench_ec_g1_msm_bls12_381")
task bench_ec_g1_msm_bls12_381_gcc, "Run benchmark on Elliptic Curve group 𝔾1 (Multi-Scalar-Mul) for BLS12-381 - GCC":
runBench("bench_ec_g1_msm_bls12_381", "gcc")
task bench_ec_g1_msm_bls12_381_clang, "Run benchmark on Elliptic Curve group 𝔾1 (Multi-Scalar-Mul) for BLS12-381 - Clang":
runBench("bench_ec_g1_msm_bls12_381", "clang")
task bench_ec_g1_msm_bls12_381_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (Multi-Scalar-Mul) for BLS12-381 - GCC no Assembly":
runBench("bench_ec_g1_msm_bls12_381", "gcc", useAsm = false)
task bench_ec_g1_msm_bls12_381_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾1 (Multi-Scalar-Mul) for BLS12-381 - Clang no Assembly":
runBench("bench_ec_g1_msm_bls12_381", "clang", useAsm = false)
# Elliptic curve G2
# ------------------------------------------
@ -846,6 +908,24 @@ task bench_ec_g2_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾2 - GCC n
task bench_ec_g2_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾2 - Clang no Assembly":
runBench("bench_ec_g2", "clang", useAsm = false)
# Elliptic curve G2 - scalar multiplication
# ------------------------------------------
task bench_ec_g2_scalar_mul, "Run benchmark on Elliptic Curve group 𝔾2 (Multi-Scalar-Mul) - Default compiler":
runBench("bench_ec_g2_scalar_mul")
task bench_ec_g2_scalar_mul_gcc, "Run benchmark on Elliptic Curve group 𝔾2 (Multi-Scalar-Mul) - GCC":
runBench("bench_ec_g2_scalar_mul", "gcc")
task bench_ec_g2_scalar_mul_clang, "Run benchmark on Elliptic Curve group 𝔾2 (Multi-Scalar-Mul) - Clang":
runBench("bench_ec_g2_scalar_mul", "clang")
task bench_ec_g2_scalar_mul_gcc_noasm, "Run benchmark on Elliptic Curve group 𝔾2 (Multi-Scalar-Mul) - GCC no Assembly":
runBench("bench_ec_g2_scalar_mul", "gcc", useAsm = false)
task bench_ec_g2_scalar_mul_clang_noasm, "Run benchmark on Elliptic Curve group 𝔾2 (Multi-Scalar-Mul) - Clang no Assembly":
runBench("bench_ec_g2_scalar_mul", "clang", useAsm = false)
# Pairings
# ------------------------------------------

View File

@ -12,7 +12,7 @@ import
./math/[arithmetic, extension_fields],
./math/arithmetic/limbs_montgomery,
./math/ec_shortweierstrass,
./math/pairings/[pairings_bn, miller_loops, cyclotomic_subgroups],
./math/pairings/[pairings_generic, miller_accumulators],
./math/constants/zoo_subgroups,
./math/io/[io_bigints, io_fields]
@ -86,8 +86,7 @@ func fromRawCoords(
return cttEVM_Success
func eth_evm_ecadd*(
r: var array[64, byte], inputs: openarray[byte]): CttEVMStatus =
func eth_evm_ecadd*(r: var array[64, byte], inputs: openarray[byte]): CttEVMStatus =
## Elliptic Curve addition on BN254_Snarks
## (also called alt_bn128 in Ethereum specs
## and bn256 in Ethereum tests)
@ -142,8 +141,7 @@ func eth_evm_ecadd*(
aff.y, bigEndian
)
func eth_evm_ecmul*(
r: var array[64, byte], inputs: openarray[byte]): CttEVMStatus =
func eth_evm_ecmul*(r: var array[64, byte], inputs: openarray[byte]): CttEVMStatus =
## Elliptic Curve multiplication on BN254_Snarks
## (also called alt_bn128 in Ethereum specs
## and bn256 in Ethereum tests)
@ -192,7 +190,6 @@ func eth_evm_ecmul*(
# which is 31.5% faster than plain windowed scalar multiplication
# at the low cost of a modular reduction.
var sprime{.noInit.}: typeof(smod.mres)
# Due to mismatch between the BigInt[256] input and the rest being BigInt[254]
# we use the low-level getMont instead of 'fromBig'
getMont(smod.mres.limbs, s.limbs,
@ -200,8 +197,7 @@ func eth_evm_ecmul*(
Fr[BN254_Snarks].getR2modP().limbs,
Fr[BN254_Snarks].getNegInvModWord(),
Fr[BN254_Snarks].getSpareBits())
sprime = smod.toBig()
P.scalarMul(sprime)
P.scalarMul(smod.toBig())
else:
P.scalarMul(s)
@ -323,10 +319,13 @@ func eth_evm_ecpairing*(
r[r.len-1] = byte 1
return
var gt0{.noInit.}, gt1{.noInit.}: Fp12[BN254_Snarks]
var P{.noInit.}: ECP_ShortW_Aff[Fp[BN254_Snarks], G1]
var Q{.noInit.}: ECP_ShortW_Aff[Fp2[BN254_Snarks], G2]
var acc {.noInit.}: MillerAccumulator[Fp[BN254_Snarks], Fp2[BN254_Snarks], Fp12[BN254_Snarks]]
acc.init()
var foundInfinity = false
for i in 0 ..< N:
let pos = i*192
@ -348,15 +347,18 @@ func eth_evm_ecpairing*(
if statusQ != cttEVM_Success:
return statusQ
gt1.millerLoopGenericBN(P, Q)
if i == 0:
gt0 = gt1
else:
gt0 *= gt1
let regular = acc.update(P, Q)
if not regular:
foundInfinity = true
gt0.finalExpEasy()
gt0.finalExpHard_BN()
if foundInfinity: # pairing with infinity returns 1, hence no need to compute the following
r[r.len-1] = byte 1
return
var gt {.noinit.}: Fp12[BN254_Snarks]
acc.finish(gt)
gt.finalExp()
zeroMem(r.addr, r.sizeof())
if gt0.isOne().bool:
if gt.isOne().bool:
r[r.len-1] = byte 1

View File

@ -61,7 +61,7 @@ export BigInt
# https://github.com/mratsim/constantine/issues/15
# No exceptions allowed
{.push raises: [].}
{.push raises: [], checks: off.}
{.push inline.}
# Initialization
@ -354,7 +354,26 @@ func setBit*[bits: static int](a: var BigInt[bits], index: int) =
let shifted = One shl (index and SelectMask)
slot[] = slot[] or shifted
# Multiplication by small cosntants
func getWindowAt*(a: BigInt, bitIndex: int, windowSize: static int): SecretWord {.inline.} =
## Access a window of `a` of size bitsize
static: doAssert windowSize <= WordBitWidth
const SlotShift = log2_vartime(WordBitWidth.uint32)
const WordMask = WordBitWidth - 1
const WindowMask = SecretWord((1 shl windowSize) - 1)
let slot = bitIndex shr SlotShift
let word = a.limbs[slot] # word in limbs
let pos = bitIndex and WordMask # position in the word
# This is constant-time, the branch does not depend on secret data.
if pos + windowSize > WordBitWidth and slot+1 < a.limbs.len:
# Read next word as well
return SecretWord((word shr pos) or (a.limbs[slot+1] shl (WordBitWidth-pos))) and WindowMask
else:
return SecretWord(word shr pos) and WindowMask
# Multiplication by small constants
# ------------------------------------------------------------
func `*=`*(a: var BigInt, b: static int) =
@ -491,22 +510,93 @@ func invmod*[bits](r: var BigInt[bits], a, M: BigInt[bits]) =
one.setOne()
r.invmod(a, one, M)
{.pop.} # inline
# ############################################################
#
# **Variable-Time**
#
# ############################################################
{.push inline.}
func invmod_vartime*[bits](
r: var BigInt[bits],
a, F, M: BigInt[bits]) {.tags: [VarTime].} =
## Compute the modular inverse of ``a`` modulo M
## r ≡ F.a⁻¹ (mod M)
##
## M MUST be odd, M does not need to be prime.
## ``a`` MUST be less than M.
r.limbs.invmod_vartime(a.limbs, F.limbs, M.limbs, bits)
func invmod_vartime*[bits](
r: var BigInt[bits],
a: BigInt[bits],
F, M: static BigInt[bits]) {.tags: [VarTime].} =
## Compute the modular inverse of ``a`` modulo M
## r ≡ F.a⁻¹ (mod M)
##
## with F and M known at compile-time
##
## M MUST be odd, M does not need to be prime.
## ``a`` MUST be less than M.
r.limbs.invmod_vartime(a.limbs, F.limbs, M.limbs, bits)
func invmod_vartime*[bits](r: var BigInt[bits], a, M: BigInt[bits]) {.tags: [VarTime].} =
## Compute the modular inverse of ``a`` modulo M
##
## The modulus ``M`` MUST be odd
var one {.noInit.}: BigInt[bits]
one.setOne()
r.invmod_vartime(a, one, M)
{.pop.}
# ############################################################
#
# Recoding
#
# ############################################################
#
# Litterature
#
# - Elliptic Curves in Cryptography
# Blake, Seroussi, Smart, 1999
#
# - Efficient Arithmetic on Koblitz Curves
# Jerome A. Solinas, 2000
# https://decred.org/research/solinas2000.pdf
#
# - Optimal Left-to-Right Binary Signed-Digit Recoding
# Joye, Yen, 2000
# https://marcjoye.github.io/papers/JY00sd2r.pdf
#
# - Guide to Elliptic Curve Cryptography
# Hankerson, Menezes, Vanstone, 2004
#
# - Signed Binary Representations Revisited
# Katsuyuki Okeya, Katja Schmidt-Samoa, Christian Spahn, and Tsuyoshi Takagi, 2004
# https://eprint.iacr.org/2004/195.pdf
#
# - Some Explicit Formulae of NAF and its Left-to-Right Analogue
# Dong-Guk Han, Tetsuya Izu, and Tsuyoshi Takagi
# https://eprint.iacr.org/2005/384.pdf
#
# See also on Booth encoding and Modified Booth Encoding (bit-pair recoding)
# - https://www.ece.ucdavis.edu/~bbaas/281/notes/Handout.booth.pdf
# - https://vulms.vu.edu.pk/Courses/CS501/Downloads/Booth%20and%20bit%20pair%20encoding.pdf
# - https://vulms.vu.edu.pk/Courses/CS501/Downloads/Bit-Pair%20Recoding.pdf
# - http://www.ecs.umass.edu/ece/koren/arith/simulator/ModBooth/
iterator recoding_l2r_vartime*(a: BigInt): int8 =
iterator recoding_l2r_signed_vartime*[bits: static int](a: BigInt[bits]): int8 =
## This is a minimum-Hamming-Weight left-to-right recoding.
## It outputs signed {-1, 0, 1} bits from MSB to LSB
## with minimal Hamming Weight to minimize operations
## in Miller Loop and vartime scalar multiplications
## in Miller Loops and vartime scalar multiplications
##
## Tagged vartime as it returns an int8
## - Optimal Left-to-Right Binary Signed-Digit Recoding
## Joye, Yen, 2000
## https://marcjoye.github.io/papers/JY00sd2r.pdf
## ⚠️ While the recoding is constant-time,
## usage of this recoding is intended vartime
# As the caller is copy-pasted at each yield
# we rework the algorithm so that we have a single yield point
@ -514,12 +604,12 @@ iterator recoding_l2r_vartime*(a: BigInt): int8 =
var bi, bi1, ri, ri1, ri2: int8
var i = a.bits
var i = bits
while true:
if i == a.bits: # We rely on compiler to hoist this branch out of the loop.
if i == bits: # We rely on compiler to hoist this branch out of the loop.
ri = 0
ri1 = int8 a.bit(a.bits-1)
ri2 = int8 a.bit(a.bits-2)
ri1 = int8 a.bit(bits-1)
ri2 = int8 a.bit(bits-2)
bi = 0
else:
bi = bi1
@ -531,12 +621,225 @@ iterator recoding_l2r_vartime*(a: BigInt): int8 =
ri2 = int8 a.bit(i-2)
bi1 = (bi + ri1 + ri2) shr 1
yield -2*bi + ri + bi1
let r = -2*bi + ri + bi1
yield r
if i > 0:
if i != 0:
i -= 1
else:
break
{.pop.} # inline
func recode_l2r_signed_vartime*[bits: static int](
recoded: var array[bits+1, SomeSignedInt], a: BigInt[bits]): int {.tags:[VarTime].} =
## Recode left-to-right (MSB to LSB)
## Output from most significant to least significant
## Returns the number of bits used
type I = SomeSignedInt
var i = 0
for bit in a.recoding_l2r_signed_vartime():
recoded[i] = I(bit)
inc i
return i
iterator recoding_r2l_signed_vartime*[bits: static int](a: BigInt[bits]): int8 =
## This is a minimum-Hamming-Weight left-to-right recoding.
## It outputs signed {-1, 0, 1} bits from LSB to MSB
## with minimal Hamming Weight to minimize operations
## in Miller Loops and vartime scalar multiplications
##
## ⚠️ While the recoding is constant-time,
## usage of this recoding is intended vartime
##
## Implementation uses 2-NAF
# This is equivalent to `var r = (3a - a); if (r and 1) == 0: r shr 1`
var ci, ci1, ri, ri1: int8
var i = 0
while i <= bits:
if i == 0: # We rely on compiler to hoist this branch out of the loop.
ri = int8 a.bit(0)
ri1 = int8 a.bit(1)
ci = 0
else:
ci = ci1
ri = ri1
if i >= bits - 1:
ri1 = 0
else:
ri1 = int8 a.bit(i+1)
ci1 = (ci + ri + ri1) shr 1
let r = ci + ri - 2*ci1
yield r
i += 1
func recode_r2l_signed_vartime*[bits: static int](
recoded: var array[bits+1, SomeSignedInt], a: BigInt[bits]): int {.tags:[VarTime].} =
## Recode right-to-left (LSB to MSB)
## Output from least significant to most significant
## Returns the number of bits used
type I = SomeSignedInt
var i = 0
for bit in a.recoding_r2l_signed_vartime():
recoded[i] = I(bit)
inc i
return i
iterator recoding_r2l_signed_window_vartime*(a: BigInt, windowLogSize: int): int {.tags:[VarTime].} =
## This is a minimum-Hamming-Weight right-to-left windowed recoding with the following properties
## 1. The most significant non-zero bit is positive.
## 2. Among any w consecutive digits, at most one is non-zero.
## 3. Each non-zero digit is odd and less than 2ʷ⁻¹ in absolute value.
## 4. The length of the recoding is at most BigInt.bits + 1
##
## This returns input one digit at a time and not the whole window.
##
## ⚠️ not constant-time
let sMax = 1 shl (windowLogSize - 1)
let uMax = sMax + sMax
let mask = uMax - 1
var a {.noInit.} = a
var zeroes = 0
while true:
# 1. Count zeroes in LSB
var ctz = 0
for i in 0 ..< a.limbs.len:
let ai = a.limbs[i]
if ai.isZero().bool:
ctz += WordBitWidth
else:
ctz += BaseType(ai).countTrailingZeroBits_vartime().int
break
# 2. Remove them
if ctz >= WordBitWidth:
let wordOffset = int(ctz shr log2_vartime(uint32 WordBitWidth))
for i in 0 ..< a.limbs.len-wordOffset:
a.limbs[i] = a.limbs[i+wordOffset]
for i in a.limbs.len-wordOffset ..< a.limbs.len:
a.limbs[i] = Zero
ctz = ctz and (WordBitWidth-1)
zeroes += wordOffset * WordBitWidth
if ctz > 0:
a.shiftRight(ctz)
zeroes += ctz
# 3. Yield - We merge yield points with a goto-based state machine
# Nim copy-pastes the iterator for-loop body at yield points, we don't want to duplicate code
# hence we need a single yield point
type State = enum
StatePrepareYield
StateYield
StateExit
var yieldVal = 0
var nextState = StatePrepareYield
var state {.goto.} = StatePrepareYield
case state
of StatePrepareYield:
# 3.a Yield zeroes
zeroes -= 1
if zeroes >= 0:
state = StateYield # goto StateYield
# 3.b Yield the least significant window
var lsw = a.limbs[0].int and mask # signed is important
a.shiftRight(windowLogSize)
if (lsw and sMax) != 0: # MSB of window set
a += One # Lend 2ʷ to next digit
lsw -= uMax # push from [0, 2ʷ) to [-2ʷ⁻¹, 2ʷ⁻¹)
zeroes = windowLogSize-1
yieldVal = lsw
nextState = StateExit
# Fall through StateYield
of StateYield:
yield yieldVal
case nextState
of StatePrepareYield: state = StatePrepareYield
of StateExit: state = StateExit
else: unreachable()
of StateExit:
if a.isZero().bool:
break
func recode_r2l_signed_window_vartime*[bits: static int](
naf: var array[bits+1, SomeSignedInt], a: BigInt[bits], window: int): int {.tags:[VarTime].} =
## Minimum Hamming-Weight windowed NAF recoding
## Output from least significant to most significant
## Returns the number of bits used
##
## The `naf` output is returned one digit at a time and not one window at a time
type I = SomeSignedInt
var i = 0
for digit in a.recoding_r2l_signed_window_vartime(window):
naf[i] = I(digit)
i += 1
return i
func signedWindowEncoding(digit: SecretWord, bitsize: static int): tuple[val: SecretWord, neg: SecretBool] {.inline.} =
## Get the signed window encoding for `digit`
##
## This uses the fact that 999 = 100 - 1
## It replaces string of binary 1 with 1...-1
## i.e. 0111 becomes 1 0 0 -1
##
## This looks at [bitᵢ₊ₙ..bitᵢ | bitᵢ₋₁]
## and encodes [bitᵢ₊ₙ..bitᵢ]
##
## Notes:
## - This is not a minimum weight encoding unlike NAF
## - Due to constant-time requirement in scalar multiplication
## or bucketing large window in multi-scalar-multiplication
## minimum weight encoding might not lead to saving operations
## - Unlike NAF and wNAF encoding, there is no carry to propagate
## hence this is suitable for parallelization without encoding precomputation
## and for GPUs
## - Implementation uses Booth encoding
result.neg = SecretBool(digit shr bitsize)
let negMask = -SecretWord(result.neg)
const valMask = SecretWord((1 shl bitsize) - 1)
let encode = (digit + One) shr 1 # Lookup bitᵢ₋₁, flip series of 1's
result.val = (encode + negMask) xor negMask # absolute value
result.val = result.val and valMask
func getSignedFullWindowAt*(a: BigInt, bitIndex: int, windowSize: static int): tuple[val: SecretWord, neg: SecretBool] {.inline.} =
## Access a signed window of `a` of size bitsize
## Returns a signed encoding.
##
## The result is `windowSize` bits at a time.
##
## bitIndex != 0 and bitIndex mod windowSize == 0
debug: doAssert (bitIndex != 0) and (bitIndex mod windowSize) == 0
let digit = a.getWindowAt(bitIndex-1, windowSize+1) # get the bit on the right of the window for Booth encoding
return digit.signedWindowEncoding(windowSize)
func getSignedBottomWindow*(a: BigInt, windowSize: static int): tuple[val: SecretWord, neg: SecretBool] {.inline.} =
## Access the least significant signed window of `a` of size bitsize
## Returns a signed encoding.
##
## The result is `windowSize` bits at a time.
let digit = a.getWindowAt(0, windowSize) shl 1 # Add implicit 0 on the right of LSB for Booth encoding
return digit.signedWindowEncoding(windowSize)
func getSignedTopWindow*(a: BigInt, topIndex: int, excess: static int): tuple[val: SecretWord, neg: SecretBool] {.inline.} =
## Access the least significant signed window of `a` of size bitsize
## Returns a signed encoding.
##
## The result is `excess` bits at a time.
##
## bitIndex != 0 and bitIndex mod windowSize == 0
let digit = a.getWindowAt(topIndex-1, excess+1) # Add implicit 0 on the left of MSB and get the bit on the right of the window
return digit.signedWindowEncoding(excess+1)
{.pop.} # raises no exceptions

View File

@ -550,9 +550,6 @@ template mulCheckSparse*(a: var Fp, b: Fp) =
else:
a *= b
{.pop.} # inline
{.pop.} # raises no exceptions
# ############################################################
#
# Field arithmetic ergonomic macros
@ -595,3 +592,27 @@ macro addchain*(fn: untyped): untyped =
result[^1] = body
# echo result.toStrLit()
# ############################################################
#
# **Variable-Time**
#
# ############################################################
func inv_vartime*(r: var FF, a: FF) {.tags: [VarTime].} =
## Variable-time Inversion modulo p
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
r.mres.invmod_vartime(a.mres, FF.getR2modP(), FF.fieldMod())
func inv_vartime*(a: var FF) {.tags: [VarTime].} =
## Variable-time Inversion modulo p
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
a.inv_vartime(a)

View File

@ -7,11 +7,12 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../../platforms/[abstractions, signed_secret_words],
../../platforms/abstractions,
./limbs, ./limbs_unsaturated
# No exceptions allowed
{.push raises: [].}
{.push checks: off.}
# ############################################################
#
@ -342,18 +343,24 @@ func matVecMul_shr_k_mod_M[N, E: static int](
d[N-1] = cd.lo
e[N-1] = ce.lo
func matVecMul_shr_k[N, E: static int](
template matVecMul_shr_k_impl(
t: TransitionMatrix,
f, g: var LimbsUnsaturated[N, E],
f, g: var LimbsUnsaturated,
Excess: static int,
numLimbsLeft: int or static int,
k: static int
) =
## Compute
##
## [u v] [f]
## [q r].[g] / 2ᵏ
##
## Template so that it can be specialized
## when iteration number is fixed and compiler can unroll, in constant-time case
## or variable and the full buffer might not be used (vartime)
static: doAssert k == WordBitWidth - E
const Max = SignedSecretWord(MaxWord shr E)
static: doAssert k == WordBitWidth - Excess
const Max = SignedSecretWord(MaxWord shr Excess)
let
u = t.u
@ -376,7 +383,7 @@ func matVecMul_shr_k[N, E: static int](
cf.ashr(k)
cg.ashr(k)
for i in 1 ..< N:
for i in 1 ..< numLimbsLeft:
cf.ssumprodAccNoCarry(u, f[i], v, g[i])
cg.ssumprodAccNoCarry(q, f[i], r, g[i])
f[i-1] = cf.lo and Max
@ -384,8 +391,11 @@ func matVecMul_shr_k[N, E: static int](
cf.ashr(k)
cg.ashr(k)
f[N-1] = cf.lo
g[N-1] = cg.lo
f[numLimbsLeft-1] = cf.lo
g[numLimbsLeft-1] = cg.lo
func matVecMul_shr_k[N, E: static int](t: TransitionMatrix, f, g: var LimbsUnsaturated[N, E], k: static int) =
matVecMul_shr_k_impl(t, f, g, E, N, k)
func invmodImpl[N, E](
a: var LimbsUnsaturated[N, E],
@ -666,3 +676,217 @@ func legendre*(a: Limbs, M: static Limbs, bits: static int): SecretWord =
a2.fromPackedRepr(a)
legendreImpl(a2, m2, k, bits)
# ############################################################
#
# Variable-time optimizations
#
# ############################################################
const NegInvMod256 = [
# Stores tab[i div 2] = -i⁻¹ (mod 256), with i odd
# See "invModBitwidth" on "Dumas iterations"
# ax ≡ 1 (mod 2ᵏ) <=> ax(2 - ax) ≡ 1 (mod 2^(2k))
# a⁻¹ (mod 256) = a(2-a²)
-1, -235, -141, -183, -57, -227, -133, -239,
-241, -91, -253, -167, -41, -83, -245, -223,
-225, -203, -109, -151, -25, -195, -101, -207,
-209, -59, -221, -135, -9, -51, -213, -191,
-193, -171, -77, -119, -249, -163, -69, -175,
-177, -27, -189, -103, -233, -19, -181, -159,
-161, -139, -45, -87, -217, -131, -37, -143,
-145, -251, -157, -71, -201, -243, -149, -127,
-129, -107, -13, -55, -185, -99, -5, -111,
-113, -219, -125, -39, -169, -211, -117, -95,
-97, -75, -237, -23, -153, -67, -229, -79,
-81, -187, -93, -7, -137, -179, -85, -63,
-65, -43, -205, -247, -121, -35, -197, -47,
-49, -155, -61, -231, -105, -147, -53, -31,
-33, -11, -173, -215, -89, -3, -165, -15,
-17, -123, -29, -199, -73, -115, -21, -255]
func batchedDivsteps_vartime(
t: var TransitionMatrix,
eta: SignedSecretWord,
f0, g0: SecretWord,
k: static int
): SignedSecretWord {.tags:[Vartime].} =
## Bernstein-Yang eta (-delta) batch of divsteps
## **Variable-Time**
##
## Output:
## - return eta for the next batch of divsteps
## - mutate t, the transition matrix to apply `numIters` divsteps at once
## t is scaled by 2ᵏ
##
## Input:
## - f0, bottom limb of f
## - g0, bottom limb of g
## - k, the maximum batch size, transition matrix is scaled by 2ᵏ
template swapNeg(a, b) =
var tmp = -a
a = b
b = tmp
var
u = One
v = Zero
q = Zero
r = One
f = f0
g = g0
eta = cast[SignedBaseType](eta)
bitsLeft = cast[SignedBaseType](k)
while true:
# Count zeros up to bitsLeft and process a batch of divsteps up to that number
let zeros = (g.BaseType or (1.BaseType shl bitsLeft)).countTrailingZeroBits_vartime()
g = g shr zeros
u = u shl zeros
v = v shl zeros
eta -= cast[SignedBaseType](zeros)
bitsLeft -= cast[SignedBaseType](zeros)
if bitsLeft == 0:
break
# Now process, the 1's.
if eta < 0:
eta = -eta
swapNeg(f, g)
swapNeg(u, q)
swapNeg(v, r)
# We process up to 6 1's at once
const mask6 = SecretWord((1 shl 6) - 1)
let limit = min(eta+1, bitsLeft)
let maskLimit = (MaxWord shr (WordBitWidth - limit)) and mask6
# Find the multiple of f to add to cancel the bottom min(limit, 6) bits of g
let w = (g * SecretWord NegInvMod256[int((f and mask6) shr 1)]) and maskLimit
# Next iteration will have at least 6 0's to process at once
g += f*w
q += u*w
r += v*w
t.u = SignedSecretWord u
t.v = SignedSecretWord v
t.q = SignedSecretWord q
t.r = SignedSecretWord r
return SignedSecretWord(eta)
func matVecMul_shr_k_partial(t: TransitionMatrix, f, g: var LimbsUnsaturated, len: int, k: static int) =
## Matrix-Vector multiplication with top part of f and g being zeros
matVecMul_shr_k_impl(t, f, g, LimbsUnsaturated.Excess, len, k)
func isZero_vartime(a: LimbsUnsaturated, limbsLeft: int): bool {.tags:[VarTime].} =
for i in 0 ..< limbsLeft:
if a[i].int != 0:
return false
return true
func discardUnusedLimb_vartime[N, E: static int](limbsLeft: var int, f, g: var LimbsUnsaturated[N, E]) {.tags:[VarTime].} =
## If f and g both don't use their last limb, it will propagate the sign down to the previous one
if limbsLeft == 1:
return
let fn = f[limbsLeft-1]
let gn = g[limbsLeft-1]
var mask = SignedSecretWord(0)
mask = mask or (fn xor fn.isNegMask()) # 0 if last limb has nothing left but its sign
mask = mask or (gn xor gn.isNegMask()) # 0 if last limb has nothing left but its sign
if cast[SignedBaseType](mask) == 0:
f[limbsLeft-2] = f[limbsLeft-2] or fn.lshl(WordBitWidth-E) # if only sign is left, the last limb is 11..11 if negative
g[limbsLeft-2] = g[limbsLeft-2] or gn.lshl(WordBitWidth-E) # or 00..00 if positive
limbsLeft -= 1
func invmodImpl_vartime[N, E: static int](
a: var LimbsUnsaturated[N, E],
F, M: LimbsUnsaturated[N, E],
invMod2powK: SecretWord,
k, bits: static int) {.tags:[VarTime].} =
## **Variable-time** Modular inversion using Bernstein-Yang algorithm
## r ≡ F.a⁻¹ (mod M)
# eta = -delta
var eta = cast[SignedSecretWord](-1)
var d{.noInit.}, e{.noInit.}: LimbsUnsaturated[N, E]
var f{.noInit.}, g{.noInit.}: LimbsUnsaturated[N, E]
d.setZero()
e = F
f = M
g = a
var limbsLeft = N
while true:
var t{.noInit.}: TransitionMatrix
# Compute transition matrix and next eta
eta = t.batchedDivsteps_vartime(eta, SecretWord f[0], SecretWord g[0], k)
# Apply the transition matrix
# [u v] [d]
# [q r]/2ᵏ.[e] mod M
t.matVecMul_shr_k_mod_M(d, e, k, M, invMod2powK)
# [u v] [f]
# [q r]/2ᵏ.[g]
t.matVecMul_shr_k_partial(f, g, limbsLeft, k)
if g.isZero_vartime(limbsLeft):
break
limbsLeft.discardUnusedLimb_vartime(f, g)
d.canonicalize(signMask = f[limbsLeft-1].isNegMask(), M)
a = d
func invmod_vartime*(
r: var Limbs, a: Limbs,
F, M: Limbs, bits: static int) {.tags:[VarTime].} =
## Compute the scaled modular inverse of ``a`` modulo M
## r ≡ F.a⁻¹ (mod M)
##
## M MUST be odd, M does not need to be prime.
## ``a`` MUST be less than M.
const Excess = 2
const k = WordBitWidth - Excess
const NumUnsatWords = (bits + k - 1) div k
# Convert values to unsaturated repr
var m2 {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess]
var factor {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess]
m2.fromPackedRepr(M)
factor.fromPackedRepr(F)
let m0invK = SecretWord invMod2powK(BaseType M[0], k)
var a2 {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess]
a2.fromPackedRepr(a)
a2.invmodImpl_vartime(factor, m2, m0invK, k, bits)
r.fromUnsatRepr(a2)
func invmod_vartime*(
r: var Limbs, a: Limbs,
F, M: static Limbs, bits: static int) {.tags:[VarTime].} =
## Compute the scaled modular inverse of ``a`` modulo M
## r ≡ F.a⁻¹ (mod M) (compile-time factor and modulus overload)
##
## with F and M known at compile-time
##
## M MUST be odd, M does not need to be prime.
## ``a`` MUST be less than M.
const Excess = 2
const k = WordBitWidth - Excess
const NumUnsatWords = (bits + k - 1) div k
# Convert values to unsaturated repr
const m2 = LimbsUnsaturated[NumUnsatWords, Excess].fromPackedRepr(M)
const factor = LimbsUnsaturated[NumUnsatWords, Excess].fromPackedRepr(F)
const m0invK = SecretWord invMod2powK(BaseType M[0], k)
var a2 {.noInit.}: LimbsUnsaturated[NumUnsatWords, Excess]
a2.fromPackedRepr(a)
a2.invmodImpl_vartime(factor, m2, m0invK, k, bits)
r.fromUnsatRepr(a2)

View File

@ -6,7 +6,7 @@
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import ../../platforms/[abstractions, signed_secret_words]
import ../../platforms/abstractions
type
LimbsUnsaturated*[N, Excess: static int] = object

View File

@ -198,6 +198,9 @@ declareCurves:
modulus: "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f"
order: "0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141"
orderBitwidth: 256
eq_form: ShortWeierstrass
coef_a: 0
coef_b: 7
curve BLS12_377:
# Zexe curve
# (p41) https://eprint.iacr.org/2018/962.pdf

View File

@ -13,8 +13,7 @@ import
../extension_fields,
../elliptic/[ec_shortweierstrass_affine, ec_shortweierstrass_projective],
../pairings/[cyclotomic_subgroups, miller_loops],
../isogenies/frobenius,
../../platforms/allocs
../isogenies/frobenius
# Slow generic implementation
# ------------------------------------------------------------
@ -22,8 +21,7 @@ import
# The bit count must be exact for the Miller loop
const BLS12_377_pairing_ate_param* = block:
# BLS12 Miller loop is parametrized by u
# +1 to bitlength so that we can mul by 3 for NAF encoding
BigInt[64+1].fromHex"0x8508c00000000001"
BigInt[64].fromHex"0x8508c00000000001"
const BLS12_377_pairing_ate_param_isNeg* = false
@ -65,7 +63,7 @@ func millerLoopAddchain*(
Qs: ptr UncheckedArray[ECP_ShortW_Aff[Fp2[BLS12_377], G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[Fp[BLS12_377], G1]],
N: int
) =
) {.noInline.} =
## Miller Loop for BLS12-377 curve
## Computes f{u,Q}(P) with u the BLS curve parameter

View File

@ -105,7 +105,7 @@ func clearCofactorReference*(P: var ECP_ShortW_Prj[Fp2[BLS12_377], G2]) {.inline
# BLS12 G1
# ------------------------------------------------------------
func clearCofactorFast*(P: var ECP_ShortW_Prj[Fp[BLS12_377], G1]) =
func clearCofactorFast*(P: var ECP_ShortW[Fp[BLS12_377], G1]) =
## Clear the cofactor of BLS12_377 G1
##
## Wahby et al "Fast and simple constant-time hashing to the BLS12-377 elliptic curve", https://eprint.iacr.org/2019/403
@ -144,7 +144,7 @@ func clearCofactorFast*(P: var ECP_ShortW_Prj[Fp[BLS12_377], G1]) =
# with Psi (ψ) - untwist-Frobenius-Twist function
# and x the curve BLS parameter
func clearCofactorFast*(P: var ECP_ShortW_Prj[Fp2[BLS12_377], G2]) =
func clearCofactorFast*(P: var ECP_ShortW[Fp2[BLS12_377], G2]) =
## Clear the cofactor of BLS12_377 G2
## Optimized using endomorphisms
## P -> [x²-x-1]P + [x-1] ψ(P) + ψ²([2]P)
@ -172,7 +172,7 @@ func clearCofactorFast*(P: var ECP_ShortW_Prj[Fp2[BLS12_377], G2]) =
#
# ############################################################
func isInSubgroup*(P: ECP_ShortW_Prj[Fp[BLS12_377], G1]): SecretBool =
func isInSubgroup*(P: ECP_ShortW[Fp[BLS12_377], G1]): SecretBool =
## Returns true if P is in G1 subgroup, i.e. P is a point of order r.
## A point may be on a curve but not on the prime order r subgroup.
## Not checking subgroup exposes a protocol to small subgroup attacks.
@ -182,7 +182,7 @@ func isInSubgroup*(P: ECP_ShortW_Prj[Fp[BLS12_377], G1]): SecretBool =
# A note on group membership tests for G1, G2 and GT
# on BLS pairing-friendly curves
# P is in the G1 subgroup iff ϕ(P) == [-u²](P)
var t0{.noInit.}, t1{.noInit.}: ECP_ShortW_Prj[Fp[BLS12_377], G1]
var t0{.noInit.}, t1{.noInit.}: typeof(P)
# [-u²]P
t0.pow_bls12_377_x(P)
@ -195,7 +195,7 @@ func isInSubgroup*(P: ECP_ShortW_Prj[Fp[BLS12_377], G1]): SecretBool =
return t0 == t1
func isInSubgroup*(P: ECP_ShortW_Prj[Fp2[BLS12_377], G2]): SecretBool =
func isInSubgroup*(P: ECP_ShortW[Fp2[BLS12_377], G2]): SecretBool =
## Returns true if P is in G2 subgroup, i.e. P is a point of order r.
## A point may be on a curve but not on the prime order r subgroup.
## Not checking subgroup exposes a protocol to small subgroup attacks.
@ -205,8 +205,29 @@ func isInSubgroup*(P: ECP_ShortW_Prj[Fp2[BLS12_377], G2]): SecretBool =
# A note on group membership tests for G1, G2 and GT
# on BLS pairing-friendly curves
# P is in the G1 subgroup iff ψ(P) == [u](P)
var t0{.noInit.}, t1{.noInit.}: ECP_ShortW_Prj[Fp2[BLS12_377], G2]
var t0{.noInit.}, t1{.noInit.}: typeof(P)
t0.pow_bls12_377_x(P) # [u]P
t1.frobenius_psi(P) # ψ(P)
return t0 == t1
func isInSubgroup*(P: ECP_ShortW_Aff[Fp[BLS12_377], G1]): SecretBool =
## Returns true if P is in 𝔾1 subgroup, i.e. P is a point of order r.
## A point may be on a curve but not on the prime order r subgroup.
## Not checking subgroup exposes a protocol to small subgroup attacks.
##
## Warning ⚠: Assumes that P is on curve
var t{.noInit.}: ECP_ShortW_Prj[Fp[BLS12_377], G1]
t.fromAffine(P)
return t.isInSubgroup()
func isInSubgroup*(P: ECP_ShortW_Aff[Fp2[BLS12_377], G2]): SecretBool =
## Returns true if P is in 𝔾2 subgroup, i.e. P is a point of order r.
## A point may be on a curve but not on the prime order r subgroup.
## Not checking subgroup exposes a protocol to small subgroup attacks.
##
## Warning ⚠: Assumes that P is on curve
var t{.noInit.}: ECP_ShortW_Jac[Fp2[BLS12_377], G2]
t.fromAffine(P)
return t.isInSubgroup()

View File

@ -13,8 +13,7 @@ import
../extension_fields,
../elliptic/[ec_shortweierstrass_affine, ec_shortweierstrass_projective],
../pairings/[cyclotomic_subgroups, miller_loops],
../isogenies/frobenius,
../../platforms/allocs
../isogenies/frobenius
# Slow generic implementation
# ------------------------------------------------------------
@ -22,8 +21,7 @@ import
# The bit count must be exact for the Miller loop
const BLS12_381_pairing_ate_param* = block:
# BLS12 Miller loop is parametrized by u
# +2 to bitlength so that we can mul by 3 for NAF encoding
BigInt[64+2].fromHex"0xd201000000010000"
BigInt[64].fromHex"0xd201000000010000"
const BLS12_381_pairing_ate_param_isNeg* = true
@ -66,7 +64,7 @@ func millerLoopAddchain*(
Qs: ptr UncheckedArray[ECP_ShortW_Aff[Fp2[BLS12_381], G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[Fp[BLS12_381], G1]],
N: int
) =
) {.noInline.} =
## Generic Miller Loop for BLS12 curve
## Computes f{u,Q}(P) with u the BLS curve parameter

View File

@ -166,7 +166,7 @@ func clearCofactorFast*(P: var ECP_ShortW[Fp2[BLS12_381], G2]) =
#
# ############################################################
func isInSubgroup*(P: ECP_ShortW_Jac[Fp[BLS12_381], G1] or ECP_ShortW_Prj[Fp[BLS12_381], G1]): SecretBool =
func isInSubgroup*(P: ECP_ShortW[Fp[BLS12_381], G1]): SecretBool =
## Returns true if P is in 𝔾1 subgroup, i.e. P is a point of order r.
## A point may be on a curve but not on the prime order r subgroup.
## Not checking subgroup exposes a protocol to small subgroup attacks.
@ -189,7 +189,7 @@ func isInSubgroup*(P: ECP_ShortW_Jac[Fp[BLS12_381], G1] or ECP_ShortW_Prj[Fp[BLS
return t0 == t1
func isInSubgroup*(P: ECP_ShortW_Jac[Fp2[BLS12_381], G2] or ECP_ShortW_Prj[Fp2[BLS12_381], G2]): SecretBool =
func isInSubgroup*(P: ECP_ShortW[Fp2[BLS12_381], G2]): SecretBool =
## Returns true if P is in 𝔾2 subgroup, i.e. P is a point of order r.
## A point may be on a curve but not on the prime order r subgroup.
## Not checking subgroup exposes a protocol to small subgroup attacks.

View File

@ -13,8 +13,7 @@ import
../extension_fields,
../elliptic/[ec_shortweierstrass_affine, ec_shortweierstrass_projective],
../pairings/[cyclotomic_subgroups, miller_loops],
../isogenies/frobenius,
../../platforms/allocs
../isogenies/frobenius
# Slow generic implementation
# ------------------------------------------------------------
@ -22,8 +21,7 @@ import
# The bit count must be exact for the Miller loop
const BN254_Nogami_pairing_ate_param* = block:
# BN Miller loop is parametrized by 6u+2
# +2 to bitlength so that we can mul by 3 for NAF encoding
BigInt[65+2].fromHex"0x18300000000000004"
BigInt[65].fromHex"0x18300000000000004"
const BN254_Nogami_pairing_ate_param_isNeg* = true
@ -56,16 +54,17 @@ func millerLoopAddchain*(
# Negative AteParam
f.conj()
T.neg()
# Ate pairing for BN curves needs adjustment after basic Miller loop
f.millerCorrectionBN(T, Q, P, BN254_Nogami_pairing_ate_param_isNeg)
f.millerCorrectionBN(T, Q, P)
func millerLoopAddchain*(
f: var Fp12[BN254_Nogami],
Qs: ptr UncheckedArray[ECP_ShortW_Aff[Fp2[BN254_Nogami], G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[Fp[BN254_Nogami], G1]],
N: int
) =
) {.noInline.} =
## Miller Loop for BN254-Nogami curve
## Computes f{6u+2,Q}(P) with u the BLS curve parameter
var Ts = allocStackArray(ECP_ShortW_Prj[Fp2[BN254_Nogami], G2], N)
@ -78,9 +77,11 @@ func millerLoopAddchain*(
# Negative AteParam
f.conj()
for i in 0 ..< N:
Ts[i].neg()
for i in 0 ..< N:
f.millerCorrectionBN(Ts[i], Qs[i], Ps[i], BN254_Nogami_pairing_ate_param_isNeg)
f.millerCorrectionBN(Ts[i], Qs[i], Ps[i])
func cycl_exp_by_curve_param*(
r: var Fp12[BN254_Nogami], a: Fp12[BN254_Nogami],

View File

@ -20,8 +20,7 @@ import
# The bit count must be exact for the Miller loop
const BN254_Snarks_pairing_ate_param* = block:
# BN Miller loop is parametrized by 6u+2
# +2 to bitlength so that we can mul by 3 for NAF encoding
BigInt[65+2].fromHex"0x19d797039be763ba8"
BigInt[65].fromHex"0x19d797039be763ba8"
const BN254_Snarks_pairing_ate_param_isNeg* = false

View File

@ -20,8 +20,7 @@ import
# 1st part: f_{u+1,Q}(P)
const BW6_761_pairing_ate_param_1_unopt* = block:
# BW6-761 unoptimized Miller loop first part is parametrized by u+1
# +1 to bitlength so that we can mul by 3 for NAF encoding
BigInt[64+1].fromHex"0x8508c00000000002"
BigInt[64].fromHex"0x8508c00000000002"
const BW6_761_pairing_ate_param_1_unopt_isNeg* = false

View File

@ -19,7 +19,7 @@ import
#
# ############################################################
func clearCofactorReference*(P: var ECP_ShortW_Prj[Fp[Pallas], G1]) {.inline.} =
func clearCofactorReference*(P: var ECP_ShortW[Fp[Pallas], G1]) {.inline.} =
## Clear the cofactor of Pallas G1
## The Pasta curves have a prime-order group so this is a no-op
discard

View File

@ -0,0 +1,37 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../../platforms/abstractions,
../config/curves,
../arithmetic,
../ec_shortweierstrass
# ############################################################
#
# Clear Cofactor - Naive
#
# ############################################################
func clearCofactorReference*(P: var ECP_ShortW[Fp[Secp256k1], G1]) {.inline.} =
## Clear the cofactor of Secp256k1
## The secp256k1 curve has a prime-order group so this is a no-op
discard
# ############################################################
#
# Subgroup checks
#
# ############################################################
func isInSubgroup*(P: ECP_ShortW[Fp[Secp256k1], G1]): SecretBool {.inline.} =
## This is a no-op, all points on curve are in the correct subgroup.
##
## Warning ⚠: Assumes that P is on curve
return CtTrue

View File

@ -19,7 +19,7 @@ import
#
# ############################################################
func clearCofactorReference*(P: var ECP_ShortW_Prj[Fp[Vesta], G1]) {.inline.} =
func clearCofactorReference*(P: var ECP_ShortW[Fp[Vesta], G1]) {.inline.} =
## Clear the cofactor of Vesta G1
## The Pasta curves have a prime-order group so this is a no-op
discard

View File

@ -49,3 +49,10 @@ func hasEndomorphismAcceleration*(C: static Curve): bool =
Pallas,
Vesta
}
const EndomorphismThreshold* = 196
## We use substraction by maximum infinity norm coefficient
## to split scalars for endomorphisms
## For small scalars the substraction will overflow
##
## TODO: implement an alternative way to split scalars.

View File

@ -15,14 +15,16 @@ import
./bn254_snarks_subgroups,
./bw6_761_subgroups,
./pallas_subgroups,
./vesta_subgroups
./vesta_subgroups,
./secp256k1_subgroups
export
bls12_377_subgroups,
bls12_381_subgroups,
bn254_nogami_subgroups,
bn254_snarks_subgroups,
bw6_761_subgroups
bw6_761_subgroups,
secp256k1_subgroups
func clearCofactor*[ECP](P: var ECP) {.inline.} =
## Clear the cofactor of a point on the curve

View File

@ -58,6 +58,8 @@ func decomposeEndo*[M, scalBits, L: static int](
## We need to test the mini scalar, which is 65 bits so 2 Fp so about 2 cycles
## and negate it as well.
static: doAssert scalBits >= L, "Cannot decompose a scalar smaller than a mini-scalar or the decomposition coefficient"
# Equal when no window or no negative handling, greater otherwise
static: doAssert L >= (scalBits + M - 1) div M + 1
const w = F.C.getCurveOrderBitwidth().wordsRequired()
@ -493,7 +495,7 @@ func scalarMulGLV_m2w2*[scalBits; EC](
mixin affine
type ECaff = affine(EC)
const C = P0.F.C # curve
static: doAssert: scalBits == C.getCurveOrderBitwidth()
static: doAssert: scalBits <= C.getCurveOrderBitwidth()
# 1. Compute endomorphisms
when P0.G == G1:

View File

@ -0,0 +1,411 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import ./ec_multi_scalar_mul_scheduler,
./ec_endomorphism_accel,
../constants/zoo_endomorphisms
export bestBucketBitSize
# No exceptions allowed in core cryptographic operations
{.push raises: [].}
{.push checks: off.}
# ########################################################### #
# #
# Multi Scalar Multiplication #
# #
# ########################################################### #
# Multi-scalar-multiplication is the primary bottleneck in all zero-knowledge proofs and polynomial commmitment schemes.
# In particular, those are at the heart of zk-rollups to bundle a large amount of blockchain transactions.
# They may have to add tens of millions of elliptic curve points to generate proofs,
# requiring powerful machines, GPUs or even FPGAs implementations.
#
# Multi-scalar multiplication does a linear combination of
# R = [a₀]P₀ + [a₁]P₁ + ... + [aₙ]Pₙ
#
# The current iteration is a reference baseline before evaluating and adding various optimizations
# (scalar recoding, change of coordinate systems, bucket sizing, sorting ...)
#
# See the litterature references at the top of `ec_multi_scalar_mul_scheduler.nim`
func multiScalarMulImpl_reference_vartime[F, G; bits: static int](
r: var ECP_ShortW[F, G],
coefs: ptr UncheckedArray[BigInt[bits]], points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
N: int, c: static int) {.tags:[VarTime, HeapAlloc].} =
## Inner implementation of MSM, for static dispatch over c, the bucket bit length
## This is a straightforward simple translation of BDLO12, section 4
# Prologue
# --------
const numBuckets = 1 shl c - 1 # bucket 0 is unused
const numWindows = (bits + c - 1) div c
type EC = typeof(r)
let miniMSMs = allocHeapArray(EC, numWindows)
let buckets = allocHeapArray(EC, numBuckets)
# Algorithm
# ---------
for w in 0 ..< numWindows:
# Place our points in a bucket corresponding to
# how many times their bit pattern in the current window of size c
for i in 0 ..< numBuckets:
buckets[i].setInf()
# 1. Bucket accumulation. Cost: n - (2ᶜ-1) => n points in 2ᶜ-1 buckets, first point per bucket is just copied
for j in 0 ..< N:
let b = cast[int](coefs[j].getWindowAt(w*c, c))
if b == 0: # bucket 0 is unused, no need to add [0]Pⱼ
continue
else:
buckets[b-1] += points[j]
# 2. Bucket reduction. Cost: 2x(2ᶜ-2) => 2 additions per 2ᶜ-1 bucket, last bucket is just copied
# We have ordered subset sums in each bucket, we now need to compute the mini-MSM
# [1]S₁ + [2]S₂ + [3]S₃ + ... + [2ᶜ-1]S₂c₋₁
var accumBuckets{.noInit.}, miniMSM{.noInit.}: EC
accumBuckets = buckets[numBuckets-1]
miniMSM = buckets[numBuckets-1]
# Example with c = 3, 2³ = 8
for k in countdown(numBuckets-2, 0):
accumBuckets += buckets[k] # Stores S₈ then S₈+S₇ then S₈+S₇+S₆ then ...
miniMSM += accumBuckets # Stores S₈ then [2]S₈+S₇ then [3]S₈+[2]S₇+S₆ then ...
miniMSMs[w] = miniMSM
# 3. Final reduction. Cost: (b/c - 1)x(c+1) => b/c windows, first is copied, c doublings + 1 addition per window
r = miniMSMs[numWindows-1]
for w in countdown(numWindows-2, 0):
for _ in 0 ..< c:
r.double()
r += miniMSMs[w]
# Cleanup
# -------
buckets.freeHeap()
miniMSMs.freeHeap()
func multiScalarMul_reference_vartime*[EC](r: var EC, coefs: openArray[BigInt], points: openArray[ECP_ShortW_Aff]) {.tags:[VarTime, HeapAlloc].} =
## Multiscalar multiplication:
## r <- [a₀]P₀ + [a₁]P₁ + ... + [aₙ]Pₙ
debug: doAssert coefs.len == points.len
let N = points.len
let coefs = coefs.asUnchecked()
let points = points.asUnchecked()
let c = bestBucketBitSize(N, BigInt.bits, useSignedBuckets = false, useManualTuning = false)
case c
of 2: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 2)
of 3: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 3)
of 4: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 4)
of 5: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 5)
of 6: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 6)
of 7: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 7)
of 8: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 8)
of 9: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 9)
of 10: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 10)
of 11: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 11)
of 12: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 12)
of 13: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 13)
of 14: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 14)
of 15: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 15)
of 16: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 16)
of 17: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 17)
of 18: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 18)
of 19: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 19)
of 20: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 20)
of 21: multiScalarMulImpl_reference_vartime(r, coefs, points, N, c = 21)
else:
unreachable()
# ########################################################### #
# #
# Multi Scalar Multiplication #
# Optimized versions #
# #
# ########################################################### #
#
# Multi-Scalar-Mul is the largest bottleneck in Zero-Knowledge-Proofs protocols
# There are ways to avoid FFTs, none to avoid Multi-Scalar-Multiplication
# Hence optimizing it is worth millions, see https://zprize.io
func accumulate[F, G](buckets: ptr UncheckedArray[ECP_ShortW_JacExt[F, G]], val: SecretWord, negate: SecretBool, point: ECP_ShortW_Aff[F, G]) {.inline, meter.} =
let val = BaseType(val)
if val == 0: # Skip [0]P
return
elif negate.bool:
buckets[val-1] -= point
else:
buckets[val-1] += point
func bucketReduce[EC](r: var EC, buckets: ptr UncheckedArray[EC], numBuckets: static int) {.meter.} =
# We interleave reduction with zero-ing the bucket to use instruction-level parallelism
var accumBuckets{.noInit.}: typeof(r)
accumBuckets = buckets[numBuckets-1]
r = buckets[numBuckets-1]
buckets[numBuckets-1].setInf()
for k in countdown(numBuckets-2, 0):
accumBuckets += buckets[k]
r += accumBuckets
buckets[k].setInf()
type MiniMsmKind = enum
kTopWindow
kFullWindow
kBottomWindow
func miniMSM_jacext[F, G; bits: static int](
r: var ECP_ShortW[F, G],
buckets: ptr UncheckedArray[ECP_ShortW_JacExt[F, G]],
bitIndex: int, miniMsmKind: static MiniMsmKind, c: static int,
coefs: ptr UncheckedArray[BigInt[bits]], points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]], N: int) {.meter.} =
## Apply a mini-Multi-Scalar-Multiplication on [bitIndex, bitIndex+window)
## slice of all (coef, point) pairs
const excess = bits mod c
const top = bits - excess
# 1. Bucket Accumulation
var curVal, nextVal: SecretWord
var curNeg, nextNeg: SecretBool
template getSignedWindow(j : int): tuple[val: SecretWord, neg: SecretBool] =
when miniMsmKind == kBottomWindow: coefs[j].getSignedBottomWindow(c)
elif miniMsmKind == kTopWindow: coefs[j].getSignedTopWindow(top, excess)
else: coefs[j].getSignedFullWindowAt(bitIndex, c)
(curVal, curNeg) = getSignedWindow(0)
for j in 0 ..< N-1:
(nextVal, nextNeg) = getSignedWindow(j+1)
if nextVal.BaseType != 0:
# In cryptography, points are indistinguishable from random
# hence, without prefetching, accessing the next bucket is a guaranteed cache miss
prefetchLarge(buckets[nextVal.BaseType-1].addr, Write, HighTemporalLocality, maxCacheLines = 2)
buckets.accumulate(curVal, curNeg, points[j])
curVal = nextVal
curNeg = nextNeg
buckets.accumulate(curVal, curNeg, points[N-1])
# 2. Bucket Reduction
var sliceSum{.noinit.}: ECP_ShortW_JacExt[F, G]
sliceSum.bucketReduce(buckets, numBuckets = 1 shl (c-1))
# 3. Mini-MSM on the slice [bitIndex, bitIndex+window)
var windowSum{.noInit.}: typeof(r)
windowSum.fromJacobianExtended_vartime(sliceSum)
r += windowSum
when miniMsmKind != kBottomWindow:
for _ in 0 ..< c:
r.double()
func multiScalarMulJacExt_vartime[F, G; bits: static int](
r: var ECP_ShortW[F, G],
coefs: ptr UncheckedArray[BigInt[bits]], points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
N: int, c: static int) {.tags:[VarTime, HeapAlloc], meter.} =
## Multiscalar multiplication:
## r <- [a₀]P₀ + [a₁]P₁ + ... + [aₙ]Pₙ
# Setup
# -----
const numBuckets = 1 shl (c-1)
type EcBucket = ECP_ShortW_JacExt[F, G]
let buckets = allocHeapArray(EcBucket, numBuckets)
zeroMem(buckets[0].addr, sizeof(EcBucket) * numBuckets)
# Algorithm
# ---------
const excess = bits mod c
const top = bits - excess
var w = top
r.setInf()
if excess != 0 and w != 0: # Prologue
r.miniMSM_jacext(buckets, w, kTopWindow, c, coefs, points, N)
w -= c
while w != 0: # Steady state
r.miniMSM_jacext(buckets, w, kFullWindow, c, coefs, points, N)
w -= c
block: # Epilogue
r.miniMSM_jacext(buckets, w, kBottomWindow, c, coefs, points, N)
# Cleanup
# -------
buckets.freeHeap()
func miniMSM_affine[NumBuckets, QueueLen, F, G; bits: static int](
r: var ECP_ShortW[F, G],
sched: var Scheduler[NumBuckets, QueueLen, F, G],
bitIndex: int, miniMsmKind: static MiniMsmKind, c: static int,
coefs: ptr UncheckedArray[BigInt[bits]], N: int) {.meter.} =
## Apply a mini-Multi-Scalar-Multiplication on [bitIndex, bitIndex+window)
## slice of all (coef, point) pairs
const excess = bits mod c
const top = bits - excess
static: doAssert miniMsmKind != kTopWindow, "The top window is smaller in bits which increases collisions in scheduler."
sched.buckets[].init()
# 1. Bucket Accumulation
var curSP, nextSP: ScheduledPoint
template getSignedWindow(j : int): tuple[val: SecretWord, neg: SecretBool] =
when miniMsmKind == kBottomWindow: coefs[j].getSignedBottomWindow(c)
elif miniMsmKind == kTopWindow: coefs[j].getSignedTopWindow(top, excess)
else: coefs[j].getSignedFullWindowAt(bitIndex, c)
curSP = scheduledPointDescriptor(0, getSignedWindow(0))
for j in 0 ..< N-1:
nextSP = scheduledPointDescriptor(j+1, getSignedWindow(j+1))
sched.prefetch(nextSP)
sched.schedule(curSP)
curSP = nextSP
sched.schedule(curSP)
sched.flushPendingAndReset()
# 2. Bucket Reduction
var sliceSum{.noInit.}: ECP_ShortW_JacExt[F, G]
sliceSum.bucketReduce(sched.buckets[])
# 3. Mini-MSM on the slice [bitIndex, bitIndex+window)
var windowSum{.noInit.}: typeof(r)
windowSum.fromJacobianExtended_vartime(sliceSum)
r += windowSum
when miniMsmKind != kBottomWindow:
for _ in 0 ..< c:
r.double()
func multiScalarMulAffine_vartime[F, G; bits: static int](
r: var ECP_ShortW[F, G],
coefs: ptr UncheckedArray[BigInt[bits]], points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
N: int, c: static int) {.tags:[VarTime, Alloca, HeapAlloc], meter.} =
## Multiscalar multiplication:
## r <- [a₀]P₀ + [a₁]P₁ + ... + [aₙ]Pₙ
# Setup
# -----
const (numBuckets, queueLen) = c.deriveSchedulerConstants()
let buckets = allocHeap(Buckets[numBuckets, F, G])
buckets[].init()
let sched = allocHeap(Scheduler[numBuckets, queueLen, F, G])
sched[].init(points, buckets, 0, numBuckets.int32)
# Algorithm
# ---------
const excess = bits mod c
const top = bits - excess
var w = top
r.setInf()
if excess != 0 and w != 0: # Prologue
# The top might use only a few bits, the affine scheduler would likely have significant collisions
zeroMem(sched.buckets.ptJacExt.addr, buckets.ptJacExt.sizeof())
r.miniMSM_jacext(sched.buckets.ptJacExt.asUnchecked(), w, kTopWindow, c, coefs, points, N)
w -= c
while w != 0: # Steady state
r.miniMSM_affine(sched[], w, kFullWindow, c, coefs, N)
w -= c
block: # Epilogue
r.miniMSM_affine(sched[], w, kBottomWindow, c, coefs, N)
# Cleanup
# -------
sched.freeHeap()
buckets.freeHeap()
func multiScalarMul_dispatch_vartime[bits: static int, F, G](
r: var ECP_ShortW[F, G], coefs: ptr UncheckedArray[BigInt[bits]],
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]], N: int) =
## Multiscalar multiplication:
## r <- [a₀]P₀ + [a₁]P₁ + ... + [aₙ]Pₙ
let c = bestBucketBitSize(N, bits, useSignedBuckets = true, useManualTuning = true)
case c
of 2: multiScalarMulJacExt_vartime(r, coefs, points, N, c = 2)
of 3: multiScalarMulJacExt_vartime(r, coefs, points, N, c = 3)
of 4: multiScalarMulJacExt_vartime(r, coefs, points, N, c = 4)
of 5: multiScalarMulJacExt_vartime(r, coefs, points, N, c = 5)
of 6: multiScalarMulJacExt_vartime(r, coefs, points, N, c = 6)
of 7: multiScalarMulJacExt_vartime(r, coefs, points, N, c = 7)
of 8: multiScalarMulJacExt_vartime(r, coefs, points, N, c = 8)
of 9: multiScalarMulAffine_vartime(r, coefs, points, N, c = 9)
of 10: multiScalarMulAffine_vartime(r, coefs, points, N, c = 10)
of 11: multiScalarMulAffine_vartime(r, coefs, points, N, c = 11)
of 12: multiScalarMulAffine_vartime(r, coefs, points, N, c = 12)
of 13: multiScalarMulAffine_vartime(r, coefs, points, N, c = 13)
of 14: multiScalarMulAffine_vartime(r, coefs, points, N, c = 14)
of 15: multiScalarMulAffine_vartime(r, coefs, points, N, c = 15)
of 16: multiScalarMulAffine_vartime(r, coefs, points, N, c = 16)
of 17: multiScalarMulAffine_vartime(r, coefs, points, N, c = 17)
of 18: multiScalarMulAffine_vartime(r, coefs, points, N, c = 18)
else:
unreachable()
func multiScalarMul_vartime*[bits: static int, F, G](
r: var ECP_ShortW[F, G],
coefs: openArray[BigInt[bits]],
points: openArray[ECP_ShortW_Aff[F, G]]) {.tags:[VarTime, Alloca, HeapAlloc], meter.} =
## Multiscalar multiplication:
## r <- [a₀]P₀ + [a₁]P₁ + ... + [aₙ]Pₙ
debug: doAssert coefs.len == points.len
let N = points.len
when bits <= F.C.getCurveOrderBitwidth() and
F.C.hasEndomorphismAcceleration():
# TODO, min amount of bits for endomorphisms?
const M = when F is Fp: 2
elif F is Fp2: 4
else: {.error: "Unconfigured".}
const L = (bits + M - 1) div M + 1
let splitCoefs = allocHeapArray(array[M, BigInt[L]], N)
let endoBasis = allocHeapArray(array[M, ECP_ShortW_Aff[F, G]], N)
for i in 0 ..< N:
var negatePoints {.noinit.}: array[M, SecretBool]
splitCoefs[i].decomposeEndo(negatePoints, coefs[i], F)
if negatePoints[0].bool:
endoBasis[i][0].neg(points[i])
else:
endoBasis[i][0] = points[i]
when F is Fp:
endoBasis[i][1].x.prod(points[i].x, F.C.getCubicRootOfUnity_mod_p())
if negatePoints[1].bool:
endoBasis[i][1].y.neg(points[i].y)
else:
endoBasis[i][1].y = points[i].y
else:
staticFor m, 1, M:
endoBasis[i][m].frobenius_psi(points[i], m)
if negatePoints[m].bool:
endoBasis[i][m].neg()
let endoCoefs = cast[ptr UncheckedArray[BigInt[L]]](splitCoefs)
let endoPoints = cast[ptr UncheckedArray[ECP_ShortW_Aff[F, G]]](endoBasis)
multiScalarMul_dispatch_vartime(r, endoCoefs, endoPoints, M*N)
endoBasis.freeHeap()
splitCoefs.freeHeap()
else:
multiScalarMul_dispatch_vartime(r, coefs.asUnchecked(), points.asUnchecked(), N)

View File

@ -0,0 +1,611 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../../platforms/abstractions,
../arithmetic,
../ec_shortweierstrass,
./ec_shortweierstrass_jacobian_extended,
./ec_shortweierstrass_batch_ops
export abstractions, arithmetic,
ec_shortweierstrass, ec_shortweierstrass_jacobian_extended
# No exceptions allowed in core cryptographic operations
{.push raises: [].}
{.push checks: off.}
# ########################################################### #
# #
# Multi Scalar Multiplication - Scheduling #
# #
# ########################################################### #
# This file implements a bucketing acceleration structure.
#
# See the following for the baseline algorithm:
# - Faster batch forgery identification
# Daniel J. Bernstein, Jeroen Doumen, Tanja Lange, and Jan-Jaap Oosterwijk, 2012
# https://eprint.iacr.org/2012/549.pdf
# - Simple guide to fast linear combinations (aka multiexponentiations)
# Vitalik Buterin, 2020
# https://ethresear.ch/t/simple-guide-to-fast-linear-combinations-aka-multiexponentiations/7238
# https://github.com/ethereum/research/blob/5c6fec6/fast_linear_combinations/multicombs.py
# - zkStudyClub: Multi-scalar multiplication: state of the art & new ideas
# Gus Gutoski, 2020
# https://www.youtube.com/watch?v=Bl5mQA7UL2I
#
# And for the scheduling technique and collision probability analysis
# - FPGA Acceleration of Multi-Scalar Multiplication: CycloneMSM
# Kaveh Aasaraai, Don Beaver, Emanuele Cesena, Rahul Maganti, Nicolas Stalder and Javier Varela, 2022
# https://eprint.iacr.org/2022/1396.pdf
#
# Challenges:
# - For the popular BLS12-377 and BLS12-381, an affine elliptic point takes 96 bytes
# an extended jacobian point takes 192 bytes.
# - We want to deal with a large number of points, for example the Zprize competition used 2²⁶ ~= 67M points
# in particular, memory usage is a concern as those input already require ~6.7GB for a BLS12 prime,
# so we can't use much scratchspace, especially on GPUs.
# - Any bit-twiddling algorithm must scale at most linearly with the number of points
# Algorithm that for example finds the most common pair of points for an optimized addition chain
# are O(n²) and will need to select from a subsample.
# - The scalars are random, so the bucket accessed is random, which needs sorting or prefetching
# to avoid bottlenecking on memory bandwidth. But sorting requires copies ...
# - While copies improve locality, our types are huge, 96~192 bytes
# and we have millions of them.
# - We want our algorithm to be scalable to a large number of threads at minimum, or even better on GPUs.
# Hence it should naturally offer data parallelism, which is tricky due to collisions when accumulating
# 1M points into 32~64K buckets.
# - The asymptotically fastest addition formulae are affine addition with individual cost 3M + 1I
# and asymptotic cost for N points N*3M + N*3M+1I using batch inversion.
# Vartime inversion cost 70-100M depending on the number of bits in the prime
# (multiplication cost scale quadratically while inversion via Euclid linearly)
# - The second fastest general coordinate system is Extended Jacobian with cost 10M,
# so the threshold for N is:
# N*3M+N*3M+100M < N*10M <=> 100M < N * 4M <=> 25 < N
# Hence we want to maximize the chance of doing 25 additions (so we need 50 points).
# Given than there is low probability for consecutive random points to be assigned to the same bucket,
# we can't keep a queue per bucket for batch accumulation.
# However we can do a vector addition as there is a high probability that consecutive random points
# are assigned to different buckets.
#
# Strategy:
# - Each bucket is associated with (EC Affine, EC ExtJac, set[Empty, AffineSet, ExtJacSet]), in SoA storage
# - Each thread is assigned a range of buckets and keeps a scheduler
# start, stop: int32
# curQueue, curRescheduled: int32
# bucketMap: BigInt[NumNZBuckets]
# queue: array[MaxCapacity, (Target Bucket, PointID)]
# rescheduled: array[32, (Target Bucket, PointID)]
# - when the queue reaches max capacity, we compute a vector affine addition with the target buckets
# we interleave with prefetching to reduce cache misses.
# - when the rescheduled array reaches max capacity, we check if there are at least 32 items in the queue
# and if so schedule an vector addition otherwise we flush the queue into the EC ExtJac.
# i.e. in the worst case, when all points are the same, we fallback to the JacExt MSM.
# - As a stretch optimization, if many points in rescheduled queue target the same bucket
# we can use sum_reduce_vartime, but are there workloads like that?
#
# Queue size is given by formula `4*c² - 16*c - 128` to handle various concerns: amortization of batch affine, memory usage, collision probability
# `c` is chosen to minimize the number of EC operations but does not take into account memory bandwidth and cache misses cost.
#
# Collision probability for `QueueSize` consecutive *uniformly random* points
# is derived from a Poisson distribution.
# NumCollisions = N*QueueSize/NumNZBuckets is the number of collisions
# NumCollisions / N is the probability of collision
# -------inputs------- c ----buckets---- queue length collision map bytes num collisions collision %
# 2^0 1 2 2^1 2 -144 8 -72 -7200.0%
# 2^1 2 2 2^1 2 -144 8 -144 -7200.0%
# 2^2 4 3 2^2 4 -140 8 -140 -3500.0%
# 2^3 8 3 2^2 4 -140 8 -280 -3500.0%
# 2^4 16 4 2^3 8 -128 8 -256 -1600.0%
# 2^5 32 5 2^4 16 -108 8 -216 -675.0%
# 2^6 64 5 2^4 16 -108 8 -432 -675.0%
# 2^7 128 6 2^5 32 -80 8 -320 -250.0%
# 2^8 256 7 2^6 64 -44 8 -176 -68.8%
# 2^9 512 8 2^7 128 0 16 0 0.0%
# 2^10 1024 9 2^8 256 52 32 208 20.3% <- At half the queue length, we can still amortize batch inversion
# 2^11 2048 9 2^8 256 52 32 416 20.3%
# 2^12 4096 10 2^9 512 112 64 896 21.9%
# 2^13 8192 11 2^10 1024 180 128 1440 17.6%
# 2^14 16384 12 2^11 2048 256 256 2048 12.5%
# 2^15 32768 13 2^12 4096 340 512 2720 8.3%
# 2^16 65536 14 2^13 8192 432 1024 3456 5.3%
# 2^17 131072 15 2^14 16384 532 2048 4256 3.2% <- 100/32 = 3.125, a collision queue of size 32 is highly unlikely to reach full capacity
# 2^18 262144 16 2^15 32768 640 4096 5120 2.0% <- ~10MB of buckets
# 2^19 524288 17 2^16 65536 756 8192 6048 1.2% <- for BLS12-381, the queue size reaches 64K aliasing conflict threshold
# 2^20 1048576 17 2^16 65536 756 8192 12096 1.2%
# 2^21 2097152 18 2^17 131072 880 16384 14080 0.7%
# 2^22 4194304 19 2^18 262144 1012 32768 16192 0.4%
# 2^23 8388608 20 2^19 524288 1152 65536 18432 0.2%
# 2^24 16777216 21 2^20 1048576 1300 131072 20800 0.1%
# 2^25 33554432 22 2^21 2097152 1456 262144 23296 0.1%
# 2^26 67108864 23 2^22 4194304 1620 524288 25920 0.0%
# 2^27 134217728 24 2^23 8388608 1792 1048576 28672 0.0%
# 2^28 268435456 25 2^24 16777216 1972 2097152 31552 0.0%
# 2^29 536870912 26 2^25 33554432 2160 4194304 34560 0.0%
# 2^30 1073741824 27 2^26 67108864 2356 8388608 37696 0.0%
# 2^31 2147483648 28 2^27 134217728 2560 16777216 40960 0.0%
# 2^32 4294967296 29 2^28 268435456 2772 33554432 44352 0.0%
# 2^33 8589934592 30 2^29 536870912 2992 67108864 47872 0.0%
# 2^34 17179869184 31 2^30 1073741824 3220 134217728 51520 0.0%
# 2^35 34359738368 32 2^31 2147483648 3456 268435456 55296 0.0%
#
# The code to reproduce this table is at the bottom
# Sizes for BLS12-381 with c = 16
#
# Buckets: 32768
# - Status: 1 32768
# - Affine: 96 3145728
# - ExtJac: 192 6291456
# ----------------------------------
# Total 289 9469952 ~= 10MB
#
# Scheduler: 1 per thread
# - start, stop: 8
# - queue cursors: 8
# - bucketMap: 4096
# - rescheduled: 256
# -----------------------------------
# Total 4368 ~= 4KB per thread
# ########################################################### #
# #
# General utilities #
# #
# ########################################################### #
func bestBucketBitSize*(inputSize: int, scalarBitwidth: static int, useSignedBuckets, useManualTuning: static bool): int {.inline.} =
## Evaluate the best bucket bit-size for the input size.
## That bucket size minimize group operations.
## This ignore cache effect. Computation can become memory-bound, especially with large buckets
## that don't fit in L1 cache, trigger the 64K aliasing conflict or worse (overflowing L2 cache or TLB).
## Especially, scalars are expected to be indistinguishable from random so buckets accessed during accumulation
## will be in a random pattern, triggering cache misses.
# Raw operation cost is approximately
# 1. Bucket accumulation
# n - (2ᶜ-1) additions for b/c windows or n - (2ᶜ⁻¹-1) if using signed buckets
# 2. Bucket reduction
# 2x(2ᶜ-2) additions for b/c windows or 2x(2ᶜ⁻¹-2)
# 3. Final reduction
# (b/c - 1) x (c doublings + 1 addition)
# Total
# b/c (n + 2ᶜ - 2) A + (b/c - 1) x (c*D + A)
# https://www.youtube.com/watch?v=Bl5mQA7UL2I
# A doubling costs 50% of an addition with jacobian coordinates
# and between 60% (BLS12-381 G1) to 66% (BN254-Snarks G1)
const A = 10'f32 # Addition cost
const D = 6'f32 # Doubling cost
const s = int useSignedBuckets
let n = inputSize
let b = float32(scalarBitwidth)
var minCost = float32(Inf)
for c in 2 .. 21:
let b_over_c = b/c.float32
let bucket_accumulate_reduce = b_over_c * float32(n + (1 shl (c-s)) - 2) * A
let final_reduction = (b_over_c - 1'f32) * (c.float32*D + A)
let cost = bucket_accumulate_reduce + final_reduction
if cost < minCost:
minCost = cost
result = c
# Manual tuning, memory bandwidth / cache boundaries of
# L1, L2 caches, TLB and 64 aliasing conflict
# are not taken into account in previous formula.
# Each increase in c doubles memory used.
when useManualTuning:
if 14 <= result:
result -= 1
if 15 <= result:
result -= 1
if 16 <= result:
result -= 1
# Extended Jacobian generic bindings
# ----------------------------------
# All vartime procedures MUST be tagged vartime
# Hence we do not expose `sum` or `+=` for extended jacobian operation to prevent `vartime` mistakes
# we create a local `sum` or `+=` for this module only
func `+=`*[F; G: static Subgroup](P: var ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_JacExt[F, G]) {.inline.}=
P.sum_vartime(P, Q)
func `+=`*[F; G: static Subgroup](P: var ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_Aff[F, G]) {.inline.}=
P.madd_vartime(P, Q)
func `-=`*[F; G: static Subgroup](P: var ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_Aff[F, G]) {.inline.}=
P.msub_vartime(P, Q)
# ########################################################### #
# #
# Scheduler #
# #
# ########################################################### #
#
# "磨刀不误砍柴功"
# "Sharpening the axe will not delay cutting the wood" - Chinese proverb
type
BucketStatus = enum
kAffine, kJacExt
Buckets*[N: static int, F; G: static Subgroup] = object
status: array[N, set[BucketStatus]]
ptAff: array[N, ECP_ShortW_Aff[F, G]]
ptJacExt*: array[N, ECP_ShortW_JacExt[F, G]] # Public for the top window
ScheduledPoint* = object
bucket {.bitsize:26.}: int64 # Supports up to 2²⁵ = 33 554 432 buckets and -1 for the skipped bucket 0
sign {.bitsize: 1.}: int64
pointID {.bitsize:37.}: int64 # Supports up to 2³⁷ = 137 438 953 472 points
Scheduler*[NumNZBuckets, QueueLen: static int, F; G: static Subgroup] = object
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]]
buckets*: ptr Buckets[NumNZBuckets, F, G]
start, stopEx: int32 # Bucket range
numScheduled, numCollisions: int32
collisionsMap: BigInt[NumNZBuckets] # We use a BigInt as a bitmap, when all you have is an axe ...
queue: array[QueueLen, ScheduledPoint]
collisions: array[32, ScheduledPoint]
const MinVectorAddThreshold = 32
func init*(buckets: var Buckets) {.inline.} =
zeroMem(buckets.status.addr, buckets.status.sizeof())
func reset(buckets: var Buckets, index: int) {.inline.} =
buckets.status[index] = {}
func deriveSchedulerConstants*(c: int): tuple[numNZBuckets, queueLen: int] {.compileTime.} =
# Returns the number of non-zero buckets and the scheduler queue length
result.numNZBuckets = 1 shl (c-1)
result.queueLen = max(MinVectorAddThreshold, 4*c*c - 16*c - 128)
func init*[NumNZBuckets, QueueLen: static int, F; G: static Subgroup](
sched: var Scheduler[NumNZBuckets, QueueLen, F, G], points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
buckets: ptr Buckets[NumNZBuckets, F, G], start, stopEx: int32) {.inline.} =
## init a scheduler overseeing buckets [start, stopEx)
## within the indices [0, NumNZBuckets). Bucket for value 0 is considered at index -1.
sched.points = points
sched.buckets = buckets
sched.start = start
sched.stopEx = stopEx
sched.numScheduled = 0
sched.numCollisions = 0
func scheduledPointDescriptor*(pointIndex: int, pointDesc: tuple[val: SecretWord, neg: SecretBool]): ScheduledPoint {.inline.} =
ScheduledPoint(
bucket: cast[int64](pointDesc.val)-1, # shift bucket by 1 as bucket 0 is skipped
sign: cast[int64](pointDesc.neg),
pointID: cast[int64](pointIndex))
func enqueuePoint(sched: var Scheduler, sp: ScheduledPoint) {.inline.} =
sched.queue[sched.numScheduled] = sp
sched.collisionsMap.setBit(sp.bucket.int)
sched.numScheduled += 1
func handleCollision(sched: var Scheduler, sp: ScheduledPoint)
func rescheduleCollisions(sched: var Scheduler)
func sparseVectorAddition[F, G](
buckets: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
bucketStatuses: ptr UncheckedArray[set[BucketStatus]],
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
scheduledPoints: ptr UncheckedArray[ScheduledPoint],
numScheduled: int32) {.noInline, tags:[VarTime, Alloca].}
func prefetch*(sched: Scheduler, sp: ScheduledPoint) =
let bucket = sp.bucket
if bucket == -1:
return
prefetch(sched.buckets.status[bucket].addr, Write, HighTemporalLocality)
prefetchLarge(sched.buckets.ptAff[bucket].addr, Write, HighTemporalLocality, maxCacheLines = 1)
prefetchLarge(sched.buckets.ptJacExt[bucket].addr, Write, HighTemporalLocality, maxCacheLines = 1)
func schedule*(sched: var Scheduler, sp: ScheduledPoint) =
## Schedule a point for accumulating in buckets
let bucket = int sp.bucket
if not(sched.start <= bucket and bucket < sched.stopEx):
return
if kAffine notin sched.buckets.status[bucket]: # Random access, prefetch to avoid cache-misses
if sp.sign == 0:
sched.buckets.ptAff[bucket] = sched.points[sp.pointID]
else:
sched.buckets.ptAff[bucket].neg(sched.points[sp.pointID])
sched.buckets.status[bucket].incl(kAffine)
return
if sched.collisionsMap.bit(bucket).bool:
sched.handleCollision(sp)
return
sched.enqueuePoint(sp)
if sched.numScheduled == sched.queue.len:
sparseVectorAddition(
sched.buckets.ptAff.asUnchecked(), sched.buckets.status.asUnchecked(),
sched.points, sched.queue.asUnchecked(), sched.numScheduled)
sched.numScheduled = 0
sched.collisionsMap.setZero()
sched.rescheduleCollisions()
func handleCollision(sched: var Scheduler, sp: ScheduledPoint) =
if sched.numCollisions < sched.collisions.len:
sched.collisions[sched.numCollisions] = sp
sched.numCollisions += 1
return
# If we want to optimize for a workload were many multipliers are the same, it's here
if kJacExt notin sched.buckets.status[sp.bucket]:
sched.buckets.ptJacExt[sp.bucket].fromAffine(sched.points[sp.pointID])
if sp.sign != 0:
sched.buckets.ptJacExt[sp.bucket].neg()
sched.buckets.status[sp.bucket].incl(kJacExt)
return
if sp.sign == 0:
sched.buckets.ptJacExt[sp.bucket] += sched.points[sp.pointID]
else:
sched.buckets.ptJacExt[sp.bucket] -= sched.points[sp.pointID]
func rescheduleCollisions(sched: var Scheduler) =
template last: untyped = sched.numCollisions-1
var i = last()
while i >= 0:
let sp = sched.collisions[i]
if not sched.collisionsMap.bit(sp.bucket.int).bool:
sched.enqueuePoint(sp)
if i != last():
sched.collisions[i] = sched.collisions[last()]
sched.numCollisions -= 1
i -= 1
func flushBuffer(sched: var Scheduler, buf: ptr UncheckedArray[ScheduledPoint], count: var int32) =
for i in 0 ..< count:
let sp = buf[i]
if kJacExt in sched.buckets.status[sp.bucket]:
if sp.sign == 0:
sched.buckets.ptJacExt[sp.bucket] += sched.points[sp.pointID]
else:
sched.buckets.ptJacExt[sp.bucket] -= sched.points[sp.pointID]
else:
sched.buckets.ptJacExt[sp.bucket].fromAffine(sched.points[sp.pointID])
if sp.sign != 0:
sched.buckets.ptJacExt[sp.bucket].neg()
sched.buckets.status[sp.bucket].incl(kJacExt)
count = 0
func flushPendingAndReset*(sched: var Scheduler) =
if sched.numScheduled >= MinVectorAddThreshold:
sparseVectorAddition(
sched.buckets.ptAff.asUnchecked(), sched.buckets.status.asUnchecked(),
sched.points, sched.queue.asUnchecked(), sched.numScheduled)
sched.numScheduled = 0
if sched.numScheduled > 0:
sched.flushBuffer(sched.queue.asUnchecked(), sched.numScheduled)
if sched.numCollisions > 0:
sched.flushBuffer(sched.collisions.asUnchecked(), sched.numCollisions)
sched.collisionsMap.setZero()
# ########################################################### #
# #
# Computation #
# #
# ########################################################### #
func sparseVectorAddition[F, G](
buckets: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
bucketStatuses: ptr UncheckedArray[set[BucketStatus]],
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
scheduledPoints: ptr UncheckedArray[ScheduledPoint],
numScheduled: int32
) {.noInline, tags:[VarTime, Alloca].} =
## Does a sparse vector addition: buckets += scheduledPoints
## This implementation is optimized using batch affine inversion
## with an asymptotic cost for N points of N*6M + I
## where M is field multiplication and I the field inversion.
##
## Inversion usually costs between 66M to 120M depending on implementation:
## - scaling linearly with bits (Euclid, Lehmer, Stein, Bernstein-Yang, Pornin algorithm)
## - scaling quadratically with bits if using Fermat's Little Theorem a⁻¹ ≡ ᵖ⁻² (mod p) with addition chains
## - constant-time or variable time
##
## `scheduledPoints` must all target a different bucket.
template sps: untyped = scheduledPoints
type SpecialCase = enum
kRegular, kInfLhs, kInfRhs, kOpposite
let lambdas = allocStackArray(tuple[num, den: F], numScheduled)
let accumDen = allocStackArray(F, numScheduled)
let specialCases = allocStackArray(SpecialCase, numScheduled)
# Step 1: Compute numerators and denominators of λᵢ = λᵢ_num / λᵢ_den
for i in 0 ..< numScheduled:
template skipSpecialCase {.dirty.} =
if i == 0: accumDen[i].setOne()
else: accumDen[i] = accumDen[i-1]
continue
if i != numScheduled - 1:
prefetchLarge(points[sps[i+1].pointID].addr, Read, HighTemporalLocality, maxCacheLines = 4)
prefetch(bucketStatuses[sps[i+1].bucket].addr, Read, HighTemporalLocality)
prefetchLarge(buckets[sps[i+1].bucket].addr, Read, HighTemporalLocality, maxCacheLines = 4)
# Special cases 1: infinity points have affine coordinates (0, 0) by convention
# it doesn't match the y²=x³+ax+b equation so slope formula need special handling
if (kAffine notin bucketStatuses[sps[i].bucket]) or buckets[sps[i].bucket].isInf().bool:
specialCases[i] = kInfLhs
skipSpecialCase()
elif points[sps[i].pointID].isInf().bool:
specialCases[i] = kInfRhs
skipSpecialCase()
# Special case 2: λ = (Qy-Py)/(Qx-Px) which is undefined when Px == Qx
# This happens when P == Q or P == -Q
if bool(buckets[sps[i].bucket].x == points[sps[i].pointID].x):
if sps[i].sign == 0:
if bool(buckets[sps[i].bucket].y == points[sps[i].pointID].y):
lambdaDouble(lambdas[i].num, lambdas[i].den, buckets[sps[i].bucket])
else:
specialCases[i] = kOpposite
skipSpecialCase()
else:
if bool(buckets[sps[i].bucket].y == points[sps[i].pointID].y):
specialCases[i] = kOpposite
skipSpecialCase()
else:
lambdaDouble(lambdas[i].num, lambdas[i].den, buckets[sps[i].bucket])
else:
if sps[i].sign == 0:
lambdaAdd(lambdas[i].num, lambdas[i].den, buckets[sps[i].bucket], points[sps[i].pointID])
else:
lambdaSub(lambdas[i].num, lambdas[i].den, buckets[sps[i].bucket], points[sps[i].pointID])
# Step 2: Accumulate denominators.
specialCases[i] = kRegular
if i == 0:
accumDen[i] = lambdas[i].den
elif i == numScheduled-1:
accumDen[i].prod(accumDen[i-1], lambdas[i].den)
else:
accumDen[i].prod(accumDen[i-1], lambdas[i].den, skipFinalSub = true)
# Step 3: Batch invert
var accInv {.noInit.}: F
accInv.inv_vartime(accumDen[numScheduled-1])
# Step 4: Output the sums
for i in countdown(numScheduled-1, 1):
prefetchLarge(points[sps[i-1].pointID].addr, Read, HighTemporalLocality, maxCacheLines = 4)
prefetchLarge(buckets[sps[i-1].bucket].addr, Write, HighTemporalLocality, maxCacheLines = 4)
if specialCases[i] == kInfLhs:
if sps[i]. sign == 0:
buckets[sps[i].bucket] = points[sps[i].pointID]
else:
buckets[sps[i].bucket].neg(points[sps[i].pointID])
bucketStatuses[sps[i].bucket].incl(kAffine)
continue
elif specialCases[i] == kInfRhs:
continue
elif specialCases[i] == kOpposite:
buckets[sps[i].bucket].setInf()
bucketStatuses[sps[i].bucket].excl(kAffine)
continue
# Compute lambda - destroys accumDen[i]
accumDen[i].prod(accInv, accumDen[i-1], skipFinalSub = true)
accumDen[i].prod(accumDen[i], lambdas[i].num, skipFinalSub = true)
# Compute EC addition
var r{.noInit.}: ECP_ShortW_Aff[F, G]
r.affineAdd(lambda = accumDen[i], buckets[sps[i].bucket], points[sps[i].pointID]) # points[sps[i].pointID].y unused even if sign is negative
# Store result
buckets[sps[i].bucket] = r
# Next iteration
accInv.prod(accInv, lambdas[i].den, skipFinalSub = true)
block: # tail
if specialCases[0] == kInfLhs:
if sps[0].sign == 0:
buckets[sps[0].bucket] = points[sps[0].pointID]
else:
buckets[sps[0].bucket].neg(points[sps[0].pointID])
bucketStatuses[sps[0].bucket].incl(kAffine)
elif specialCases[0] == kInfRhs:
discard
elif specialCases[0] == kOpposite:
buckets[sps[0].bucket].setInf()
bucketStatuses[sps[0].bucket].excl(kAffine)
else:
# Compute lambda
accumDen[0].prod(lambdas[0].num, accInv, skipFinalSub = true)
# Compute EC addition
var r{.noInit.}: ECP_ShortW_Aff[F, G]
r.affineAdd(lambda = accumDen[0], buckets[sps[0].bucket], points[sps[0].pointID])
# Store result
buckets[sps[0].bucket] = r
func bucketReduce*[N, F, G](
r: var ECP_ShortW_JacExt[F, G],
buckets: var Buckets[N, F, G]) =
var accumBuckets{.noinit.}: ECP_ShortW_JacExt[F, G]
if kAffine in buckets.status[N-1]:
if kJacExt in buckets.status[N-1]:
accumBuckets.madd_vartime(buckets.ptJacExt[N-1], buckets.ptAff[N-1])
else:
accumBuckets.fromAffine(buckets.ptAff[N-1])
elif kJacExt in buckets.status[N-1]:
accumBuckets = buckets.ptJacExt[N-1]
else:
accumBuckets.setInf()
r = accumBuckets
buckets.reset(N-1)
for k in countdown(N-2, 0):
if kAffine in buckets.status[k]:
if kJacExt in buckets.status[k]:
var t{.noInit.}: ECP_ShortW_JacExt[F, G]
t.madd_vartime(buckets.ptJacExt[k], buckets.ptAff[k])
accumBuckets += t
else:
accumBuckets += buckets.ptAff[k]
elif kJacExt in buckets.status[k]:
accumBuckets += buckets.ptJacExt[k]
buckets.reset(k)
r += accumBuckets
# ########################################################### #
# #
# Statistics generation #
# #
# ########################################################### #
when isMainModule:
import strformat
proc echoSchedulingParameter(logInputSize: int, echoHeader = false) {.raises:[ValueError].} =
const titles = ["-------inputs-------", "c", "----buckets----", "queue length", "collision map bytes", "num collisions", "collision %"]
const header = &"{titles[0]:>16} {titles[1]:>3} {titles[2]:>19} {titles[3]:>13} {titles[4]:>16} {titles[5]:>14} {titles[6]:>12}"
if echoHeader:
echo header
return
let inputSize = 1 shl logInputSize
let c = inputSize.bestBucketBitSize(255, useSignedBuckets = true, useManualTuning = false)
let twoPow = "2^"
let numNZBuckets = 1 shl (c-1)
let collisionMapSize = ((1 shl (c-1))+63) div 64 * 8 # Stored in BigInt[1 shl (c-1)]
let queueSize = 4*c*c - 16*c - 128
let numCollisions = float(inputSize*queueSize) / float(numNZBuckets)
let collisionPercentage = numCollisions / float(inputSize) * 100
echo &"{twoPow & $logInputSize:>4} {inputSize:>14} {c:>3} {twoPow & $(c-1):>4} {numNZBuckets:>11} {queueSize:>13} {collisionMapSize:>19} {numCollisions:>14} {collisionPercentage:>11.1f}%"
echoSchedulingParameter(0, echoHeader = true)
for n in 0 ..< 36:
echoSchedulingParameter(n)

View File

@ -242,6 +242,7 @@ func scalarMul*[EC](
## Those will be assumed to maintain constant-time property
when BigInt.bits <= EC.F.C.getCurveOrderBitwidth() and
EC.F.C.hasEndomorphismAcceleration():
# TODO, min amount of bits for endomorphisms?
when EC.F is Fp:
P.scalarMulGLV_m2w2(scalar)
elif EC.F is Fp2:

View File

@ -0,0 +1,128 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../arithmetic,
../ec_shortweierstrass,
../io/io_bigints,
../../platforms/abstractions
{.push raises: [].} # No exceptions allowed in core cryptographic operations
{.push checks: off.} # No defects due to array bound checking or signed integer overflow allowed
# Support files for testing Elliptic Curve arithmetic
# ------------------------------------------------------------------------------
iterator unpack(scalarByte: byte): bool =
yield bool((scalarByte and 0b10000000) shr 7)
yield bool((scalarByte and 0b01000000) shr 6)
yield bool((scalarByte and 0b00100000) shr 5)
yield bool((scalarByte and 0b00010000) shr 4)
yield bool((scalarByte and 0b00001000) shr 3)
yield bool((scalarByte and 0b00000100) shr 2)
yield bool((scalarByte and 0b00000010) shr 1)
yield bool( scalarByte and 0b00000001)
func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} =
## **Variable-time** Elliptic Curve Scalar Multiplication
##
## P <- [k] P
##
## This uses the double-and-add algorithm
## This MUST NOT be used with secret data.
##
## This is highly VULNERABLE to timing attacks and power analysis attacks.
var scalarCanonical: array[(scalar.bits+7) div 8, byte]
scalarCanonical.marshal(scalar, bigEndian)
var Paff {.noinit.}: affine(EC)
Paff.affine(P)
P.setInf()
for scalarByte in scalarCanonical:
for bit in unpack(scalarByte):
P.double()
if bit:
P += Paff
func scalarMul_minHammingWeight_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} =
## **Variable-time** Elliptic Curve Scalar Multiplication
##
## P <- [k] P
##
## This uses an online recoding with minimum Hamming Weight
## (which is not NAF, NAF is least-significant bit to most)
## This MUST NOT be used with secret data.
##
## This is highly VULNERABLE to timing attacks and power analysis attacks
var Paff {.noinit.}: affine(EC)
Paff.affine(P)
P.setInf()
for bit in recoding_l2r_signed_vartime(scalar):
P.double()
if bit == 1:
P += Paff
elif bit == -1:
P -= Paff
func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt, window: static int) {.tags:[VarTime, Alloca].} =
## **Variable-time** Elliptic Curve Scalar Multiplication
##
## P <- [k] P
##
## This uses windowed-NAF (wNAF)
## This MUST NOT be used with secret data.
##
## This is highly VULNERABLE to timing attacks and power analysis attacks
# Signed digits divides precomputation table size by 2
# Odd-only divides precomputation table size by another 2
const precompSize = 1 shl (window - 2)
when window <= 8:
type I = int8
elif window <= 16:
type I = int16
elif window <= 32:
type I = int32
else:
type I = int64
var naf {.noInit.}: array[BigInt.bits+1, I]
let nafLen = naf.recode_r2l_signed_window_vartime(scalar, window)
var P2{.noInit.}: EC
P2.double(P)
var tabEC {.noinit.}: array[precompSize, EC]
tabEC[0] = P
for i in 1 ..< tabEC.len:
tabEC[i].sum(tabEC[i-1], P2)
var tab {.noinit.}: array[precompSize, affine(EC)]
tab.batchAffine(tabEC)
# init
if naf[nafLen-1] > 0:
P.fromAffine(tab[naf[nafLen-1] shr 1])
elif naf[nafLen-1] < 0:
P.fromAffine(tab[-naf[nafLen-1] shr 1])
P.neg()
else:
P.setInf()
# steady state
for i in 1 ..< nafLen:
P.double()
let digit = naf[nafLen-1-i]
if digit > 0:
P += tab[digit shr 1]
elif digit < 0:
P -= tab[-digit shr 1]

View File

@ -7,12 +7,13 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../../platforms/[abstractions, allocs],
../../platforms/abstractions,
../arithmetic,
../extension_fields,
./ec_shortweierstrass_affine,
./ec_shortweierstrass_jacobian,
./ec_shortweierstrass_projective
./ec_shortweierstrass_projective,
./ec_shortweierstrass_jacobian_extended
# No exceptions allowed, or array bound checks or integer overflow
{.push raises: [], checks:off.}
@ -27,7 +28,7 @@ import
func batchAffine*[F, G](
affs: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
projs: ptr UncheckedArray[ECP_ShortW_Prj[F, G]],
N: int) =
N: int) {.noInline, tags:[Alloca].} =
# Algorithm: Montgomery's batch inversion
# - Speeding the Pollard and Elliptic Curve Methods of Factorization
# Section 10.3.1
@ -87,7 +88,7 @@ func batchAffine*[N: static int, F, G](
func batchAffine*[F, G](
affs: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
jacs: ptr UncheckedArray[ECP_ShortW_Jac[F, G]],
N: int) =
N: int) {.noInline, tags:[Alloca].} =
# Algorithm: Montgomery's batch inversion
# - Speeding the Pollard and Elliptic Curve Methods of Factorization
# Section 10.3.1
@ -153,7 +154,7 @@ func batchAffine*[N: static int, F, G](
# ############################################################
#
# Elliptic Curve in Short Weierstrass form
# Batch addition
# Sum Reduction
#
# ############################################################
@ -192,7 +193,7 @@ func batchAffine*[N: static int, F, G](
# However, n inversions can use Montgomery's batch inversion
# at the cost of 3(n-1)M + 1I
#
# Hence batch addition can have an asymptotic cost of
# Hence sum reduction can have an asymptotic cost of
# 5M + 1S
# Compared to
# Jacobian addition: 12M + 4S
@ -200,12 +201,18 @@ func batchAffine*[N: static int, F, G](
# Projective addition: 12M (for curves in the form y² = x³ + b)
# Projective mixed addition: 11M (for curves in the form y² = x³ + b)
func lambdaAdd[F; G: static Subgroup](lambda_num, lambda_den: var F, P, Q: ECP_ShortW_Aff[F, G]) =
func lambdaAdd*[F; G: static Subgroup](lambda_num, lambda_den: var F, P, Q: ECP_ShortW_Aff[F, G]) {.inline.} =
## Compute the slope of the line (PQ)
lambda_num.diff(Q.y, P.y)
lambda_den.diff(Q.x, P.x)
func lambdaDouble[F; G: static Subgroup](lambda_num, lambda_den: var F, P: ECP_ShortW_Aff[F, G]) =
func lambdaSub*[F; G: static Subgroup](lambda_num, lambda_den: var F, P, Q: ECP_ShortW_Aff[F, G]) {.inline.} =
## Compute the slope of the line (PQ)
lambda_num.neg(Q.y)
lambda_num -= P.y
lambda_den.diff(Q.x, P.x)
func lambdaDouble*[F; G: static Subgroup](lambda_num, lambda_den: var F, P: ECP_ShortW_Aff[F, G]) {.inline.} =
## Compute the tangent at P
lambda_num.square(P.x)
lambda_num *= 3
@ -214,11 +221,11 @@ func lambdaDouble[F; G: static Subgroup](lambda_num, lambda_den: var F, P: ECP_S
lambda_den.double(P.y)
func affineAdd[F; G: static Subgroup](
r: var ECP_ShortW_Aff[F, G],
lambda: var F,
func affineAdd*[F; G: static Subgroup](
r{.noAlias.}: var ECP_ShortW_Aff[F, G],
lambda: F,
P, Q: ECP_ShortW_Aff[F, G]) =
## `r` MUST NOT alias P or Q
r.x.square(lambda)
r.x -= P.x
r.x -= Q.x
@ -229,8 +236,7 @@ func affineAdd[F; G: static Subgroup](
func accum_half_vartime[F; G: static Subgroup](
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
lambdas: ptr UncheckedArray[tuple[num, den: F]],
len: uint) {.noinline.} =
len: uint) {.noInline, tags:[VarTime, Alloca].} =
## Affine accumulation of half the points into the other half
## Warning ⚠️ : variable-time
##
@ -241,17 +247,13 @@ func accum_half_vartime[F; G: static Subgroup](
## Partial sums are stored in [0, len/2)
## [len/2, len) data has been destroyed
##
## Scratchspace:
## - Lambdas
##
## Output:
## - r
##
## Warning ⚠️ : cannot be inlined if used in loop due to the use of alloca
debug: doAssert len and 1 == 0, "There must be an even number of points"
let N = len div 2
let N = int(len div 2)
let lambdas = allocStackArray(tuple[num, den: F], N)
# Step 1: Compute numerators and denominators of λᵢ = λᵢ_num / λᵢ_den
for i in 0 ..< N:
@ -293,25 +295,25 @@ func accum_half_vartime[F; G: static Subgroup](
# Step 2: Accumulate denominators in Qy, which is not used anymore.
if i == 0:
points[q].y = lambdas[i].den
elif i == N-1:
points[q].y.prod(points[q_prev].y, lambdas[i].den)
else:
points[q].y.prod(points[q_prev].y, lambdas[i].den, skipFinalSub = true)
# Step 3: batch invert
var accInv {.noInit.}: F
accInv.setZero()
points[len-1].y += accInv # Undo skipFinalSub, ensure that the last accum is in canonical form, before inversion
accInv.inv(points[len-1].y)
accInv.inv_vartime(points[len-1].y)
# Step 4: Compute the partial sums
template recallSpecialCase(i, p, q): untyped {.dirty.} =
# As Qy is used as an accumulator, we saved Qy in λᵢ_num
# For special caseshandling, restore it.
# For special cases handling, restore it.
points[q].y = lambdas[i].num
if points[p].isInf().bool():
points[i] = points[q]
elif points[q].x.isZero().bool() and lambdas[i].num.isZero().bool():
discard "points[i] = points[p]" # i == p
discard "points[q] is infinity => point[p] unchanged"
else:
points[i].setInf()
@ -356,17 +358,23 @@ func accum_half_vartime[F; G: static Subgroup](
# Store result
points[0] = r
# Batch addition: jacobian
# Batch addition - High-level
# ------------------------------------------------------------
template `+=`[F; G: static Subgroup](P: var ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_Aff[F, G]) =
# All vartime procedures MUST be tagged vartime
# Hence we do not expose `+=` for extended jacobian operation to prevent `vartime` mistakes
# The following algorithms are all tagged vartime, hence for genericity
# we create a local `+=` for this module only
madd_vartime(P, P, Q)
func accumSum_chunk_vartime[F; G: static Subgroup](
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]),
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
lambdas: ptr UncheckedArray[tuple[num, den: F]],
len: uint) =
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G] or ECP_ShortW_JacExt[F, G]),
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]], len: uint) =
## Accumulate `points` into r.
## `r` is NOT overwritten
## r += ∑ points
## `points` are destroyed
const minNumPointsSerial = 16
var n = len
@ -378,7 +386,7 @@ func accumSum_chunk_vartime[F; G: static Subgroup](
n -= 1
# Compute [0, n/2) += [n/2, n)
accum_half_vartime(points, lambdas, n)
accum_half_vartime(points, n)
# Next chunk
n = n div 2
@ -387,11 +395,11 @@ func accumSum_chunk_vartime[F; G: static Subgroup](
for i in 0'u ..< n:
r += points[i]
func sum_batch_vartime*[F; G: static Subgroup](
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]),
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]], pointsLen: int) =
## Batch addition of `points` into `r`
## `r` is overwritten
func accum_batch_vartime[F; G: static Subgroup](
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G] or ECP_ShortW_JacExt[F, G]),
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]], pointsLen: int) {.noInline, tags:[VarTime, Alloca].} =
## Batch accumulation of `points` into `r`
## `r` is accumulated into
# We chunk the addition to limit memory usage
# especially as we allocate on the stack.
@ -412,27 +420,81 @@ func sum_batch_vartime*[F; G: static Subgroup](
# After one chunk is processed we are well within all 64-bit CPU L2 cache bounds
# as we halve after each chunk.
r.setInf()
const maxTempMem = 262144 # 2¹⁸ = 262144
const maxStride = maxTempMem div sizeof(ECP_ShortW_Aff[F, G])
let n = min(maxStride, pointsLen)
let accumulators = allocStackArray(ECP_ShortW_Aff[F, G], n)
let lambdas = allocStackArray(tuple[num, den: F], n)
for i in countup(0, pointsLen-1, maxStride):
let n = min(maxStride, pointsLen - i)
let size = n * sizeof(ECP_ShortW_Aff[F, G])
copyMem(accumulators[0].addr, points[i].unsafeAddr, size)
r.accumSum_chunk_vartime(accumulators, lambdas, uint n)
r.accumSum_chunk_vartime(accumulators, uint n)
func sum_batch_vartime*[F; G: static Subgroup](
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]),
points: openArray[ECP_ShortW_Aff[F, G]]) {.inline.} =
func sum_reduce_vartime*[F; G: static Subgroup](
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G] or ECP_ShortW_JacExt[F, G]),
points: ptr UncheckedArray[ECP_ShortW_Aff[F, G]], pointsLen: int) {.inline, tags:[VarTime, Alloca].} =
## Batch addition of `points` into `r`
## `r` is overwritten
if points.len == 0:
r.setInf()
if pointsLen == 0:
return
r.sum_batch_vartime(points.asUnchecked(), points.len)
r.accum_batch_vartime(points, pointsLen)
func sum_reduce_vartime*[F; G: static Subgroup](
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G] or ECP_ShortW_JacExt[F, G]),
points: openArray[ECP_ShortW_Aff[F, G]]) {.inline, tags:[VarTime, Alloca].} =
## Batch addition of `points` into `r`
## `r` is overwritten
r.sum_reduce_vartime(points.asUnchecked(), points.len)
# ############################################################
#
# EC Addition Accumulator
#
# ############################################################
# Accumulators stores partial additions
# They allow supporting EC additions in a streaming fashion
type EcAddAccumulator_vartime*[EC, F; G: static Subgroup; AccumMax: static int] = object
## Elliptic curve addition accumulator
## **Variable-Time**
# The `cur` is dereferenced first so better locality if at the beginning
# Do we want alignment guarantees?
cur: uint32
accum: EC
buffer: array[AccumMax, ECP_ShortW_Aff[F, G]]
func init*(ctx: var EcAddAccumulator_vartime) =
static: doAssert EcAddAccumulator_vartime.AccumMax >= 16, "There is no point in a EcAddBatchAccumulator if the batch size is too small"
ctx.accum.setInf()
ctx.cur = 0
func consumeBuffer[EC, F; G: static Subgroup; AccumMax: static int](
ctx: var EcAddAccumulator_vartime[EC, F, G, AccumMax]) {.noInline, tags: [VarTime, Alloca].}=
if ctx.cur == 0:
return
let lambdas = allocStackArray(tuple[num, den: F], ctx.cur.int)
ctx.accum.accumSum_chunk_vartime(ctx.buffer.asUnchecked(), lambdas, ctx.cur.uint)
ctx.cur = 0
func update*[EC, F, G; AccumMax: static int](
ctx: var EcAddAccumulator_vartime[EC, F, G, AccumMax],
P: ECP_ShortW_Aff[F, G]) =
if ctx.cur == AccumMax:
ctx.consumeBuffer()
ctx.buffer[ctx.cur] = P
ctx.cur += 1
# TODO: `merge` for parallel recursive divide-and-conquer processing
func finish*[EC, F, G; AccumMax: static int](
ctx: var EcAddAccumulator_vartime[EC, F, G, AccumMax],
accumulatedResult: var EC) =
ctx.consumeBuffer()
accumulatedResult = ctx.accum

View File

@ -7,7 +7,7 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../../platforms/[abstractions, allocs],
../../platforms/abstractions,
../../platforms/threadpool/threadpool,
./ec_shortweierstrass_affine,
./ec_shortweierstrass_jacobian,
@ -65,10 +65,10 @@ iterator items(c: ChunkDescriptor): tuple[chunkID, start, stopEx: int] =
let chunkSize = c.baseChunkSize
yield (chunkID, offset, min(offset+chunkSize, c.totalIters))
proc sum_batch_vartime_parallel*[F; G: static Subgroup](
proc sum_reduce_vartime_parallel*[F; G: static Subgroup](
tp: Threadpool,
r: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]),
points: openArray[ECP_ShortW_Aff[F, G]]) =
points: openArray[ECP_ShortW_Aff[F, G]]) {.noInline.} =
## Batch addition of `points` into `r`
## `r` is overwritten
## Compute is parallelized, if beneficial.
@ -92,7 +92,7 @@ proc sum_batch_vartime_parallel*[F; G: static Subgroup](
static: doAssert minNumPointsParallel <= maxNumPoints, "The curve " & $r.typeof & " requires large size and needs to be tuned."
if points.len < minNumPointsParallel:
r.sum_batch_vartime(points)
r.sum_reduce_vartime(points)
return
let chunkDesc = computeBalancedChunks(
@ -103,12 +103,12 @@ proc sum_batch_vartime_parallel*[F; G: static Subgroup](
let partialResults = allocStackArray(r.typeof(), chunkDesc.numChunks)
for iter in items(chunkDesc):
proc sum_batch_vartime_wrapper(res: ptr, p: ptr, pLen: int) {.nimcall.} =
proc sum_reduce_vartime_wrapper(res: ptr, p: ptr, pLen: int) {.nimcall.} =
# The borrow checker prevents capturing `var` and `openArray`
# so we capture pointers instead.
res[].sum_batch_vartime(p, pLen)
res[].sum_reduce_vartime(p, pLen)
tp.spawn partialResults[iter.chunkID].addr.sum_batch_vartime_wrapper(
tp.spawn partialResults[iter.chunkID].addr.sum_reduce_vartime_wrapper(
points.asUnchecked() +% iter.start,
iter.stopEx - iter.start)
@ -122,7 +122,7 @@ proc sum_batch_vartime_parallel*[F; G: static Subgroup](
else:
let partialResultsAffine = allocStackArray(ECP_ShortW_Aff[F, G], chunkDesc.numChunks)
partialResultsAffine.batchAffine(partialResults, chunkDesc.numChunks)
r.sum_batch_vartime(partialResultsAffine, chunkDesc.numChunks)
r.sum_reduce_vartime(partialResultsAffine, chunkDesc.numChunks)
# Sanity checks
# ---------------------------------------

View File

@ -37,11 +37,23 @@ type ECP_ShortW_Jac*[F; G: static Subgroup] = object
## Note that jacobian coordinates are not unique
x*, y*, z*: F
template affine*[F, G](_: type ECP_ShortW_Jac[F, G]): typedesc =
## Returns the affine type that corresponds to the Jacobian type input
ECP_ShortW_Aff[F, G]
func isInf*(P: ECP_ShortW_Jac): SecretBool {.inline.} =
## Returns true if P is an infinity point
## and false otherwise
##
## Note: the jacobian coordinates equation is
## Y² = X³ + aXZ⁴ + bZ⁶
## A "zero" point is any point with coordinates X and Z = 0
## Y can be anything
result = P.z.isZero()
func `==`*(P, Q: ECP_ShortW_Jac): SecretBool =
func setInf*(P: var ECP_ShortW_Jac) {.inline.} =
## Set ``P`` to infinity
P.x.setOne()
P.y.setOne()
P.z.setZero()
func `==`*(P, Q: ECP_ShortW_Jac): SecretBool {.meter.} =
## Constant-time equality check
## This is a costly operation
# Reminder: the representation is not unique
@ -63,21 +75,8 @@ func `==`*(P, Q: ECP_ShortW_Jac): SecretBool =
b *= z1z1
result = result and a == b
func isInf*(P: ECP_ShortW_Jac): SecretBool {.inline.} =
## Returns true if P is an infinity point
## and false otherwise
##
## Note: the jacobian coordinates equation is
## Y² = X³ + aXZ⁴ + bZ⁶
## A "zero" point is any point with coordinates X and Z = 0
## Y can be anything
result = P.z.isZero()
func setInf*(P: var ECP_ShortW_Jac) {.inline.} =
## Set ``P`` to infinity
P.x.setOne()
P.y.setOne()
P.z.setZero()
# Ensure a zero-init point doesn't propagate 0s and match any
result = result and not(P.isInf() xor Q.isInf())
func ccopy*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Jac, ctl: SecretBool) {.inline.} =
## Constant-time conditional copy
@ -337,7 +336,7 @@ func sum*[F; G: static Subgroup](
r: var ECP_ShortW_Jac[F, G],
P, Q: ECP_ShortW_Jac[F, G],
CoefA: static F
) =
) {.meter.} =
## Elliptic curve point addition for Short Weierstrass curves in Jacobian coordinates
## with the curve ``a`` being a parameter for summing on isogenous curves.
##
@ -361,7 +360,7 @@ func sum*[F; G: static Subgroup](
func sum*[F; G: static Subgroup](
r: var ECP_ShortW_Jac[F, G],
P, Q: ECP_ShortW_Jac[F, G]
) =
) {.meter.} =
## Elliptic curve point addition for Short Weierstrass curves in Jacobian coordinates
##
## R = P + Q
@ -383,7 +382,7 @@ func madd*[F; G: static Subgroup](
r: var ECP_ShortW_Jac[F, G],
P: ECP_ShortW_Jac[F, G],
Q: ECP_ShortW_Aff[F, G]
) =
) {.meter.} =
## Elliptic curve mixed addition for Short Weierstrass curves in Jacobian coordinates
## with the curve ``a`` being a parameter for summing on isogenous curves.
##
@ -555,10 +554,7 @@ func madd*[F; G: static Subgroup](
r = o
func double*[F; G: static Subgroup](
r: var ECP_ShortW_Jac[F, G],
P: ECP_ShortW_Jac[F, G]
) =
func double*[F; G: static Subgroup](r: var ECP_ShortW_Jac[F, G], P: ECP_ShortW_Jac[F, G]) {.meter.} =
## Elliptic curve point doubling for Short Weierstrass curves in projective coordinate
##
## R = [2] P
@ -642,9 +638,19 @@ func `-=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Jac) {.inline.} =
nQ.neg(Q)
P.sum(P, nQ)
func `-=`*(P: var ECP_ShortW_Jac, Q: ECP_ShortW_Aff) {.inline.} =
## In-place point substraction
var nQ {.noInit.}: typeof(Q)
nQ.neg(Q)
P.madd(P, nQ)
template affine*[F, G](_: type ECP_ShortW_Jac[F, G]): typedesc =
## Returns the affine type that corresponds to the Jacobian type input
ECP_ShortW_Aff[F, G]
func affine*[F; G](
aff: var ECP_ShortW_Aff[F, G],
jac: ECP_ShortW_Jac[F, G]) =
jac: ECP_ShortW_Jac[F, G]) {.meter.} =
var invZ {.noInit.}, invZ2{.noInit.}: F
invZ.inv(jac.z)
invZ2.square(invZ, skipFinalSub = true)
@ -659,3 +665,4 @@ func fromAffine*[F; G](
jac.x = aff.x
jac.y = aff.y
jac.z.setOne()
jac.z.csetZero(aff.isInf())

View File

@ -0,0 +1,371 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../../platforms/abstractions,
../config/curves,
../arithmetic,
../extension_fields,
./ec_shortweierstrass_affine,
./ec_shortweierstrass_projective,
./ec_shortweierstrass_jacobian
export Subgroup
# No exceptions allowed
{.push raises: [].}
# ############################################################
#
# Elliptic Curve in Short Weierstrass form
# with Extended Jacobian Coordinates
#
# ############################################################
type ECP_ShortW_JacExt*[F; G: static Subgroup] = object
## Elliptic curve point for a curve in Short Weierstrass form
## y² = x³ + a x + b
##
## over a field F
##
## in Extended Jacobian coordinates (X, Y, ZZ, ZZZ)
## corresponding to (x, y) with X = xZ² and Y = yZ³
##
## Note that extended jacobian coordinates are not unique
x*, y*, zz*, zzz*: F
func fromAffine*[F; G](jacext: var ECP_ShortW_JacExt[F, G], aff: ECP_ShortW_Aff[F, G]) {.inline.}
func isInf*(P: ECP_ShortW_JacExt): SecretBool {.inline, meter.} =
## Returns true if P is an infinity point
## and false otherwise
result = P.zz.isZero()
func setInf*(P: var ECP_ShortW_JacExt) {.inline.} =
## Set ``P`` to infinity
P.x.setOne()
P.y.setOne()
P.zz.setZero()
P.zzz.setZero()
func `==`*(P, Q: ECP_ShortW_JacExt): SecretBool {.meter.} =
## Constant-time equality check
## This is a costly operation
# Reminder: the representation is not unique
type F = ECP_ShortW_JacExt.F
var a{.noInit.}, b{.noInit.}: F
a.prod(P.x, Q.zz)
b.prod(Q.x, P.zz)
result = a == b
a.prod(P.y, Q.zzz)
b.prod(Q.y, P.zzz)
result = result and a == b
# Ensure a zero-init point doesn't propagate 0s and match any
result = result and not(P.isInf() xor Q.isInf())
func trySetFromCoordsXandZ*[F; G](
P: var ECP_ShortW_JacExt[F, G],
x, z: F): SecretBool =
## Try to create a point the elliptic curve
## Y² = X³ + aXZ⁴ + bZ⁶ (Jacobian coordinates)
## y² = x³ + a x + b (affine coordinate)
## return true and update `P` if `x` leads to a valid point
## return false otherwise, in that case `P` is undefined.
##
## Note: Dedicated robust procedures for hashing-to-curve
## will be provided, this is intended for testing purposes.
##
## For **test case generation only**,
## this is preferred to generating random point
## via random scalar multiplication of the curve generator
## as the latter assumes:
## - point addition, doubling work
## - scalar multiplication works
## - a generator point is defined
## i.e. you can't test unless everything is already working
P.y.curve_eq_rhs(x, G)
result = sqrt_if_square(P.y)
P.zz.square(z)
P.x.prod(x, P.zz)
P.zzz.prod(P.zz, z)
P.y.prod(P.y, P.zzz)
func trySetFromCoordX*[F; G](
P: var ECP_ShortW_JacExt[F, G],
x: F): SecretBool =
## Try to create a point the elliptic curve
## y² = x³ + a x + b (affine coordinate)
##
## The `ZZ` and `ZZZ` coordinates are set to 1
##
## return true and update `P` if `x` leads to a valid point
## return false otherwise, in that case `P` is undefined.
##
## Note: Dedicated robust procedures for hashing-to-curve
## will be provided, this is intended for testing purposes.
##
## For **test case generation only**,
## this is preferred to generating random point
## via random scalar multiplication of the curve generator
## as the latter assumes:
## - point addition, doubling work
## - scalar multiplication works
## - a generator point is defined
## i.e. you can't test unless everything is already working
P.y.curve_eq_rhs(x, G)
result = sqrt_if_square(P.y)
P.x = x
P.zz.setOne()
P.zzz.setOne()
func neg*(P: var ECP_ShortW_JacExt, Q: ECP_ShortW_JacExt) {.inline.} =
## Negate ``P``
P.x = Q.x
P.y.neg(Q.y)
P.zz = Q.zz
P.zzz = Q.zzz
func neg*(P: var ECP_ShortW_JacExt) {.inline.} =
## Negate ``P``
P.y.neg()
func double*[F; G: static Subgroup](r: var ECP_ShortW_JacExt[F, G], P: ECP_ShortW_JacExt[F, G]) {.meter.} =
# http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
var U{.noInit.}, V{.noInit.}, W{.noinit.}, S{.noInit.}, M{.noInit.}: F
U.double(P.y)
V.square(U)
W.prod(U, V)
S.prod(P.x, V)
M.square(P.x)
M *= 3
when F.C.getCoefA() != 0:
{.error: "Not implemented.".}
# aliasing, we don't use P.x and U anymore
r.x.square(M)
U.double(S)
r.x -= U
S -= r.x
r.y.prod(W, P.y)
M *= S
r.y.diff(M, r.y)
r.zz.prod(P.zz, V)
r.zzz.prod(P.zzz, W)
func sum_vartime*[F; G: static Subgroup](
r: var ECP_ShortW_JacExt[F, G],
p, q: ECP_ShortW_JacExt[F, G])
{.tags:[VarTime], meter.} =
## **Variable-time** Extended Jacobian addition
##
## This MUST NOT be used with secret data.
##
## This is highly VULNERABLE to timing attacks and power analysis attacks.
# https://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
if p.isInf().bool:
r = q
return
if q.isInf().bool:
r = p
return
var U{.noInit.}, S{.noInit.}, P{.noInit.}, R{.noInit.}: F
U.prod(p.x, q.zz)
P.prod(q.x, p.zz)
S.prod(p.y, q.zzz)
R.prod(q.y, p.zzz)
P -= U
R -= S
if P.isZero().bool: # Same x coordinate
if R.isZero().bool: # case P == Q
r.double(q)
return
else: # case P = -Q
r.setInf()
return
var PP{.noInit.}, PPP{.noInit.}, Q{.noInit.}: F
PP.square(P)
PPP.prod(PP, P)
Q.prod(U, PP)
r.x.square(R)
P.double(Q)
r.x -= PPP
r.x -= P
Q -= r.x
r.y.prod(S, PPP)
R *= Q
r.y.diff(R, r.y)
r.zz.prod(p.zz, q.zz)
r.zz *= PP
r.zzz.prod(p.zzz, q.zzz)
r.zzz *= PPP
func mdouble*[F; G: static Subgroup](r: var ECP_ShortW_JacExt[F, G], P: ECP_ShortW_Aff[F, G]) {.meter.} =
## Mixed EC point double
# http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-mdbl-2008-s-1
var U{.noInit.}, V{.noInit.}, W{.noinit.}, S{.noInit.}, M{.noInit.}: F
U.double(P.y)
V.square(U)
W.prod(U, V)
S.prod(P.x, V)
M.square(P.x)
M *= 3
when F.C.getCoefA() != 0:
{.error: "Not implemented.".}
# aliasing, we don't use P.x and U anymore
r.x.square(M)
U.double(S)
r.x -= U
S -= r.x
r.y.prod(W, P.y)
M *= S
r.y.diff(M, r.y)
r.zz = V
r.zzz = W
func madd_vartime*[F; G: static Subgroup](
r: var ECP_ShortW_JacExt[F, G],
p: ECP_ShortW_JacExt[F, G],
q: ECP_ShortW_Aff[F, G])
{.tags:[VarTime], meter.} =
## **Variable-time** Extended Jacobian mixed addition
##
## This MUST NOT be used with secret data.
##
## This is highly VULNERABLE to timing attacks and power analysis attacks.
# https://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
if p.isInf().bool:
r.fromAffine(q)
return
if q.isInf().bool:
r = p
return
var P{.noInit.}, R{.noInit.}: F
P.prod(q.x, p.zz)
R.prod(q.y, p.zzz)
P -= p.x
R -= p.y
if P.isZero().bool: # Same x coordinate
if R.isZero().bool: # case P == Q
r.mdouble(q)
return
else: # case P = -Q
r.setInf()
return
var PP{.noInit.}, PPP{.noInit.}, Q{.noInit.}: F
PP.square(P)
PPP.prod(PP, P)
Q.prod(p.x, PP)
r.x.square(R)
P.double(Q)
r.x -= PPP
r.x -= P
Q -= r.x
r.y.prod(p.y, PPP)
R *= Q
r.y.diff(R, r.y)
r.zz.prod(p.zz, PP)
r.zzz.prod(p.zzz, PPP)
func msub_vartime*[F; G: static Subgroup](
r: var ECP_ShortW_JacExt[F, G],
p: ECP_ShortW_JacExt[F, G],
q: ECP_ShortW_Aff[F, G]) {.tags:[VarTime], inline.} =
var nQ {.noInit.}: ECP_ShortW_Aff[F, G]
nQ.neg(q)
r.madd_vartime(p, nQ)
# Conversions
# -----------
template affine*[F, G](_: type ECP_ShortW_JacExt[F, G]): typedesc =
## Returns the affine type that corresponds to the Extended Jacobian type input
ECP_ShortW_Aff[F, G]
template jacobianExtended*[EC](_: typedesc[EC]): typedesc =
## Returns the affine type that corresponds to the Extended Jacobian type input
ECP_ShortW_JacExt[EC.F, EC.G]
func affine*[F; G](
aff: var ECP_ShortW_Aff[F, G],
jacext: ECP_ShortW_JacExt[F, G]) {.meter.} =
var invZZ {.noInit.}, invZZZ{.noInit.}: F
invZZZ.inv(jacext.zzz)
invZZ.prod(jacext.zz, invZZZ, skipFinalSub = true)
invZZ.square(skipFinalSub = true)
aff.x.prod(jacext.x, invZZ)
aff.y.prod(jacext.y, invZZZ)
func fromAffine*[F; G](
jacext: var ECP_ShortW_JacExt[F, G],
aff: ECP_ShortW_Aff[F, G]) {.inline, meter.} =
jacext.x = aff.x
jacext.y = aff.y
jacext.zz.setOne()
jacext.zzz.setOne()
let inf = aff.isInf()
jacext.zz.csetZero(inf)
jacext.zzz.csetZero(inf)
func fromJacobianExtended_vartime*[F; G](
prj: var ECP_ShortW_Prj[F, G],
jacext: ECP_ShortW_JacExt[F, G]) {.inline, meter, tags:[VarTime].} =
# Affine (x, y)
# Jacobian extended (xZ², yZ³, Z², Z³)
# Projective (xZ', yZ', Z')
# We can choose Z' = Z⁵
if jacext.isInf().bool:
prj.setInf()
return
prj.z.prod(jacext.zz, jacext.zzz)
prj.x.prod(jacext.x, jacext.zzz)
prj.y.prod(jacext.y, jacext.zz)
func fromJacobianExtended_vartime*[F; G](
jac: var ECP_ShortW_Jac[F, G],
jacext: ECP_ShortW_JacExt[F, G]) {.inline, meter, tags:[VarTime].} =
# Affine (x, y)
# Jacobian extended (xZ², yZ³, Z², Z³)
# Jacobian (xZ'², yZ'³, Z')
# We can choose Z' = Z²
if jacext.isInf().bool:
jac.setInf()
return
jac.x.prod(jacext.x, jacext.zz)
jac.y.prod(jacext.y, jacext.zzz)
jac.z = jacext.zz

View File

@ -37,9 +37,21 @@ type ECP_ShortW_Prj*[F; G: static Subgroup] = object
## Note that projective coordinates are not unique
x*, y*, z*: F
template affine*[F, G](_: type ECP_ShortW_Prj[F, G]): typedesc =
## Returns the affine type that corresponds to the Jacobian type input
ECP_ShortW_Aff[F, G]
func isInf*(P: ECP_ShortW_Prj): SecretBool {.inline.} =
## Returns true if P is an infinity point
## and false otherwise
##
## Note: the projective coordinates equation is
## Y²Z = X³ + aXZ² + bZ³
## A "zero" point is any point with coordinates X and Z = 0
## Y can be anything
result = P.x.isZero() and P.z.isZero()
func setInf*(P: var ECP_ShortW_Prj) {.inline.} =
## Set ``P`` to infinity
P.x.setZero()
P.y.setOne()
P.z.setZero()
func `==`*(P, Q: ECP_ShortW_Prj): SecretBool =
## Constant-time equality check
@ -57,21 +69,8 @@ func `==`*(P, Q: ECP_ShortW_Prj): SecretBool =
b.prod(Q.y, P.z)
result = result and a == b
func isInf*(P: ECP_ShortW_Prj): SecretBool {.inline.} =
## Returns true if P is an infinity point
## and false otherwise
##
## Note: the projective coordinates equation is
## Y²Z = X³ + aXZ² + bZ³
## A "zero" point is any point with coordinates X and Z = 0
## Y can be anything
result = P.x.isZero() and P.z.isZero()
func setInf*(P: var ECP_ShortW_Prj) {.inline.} =
## Set ``P`` to infinity
P.x.setZero()
P.y.setOne()
P.z.setZero()
# Ensure a zero-init point doesn't propagate 0s and match any
result = result and not(P.isInf() xor Q.isInf())
func ccopy*(P: var ECP_ShortW_Prj, Q: ECP_ShortW_Prj, ctl: SecretBool) {.inline.} =
## Constant-time conditional copy
@ -153,7 +152,7 @@ func cneg*(P: var ECP_ShortW_Prj, ctl: CTBool) {.inline.} =
func sum*[F; G: static Subgroup](
r: var ECP_ShortW_Prj[F, G],
P, Q: ECP_ShortW_Prj[F, G]
) =
) {.meter.} =
## Elliptic curve point addition for Short Weierstrass curves in projective coordinates
##
## R = P + Q
@ -253,7 +252,7 @@ func madd*[F; G: static Subgroup](
r: var ECP_ShortW_Prj[F, G],
P: ECP_ShortW_Prj[F, G],
Q: ECP_ShortW_Aff[F, G]
) =
) {.meter.} =
## Elliptic curve mixed addition for Short Weierstrass curves
## with p in Projective coordinates and Q in affine coordinates
##
@ -331,7 +330,7 @@ func madd*[F; G: static Subgroup](
func double*[F; G: static Subgroup](
r: var ECP_ShortW_Prj[F, G],
P: ECP_ShortW_Prj[F, G]
) =
) {.meter.} =
## Elliptic curve point doubling for Short Weierstrass curves in projective coordinate
##
## R = [2] P
@ -430,9 +429,19 @@ func `-=`*(P: var ECP_ShortW_Prj, Q: ECP_ShortW_Prj) {.inline.} =
nQ.neg(Q)
P.sum(P, nQ)
func `-=`*(P: var ECP_ShortW_Prj, Q: ECP_ShortW_Aff) {.inline.} =
## In-place point substraction
var nQ {.noInit.}: typeof(Q)
nQ.neg(Q)
P.madd(P, nQ)
template affine*[F, G](_: type ECP_ShortW_Prj[F, G]): typedesc =
## Returns the affine type that corresponds to the Jacobian type input
ECP_ShortW_Aff[F, G]
func affine*[F, G](
aff: var ECP_ShortW_Aff[F, G],
proj: ECP_ShortW_Prj[F, G]) =
proj: ECP_ShortW_Prj[F, G]) {.meter.} =
var invZ {.noInit.}: F
invZ.inv(proj.z)
@ -445,3 +454,7 @@ func fromAffine*[F, G](
proj.x = aff.x
proj.y = aff.y
proj.z.setOne()
let inf = aff.isInf()
proj.x.csetZero(inf)
proj.z.csetZero(inf)

View File

@ -707,9 +707,12 @@ func prefer_3sqr_over_2mul(F: type ExtensionField): bool {.compileTime.} =
let a = default(F)
# No shortcut in the VM
when a.c0 is ExtensionField:
when a.c0.c0 is ExtensionField:
when a.c0 is Fp12:
# Benchmarked on BLS12-381
when a.c0.c0 is Fp6:
return true
elif a.c0.c0 is Fp4:
return false
else: return false
else: return false
@ -1233,7 +1236,7 @@ func mul2x_sparse_by_0y*[Fdbl, F](
# Inversion
# -------------------------------------------------------------------
func invImpl(r: var QuadraticExt, a: QuadraticExt) =
func invImpl(r: var QuadraticExt, a: QuadraticExt, useVartime: static bool = false) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
@ -1257,6 +1260,9 @@ func invImpl(r: var QuadraticExt, a: QuadraticExt) =
v0 -= v1 # v0 = a0² - β a1² (the norm / squared magnitude of a)
# [1 Inv, 2 Sqr, 1 Add]
when useVartime:
v1.inv_vartime(v0)
else:
v1.inv(v0) # v1 = 1 / (a0² - β a1²)
# [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
@ -1264,7 +1270,7 @@ func invImpl(r: var QuadraticExt, a: QuadraticExt) =
v0.neg(v1) # v0 = -1 / (a0² - β a1²)
r.c1.prod(a.c1, v0) # r1 = -a1 / (a0² - β a1²)
func inv2xImpl(r: var QuadraticExt, a: QuadraticExt) =
func inv2xImpl(r: var QuadraticExt, a: QuadraticExt, useVartime: static bool = false) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
@ -1284,6 +1290,9 @@ func inv2xImpl(r: var QuadraticExt, a: QuadraticExt) =
# [1 Inv, 2 Sqr, 1 Add]
t.redc2x(V0)
when useVartime:
t.inv_vartime()
else:
t.inv() # v1 = 1 / (a0² - β a1²)
# [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
@ -1983,7 +1992,7 @@ func mul2x_sparse_by_0yz*[Fpkdiv3](r: var CubicExt2x, a: CubicExt, y, z: Fpkdiv3
# Inversion
# ----------------------------------------------------------------------
func invImpl(r: var CubicExt, a: CubicExt) =
func invImpl(r: var CubicExt, a: CubicExt, useVartime: static bool = false) =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
@ -2031,6 +2040,9 @@ func invImpl(r: var CubicExt, a: CubicExt) =
r.c0.prod(a.c0, A) # aliasing: last use of a₀, destroy r₀
t += r.c0
when useVartime:
t.inv_vartime()
else:
t.inv()
# (a0 + a1 v + a2 v²)^-1 = (A + B v + C v²) / F
@ -2038,7 +2050,7 @@ func invImpl(r: var CubicExt, a: CubicExt) =
r.c1.prod(B, t)
r.c2.prod(C, t)
func inv2xImpl(r: var CubicExt, a: CubicExt) =
func inv2xImpl(r: var CubicExt, a: CubicExt, useVartime: static bool = false) =
## Compute the multiplicative inverse of ``a``
## via lazy reduction
##
@ -2083,6 +2095,9 @@ func inv2xImpl(r: var CubicExt, a: CubicExt) =
t.sum2xUnr(t, t2)
f.redc2x(t)
when useVartime:
f.inv_vartime()
else:
f.inv()
# (a0 + a1 v + a2 v²)^-1 = (A + B v + C v²) / F
@ -2142,7 +2157,7 @@ func inv*(r: var CubicExt, a: CubicExt) =
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
when true:
when CubicExt.C.has_large_field_elem() or r is Fp12:
r.invImpl(a)
else:
r.inv2xImpl(a)
@ -2180,5 +2195,44 @@ template prod*(r: var ExtensionField, a, b: ExtensionField, skipFinalSub: static
# the final substraction on Fp
r.prod(a, b)
# ############################################################
# #
# Variable-time #
# #
# ############################################################
func inv_vartime*(r: var QuadraticExt, a: QuadraticExt) {.tags:[VarTime].} =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
when true:
r.invImpl(a, useVartime = true)
else: # Lazy reduction, doesn't seem to gain speed.
r.inv2xImpl(a, useVartime = true)
func inv_vartime*(r: var CubicExt, a: CubicExt) {.tags:[VarTime].} =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
when CubicExt.C.has_large_field_elem() or r is Fp12:
r.invImpl(a, useVartime = true)
else:
r.inv2xImpl(a, useVartime = true)
func inv_vartime*(a: var ExtensionField) {.tags:[VarTime].} =
## Compute the multiplicative inverse of ``a``
##
## The inverse of 0 is 0.
## Incidentally this avoids extra check
## to convert Jacobian and Projective coordinates
## to affine for elliptic curve
a.invImpl(a, useVartime = true)
{.pop.} # inline
{.pop.} # raises no exceptions

View File

@ -14,7 +14,8 @@ import
../elliptic/[
ec_shortweierstrass_affine,
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian
ec_shortweierstrass_jacobian,
ec_shortweierstrass_jacobian_extended
]
# No exceptions allowed
@ -27,7 +28,7 @@ import
#
# ############################################################
func toHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff](P: EC, indent: static int = 0): string =
func toHex*[EC: ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_Aff or ECP_ShortW_JacExt](P: EC, indent: static int = 0): string =
## Stringify an elliptic curve point to Hex
## Note. Leading zeros are not removed.
## Result is prefixed with 0x

View File

@ -392,7 +392,7 @@ func cyclotomic_exp*[FT](r: var FT, a: FT, exponent: static BigInt, invert: bool
r.setOne()
var init = false
for bit in recoding_l2r_vartime(exponent):
for bit in recoding_l2r_signed_vartime(exponent):
if init:
r.cyclotomic_square()
if bit == 1:

View File

@ -7,15 +7,17 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../config/curves,
../elliptic/[
ec_shortweierstrass_affine,
ec_shortweierstrass_projective
],
../arithmetic,
../isogenies/frobenius,
./lines_eval
# No exceptions allowed
{.push raises: [].}
{.push raises: [], checks: off.}
# ############################################################
# #
@ -23,60 +25,82 @@ import
# #
# ############################################################
func recodeNafForPairing(ate: BigInt): seq[int8] {.compileTime.} =
## We need a NAF recoding and we need to skip the MSB for pairings
var recoded: array[ate.bits+1, int8]
let recodedLen = recoded.recode_r2l_signed_vartime(ate)
var msbPos = recodedLen-1
while true:
if recoded[msbPos] != 0:
break
else:
msbPos -= 1
doAssert msbPos >= 0
result = recoded[0 ..< msbPos]
func basicMillerLoop*[FT, F1, F2](
f: var FT,
line: var Line[F2],
T: var ECP_ShortW_Prj[F2, G2],
P: ECP_ShortW_Aff[F1, G1],
Q: ECP_ShortW_Aff[F2, G2],
ate_param: auto,
ate_param_isNeg: static bool
) =
ate_param: static BigInt) =
## Basic Miller loop iterations
##
## Multiplications by constants in the Miller loop is eliminated by final exponentiation
## aka cofactor clearing in the pairing group.
##
## This means that there is no need to inverse/conjugate when `ate_param_isNeg` is false
## in the general case.
## If further processing is required, `ate_param_isNeg` must be taken into account by the caller.
static:
doAssert FT.C == F1.C
doAssert FT.C == F2.C
f.setOne()
const naf = ate_param.recodeNafForPairing()
var line0 {.noInit.}, line1 {.noInit.}: Line[F2]
var nQ {.noInit.}: ECP_ShortW_Aff[F2, G2]
f.setOne()
nQ.neg(Q)
template u: untyped = ate_param
var u3 = ate_param
u3 *= 3
for i in countdown(u3.bits - 2, 1):
if i != u3.bits - 2:
block: # naf.len - 1
line0.line_double(T, P)
let bit = naf[naf.len-1]
if bit == 1:
line1.line_add(T, Q, P)
f.prod_from_2_lines(line0, line1)
elif bit == -1:
line1.line_add(T, nQ, P)
f.prod_from_2_lines(line0, line1)
else:
f.mul_by_line(line0)
for i in countdown(naf.len-2, 0):
let bit = naf[i]
f.square()
line.line_double(T, P)
f.mul_by_line(line)
line0.line_double(T, P)
let naf = u3.bit(i).int8 - u.bit(i).int8 # This can throw exception
if naf == 1:
line.line_add(T, Q, P)
f.mul_by_line(line)
elif naf == -1:
line.line_add(T, nQ, P)
f.mul_by_line(line)
when ate_param_isNeg:
# In GT, x^-1 == conjugate(x)
# Remark 7.1, chapter 7.1.1 of Guide to Pairing-Based Cryptography, El Mrabet, 2017
f.conj()
if bit == 1:
line1.line_add(T, Q, P)
f.mul_by_2_lines(line0, line1)
elif bit == -1:
line1.line_add(T, nQ, P)
f.mul_by_2_lines(line0, line1)
else:
f.mul_by_line(line0)
func millerCorrectionBN*[FT, F1, F2](
f: var FT,
T: var ECP_ShortW_Prj[F2, G2],
Q: ECP_ShortW_Aff[F2, G2],
P: ECP_ShortW_Aff[F1, G1],
ate_param_isNeg: static bool
) =
P: ECP_ShortW_Aff[F1, G1]) =
## Ate pairing for BN curves need adjustment after basic Miller loop
## If `ate_param_isNeg` f must be cyclotomic inverted/conjugated
## and T must be negated by the caller.
static:
doAssert FT.C == F1.C
doAssert FT.C == F2.C
doAssert FT.C.family() == BarretoNaehrig
when ate_param_isNeg:
T.neg()
var V {.noInit.}: typeof(Q)
var line1 {.noInit.}, line2 {.noInit.}: Line[F2]
@ -122,92 +146,67 @@ func miller_init_double_then_add*[FT, F1, F2](
T: var ECP_ShortW_Prj[F2, G2],
Q: ECP_ShortW_Aff[F2, G2],
P: ECP_ShortW_Aff[F1, G1],
numDoublings: static int
) =
numDoublings: static int) =
## Start a Miller Loop with
## - `numDoubling` doublings
## - 1 add
##
## f is overwritten
## T is overwritten by Q
static:
doAssert FT.C == F1.C
doAssert FT.C == F2.C
doAssert numDoublings >= 1
{.push checks: off.} # No OverflowError or IndexError allowed
var line {.noInit.}: Line[F2]
# First step: 0b10, T <- Q, f = 1 (mod p¹²), f *= line
# ----------------------------------------------------
var line0 {.noInit.}, line1 {.noInit.}: Line[F2]
T.fromAffine(Q)
# f.square() -> square(1)
line.line_double(T, P)
# First step: 0b1..., T <- Q, f = 1 (mod p¹²), f *= line
line0.line_double(T, P)
# Doubling steps: 0b10...00
# ----------------------------------------------------
# Process all doublings, the second is special cased
# as:
# - The first line is squared (sparse * sparse)
# - The second is (somewhat-sparse * sparse)
# Second step: 0b10 or 0b11
# If we have more than 1 doubling, we square the line instead of squaring f
when numDoublings >= 2:
f.prod_from_2_lines(line, line)
line.line_double(T, P)
f.mul_by_line(line)
f.prod_from_2_lines(line0, line0)
line0.line_double(T, P)
# Doublings step: 0b10...0
for _ in 2 ..< numDoublings:
# Apply previous line0
f.mul_by_line(line0)
f.square()
line.line_double(T, P)
f.mul_by_line(line)
line0.line_double(T, P)
# Addition step: 0b10...01
# ------------------------------------------------
# If there was only a single doubling needed,
# we special case the addition as
# - The first line and second are sparse (sparse * sparse)
line1.line_add(T, Q, P)
when numDoublings == 1:
# f *= line <=> f = line for the first iteration
var line2 {.noInit.}: Line[F2]
line2.line_add(T, Q, P)
f.prod_from_2_lines(line, line2)
f.prod_from_2_lines(line0, line1)
else:
line.line_add(T, Q, P)
f.mul_by_line(line)
{.pop.} # No OverflowError or IndexError allowed
f.mul_by_2_lines(line0, line1)
func miller_accum_double_then_add*[FT, F1, F2](
f: var FT,
T: var ECP_ShortW_Prj[F2, G2],
Q: ECP_ShortW_Aff[F2, G2],
P: ECP_ShortW_Aff[F1, G1],
numDoublings: int,
add = true
) =
numDoublings: int, add = true) =
## Continue a Miller Loop with
## - `numDoubling` doublings
## - 1 add
##
## f and T are updated
#
# `numDoublings` and `add` can be hardcoded at compile-time
# to prevent fault attacks.
# But fault attacks only happen on embedded
# and embedded is likely to want to minimize codesize.
# What to do?
{.push checks: off.} # No OverflowError or IndexError allowed
var line {.noInit.}: Line[F2]
for _ in 0 ..< numDoublings:
var line0 {.noInit.}, line1 {.noInit.}: Line[F2]
f.square()
line.line_double(T, P)
f.mul_by_line(line)
line0.line_double(T, P)
for _ in 1 ..< numDoublings:
f.mul_by_line(line0)
f.square()
line0.line_double(T, P)
if add:
line.line_add(T, Q, P)
f.mul_by_line(line)
line1.line_add(T, Q, P)
f.mul_by_2_lines(line0, line1)
else:
f.mul_by_line(line0)
# Miller Loop - multi-pairing
# ----------------------------------------------------------------------------
@ -217,61 +216,52 @@ func miller_accum_double_then_add*[FT, F1, F2](
# See `multi_pairing.md``
# We implement Aranha approach
func isOdd(n: int): bool {.inline.} = bool(n and 1)
func double_jToN[FT, F1, F2](
f: var FT,
j: static int,
line0, line1: var Line[F2],
lineOddRemainder: var Line[F2],
Ts: ptr UncheckedArray[ECP_ShortW_Prj[F2, G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[F1, G1]],
N: int) =
## Doubling steps for pairings j to N
## if N is odd, lineOddRemainder must be applied to `f`
{.push checks: off.} # No OverflowError or IndexError allowed
var line0{.noInit.}, line1{.noInit.}: Line[F2]
# Sparse merge 2 by 2, starting from j
for i in countup(j, N-1, 2):
if i+1 >= N:
break
for i in countup(j, N-2, 2):
line0.line_double(Ts[i], Ps[i])
line1.line_double(Ts[i+1], Ps[i+1])
f.mul_by_2_lines(line0, line1)
if (N and 1) == 1: # N >= 2 and N is odd, there is a leftover
line0.line_double(Ts[N-1], Ps[N-1])
f.mul_by_line(line0)
{.pop.}
if N.isOdd(): # N >= 2 and N is odd, there is a leftover
lineOddRemainder.line_double(Ts[N-1], Ps[N-1])
func add_jToN[FT, F1, F2](
f: var FT,
j: static int,
line0, line1: var Line[F2],
lineOddRemainder: var Line[F2],
Ts: ptr UncheckedArray[ECP_ShortW_Prj[F2, G2]],
Qs: ptr UncheckedArray[ECP_ShortW_Aff[F2, G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[F1, G1]],
N: int)=
## Addition steps for pairings 0 to N
{.push checks: off.} # No OverflowError or IndexError allowed
var line0{.noInit.}, line1{.noInit.}: Line[F2]
# Sparse merge 2 by 2, starting from 0
for i in countup(j, N-1, 2):
if i+1 >= N:
break
for i in countup(j, N-2, 2):
line0.line_add(Ts[i], Qs[i], Ps[i])
line1.line_add(Ts[i+1], Qs[i+1], Ps[i+1])
f.mul_by_2_lines(line0, line1)
if (N and 1) == 1: # N >= 2 and N is odd, there is a leftover
line0.line_add(Ts[N-1], Qs[N-1], Ps[N-1])
f.mul_by_line(line0)
{.pop.}
if N.isOdd(): # N >= 2 and N is odd, there is a leftover
lineOddRemainder.line_add(Ts[N-1], Qs[N-1], Ps[N-1])
func add_jToN_negateQ[FT, F1, F2](
f: var FT,
j: static int,
line0, line1: var Line[F2],
lineOddRemainder: var Line[F2],
Ts: ptr UncheckedArray[ECP_ShortW_Prj[F2, G2]],
Qs: ptr UncheckedArray[ECP_ShortW_Aff[F2, G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[F1, G1]],
@ -279,62 +269,55 @@ func add_jToN_negateQ[FT, F1, F2](
## Addition steps for pairings 0 to N
var nQ{.noInit.}: ECP_ShortW_Aff[F2, G2]
{.push checks: off.} # No OverflowError or IndexError allowed
var line0{.noInit.}, line1{.noInit.}: Line[F2]
# Sparse merge 2 by 2, starting from 0
for i in countup(j, N-1, 2):
if i+1 >= N:
break
for i in countup(j, N-2, 2):
nQ.neg(Qs[i])
line0.line_add(Ts[i], nQ, Ps[i])
nQ.neg(Qs[i+1])
line1.line_add(Ts[i+1], nQ, Ps[i+1])
f.mul_by_2_lines(line0, line1)
if (N and 1) == 1: # N >= 2 and N is odd, there is a leftover
if N.isOdd(): # N >= 2 and N is odd, there is a leftover
nQ.neg(Qs[N-1])
line0.line_add(Ts[N-1], nQ, Ps[N-1])
f.mul_by_line(line0)
{.pop.}
lineOddRemainder.line_add(Ts[N-1], nQ, Ps[N-1])
func basicMillerLoop*[FT, F1, F2](
f: var FT,
line0, line1: var Line[F2],
Ts: ptr UncheckedArray[ECP_ShortW_Prj[F2, G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[F1, G1]],
Qs: ptr UncheckedArray[ECP_ShortW_Aff[F2, G2]],
N: int,
ate_param: auto,
ate_param_isNeg: static bool
) =
ate_param: static Bigint) =
## Basic Miller loop iterations
##
## Multiplications by constants in the Miller loop is eliminated by final exponentiation
## aka cofactor clearing in the pairing group.
##
## This means that there is no need to inverse/conjugate when `ate_param_isNeg` is false
## in the general case.
## If further processing is required, `ate_param_isNeg` must be taken into account by the caller.
static:
doAssert FT.C == F1.C
doAssert FT.C == F2.C
const naf = ate_param.recodeNafForPairing()
var lineOddRemainder0{.noInit.}, lineOddRemainder1{.noinit.}: Line[F2]
f.setOne()
template u: untyped = ate_param
var u3 = ate_param
u3 *= 3
for i in countdown(u3.bits - 2, 1):
if i != u3.bits - 2:
for i in countdown(naf.len-1, 0):
let bit = naf[i]
if i != naf.len-1:
f.square()
f.double_jToN(j=0, line0, line1, Ts, Ps, N)
f.double_jToN(j=0, lineOddRemainder0, Ts, Ps, N)
let naf = u3.bit(i).int8 - u.bit(i).int8 # This can throw exception
if naf == 1:
f.add_jToN(j=0, line0, line1, Ts, Qs, Ps, N)
elif naf == -1:
f.add_jToN_negateQ(j=0, line0, line1, Ts, Qs, Ps, N)
if bit == 1:
f.add_jToN(j=0, lineOddRemainder1, Ts, Qs, Ps, N)
elif bit == -1:
f.add_jToN_negateQ(j=0, lineOddRemainder1, Ts, Qs, Ps, N)
when ate_param_isNeg:
# In GT, x^-1 == conjugate(x)
# Remark 7.1, chapter 7.1.1 of Guide to Pairing-Based Cryptography, El Mrabet, 2017
f.conj()
if N.isOdd():
if bit == 0:
f.mul_by_line(lineOddRemainder0)
else:
f.mul_by_2_lines(lineOddRemainder0, lineOddRemainder1)
func miller_init_double_then_add*[FT, F1, F2](
f: var FT,
@ -342,8 +325,7 @@ func miller_init_double_then_add*[FT, F1, F2](
Qs: ptr UncheckedArray[ECP_ShortW_Aff[F2, G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[F1, G1]],
N: int,
numDoublings: static int
) =
numDoublings: static int) =
## Start a Miller Loop
## This means
## - 1 doubling
@ -351,52 +333,32 @@ func miller_init_double_then_add*[FT, F1, F2](
##
## f is overwritten
## Ts are overwritten by Qs
static:
doAssert FT.C == F1.C
doAssert FT.C == F2.C
{.push checks: off.} # No OverflowError or IndexError allowed
var line0 {.noInit.}, line1 {.noInit.}: Line[F2]
if N == 1:
f.miller_init_double_then_add(Ts[0], Qs[0], Ps[0], numDoublings)
return
# First step: T <- Q, f = 1 (mod p¹²), f *= line
# ----------------------------------------------
var lineOddRemainder0 {.noInit.}, lineOddRemainder1 {.noInit.}: Line[F2]
for i in 0 ..< N:
Ts[i].fromAffine(Qs[i])
line0.line_double(Ts[0], Ps[0])
if N >= 2:
line1.line_double(Ts[1], Ps[1])
f.prod_from_2_lines(line0, line1)
f.double_jToN(j=2, line0, line1, Ts, Ps, N)
# First step: T <- Q, f = 1 (mod p¹²), f *= line
lineOddRemainder0.line_double(Ts[0], Ps[0])
lineOddRemainder1.line_double(Ts[1], Ps[1])
f.prod_from_2_lines(lineOddRemainder0, lineOddRemainder1)
f.double_jToN(j=2, lineOddRemainder0, Ts, Ps, N)
# Doubling steps: 0b10...00
# ------------------------------------------------
when numDoublings > 1: # Already did the MSB doubling
if N == 1: # f = line0
f.prod_from_2_lines(line0, line0) # f.square()
line0.line_double(Ts[0], Ps[0])
f.mul_by_line(line0)
for _ in 2 ..< numDoublings:
# Doublings step: 0b10...0
for _ in 1 ..< numDoublings:
if N.isOdd():
f.mul_by_line(lineOddRemainder0)
f.square()
f.double_jtoN(j=0, line0, line1, Ts, Ps, N)
else:
for _ in 0 ..< numDoublings:
f.square()
f.double_jtoN(j=0, line0, line1, Ts, Ps, N)
f.double_jToN(j=0, lineOddRemainder0, Ts, Ps, N)
# Addition step: 0b10...01
# ------------------------------------------------
when numDoublings == 1:
if N == 1: # f = line0
line1.line_add(Ts[0], Qs[0], Ps[0])
f.prod_from_2_lines(line0, line1)
else:
f.add_jToN(j=0,line0, line1, Ts, Qs, Ps, N)
else:
f.add_jToN(j=0,line0, line1, Ts, Qs, Ps, N)
{.pop.} # No OverflowError or IndexError allowed
f.add_jToN(j=0, lineOddRemainder1, Ts, Qs, Ps, N)
if N.isOdd():
f.mul_by_2_lines(lineOddRemainder0, lineOddRemainder1)
func miller_accum_double_then_add*[FT, F1, F2](
f: var FT,
@ -404,18 +366,31 @@ func miller_accum_double_then_add*[FT, F1, F2](
Qs: ptr UncheckedArray[ECP_ShortW_Aff[F2, G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[F1, G1]],
N: int,
numDoublings: int,
add = true
) =
numDoublings: int, add = true) =
## Continue a Miller Loop with
## - `numDoubling` doublings
## - 1 add
##
## f and T are updated
var line0{.noInit.}, line1{.noinit.}: Line[F2]
for _ in 0 ..< numDoublings:
if N == 1:
f.miller_accum_double_then_add(Ts[0], Qs[0], Ps[0], numDoublings, add)
return
var lineOddRemainder0 {.noInit.}, lineOddRemainder1 {.noInit.}: Line[F2]
f.square()
f.double_jtoN(j=0, line0, line1, Ts, Ps, N)
f.double_jtoN(j=0, lineOddRemainder0, Ts, Ps, N)
for _ in 1 ..< numDoublings:
if N.isOdd():
f.mul_by_line(lineOddRemainder0)
f.square()
f.double_jtoN(j=0, lineOddRemainder0, Ts, Ps, N)
if add:
f.add_jToN(j=0, line0, line1, Ts, Qs, Ps, N)
f.add_jToN(j=0, lineOddRemainder1, Ts, Qs, Ps, N)
if N.isOdd():
f.mul_by_2_lines(lineOddRemainder0, lineOddRemainder1)
else:
if N.isOdd():
f.mul_by_line(lineOddRemainder0)

View File

@ -18,7 +18,6 @@ import
../constants/zoo_pairings,
../arithmetic,
./cyclotomic_subgroups,
./lines_eval,
./miller_loops
export zoo_pairings # generic sandwich https://github.com/nim-lang/Nim/issues/11225
@ -54,23 +53,29 @@ export zoo_pairings # generic sandwich https://github.com/nim-lang/Nim/issues/11
func millerLoopGenericBLS12*[C](
f: var Fp12[C],
P: ECP_ShortW_Aff[Fp[C], G1],
Q: ECP_ShortW_Aff[Fp2[C], G2]
Q: ECP_ShortW_Aff[Fp2[C], G2],
P: ECP_ShortW_Aff[Fp[C], G1]
) {.meter.} =
## Generic Miller Loop for BLS12 curve
## Computes f{u,Q}(P) with u the BLS curve parameter
var
T {.noInit.}: ECP_ShortW_Prj[Fp2[C], G2]
line {.noInit.}: Line[Fp2[C]]
var T {.noInit.}: ECP_ShortW_Prj[Fp2[C], G2]
T.fromAffine(Q)
basicMillerLoop(
f, line, T,
P, Q,
pairing(C, ate_param), pairing(C, ate_param_isNeg)
)
basicMillerLoop(f, T, P, Q, pairing(C, ate_param))
func millerLoopGenericBLS12*[C](
f: var Fp12[C],
Qs: ptr UncheckedArray[ECP_ShortW_Aff[Fp2[C], G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[Fp[C], G1]],
N: int
) {.noinline, tags:[Alloca], meter.} =
## Generic Miller Loop for BLS12 curve
## Computes f{u,Q}(P) with u the BLS curve parameter
var Ts = allocStackArray(ECP_ShortW_Prj[Fp2[C], G2], N)
for i in 0 ..< N:
Ts[i].fromAffine(Qs[i])
basicMillerLoop(f, Ts, Ps, Qs, N, pairing(C, ate_param))
func finalExpGeneric[C: static Curve](f: var Fp12[C]) =
## A generic and slow implementation of final exponentiation
@ -86,7 +91,7 @@ func pairing_bls12_reference*[C](
## Output: e(P, Q) ∈ Gt
##
## Reference implementation
gt.millerLoopGenericBLS12(P, Q)
gt.millerLoopGenericBLS12(Q, P)
gt.finalExpGeneric()
# Optimized pairing implementation

View File

@ -7,7 +7,7 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
../../platforms/[abstractions, allocs],
../../platforms/abstractions,
../config/curves,
../extension_fields,
../elliptic/[
@ -16,7 +16,6 @@ import
],
../isogenies/frobenius,
../constants/zoo_pairings,
./lines_eval,
./cyclotomic_subgroups,
./miller_loops
@ -50,53 +49,45 @@ export zoo_pairings # generic sandwich https://github.com/nim-lang/Nim/issues/11
func millerLoopGenericBN*[C](
f: var Fp12[C],
Q: ECP_ShortW_Aff[Fp2[C], G2],
P: ECP_ShortW_Aff[Fp[C], G1],
Q: ECP_ShortW_Aff[Fp2[C], G2]
) {.meter.} =
## Generic Miller Loop for BN curves
## Computes f{6u+2,Q}(P) with u the BN curve parameter
var
T {.noInit.}: ECP_ShortW_Prj[Fp2[C], G2]
line {.noInit.}: Line[Fp2[C]]
var T {.noInit.}: ECP_ShortW_Prj[Fp2[C], G2]
T.fromAffine(Q)
basicMillerLoop(
f, line, T,
P, Q,
pairing(C, ate_param), pairing(C, ate_param_isNeg)
)
basicMillerLoop(f, T, P, Q, pairing(C, ate_param))
when pairing(C, ate_param_is_neg):
f.conj()
T.neg()
# Ate pairing for BN curves needs adjustment after basic Miller loop
f.millerCorrectionBN(
T, Q, P,
pairing(C, ate_param_isNeg)
)
f.millerCorrectionBN(T, Q, P)
func millerLoopGenericBN*[C](
f: var Fp12[C],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[Fp[C], G1]],
Qs: ptr UncheckedArray[ECP_ShortW_Aff[Fp2[C], G2]],
Ps: ptr UncheckedArray[ECP_ShortW_Aff[Fp[C], G1]],
N: int
) {.meter.} =
) {.noinline, tags:[Alloca], meter.} =
## Generic Miller Loop for BN curves
## Computes f{6u+2,Q}(P) with u the BN curve parameter
var
Ts = allocStackArray(ECP_ShortW_Prj[Fp2[C], G2], N)
line0 {.noInit.}, line1 {.noInit.}: Line[Fp2[C]]
var Ts = allocStackArray(ECP_ShortW_Prj[Fp2[C], G2], N)
for i in 0 ..< N:
Ts[i].fromAffine(Qs[i])
basicMillerLoop(
f, line0, line1, Ts,
Ps, Qs, N,
pairing(C, ate_param), pairing(C, ate_param_isNeg)
)
basicMillerLoop(f, Ts, Ps, Qs, N, pairing(C, ate_param))
when pairing(C, ate_param_is_neg):
f.conj()
for i in 0 ..< N:
Ts[i].neg()
# Ate pairing for BN curves needs adjustment after basic Miller loop
for i in 0 ..< N:
f.millerCorrectionBN(Ts[i], Qs[i], Ps[i], pairing(C, ate_param_isNeg))
f.millerCorrectionBN(Ts[i], Qs[i], Ps[i])
func finalExpGeneric[C: static Curve](f: var Fp12[C]) =
## A generic and slow implementation of final exponentiation
@ -180,7 +171,7 @@ func pairing_bn*[C](
when C == BN254_Nogami:
gt.millerLoopAddChain(Q, P)
else:
gt.millerLoopGenericBN(P, Q)
gt.millerLoopGenericBN(Q, P)
gt.finalExpEasy()
gt.finalExpHard_BN()
@ -196,6 +187,6 @@ func pairing_bn*[N: static int, C](
when C == BN254_Nogami:
gt.millerLoopAddChain(Qs.asUnchecked(), Ps.asUnchecked(), N)
else:
gt.millerLoopGenericBN(Ps.asUnchecked(), Qs.asUnchecked(), N)
gt.millerLoopGenericBN(Qs.asUnchecked(), Ps.asUnchecked(), N)
gt.finalExpEasy()
gt.finalExpHard_BN()

View File

@ -33,20 +33,16 @@ export zoo_pairings # generic sandwich https://github.com/nim-lang/Nim/issues/11
func millerLoopBW6_761_naive[C](
f: var Fp6[C],
P: ECP_ShortW_Aff[Fp[C], G1],
Q: ECP_ShortW_Aff[Fp[C], G2]
Q: ECP_ShortW_Aff[Fp[C], G2],
P: ECP_ShortW_Aff[Fp[C], G1]
) =
## Miller Loop for BW6_761 curve
## Computes f_{u+1,Q}(P)*Frobenius(f_{u*(u^2-u-1),Q}(P))
var
T {.noInit.}: ECP_ShortW_Prj[Fp[C], G2]
line {.noInit.}: Line[Fp[C]]
var T {.noInit.}: ECP_ShortW_Prj[Fp[C], G2]
T.fromAffine(Q)
basicMillerLoop(
f, line, T,
f, T,
P, Q,
pairing(C, ate_param_1_unopt), pairing(C, ate_param_1_unopt_isNeg)
)
@ -55,7 +51,7 @@ func millerLoopBW6_761_naive[C](
T.fromAffine(Q)
basicMillerLoop(
f2, line, T,
f2, T,
P, Q,
pairing(C, ate_param_2_unopt), pairing(C, ate_param_2_unopt_isNeg)
)
@ -79,16 +75,15 @@ func finalExpHard_BW6_761*[C: static Curve](f: var Fp6[C]) =
func millerLoopBW6_761_opt_to_debug[C](
f: var Fp6[C],
P: ECP_ShortW_Aff[Fp[C], G1],
Q: ECP_ShortW_Aff[Fp[C], G2]
Q: ECP_ShortW_Aff[Fp[C], G2],
P: ECP_ShortW_Aff[Fp[C], G1]
) {.used.} =
## Miller Loop Otpimized for BW6_761 curve
# 1st part: f_{u,Q}(P)
# ------------------------------
var
T {.noInit.}: ECP_ShortW_Prj[Fp[C], G2]
line {.noInit.}: Line[Fp[C]]
var T {.noInit.}: ECP_ShortW_Prj[Fp[C], G2]
var line {.noInit.}: Line[Fp[C]]
T.fromAffine(Q)
f.setOne()
@ -161,6 +156,6 @@ func pairing_bw6_761_reference*[C](
##
## Reference implementation
{.error: "BW6_761 Miller loop is not working yet".}
gt.millerLoopBW6_761_naive(P, Q)
gt.millerLoopBW6_761_naive(Q, P)
gt.finalExpEasy()
gt.finalExpHard_BW6_761()

View File

@ -21,11 +21,11 @@ func pairing*[C](gt: var Fp12[C], P, Q: auto) {.inline.} =
else:
{.error: "Pairing not implemented for " & $C.}
func millerLoop*[C](gt: var Fp12[C], P, Q: auto, n: int) {.inline.} =
func millerLoop*[C](gt: var Fp12[C], Q, P: auto, n: int) {.inline.} =
when C == BN254_Snarks:
gt.millerLoopGenericBN(P, Q, n)
gt.millerLoopGenericBN(Q, P, n)
else:
gt.millerLoopAddchain(P, Q, n)
gt.millerLoopAddchain(Q, P, n)
func finalExp*[C](gt: var Fp12[C]){.inline.} =
gt.finalExpEasy()

View File

@ -55,3 +55,180 @@ const
const CttASM {.booldefine.} = true
const UseASM_X86_32* = CttASM and X86 and GCC_Compatible
const UseASM_X86_64* = WordBitWidth == 64 and UseASM_X86_32
# We use Nim effect system to track vartime subroutines
type VarTime* = object
# ############################################################
#
# Signed Secret Words
#
# ############################################################
type SignedSecretWord* = distinct SecretWord
when sizeof(int) == 8 and not defined(Constantine32):
type
SignedBaseType* = int64
else:
type
SignedBaseType* = int32
template fmap(x: SignedSecretWord, op: untyped, y: SignedSecretWord): SignedSecretWord =
## Unwrap x and y from their distinct type
## Apply op, and rewrap them
SignedSecretWord(op(SecretWord(x), SecretWord(y)))
template fmapAsgn(x: var SignedSecretWord, op: untyped, y: SignedSecretWord) =
## Unwrap x and y from their distinct type
## Apply assignment op, and rewrap them
op(cast[var SecretWord](x.addr), SecretWord(y))
template `and`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `and`, y)
template `or`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `or`, y)
template `xor`*(x, y: SignedSecretWord): SignedSecretWord = SignedSecretWord(BaseType(x) xor BaseType(y))
template `not`*(x: SignedSecretWord): SignedSecretWord = SignedSecretWord(not SecretWord(x))
template `+`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `+`, y)
template `+=`*(x: var SignedSecretWord, y: SignedSecretWord) = fmapAsgn(x, `+=`, y)
template `-`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `-`, y)
template `-=`*(x: var SignedSecretWord, y: SignedSecretWord) = fmapAsgn(x, `-=`, y)
template `-`*(x: SignedSecretWord): SignedSecretWord =
# We don't use Nim signed integers to avoid range checks
SignedSecretWord(-SecretWord(x))
template `*`*(x, y: SignedSecretWord): SignedSecretWord =
# Warning ⚠️ : We assume that hardware multiplication is constant time
# but this is not always true. See https://www.bearssl.org/ctmul.html
fmap(x, `*`, y)
# shifts
template ashr*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord =
## Arithmetic right shift
# We need to cast to Nim ints without Nim checks
cast[SignedSecretWord](cast[SignedBaseType](x).ashr(y))
template lshr*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord =
## Logical right shift
SignedSecretWord(SecretWord(x) shr y)
template lshl*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord =
## Logical left shift
SignedSecretWord(SecretWord(x) shl y)
# Hardened Boolean primitives
# ---------------------------
template `==`*(x, y: SignedSecretWord): SecretBool =
SecretWord(x) == SecretWord(y)
# Conditional arithmetic
# ----------------------
func isNeg*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Returns 1 if a is negative
## and 0 otherwise
a.lshr(WordBitWidth-1)
func isOdd*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Returns 1 if a is odd
## and 0 otherwise
a and SignedSecretWord(1)
func isZeroMask*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Produce the -1 mask if a is 0
## and 0 otherwise
# In x86 assembly, we can use "neg" + "sbb"
-SignedSecretWord(a.SecretWord().isZero())
func isNegMask*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Produce the -1 mask if a is negative
## and 0 otherwise
a.ashr(WordBitWidth-1)
func isOddMask*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Produce the -1 mask if a is odd
## and 0 otherwise
-(a and SignedSecretWord(1))
func isInRangeMask*(val, lo, hi: SignedSecretWord): SignedSecretWord {.inline.} =
## Produce 0b11111111 mask if lo <= val <= hi (inclusive range)
## and 0b00000000 otherwise
let loInvMask = isNegMask(val-lo) # if val-lo < 0 => val < lo
let hiInvMask = isNegMask(hi-val) # if hi-val < 0 => val > hi
return not(loInvMask or hiInvMask)
func csetZero*(a: var SignedSecretWord, mask: SignedSecretWord) {.inline.} =
## Conditionally set `a` to 0
## mask must be 0 (0x00000...0000) (kept as is)
## or -1 (0xFFFF...FFFF) (zeroed)
a = a and mask
func cneg*(
a: SignedSecretWord,
mask: SignedSecretWord): SignedSecretWord {.inline.} =
## Conditionally negate `a`
## mask must be 0 (0x00000...0000) (no negation)
## or -1 (0xFFFF...FFFF) (negation)
(a xor mask) - mask
func cadd*(
a: var SignedSecretWord,
b: SignedSecretWord,
mask: SignedSecretWord) {.inline.} =
## Conditionally add `b` to `a`
## mask must be 0 (0x00000...0000) (no addition)
## or -1 (0xFFFF...FFFF) (addition)
a = a + (b and mask)
func csub*(
a: var SignedSecretWord,
b: SignedSecretWord,
mask: SignedSecretWord) {.inline.} =
## Conditionally substract `b` from `a`
## mask must be 0 (0x00000...0000) (no substraction)
## or -1 (0xFFFF...FFFF) (substraction)
a = a - (b and mask)
# Double-Width signed arithmetic
# ------------------------------
type DSWord* = object
lo*, hi*: SignedSecretWord
func smulAccNoCarry*(r: var DSWord, a, b: SignedSecretWord) {.inline.}=
## Signed accumulated multiplication
## (_, hi, lo) += a*b
## This assumes no overflowing
var UV: array[2, SecretWord]
var carry: Carry
smul(UV[1], UV[0], SecretWord a, SecretWord b)
addC(carry, UV[0], UV[0], SecretWord r.lo, Carry(0))
addC(carry, UV[1], UV[1], SecretWord r.hi, carry)
r.lo = SignedSecretWord UV[0]
r.hi = SignedSecretWord UV[1]
func ssumprodAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}=
## Accumulated sum of products
## (_, hi, lo) += a*u + b*v
## This assumes no overflowing
var carry: Carry
var x1, x0, y1, y0: SecretWord
smul(x1, x0, SecretWord a, SecretWord u)
addC(carry, x0, x0, SecretWord r.lo, Carry(0))
addC(carry, x1, x1, SecretWord r.hi, carry)
smul(y1, y0, SecretWord b, SecretWord v)
addC(carry, x0, x0, y0, Carry(0))
addC(carry, x1, x1, y1, carry)
r.lo = SignedSecretWord x0
r.hi = SignedSecretWord x1
func ashr*(
r: var DSWord,
k: SomeInteger) {.inline.} =
## Arithmetic right-shift of a double-word
## This does not normalize the excess bits
r.lo = r.lo.lshr(k) or r.hi.lshl(WordBitWidth - k)
r.hi = r.hi.ashr(k)

View File

@ -23,32 +23,37 @@
#
# stack allocation is strongly preferred where necessary.
# We use Nim effect system to track allocating subroutines
type
Alloca* = object
HeapAlloc* = object
# Bindings
# ----------------------------------------------------------------------------------
# We wrap them with int instead of size_t / csize_t
when defined(windows):
proc alloca(size: int): pointer {.header: "<malloc.h>".}
proc alloca(size: int): pointer {.tags:[Alloca], header: "<malloc.h>".}
else:
proc alloca(size: int): pointer {.header: "<alloca.h>".}
proc alloca(size: int): pointer {.tags:[Alloca], header: "<alloca.h>".}
proc malloc(size: int): pointer {.sideeffect, header: "<stdlib.h>".}
proc free(p: pointer) {.sideeffect, header: "<stdlib.h>".}
proc malloc(size: int): pointer {.tags:[HeapAlloc], header: "<stdlib.h>".}
proc free(p: pointer) {.tags:[HeapAlloc], header: "<stdlib.h>".}
when defined(windows):
proc aligned_alloc_windows(size, alignment: int): pointer {.sideeffect,importc:"_aligned_malloc", header:"<malloc.h>".}
proc aligned_alloc_windows(size, alignment: int): pointer {.tags:[HeapAlloc],importc:"_aligned_malloc", header:"<malloc.h>".}
# Beware of the arg order!
proc aligned_alloc(alignment, size: int): pointer {.inline.} =
aligned_alloc_windows(size, alignment)
proc aligned_free(p: pointer){.sideeffect,importc:"_aligned_free", header:"<malloc.h>".}
proc aligned_free(p: pointer){.tags:[HeapAlloc],importc:"_aligned_free", header:"<malloc.h>".}
elif defined(osx):
proc posix_memalign(mem: var pointer, alignment, size: int){.sideeffect,importc, header:"<stdlib.h>".}
proc posix_memalign(mem: var pointer, alignment, size: int){.tags:[HeapAlloc],importc, header:"<stdlib.h>".}
proc aligned_alloc(alignment, size: int): pointer {.inline.} =
posix_memalign(result, alignment, size)
proc aligned_free(p: pointer) {.sideeffect, importc: "free", header: "<stdlib.h>".}
proc aligned_free(p: pointer) {.tags:[HeapAlloc], importc: "free", header: "<stdlib.h>".}
else:
proc aligned_alloc(alignment, size: int): pointer {.sideeffect,importc, header:"<stdlib.h>".}
proc aligned_free(p: pointer) {.sideeffect, importc: "free", header: "<stdlib.h>".}
proc aligned_alloc(alignment, size: int): pointer {.tags:[HeapAlloc],importc, header:"<stdlib.h>".}
proc aligned_free(p: pointer) {.tags:[HeapAlloc], importc: "free", header: "<stdlib.h>".}
# Helpers
# ----------------------------------------------------------------------------------

View File

@ -134,7 +134,7 @@ func ctz_impl_vartime(n: uint64): uint64 =
let isolateLSB = n xor (n-1)
uint64 lookup[(isolateLSB * 0x03f79d71b4cb0a89'u64) shr 58]
func countTrailingZeroBits*[T: SomeUnsignedInt](n: T): T {.inline.} =
func countTrailingZeroBits_vartime*[T: SomeUnsignedInt](n: T): T {.inline.} =
## Count the number of trailing zero bits of an integer
when nimvm:
if n == 0:
@ -151,7 +151,7 @@ func isPowerOf2_vartime*(n: SomeUnsignedInt): bool {.inline.} =
## Returns true if n is a power of 2
## ⚠️ Result is bool instead of Secretbool,
## for compile-time or explicit vartime proc only.
(n and (n - 1)) == 0
(n and (n - 1)) == 0 and n > 0
func nextPowerOfTwo_vartime*(n: uint32): uint32 {.inline.} =
## Returns x if x is a power of 2

View File

@ -6,7 +6,7 @@
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import ./abstractions, ./signed_secret_words
import ./abstractions
# ############################################################
#

View File

@ -0,0 +1,151 @@
# Laser & Arraymancer
# Copyright (c) 2017-2018 Mamy André-Ratsimbazafy
# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
# This file may not be copied, modified, or distributed except according to those terms.
const LASER_MEM_ALIGN*{.intdefine.} = 64
static:
assert LASER_MEM_ALIGN != 0, "Alignment " & $LASER_MEM_ALIGN & "must be a power of 2"
assert (LASER_MEM_ALIGN and (LASER_MEM_ALIGN - 1)) == 0, "Alignment " & $LASER_MEM_ALIGN & "must be a power of 2"
template withCompilerOptimHints*() =
# See https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html
# and https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#Common-Variable-Attributes
# Variable is created aligned by LASER_MEM_ALIGN.
# This is useful to ensure an object can be loaded
# in a minimum amount of cache lines load
# For example, the stack part of tensors are 128 bytes and can be loaded in 2 cache lines
# but would require 3 loads if they are misaligned.
when defined(vcc):
{.pragma: align_variable, codegenDecl: "__declspec(align(" & $LASER_MEM_ALIGN & ")) $# $#".}
else:
{.pragma: align_variable, codegenDecl: "$# $# __attribute__((aligned(" & $LASER_MEM_ALIGN & ")))".}
# Variable. Pointer does not alias any existing valid pointers.
when not defined(vcc):
{.pragma: restrict, codegenDecl: "$# __restrict__ $#".}
else:
{.pragma: restrict, codegenDecl: "$# __restrict $#".}
const withBuiltins = defined(gcc) or defined(clang) or defined(icc)
type
PrefetchRW* {.size: cint.sizeof.} = enum
Read = 0
Write = 1
PrefetchLocality* {.size: cint.sizeof.} = enum
NoTemporalLocality = 0 # Data can be discarded from CPU cache after access
LowTemporalLocality = 1
ModerateTemporalLocality = 2
HighTemporalLocality = 3 # Data should be left in all levels of cache possible
# Translation
# 0 - use no cache eviction level
# 1 - L1 cache eviction level
# 2 - L2 cache eviction level
# 3 - L1 and L2 cache eviction level
when withBuiltins:
proc builtin_assume_aligned(data: pointer, alignment: csize_t): pointer {.importc: "__builtin_assume_aligned", noDecl.}
proc builtin_prefetch(data: pointer, rw: PrefetchRW, locality: PrefetchLocality) {.importc: "__builtin_prefetch", noDecl.}
when defined(cpp):
proc static_cast[T: ptr](input: pointer): T
{.importcpp: "static_cast<'0>(@)".}
template assume_aligned*[T](data: ptr T, alignment: static int = LASER_MEM_ALIGN): ptr T =
when defined(cpp) and withBuiltins: # builtin_assume_aligned returns void pointers, this does not compile in C++, they must all be typed
static_cast[ptr T](builtin_assume_aligned(data, alignment))
elif withBuiltins:
cast[ptr T](builtin_assume_aligned(data, alignment))
else:
data
template prefetch*(
data: ptr or pointer,
rw: static PrefetchRW = Read,
locality: static PrefetchLocality = HighTemporalLocality) =
## Prefetch examples:
## - https://scripts.mit.edu/~birge/blog/accelerating-code-using-gccs-prefetch-extension/
## - https://stackoverflow.com/questions/7327994/prefetching-examples
## - https://lemire.me/blog/2018/04/30/is-software-prefetching-__builtin_prefetch-useful-for-performance/
## - https://www.naftaliharris.com/blog/2x-speedup-with-one-line-of-code/
when withBuiltins:
builtin_prefetch(data, rw, locality)
else:
discard
template pragma_ivdep() {.used.}=
## Tell the compiler to ignore unproven loop dependencies
## such as "a[i] = a[i + k] * c;" if k is unknown, as it introduces a loop
## dependency if it's negative
## https://software.intel.com/en-us/node/524501
##
## Placeholder
# We don't expose that as it only works on C for loop. Nim only generates while loop
# except when using OpenMP. But the OpenMP "simd" already achieves the same as ivdep.
when defined(gcc):
{.emit: "#pragma GCC ivdep".}
else: # Supported on ICC and Cray
{.emit: "pragma ivdep".}
template withCompilerFunctionHints() {.used.}=
## Not exposed, Nim codegen will declare them as normal C function.
## This messes up with N_NIMCALL, N_LIB_PRIVATE, N_INLINE and also
## creates duplicate symbols when one function called by a hot or pure function
## is public and inline (because hot and pure cascade to all cunfctions called)
## and they cannot be stacked easily: (hot, pure) will only apply the last
# Function. Returned pointer is aligned to LASER_MEM_ALIGN
{.pragma: aligned_ptr_result, codegenDecl: "__attribute__((assume_aligned(" & $LASER_MEM_ALIGN & ")) $# $#$#".}
# Function. Returned pointer cannot alias any other valid pointer and no pointers to valid object occur in any
# storage pointed to.
{.pragma: malloc, codegenDecl: "__attribute__((malloc)) $# $#$#".}
# Function. Creates one or more function versions that can process multiple arguments using SIMD.
# Ignored when -fopenmp is used and within an OpenMP simd loop
{.pragma: simd, codegenDecl: "__attribute__((simd)) $# $#$#".}
# Function. Indicates hot and cold path. Ignored when using profile guided optimization.
{.pragma: hot, codegenDecl: "__attribute__((hot)) $# $#$#".}
{.pragma: cold, codegenDecl: "__attribute__((cold)) $# $#$#".}
# ## pure and const
# ## Affect Common Sub-expression Elimination, Dead Code Elimination and loop optimization.
# See
# - https://lwn.net/Articles/285332/
# - http://benyossef.com/helping-the-compiler-help-you/
#
# Function. The function only accesses its input params and global variables state.
# It does not modify any global, calling it multiple times with the same params
# and global variables will produce the same result.
{.pragma: gcc_pure, codegenDecl: "__attribute__((pure)) $# $#$#".}
#
# Function. The function only accesses its input params and calling it multiple times
# with the same params will produce the same result.
# Warning ⚠:
# Pointer inputs must not be dereferenced to read the memory pointed to.
# In Nim stack arrays are passed by pointers and big stack data structures
# are passed by reference as well. I.e. Result unknown.
{.pragma: gcc_const, codegenDecl: "__attribute__((const)) $# $#$#".}
# We don't define per-function fast-math, GCC attribute optimize is broken:
# --> https://gcc.gnu.org/ml/gcc/2009-10/msg00402.html
#
# Workaround floating point latency for algorithms like sum
# should be done manually.
#
# See : https://stackoverflow.com/questions/39095993/does-each-floating-point-operation-take-the-same-time
# and https://www.agner.org/optimize/vectorclass.pdf "Using multiple accumulators"
#
# FP addition has a latency of 3~5 clock cycles, i.e. the result cannot be reused for that much time.
# But the throughput is 1 FP add per clock cycle (and even 2 per clock cycle for Skylake)
# So we need to use extra accumulators to fully utilize the FP throughput despite FP latency.
# On Skylake, all FP latencies are 4: https://www.agner.org/optimize/blog/read.php?i=415
#
# Note that this is per CPU cores, each core needs its own "global CPU accumulator" to combat
# false sharing when multithreading.
#
# This wouldn't be needed with fast-math because compiler would consider FP addition associative
# and create intermediate variables as needed to exploit this through put.

View File

@ -112,13 +112,7 @@ template `*=`*[T: Ct](x, y: T) =
template `-`*[T: Ct](x: T): T =
## Unary minus returns the two-complement representation
## of an unsigned integer
# We could use "not(x) + 1" but the codegen is not optimal
when nimvm:
not(x) + T(1)
else: # Use C so that compiler uses the "neg" instructions
var neg: T
{.emit:[neg, " = -", x, ";"].}
neg
T(0) - x
# ############################################################
#
@ -175,19 +169,6 @@ template cneg*[T: Ct](x: T, ctl: CTBool[T]): T =
# Conditional negate if ctl is true
(x xor -T(ctl)) + T(ctl)
# ############################################################
#
# Workaround system.nim `!=` template
#
# ############################################################
# system.nim defines `!=` as a catchall template
# in terms of `==` while we define `==` in terms of `!=`
# So we would have not(not(noteq(x,y)))
template trmFixSystemNotEq*{x != y}[T: Ct](x, y: T): CTBool[T] =
noteq(x, y)
# ############################################################
#
# Table lookups
@ -217,15 +198,3 @@ template isNonZero*[T: Ct](x: T): CTBool[T] =
template isZero*[T: Ct](x: T): CTBool[T] =
# In x86 assembly, we can use "neg" + "adc"
not isNonZero(x)
# ############################################################
#
# Transform x == 0 and x != 0
# into their optimized version
#
# ############################################################
template trmIsZero*{x == 0}[T: Ct](x: T): CTBool[T] = x.isZero
template trmIsZero*{0 == x}[T: Ct](x: T): CTBool[T] = x.isZero
template trmIsNonZero*{x != 0}[T: Ct](x: T): CTBool[T] = x.isNonZero
template trmIsNonZero*{0 != x}[T: Ct](x: T): CTBool[T] = x.isNonZero

View File

@ -15,10 +15,12 @@ import
],
compilers/[
addcarry_subborrow,
extended_precision
extended_precision,
compiler_optim_hints
],
./bithacks,
./static_for
./static_for,
./allocs
export
ct_types,
@ -28,7 +30,9 @@ export
extended_precision,
ct_division,
bithacks,
staticFor
staticFor,
allocs,
compiler_optim_hints
when X86 and GCC_Compatible:
import isa/[cpuinfo_x86, macro_assembler_x86]
@ -48,6 +52,9 @@ template debug*(body: untyped): untyped =
when defined(debugConstantine):
body
func unreachable*() {.noReturn.} =
doAssert false, "Unreachable"
# ############################################################
#
# Buffers
@ -111,10 +118,22 @@ template asUnchecked*[T](a: openArray[T]): ptr UncheckedArray[T] =
# to a function as `var` are passed by hidden pointers in Nim and the wrong
# pointer will be modified. Templates are fine.
func `+%`*(p: ptr, offset: SomeInteger): type(p) {.inline, noInit.}=
func `+%`*(p: ptr or pointer, offset: SomeInteger): type(p) {.inline, noInit.}=
## Pointer increment
{.emit: [result, " = ", p, " + ", offset, ";"].}
func `+%=`*(p: var ptr, offset: SomeInteger){.inline.}=
func `+%=`*(p: var (ptr or pointer), offset: SomeInteger){.inline.}=
## Pointer increment
p = p +% offset
func prefetchLarge*[T](
data: ptr T,
rw: static PrefetchRW = Read,
locality: static PrefetchLocality = HighTemporalLocality,
maxCacheLines: static int = 0) {.inline.} =
## Prefetch a large value
let pdata = pointer(data)
const span = sizeof(T) div 64 # 64 byte cache line
const N = if maxCacheLines == 0: span else: min(span, maxCacheLines)
for i in 0 ..< N:
prefetch(pdata +% (i*64), rw, locality)

View File

@ -1,195 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import ./abstractions
type SignedSecretWord* = distinct SecretWord
when sizeof(int) == 8 and not defined(Constantine32):
type
SignedBaseType* = int64
else:
type
SignedBaseType* = int32
# ############################################################
#
# Arithmetic
#
# ############################################################
template fmap(x: SignedSecretWord, op: untyped, y: SignedSecretWord): SignedSecretWord =
## Unwrap x and y from their distinct type
## Apply op, and rewrap them
SignedSecretWord(op(SecretWord(x), SecretWord(y)))
template fmapAsgn(x: SignedSecretWord, op: untyped, y: SignedSecretWord) =
## Unwrap x and y from their distinct type
## Apply assignment op, and rewrap them
op(SecretWord(x), SecretWord(y))
template `and`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `and`, y)
template `or`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `or`, y)
template `xor`*(x, y: SignedSecretWord): SignedSecretWord = SignedSecretWord(BaseType(x) xor BaseType(y))
template `not`*(x: SignedSecretWord): SignedSecretWord = SignedSecretWord(not SecretWord(x))
template `+`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `+`, y)
template `+=`*(x: var SignedSecretWord, y: SignedSecretWord) = fmapAsgn(x, `+=`, y)
template `-`*(x, y: SignedSecretWord): SignedSecretWord = fmap(x, `-`, y)
template `-=`*(x: var SignedSecretWord, y: SignedSecretWord) = fmapAsgn(x, `-=`, y)
template `-`*(x: SignedSecretWord): SignedSecretWord =
# We don't use Nim signed integers to avoid range checks
SignedSecretWord(-SecretWord(x))
template `*`*(x, y: SignedSecretWord): SignedSecretWord =
# Warning ⚠️ : We assume that hardware multiplication is constant time
# but this is not always true. See https://www.bearssl.org/ctmul.html
fmap(x, `*`, y)
# shifts
template ashr*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord =
## Arithmetic right shift
# We need to cast to Nim ints without Nim checks
cast[SignedSecretWord](cast[SignedBaseType](x).ashr(y))
template lshr*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord =
## Logical right shift
SignedSecretWord(SecretWord(x) shr y)
template lshl*(x: SignedSecretWord, y: SomeNumber): SignedSecretWord =
## Logical left shift
SignedSecretWord(SecretWord(x) shl y)
# ############################################################
#
# Hardened Boolean primitives
#
# ############################################################
template `==`*(x, y: SignedSecretWord): SecretBool =
SecretWord(x) == SecretWord(y)
# ############################################################
#
# Conditional arithmetic
#
# ############################################################
# SignedSecretWord
# ----------------
func isNeg*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Returns 1 if a is negative
## and 0 otherwise
a.lshr(WordBitWidth-1)
func isOdd*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Returns 1 if a is odd
## and 0 otherwise
a and SignedSecretWord(1)
func isZeroMask*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Produce the -1 mask if a is 0
## and 0 otherwise
# In x86 assembly, we can use "neg" + "sbb"
-SignedSecretWord(a.SecretWord().isZero())
func isNegMask*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Produce the -1 mask if a is negative
## and 0 otherwise
a.ashr(WordBitWidth-1)
func isOddMask*(a: SignedSecretWord): SignedSecretWord {.inline.} =
## Produce the -1 mask if a is odd
## and 0 otherwise
-(a and SignedSecretWord(1))
func isInRangeMask*(val, lo, hi: SignedSecretWord): SignedSecretWord {.inline.} =
## Produce 0b11111111 mask if lo <= val <= hi (inclusive range)
## and 0b00000000 otherwise
let loInvMask = isNegMask(val-lo) # if val-lo < 0 => val < lo
let hiInvMask = isNegMask(hi-val) # if hi-val < 0 => val > hi
return not(loInvMask or hiInvMask)
func csetZero*(a: var SignedSecretWord, mask: SignedSecretWord) {.inline.} =
## Conditionally set `a` to 0
## mask must be 0 (0x00000...0000) (kept as is)
## or -1 (0xFFFF...FFFF) (zeroed)
a = a and mask
func cneg*(
a: SignedSecretWord,
mask: SignedSecretWord): SignedSecretWord {.inline.} =
## Conditionally negate `a`
## mask must be 0 (0x00000...0000) (no negation)
## or -1 (0xFFFF...FFFF) (negation)
(a xor mask) - mask
func cadd*(
a: var SignedSecretWord,
b: SignedSecretWord,
mask: SignedSecretWord) {.inline.} =
## Conditionally add `b` to `a`
## mask must be 0 (0x00000...0000) (no addition)
## or -1 (0xFFFF...FFFF) (addition)
a = a + (b and mask)
func csub*(
a: var SignedSecretWord,
b: SignedSecretWord,
mask: SignedSecretWord) {.inline.} =
## Conditionally substract `b` from `a`
## mask must be 0 (0x00000...0000) (no substraction)
## or -1 (0xFFFF...FFFF) (substraction)
a = a - (b and mask)
# ############################################################
#
# Double-Width signed arithmetic
#
# ############################################################
type DSWord* = object
lo*, hi*: SignedSecretWord
func smulAccNoCarry*(r: var DSWord, a, b: SignedSecretWord) {.inline.}=
## Signed accumulated multiplication
## (_, hi, lo) += a*b
## This assumes no overflowing
var UV: array[2, SecretWord]
var carry: Carry
smul(UV[1], UV[0], SecretWord a, SecretWord b)
addC(carry, UV[0], UV[0], SecretWord r.lo, Carry(0))
addC(carry, UV[1], UV[1], SecretWord r.hi, carry)
r.lo = SignedSecretWord UV[0]
r.hi = SignedSecretWord UV[1]
func ssumprodAccNoCarry*(r: var DSWord, a, u, b, v: SignedSecretWord) {.inline.}=
## Accumulated sum of products
## (_, hi, lo) += a*u + b*v
## This assumes no overflowing
var carry: Carry
var x1, x0, y1, y0: SecretWord
smul(x1, x0, SecretWord a, SecretWord u)
addC(carry, x0, x0, SecretWord r.lo, Carry(0))
addC(carry, x1, x1, SecretWord r.hi, carry)
smul(y1, y0, SecretWord b, SecretWord v)
addC(carry, x0, x0, y0, Carry(0))
addC(carry, x1, x1, y1, carry)
r.lo = SignedSecretWord x0
r.hi = SignedSecretWord x1
func ashr*(
r: var DSWord,
k: SomeInteger) {.inline.} =
## Arithmetic right-shift of a double-word
## This does not normalize the excess bits
r.lo = r.lo.lshr(k) or r.hi.lshl(WordBitWidth - k)
r.hi = r.hi.ashr(k)

View File

@ -9,7 +9,7 @@
import
../math/[ec_shortweierstrass, extension_fields],
../math/io/io_bigints,
../math/elliptic/ec_shortweierstrass_batch_ops,
../math/elliptic/ec_scalar_mul_vartime,
../math/pairings/[pairings_generic, miller_accumulators],
../math/constants/zoo_generators,
../math/config/curves,
@ -366,32 +366,6 @@ func init*[T0, T1: char|byte](
H.hash(ctx.secureBlinding, secureRandomBytes, accumSepTag)
func scalarMul_minHammingWeight_vartime[EC](
P: var EC,
scalar: BigInt,
) =
## **Variable-time** Elliptic Curve Scalar Multiplication
##
## P <- [k] P
##
## This uses an online recoding with minimum Hamming Weight
## (which is not NAF, NAF is least-significant bit to most)
## Due to those scalars being 64-bit, window-method or endomorphism acceleration are slower
## than double-and-add.
##
## This is highly VULNERABLE to timing attacks and power analysis attacks.
## For our usecase, scaling with a random number not in attacker control,
## leaking the scalar bits is not an issue.
var t0{.noInit.}: typeof(P)
t0.setInf()
for bit in recoding_l2r_vartime(scalar):
t0.double()
if bit == 1:
t0 += P
elif bit == -1:
t0 -= P
P = t0
func update*[T: char|byte, Pubkey, Sig: ECP_ShortW_Aff](
ctx: var BLSBatchSigAccumulator,
pubkey: Pubkey,
@ -456,8 +430,8 @@ func update*[T: char|byte, Pubkey, Sig: ECP_ShortW_Aff](
var randFactor{.noInit.}: BigInt[64]
randFactor.unmarshal(ctx.secureBlinding.toOpenArray(0, 7), bigEndian)
pkG1_jac.scalarMul_minHammingWeight_vartime(randFactor)
sigG2_jac.scalarMul_minHammingWeight_vartime(randFactor)
pkG1_jac.scalarMul_minHammingWeight_windowed_vartime(randFactor, window = 3)
sigG2_jac.scalarMul_minHammingWeight_windowed_vartime(randFactor, window = 3)
if ctx.aggSigOnce == false:
ctx.aggSig = sigG2_jac
@ -492,8 +466,8 @@ func update*[T: char|byte, Pubkey, Sig: ECP_ShortW_Aff](
var randFactor{.noInit.}: BigInt[64]
randFactor.unmarshal(ctx.secureBlinding.toOpenArray(0, 7), bigEndian)
hmsgG1_jac.scalarMul_minHammingWeight_vartime(randFactor)
sigG1_jac.scalarMul_minHammingWeight_vartime(randFactor)
hmsgG1_jac.scalarMul_minHammingWeight_windowed_vartime(randFactor, window = 3)
sigG1_jac.scalarMul_minHammingWeight_windowed_vartime(randFactor, window = 3)
if ctx.aggSigOnce == false:
ctx.aggSig = sigG1_jac
@ -571,7 +545,7 @@ func finalVerify*(ctx: var BLSBatchSigAccumulator): bool =
func aggregate*[T: ECP_ShortW_Aff](r: var T, points: openarray[T]) =
## Aggregate pubkeys or signatures
var accum {.noinit.}: ECP_ShortW_Jac[T.F, T.G]
accum.sum_batch_vartime(points)
accum.sum_reduce_vartime(points)
r.affine(accum)
func fastAggregateVerify*[B1, B2: byte|char, Pubkey, Sig](

View File

@ -15,6 +15,7 @@ import
ec_shortweierstrass_affine,
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian,
ec_shortweierstrass_jacobian_extended,
ec_twistededwards_affine,
ec_twistededwards_projective],
../constantine/math/io/io_bigints,
@ -282,19 +283,19 @@ func random_long01Seq(rng: var RngState, a: var ExtensionField) =
# Elliptic curves
# ------------------------------------------------------------
type ECP = ECP_ShortW_Aff or ECP_ShortW_Prj or ECP_ShortW_Jac or
type ECP = ECP_ShortW_Aff or ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_JacExt or
ECP_TwEdwards_Aff or ECP_TwEdwards_Prj
type ECP_ext = ECP_ShortW_Prj or ECP_ShortW_Jac or
type ECP_ext = ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_JacExt or
ECP_TwEdwards_Prj
template trySetFromCoord[F](a: ECP, fieldElem: F): SecretBool =
when a is (ECP_ShortW_Aff or ECP_ShortW_Prj or ECP_ShortW_Jac):
when a is (ECP_ShortW_Aff or ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_JacExt):
trySetFromCoordX(a, fieldElem)
else:
trySetFromCoordY(a, fieldElem)
template trySetFromCoords[F](a: ECP, fieldElem, scale: F): SecretBool =
when a is (ECP_ShortW_Prj or ECP_ShortW_Jac):
when a is (ECP_ShortW_Prj or ECP_ShortW_Jac or ECP_ShortW_JacExt):
trySetFromCoordsXandZ(a, fieldElem, scale)
else:
trySetFromCoordsYandZ(a, fieldElem, scale)

44
metering/m_msm.nim Normal file
View File

@ -0,0 +1,44 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
std/times,
./reports, ./tracer,
../constantine/math/config/curves,
../constantine/math/[arithmetic, extension_fields, ec_shortweierstrass],
../constantine/math/constants/zoo_subgroups,
../constantine/math/elliptic/ec_multi_scalar_mul,
../constantine/platforms/abstractions,
# Helpers
../helpers/prng_unsafe
var rng*: RngState
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
rng.seed(seed)
echo "bench xoshiro512** seed: ", seed
proc msmMeter*(EC: typedesc, numPoints: int) =
const bits = EC.F.C.getCurveOrderBitwidth()
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](numPoints)
var scalars = newSeq[BigInt[bits]](numPoints)
for i in 0 ..< numPoints:
var tmp = rng.random_unsafe(EC)
tmp.clearCofactor()
points[i].affine(tmp)
scalars[i] = rng.random_unsafe(BigInt[bits])
var r{.noInit.}: EC
r.setinf()
resetMetering()
r.multiScalarMul_vartime(scalars, points)
resetMetering()
msmMeter(ECP_ShortW_Jac[Fp[BLS12_381], G1], 10000)
const flags = if UseASM_X86_64 or UseASM_X86_32: "UseAssembly" else: "NoAssembly"
reportCli(Metrics, flags)

View File

@ -9,11 +9,11 @@
import
std/times,
./reports, ./tracer,
../constantine/math/config/[common, curves],
../constantine/math/[arithmetic, extension_fields],
../constantine/math/elliptic/ec_shortweierstrass_projective,
../constantine/math/config/curves,
../constantine/math/[arithmetic, extension_fields, ec_shortweierstrass],
../constantine/math/constants/zoo_subgroups,
../constantine/math/pairings/pairings_bls12,
../constantine/math/pairings/pairings_generic,
../constantine/platforms/abstractions,
# Helpers
../helpers/prng_unsafe
@ -22,19 +22,20 @@ let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
rng.seed(seed)
echo "bench xoshiro512** seed: ", seed
func random_point*(rng: var RngState, EC: typedesc): EC {.noInit.} =
result = rng.random_unsafe(EC)
result.clearCofactor()
func random_point*(rng: var RngState, EC: typedesc[ECP_ShortW_Aff]): EC {.noInit.} =
var jac = rng.random_unsafe(ECP_ShortW_Jac[EC.F, EC.G])
jac.clearCofactor()
result.affine(jac)
proc pairingBLS12Meter*(C: static Curve) =
let
P = rng.random_point(ECP_ShortW_Prj[Fp[C], G1])
Q = rng.random_point(ECP_ShortW_Prj[Fp2[C], G2])
P = rng.random_point(ECP_ShortW_Aff[Fp[C], G1])
Q = rng.random_point(ECP_ShortW_Aff[Fp2[C], G2])
var f: Fp12[C]
resetMetering()
f.pairing_bls12(P, Q)
f.pairing(P, Q)
resetMetering()
pairingBLS12Meter(BLS12_381)

View File

@ -24,11 +24,11 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
# https://www.agner.org/optimize/blog/read.php?i=838
echo "The CPU Cycle Count is indicative only. It cannot be used to compare across systems, works at your CPU nominal frequency and is sensitive to overclocking, throttling and frequency scaling (powersaving and Turbo Boost)."
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|{'-'.repeat(26)}|{'-'.repeat(26)}|"""
const lineSep = &"""|{'-'.repeat(150)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|{'-'.repeat(26)}|{'-'.repeat(26)}|"""
echo "\n"
echo lineSep
echo &"""|{"Procedures":^50}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (µs)":^15}|{"Avg Time (µs)":^17}|{"CPU cycles (in billions)":^26}|{"Avg cycles (in billions)":^26}|"""
echo &"""|{flags:^50}|{' '.repeat(14)}|{' '.repeat(20)}|{' '.repeat(15)}|{' '.repeat(17)}|{"indicative only":^26}|{"indicative only":^26}|"""
echo &"""|{"Procedures":^150}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (µs)":^15}|{"Avg Time (µs)":^17}|{"CPU cycles (in billions)":^26}|{"Avg cycles (in billions)":^26}|"""
echo &"""|{flags:^150}|{' '.repeat(14)}|{' '.repeat(20)}|{' '.repeat(15)}|{' '.repeat(17)}|{"indicative only":^26}|{"indicative only":^26}|"""
echo lineSep
for m in metrics:
if m.numCalls == 0:
@ -40,15 +40,15 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
let throughput = 1e6 / avgTimeUs
let cumulCyclesBillions = m.cumulatedCycles.float64 * 1e-9
let avgCyclesBillions = cumulCyclesBillions / m.numCalls.float64
echo &"""|{m.procName:<50}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo &"""|{m.procName:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo lineSep
else:
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|"""
echo "\n"
echo lineSep
echo &"""|{"Procedures":^50}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (µs)":^15}|{"Avg Time (µs)":^17}|"""
echo &"""|{flags:^50}|{' '.repeat(14)}|{' '.repeat(20)}|{' '.repeat(15)}|{' '.repeat(17)}|"""
echo &"""|{"Procedures":^150}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (µs)":^15}|{"Avg Time (µs)":^17}|"""
echo &"""|{flags:^150}|{' '.repeat(14)}|{' '.repeat(20)}|{' '.repeat(15)}|{' '.repeat(17)}|"""
echo lineSep
for m in metrics:
if m.numCalls == 0:
@ -58,5 +58,5 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3
let avgTimeUs = cumulTimeUs / m.numCalls.float64
let throughput = 1e6 / avgTimeUs
echo &"""|{m.procName:<50}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo &"""|{m.procName:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo lineSep

View File

@ -57,7 +57,7 @@ var ctMetrics{.compileTime.}: seq[Metadata]
## Unfortunately the "seq" is emptied when passing the compileTime/runtime boundaries
## due to Nim bugs
when CttTrace:
when CttMeter or CttTrace:
# strformat doesn't work in templates.
from strutils import alignLeft, formatFloat
@ -111,7 +111,7 @@ macro meterAnnotate(procAst: untyped): untyped =
procAst.expectKind({nnkProcDef, nnkFuncDef})
let id = ctMetrics.len
let name = procAst[0].repr
let name = procAst[0].repr & procAst[3].repr
# TODO, get the module and the package the proc is coming from
# and the tag "Fp", "ec", "polynomial" ...
@ -123,6 +123,24 @@ macro meterAnnotate(procAst: untyped): untyped =
newbody.add nnkDefer.newTree(getAst(fnExit(name, id, startTime, startCycle)))
newBody.add procAst.body
if procAst[4].kind != nnkEmpty:
# Timing procedures adds the TimeEffect tag, which interferes with {.tags:[VarTime].}
# as TimeEffect is not listed. We drop the `tags` for metering
var pragmas: NimNode
if procAst[4].len == 1:
if procAst[4][0].kind == nnkExprColonExpr and procAst[4][0][0].eqIdent"tags":
pragmas = newEmptyNode()
else:
pragmas = procAst[4]
else:
pragmas = nnkPragma.newTree()
for i in 0 ..< procAst[4].len:
if procAst[4][0].kind == nnkExprColonExpr and procAst[4][0][0].eqIdent"tags":
continue
else:
pragmas.add procAst[4][0]
procAst[4] = pragmas
procAst.body = newBody
result = procAst

View File

@ -63,10 +63,7 @@ def genAteParam(curve_name, curve_config):
buf += ate_comment
ate_bits = int(ate_param).bit_length()
naf_bits = int(3*ate_param).bit_length() - ate_bits
buf += f' # +{naf_bits} to bitlength so that we can mul by 3 for NAF encoding\n'
buf += f' BigInt[{ate_bits}+{naf_bits}].fromHex"0x{Integer(abs(ate_param)).hex()}"\n\n'
buf += f' BigInt[{ate_bits}].fromHex"0x{Integer(abs(ate_param)).hex()}"\n\n'
buf += f'const {curve_name}_pairing_ate_param_isNeg* = {"true" if ate_param < 0 else "false"}'
@ -198,7 +195,7 @@ def genFinalExp(curve_name, curve_config):
scale = 3*(u^3-u^2+1)
scaleDesc = ' * 3*(u^3-u^2+1)'
fexp = (p - 1)//r
fexp = (p^k - 1)//r
fexp *= scale
buf = f'const {curve_name}_pairing_finalexponent* = block:\n'

View File

@ -81,7 +81,7 @@ def serialize_EC_Fp2(P):
# Generator
# ---------------------------------------------------------
def genScalarMulG1(curve_name, curve_config, count, seed):
def genScalarMulG1(curve_name, curve_config, count, seed, scalarBits = None):
p = curve_config[curve_name]['field']['modulus']
r = curve_config[curve_name]['field']['order']
form = curve_config[curve_name]['curve']['form']
@ -109,13 +109,14 @@ def genScalarMulG1(curve_name, curve_config, count, seed):
for i in progressbar(range(count)):
v = {}
P = G1.random_point()
scalar = randrange(r)
scalar = randrange(1 << scalarBits) if scalarBits else randrange(r)
P *= cofactor # clear cofactor
Q = scalar * P
v['id'] = i
v['P'] = serialize_EC_Fp(P)
v['scalarBits'] = scalarBits if scalarBits else r.bit_length()
v['scalar'] = serialize_bigint(scalar)
v['Q'] = serialize_EC_Fp(Q)
vectors.append(v)
@ -123,7 +124,7 @@ def genScalarMulG1(curve_name, curve_config, count, seed):
out['vectors'] = vectors
return out
def genScalarMulG2(curve_name, curve_config, count, seed):
def genScalarMulG2(curve_name, curve_config, count, seed, scalarBits = None):
p = curve_config[curve_name]['field']['modulus']
r = curve_config[curve_name]['field']['order']
form = curve_config[curve_name]['curve']['form']
@ -197,7 +198,7 @@ def genScalarMulG2(curve_name, curve_config, count, seed):
for i in progressbar(range(count)):
v = {}
P = G2.random_point()
scalar = randrange(r)
scalar = randrange(1 << scalarBits) if scalarBits else randrange(r)
P *= cofactor # clear cofactor
Q = scalar * P
@ -205,10 +206,12 @@ def genScalarMulG2(curve_name, curve_config, count, seed):
v['id'] = i
if G2_field == 'Fp2':
v['P'] = serialize_EC_Fp2(P)
v['scalarBits'] = scalarBits if scalarBits else r.bit_length()
v['scalar'] = serialize_bigint(scalar)
v['Q'] = serialize_EC_Fp2(Q)
elif G2_field == 'Fp':
v['P'] = serialize_EC_Fp(P)
v['scalarBits'] = scalarBits if scalarBits else r.bit_length()
v['scalar'] = serialize_bigint(scalar)
v['Q'] = serialize_EC_Fp(Q)
vectors.append(v)
@ -222,7 +225,7 @@ def genScalarMulG2(curve_name, curve_config, count, seed):
if __name__ == "__main__":
# Usage
# BLS12-381
# sage sage/derive_pairing.sage BLS12_381 G1
# sage sage/testgen_scalar_mul.sage BLS12_381 G1 {scalarBits: optional int}
from argparse import ArgumentParser
@ -232,6 +235,9 @@ if __name__ == "__main__":
curve = args.curve[0]
group = args.curve[1]
scalarBits = None
if len(args.curve) > 2:
scalarBits = int(args.curve[2])
if curve not in Curves:
raise ValueError(
@ -245,16 +251,17 @@ if __name__ == "__main__":
' is not a valid group, expected G1 or G2 instead'
)
else:
print(f'\nGenerating test vectors tv_{curve}_scalar_mul_{group}.json')
bits = scalarBits if scalarBits else Curves[curve]['field']['order'].bit_length()
print(f'\nGenerating test vectors tv_{curve}_scalar_mul_{group}_{bits}bit.json')
print('----------------------------------------------------\n')
count = 40
seed = 1337
if group == 'G1':
out = genScalarMulG1(curve, Curves, count, seed)
out = genScalarMulG1(curve, Curves, count, seed, scalarBits)
elif group == 'G2':
out = genScalarMulG2(curve, Curves, count, seed)
out = genScalarMulG2(curve, Curves, count, seed, scalarBits)
with open(f'tv_{curve}_scalar_mul_{group}.json', 'w') as f:
with open(f'tv_{curve}_scalar_mul_{group}_{bits}bits.json', 'w') as f:
json.dump(out, f, indent=2)

View File

@ -1,71 +0,0 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../../../constantine/math/arithmetic,
../../../constantine/math/io/io_bigints
# Support files for testing Elliptic Curve arithmetic
# ------------------------------------------------------------------------------
iterator unpack(scalarByte: byte): bool =
yield bool((scalarByte and 0b10000000) shr 7)
yield bool((scalarByte and 0b01000000) shr 6)
yield bool((scalarByte and 0b00100000) shr 5)
yield bool((scalarByte and 0b00010000) shr 4)
yield bool((scalarByte and 0b00001000) shr 3)
yield bool((scalarByte and 0b00000100) shr 2)
yield bool((scalarByte and 0b00000010) shr 1)
yield bool( scalarByte and 0b00000001)
func unsafe_ECmul_double_add*[EC](
P: var EC,
scalar: BigInt,
) =
## **Unsafe** Elliptic Curve Scalar Multiplication
##
## P <- [k] P
##
## This uses the double-and-add algorithm to verify the constant-time production implementation
## This is UNSAFE to use in production and only intended for testing purposes.
##
## This is highly VULNERABLE to timing attacks and power analysis attacks
var scalarCanonical: array[(scalar.bits+7) div 8, byte]
scalarCanonical.marshal(scalar, bigEndian)
var t0: typeof(P)
t0.setInf()
for scalarByte in scalarCanonical:
for bit in unpack(scalarByte):
t0.double()
if bit:
t0 += P
P = t0
func unsafe_ECmul_minHammingWeight*[EC](
P: var EC,
scalar: BigInt) =
## **Unsafe** Elliptic Curve Scalar Multiplication
##
## P <- [k] P
##
## This uses an online recoding with minimum Hamming Weight
## (which is not NAF, NAF is least-significant bit to most)
## This is UNSAFE to use in production and only intended for testing purposes.
##
## This is highly VULNERABLE to timing attacks and power analysis attacks
var t0{.noInit.}: typeof(P)
t0.setInf()
for bit in recoding_l2r_vartime(scalar):
t0.double()
if bit == 1:
t0 += P
elif bit == -1:
t0 -= P
P = t0

View File

@ -19,7 +19,7 @@ import
echo "\n------------------------------------------------------\n"
proc mainArith() =
suite "isZero" & " [" & $WordBitWidth & "-bit mode]":
suite "isZero" & " [" & $WordBitWidth & "-bit words]":
test "isZero for zero":
var x: BigInt[128]
check: x.isZero().bool
@ -49,7 +49,7 @@ proc mainArith() =
check: static(not x.isZero().bool)
suite "Arithmetic operations - Addition" & " [" & $WordBitWidth & "-bit mode]":
suite "Arithmetic operations - Addition" & " [" & $WordBitWidth & "-bit words]":
test "Adding 2 zeros":
var a = fromHex(BigInt[128], "0x00000000000000000000000000000000")
let b = fromHex(BigInt[128], "0x00000000000000000000000000000000")
@ -149,7 +149,7 @@ proc mainArith() =
bool(a == c)
not bool(carry)
suite "BigInt + SecretWord" & " [" & $WordBitWidth & "-bit mode]":
suite "BigInt + SecretWord" & " [" & $WordBitWidth & "-bit words]":
test "Addition limbs carry":
block: # P256 / 2
var a = BigInt[256].fromhex"0x7fffffff800000008000000000000000000000007fffffffffffffffffffffff"
@ -160,7 +160,7 @@ proc mainArith() =
check: bool(a == expected)
proc mainMul() =
suite "Multi-precision multiplication" & " [" & $WordBitWidth & "-bit mode]":
suite "Multi-precision multiplication" & " [" & $WordBitWidth & "-bit words]":
test "Same size operand into double size result":
block:
var r = canary(BigInt[256])
@ -201,7 +201,7 @@ proc mainMul() =
check: bool(r == expected)
proc mainMulHigh() =
suite "Multi-precision multiplication keeping only high words" & " [" & $WordBitWidth & "-bit mode]":
suite "Multi-precision multiplication keeping only high words" & " [" & $WordBitWidth & "-bit words]":
test "Same size operand into double size result - discard first word":
block:
var r = canary(BigInt[256])
@ -287,7 +287,7 @@ proc mainMulHigh() =
check: bool(r == expected)
proc mainSquare() =
suite "Multi-precision multiplication" & " [" & $WordBitWidth & "-bit mode]":
suite "Multi-precision multiplication" & " [" & $WordBitWidth & "-bit words]":
test "Squaring is consistent with multiplication (rBits = 2*aBits)":
block:
let a = BigInt[200].fromHex"0xDEADBEEFDEADBEEFDEADBEEFDEADBEEFDEADBEEFDEADBEEFDE"
@ -309,7 +309,7 @@ proc mainSquare() =
check: bool(rmul == rsqr)
proc mainModular() =
suite "Modular operations - small modulus" & " [" & $WordBitWidth & "-bit mode]":
suite "Modular operations - small modulus" & " [" & $WordBitWidth & "-bit words]":
# Vectors taken from Stint - https://github.com/status-im/nim-stint
test "100 mod 13":
# Test 1 word and more than 1 word
@ -368,7 +368,7 @@ proc mainModular() =
"\n r (low-level repr): " & $r &
"\n expected (ll repr): " & $expected
suite "Modular operations - small modulus - Stint specific failures highlighted by property-based testing" & " [" & $WordBitWidth & "-bit mode]":
suite "Modular operations - small modulus - Stint specific failures highlighted by property-based testing" & " [" & $WordBitWidth & "-bit words]":
# Vectors taken from Stint - https://github.com/status-im/nim-stint
test "Modulo: 65696211516342324 mod 174261910798982":
let u = 65696211516342324'u64
@ -401,7 +401,7 @@ proc mainModular() =
"\n expected (ll repr): " & $expected
proc mainNeg() =
suite "Conditional negation" & " [" & $WordBitWidth & "-bit mode]":
suite "Conditional negation" & " [" & $WordBitWidth & "-bit words]":
test "Conditional negation":
block:
var a = fromHex(BigInt[128], "0x12345678FF11FFAA00321321CAFECAFE")
@ -499,7 +499,7 @@ proc mainNeg() =
bool(b == b2)
proc mainCopySwap() =
suite "Copy and Swap" & " [" & $WordBitWidth & "-bit mode]":
suite "Copy and Swap" & " [" & $WordBitWidth & "-bit words]":
test "Conditional copy":
block:
var a = fromHex(BigInt[128], "0x12345678FF11FFAA00321321CAFECAFE")
@ -545,7 +545,7 @@ proc mainCopySwap() =
bool(eB == b)
proc mainModularInverse() =
suite "Modular Inverse (with odd modulus)" & " [" & $WordBitWidth & "-bit mode]":
suite "Modular Inverse (with odd modulus)" & " [" & $WordBitWidth & "-bit words]":
# Note: We don't define multi-precision multiplication
# because who needs it when you have Montgomery?
# ¯\(ツ)/¯
@ -556,10 +556,14 @@ proc mainModularInverse() =
let expected = BigInt[16].fromUint(1969'u16)
var r = canary(BigInt[16])
var r2 = canary(BigInt[16])
r.invmod(a, M)
r2.invmod_vartime(a, M)
check: bool(r == expected)
check:
bool(r == expected)
bool(r2 == expected)
block: # huge int
let a = BigInt[381].fromUint(42'u16)
@ -567,10 +571,14 @@ proc mainModularInverse() =
let expected = BigInt[381].fromUint(1969'u16)
var r = canary(BigInt[381])
var r2 = canary(BigInt[381])
r.invmod(a, M)
r2.invmod_vartime(a, M)
check: bool(r == expected)
check:
bool(r == expected)
bool(r2 == expected)
test "271^-1 (mod 383) = 106":
block: # small int
@ -579,10 +587,14 @@ proc mainModularInverse() =
let expected = BigInt[16].fromUint(106'u16)
var r = canary(BigInt[16])
var r2 = canary(BigInt[16])
r.invmod(a, M)
r2.invmod_vartime(a, M)
check: bool(r == expected)
check:
bool(r == expected)
bool(r2 == expected)
block: # huge int
let a = BigInt[381].fromUint(271'u16)
@ -590,10 +602,14 @@ proc mainModularInverse() =
let expected = BigInt[381].fromUint(106'u16)
var r = canary(BigInt[381])
var r2 = canary(BigInt[381])
r.invmod(a, M)
r2.invmod_vartime(a, M)
check: bool(r == expected)
check:
bool(r == expected)
bool(r2 == expected)
test "BN254_Modulus^-1 (mod BLS12_381)":
let a = BigInt[381].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
@ -602,9 +618,14 @@ proc mainModularInverse() =
let expected = BigInt[381].fromHex("0x0636759a0f3034fa47174b2c0334902f11e9915b7bd89c6a2b3082b109abbc9837da17201f6d8286fe6203caa1b9d4c8")
var r = canary(BigInt[381])
r.invmod(a, M)
var r2 = canary(BigInt[381])
check: bool(r == expected)
r.invmod(a, M)
r2.invmod_vartime(a, M)
check:
bool(r == expected)
bool(r2 == expected)
test "0^-1 (mod any) = 0 (need for tower of extension fields)":
block:
@ -613,10 +634,14 @@ proc mainModularInverse() =
let expected = BigInt[16].fromUint(0'u16)
var r = canary(BigInt[16])
var r2 = canary(BigInt[16])
r.invmod(a, M)
r2.invmod_vartime(a, M)
check: bool(r == expected)
check:
bool(r == expected)
bool(r2 == expected)
block:
let a = BigInt[381].fromUint(0'u16)
@ -624,10 +649,14 @@ proc mainModularInverse() =
let expected = BigInt[381].fromUint(0'u16)
var r = canary(BigInt[381])
var r2 = canary(BigInt[381])
r.invmod(a, M)
r2.invmod_vartime(a, M)
check: bool(r == expected)
check:
bool(r == expected)
bool(r2 == expected)
mainArith()
mainMul()

View File

@ -17,7 +17,7 @@ import
echo "\n------------------------------------------------------\n"
proc main() =
suite "Bigints - Multiprecision modulo" & " [" & $WordBitWidth & "-bit mode]":
suite "Bigints - Multiprecision modulo" & " [" & $WordBitWidth & "-bit words]":
test "bitsize 237 mod bitsize 192":
let a = BigInt[237].fromHex("0x123456789012345678901234567890123456789012345678901234567890")
let m = BigInt[192].fromHex("0xAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB")

View File

@ -49,7 +49,7 @@ proc test(
R.frobenius_psi(P)
doAssert: bool(R == Q)
suite "ψ (Psi) - Untwist-Frobenius-Twist Endomorphism on G2 vs SageMath" & " [" & $WordBitWidth & "-bit mode]":
suite "ψ (Psi) - Untwist-Frobenius-Twist Endomorphism on G2 vs SageMath" & " [" & $WordBitWidth & "-bit words]":
# Generated via
# - sage sage/frobenius_bn254_snarks.sage
# - sage sage/frobenius_bls12_377.sage
@ -214,7 +214,7 @@ suite "ψ (Psi) - Untwist-Frobenius-Twist Endomorphism on G2 vs SageMath" & " ["
Qy1 = "77ef6850d4a8f181a10196398cd344011a44c50dce00e18578f3526301263492086d44c7c3d1db5b12499b4033116e1"
)
suite "ψ - psi(psi(P)) == psi2(P) - (Untwist-Frobenius-Twist Endomorphism)" & " [" & $WordBitWidth & "-bit mode]":
suite "ψ - psi(psi(P)) == psi2(P) - (Untwist-Frobenius-Twist Endomorphism)" & " [" & $WordBitWidth & "-bit words]":
const Iters = 8
proc test(EC: typedesc, randZ: static bool, gen: static RandomGen) =
for i in 0 ..< Iters:
@ -247,7 +247,7 @@ suite "ψ - psi(psi(P)) == psi2(P) - (Untwist-Frobenius-Twist Endomorphism)" & "
testAll(ECP_ShortW_Prj[Fp2[BLS12_381], G2])
testAll(ECP_ShortW_Prj[Fp[BW6_761], G2])
suite "ψ²(P) - [t]ψ(P) + [p]P = Inf" & " [" & $WordBitWidth & "-bit mode]":
suite "ψ²(P) - [t]ψ(P) + [p]P = Inf" & " [" & $WordBitWidth & "-bit words]":
const Iters = 10
proc trace(C: static Curve): auto =
# Returns (abs(trace), isNegativeSign)
@ -314,7 +314,7 @@ suite "ψ²(P) - [t]ψ(P) + [p]P = Inf" & " [" & $WordBitWidth & "-bit mode]":
testAll(ECP_ShortW_Prj[Fp2[BLS12_381], G2])
testAll(ECP_ShortW_Prj[Fp[BW6_761], G2])
suite "ψ⁴(P) - ψ²(P) + P = Inf (k-th cyclotomic polynomial with embedding degree k=12)" & " [" & $WordBitWidth & "-bit mode]":
suite "ψ⁴(P) - ψ²(P) + P = Inf (k-th cyclotomic polynomial with embedding degree k=12)" & " [" & $WordBitWidth & "-bit words]":
const Iters = 10
proc test(EC: typedesc, randZ: static bool, gen: static RandomGen) =
@ -344,7 +344,7 @@ suite "ψ⁴(P) - ψ²(P) + P = Inf (k-th cyclotomic polynomial with embedding d
testAll(ECP_ShortW_Prj[Fp2[BLS12_377], G2])
testAll(ECP_ShortW_Prj[Fp2[BLS12_381], G2])
suite "ψ²(P) - ψ(P) + P = Inf (k-th cyclotomic polynomial with embedding degree k=6)" & " [" & $WordBitWidth & "-bit mode]":
suite "ψ²(P) - ψ(P) + P = Inf (k-th cyclotomic polynomial with embedding degree k=6)" & " [" & $WordBitWidth & "-bit words]":
const Iters = 10
proc test(EC: typedesc, randZ: static bool, gen: static RandomGen) =

View File

@ -15,22 +15,23 @@ import
# Test utilities
./t_ec_sage_template
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BLS12_377], G1],
staticFor(bits, [32, 64, 128, BLS12_377.getCurveOrderBitwidth()]):
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BLS12_377], G1], bits,
"t_ec_sage_bls12_377_g1_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BLS12_377], G1],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BLS12_377], G1], bits,
"t_ec_sage_bls12_377_g1_jacobian"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp2[BLS12_377], G2],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp2[BLS12_377], G2], bits,
"t_ec_sage_bls12_377_g2_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp2[BLS12_377], G2],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp2[BLS12_377], G2], bits,
"t_ec_sage_bls12_377_g2_jacobian"
)
)

View File

@ -15,22 +15,23 @@ import
# Test utilities
./t_ec_sage_template
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BLS12_381], G1],
staticFor(bits, [32, 64, 128, BLS12_381.getCurveOrderBitwidth()]):
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BLS12_381], G1], bits,
"t_ec_sage_bls12_381_g1_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BLS12_381], G1],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BLS12_381], G1], bits,
"t_ec_sage_bls12_381_g1_jacobian"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp2[BLS12_381], G2],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp2[BLS12_381], G2], bits,
"t_ec_sage_bls12_381_g2_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp2[BLS12_381], G2],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp2[BLS12_381], G2], bits,
"t_ec_sage_bls12_381_g2_jacobian"
)
)

View File

@ -15,22 +15,23 @@ import
# Test utilities
./t_ec_sage_template
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BN254_Nogami], G1],
staticFor(bits, [BN254_Nogami.getCurveOrderBitwidth()]):
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BN254_Nogami], G1], bits,
"t_ec_sage_bn254_nogami_g1_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BN254_Nogami], G1],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BN254_Nogami], G1], bits,
"t_ec_sage_bn254_nogami_g1_jacobian"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp2[BN254_Nogami], G2],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp2[BN254_Nogami], G2], bits,
"t_ec_sage_bn254_nogami_g2_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp2[BN254_Nogami], G2],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp2[BN254_Nogami], G2], bits,
"t_ec_sage_bn254_nogami_g2_jacobian"
)
)

View File

@ -15,22 +15,23 @@ import
# Test utilities
./t_ec_sage_template
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BN254_Snarks], G1],
staticFor(bits, [32, 64, 128, BN254_Snarks.getCurveOrderBitwidth()]):
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BN254_Snarks], G1], bits,
"t_ec_sage_bn254_snarks_g1_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BN254_Snarks], G1],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BN254_Snarks], G1], bits,
"t_ec_sage_bn254_snarks_g1_jacobian"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp2[BN254_Snarks], G2],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp2[BN254_Snarks], G2], bits,
"t_ec_sage_bn254_snarks_g2_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp2[BN254_Snarks], G2],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp2[BN254_Snarks], G2], bits,
"t_ec_sage_bn254_snarks_g2_jacobian"
)
)

View File

@ -20,22 +20,23 @@ import
# this creates bad codegen, in the C code, the `value`parameter gets the wrong type
# TODO: upstream
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BW6_761], G1],
staticFor(bits, [BW6_761.getCurveOrderBitwidth()]):
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BW6_761], G1], bits,
"t_ec_sage_bw6_761_g1_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BW6_761], G1],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BW6_761], G1], bits,
"t_ec_sage_bw6_761_g1_jacobian"
)
)
# run_scalar_mul_test_vs_sage(
# ECP_ShortW_Prj[Fp[BW6_761], G2],
# "t_ec_sage_bw6_761_g2_projective"
# )
# run_scalar_mul_test_vs_sage(
# ECP_ShortW_Prj[Fp[BW6_761], G2], bits,
# "t_ec_sage_bw6_761_g2_projective"
# )
# run_scalar_mul_test_vs_sage(
# ECP_ShortW_Jac[Fp[BW6_761], G2],
# "t_ec_sage_bw6_761_g2_jacobian"
# )
# run_scalar_mul_test_vs_sage(
# ECP_ShortW_Jac[Fp[BW6_761], G2], bits,
# "t_ec_sage_bw6_761_g2_jacobian"
# )

View File

@ -20,22 +20,23 @@ import
# this creates bad codegen, in the C code, the `value`parameter gets the wrong type
# TODO: upstream
# run_scalar_mul_test_vs_sage(
# ECP_ShortW_Prj[Fp[BW6_761], G1],
# "t_ec_sage_bw6_761_g1_projective"
# )
staticFor(bits, [BW6_761.getCurveOrderBitwidth()]):
# run_scalar_mul_test_vs_sage(
# ECP_ShortW_Prj[Fp[BW6_761], G1], bits,
# "t_ec_sage_bw6_761_g1_projective"
# )
# run_scalar_mul_test_vs_sage(
# ECP_ShortW_Jac[Fp[BW6_761], G1],
# "t_ec_sage_bw6_761_g1_jacobian"
# )
# run_scalar_mul_test_vs_sage(
# ECP_ShortW_Jac[Fp[BW6_761], G1], bits,
# "t_ec_sage_bw6_761_g1_jacobian"
# )
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BW6_761], G2],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[BW6_761], G2], bits,
"t_ec_sage_bw6_761_g2_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BW6_761], G2],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[BW6_761], G2], bits,
"t_ec_sage_bw6_761_g2_jacobian"
)
)

View File

@ -15,12 +15,13 @@ import
# Test utilities
./t_ec_sage_template
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[Pallas], G1],
staticFor(bits, [Pallas.getCurveOrderBitwidth()]):
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[Pallas], G1], bits,
"t_ec_sage_pallas_g1_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[Pallas], G1],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[Pallas], G1], bits,
"t_ec_sage_pallas_g1_jacobian"
)
)

View File

@ -13,7 +13,6 @@ import
pkg/jsony,
# Internals
../../constantine/platforms/abstractions,
../../constantine/math/config/curves,
../../constantine/math/[arithmetic, extension_fields],
../../constantine/math/io/[io_bigints, io_ec],
../../constantine/math/elliptic/[
@ -22,72 +21,21 @@ import
ec_shortweierstrass_jacobian,
ec_scalar_mul,
ec_endomorphism_accel],
../../constantine/math/constants/zoo_endomorphisms,
# Test utilities
./support/ec_reference_scalar_mult
../../constantine/math/elliptic/ec_scalar_mul_vartime
export unittest, abstractions, arithmetic # Generic sandwich
# Serialization
# --------------------------------------------------------------------------
macro matchingScalar*(EC: type ECP_ShortW_Aff): untyped =
## Workaround the annoying type system
## 1. Higher-kinded type
## 2. Computation in type section needs template or macro indirection
## 3. Converting NimNode to typedesc
## https://github.com/nim-lang/Nim/issues/6785
# BigInt[EC.F.C.getCurveOrderBitwidth()]
let ec = EC.getTypeImpl()
# echo ec.treerepr
# BracketExpr
# Sym "typeDesc"
# BracketExpr
# Sym "ECP_ShortW_Aff"
# BracketExpr
# Sym "Fp"
# IntLit 12
# IntLit 0
doAssert ec[0].eqIdent"typedesc"
doAssert ec[1][0].eqIdent"ECP_ShortW_Aff"
ec[1][1].expectkind(nnkBracketExpr)
doAssert ($ec[1][1][0]).startsWith"Fp"
let curve = Curve(ec[1][1][1].intVal)
let bitwidth = getAST(getCurveOrderBitwidth(curve))
result = nnkBracketExpr.newTree(
bindSym"BigInt",
bitwidth
)
macro matchingNonResidueType*(EC: type ECP_ShortW_Aff): untyped =
## Workaround the annoying type system
## 1. Higher-kinded type
## 2. Computation in type section needs template or macro indirection
## 3. Converting NimNode to typedesc
## https://github.com/nim-lang/Nim/issues/6785
let ec = EC.getTypeImpl()
doAssert ec[0].eqIdent"typedesc"
doAssert ec[1][0].eqIdent"ECP_ShortW_Aff"
ec[1][1].expectkind(nnkBracketExpr)
doAssert ($ec[1][1][0]).startsWith"Fp"
# int or array[2, int]
if ec[1][1][0].eqIdent"Fp":
result = bindSym"int"
elif ec[1][1][0].eqIdent"Fp2":
result = nnkBracketExpr.newTree(
bindSym"array",
newLit 2,
bindSym"int"
)
type
TestVector*[EC: ECP_ShortW_Aff] = object
TestVector*[EC: ECP_ShortW_Aff, bits: static int] = object
id: int
P: EC
scalar: matchingScalar(EC)
scalarBits: int
scalar: BigInt[bits]
Q: EC
EC_G1_hex = object
@ -102,7 +50,7 @@ type
x: Fp2_hex
y: Fp2_hex
ScalarMulTestG1[EC: ECP_ShortW_Aff] = object
ScalarMulTestG1[EC: ECP_ShortW_Aff, bits: static int] = object
curve: string
group: string
modulus: string
@ -112,9 +60,9 @@ type
a: string
b: string
# vectors ------------------
vectors: seq[TestVector[EC]]
vectors: seq[TestVector[EC, bits]]
ScalarMulTestG2[EC: ECP_ShortW_Aff] = object
ScalarMulTestG2[EC: ECP_ShortW_Aff, bits: static int] = object
curve: string
group: string
modulus: string
@ -128,9 +76,12 @@ type
twist: string
non_residue_fp: int
G2_field: string
non_residue_twist: matchingNonResidueType(EC) # int or array[2, int]
when EC.F is Fp:
non_residue_twist: int
else:
non_residue_twist: array[2, int]
# vectors ------------------
vectors: seq[TestVector[EC]]
vectors: seq[TestVector[EC, bits]]
const
TestVectorsDir* =
@ -170,7 +121,8 @@ proc parseHook*(src: string, pos: var int, value: var ECP_ShortW_Aff) =
proc loadVectors(TestType: typedesc): TestType =
const group = when TestType.EC.G == G1: "G1"
else: "G2"
const filename = "tv_" & $TestType.EC.F.C & "_scalar_mul_" & group & ".json"
const filename = "tv_" & $TestType.EC.F.C & "_scalar_mul_" & group & "_" & $TestType.bits & "bit.json"
echo "Loading: ", filename
let content = readFile(TestVectorsDir/filename)
result = content.fromJson(TestType)
@ -178,7 +130,7 @@ proc loadVectors(TestType: typedesc): TestType =
# ------------------------------------------------------------------------
proc run_scalar_mul_test_vs_sage*(
EC: typedesc,
EC: typedesc, bits: static int,
moduleName: string
) =
echo "\n------------------------------------------------------\n"
@ -186,38 +138,48 @@ proc run_scalar_mul_test_vs_sage*(
when EC.G == G1:
const G1_or_G2 = "G1"
let vec = loadVectors(ScalarMulTestG1[ECP_ShortW_Aff[EC.F, EC.G]])
let vec = loadVectors(ScalarMulTestG1[ECP_ShortW_Aff[EC.F, EC.G], bits])
else:
const G1_or_G2 = "G2"
let vec = loadVectors(ScalarMulTestG2[ECP_ShortW_Aff[EC.F, EC.G]])
let vec = loadVectors(ScalarMulTestG2[ECP_ShortW_Aff[EC.F, EC.G], bits])
const coord = when EC is ECP_ShortW_Prj: " Projective coordinates "
elif EC is ECP_ShortW_Jac: " Jacobian coordinates "
const testSuiteDesc = "Scalar Multiplication " & $EC.F.C & " " & G1_or_G2 & " vs SageMath"
const testSuiteDesc = "Scalar Multiplication " & $EC.F.C & " " & G1_or_G2 & " vs SageMath - " & $bits & "-bit scalar"
suite testSuiteDesc & " [" & $WordBitWidth & "-bit mode]":
suite testSuiteDesc & " [" & $WordBitWidth & "-bit words]":
for i in 0 ..< vec.vectors.len:
test "test " & $vec.vectors[i].id & " - " & $EC:
test "test " & $vec.vectors[i].id & " - " & $EC & " - " & $bits & "-bit scalar":
var
P{.noInit.}: EC
Q {.noInit.}: EC
impl {.noInit.}: EC
reference {.noInit.}: EC
endo {.noInit.}: EC
refMinWeight {.noInit.}: EC
P.fromAffine(vec.vectors[i].P)
Q.fromAffine(vec.vectors[i].Q)
impl = P
reference = P
endo = P
refMinWeight = P
impl.scalarMulGeneric(vec.vectors[i].scalar)
reference.unsafe_ECmul_double_add(vec.vectors[i].scalar)
endo.scalarMulEndo(vec.vectors[i].scalar)
reference.scalarMul_doubleAdd_vartime(vec.vectors[i].scalar)
refMinWeight.scalarMul_minHammingWeight_vartime(vec.vectors[i].scalar)
doAssert: bool(Q == reference)
doAssert: bool(Q == impl)
doAssert: bool(Q == refMinWeight)
staticFor w, 2, 14:
var refWNAF = P
refWNAF.scalarMul_minHammingWeight_windowed_vartime(vec.vectors[i].scalar, window = w)
check: bool(impl == refWNAF)
when bits >= EndomorphismThreshold: # All endomorphisms constants are below this threshold
var endo = P
endo.scalarMulEndo(vec.vectors[i].scalar)
doAssert: bool(Q == endo)
when EC.F is Fp: # Test windowed endomorphism acceleration

View File

@ -15,12 +15,13 @@ import
# Test utilities
./t_ec_sage_template
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[Vesta], G1],
staticFor(bits, [Vesta.getCurveOrderBitwidth()]):
run_scalar_mul_test_vs_sage(
ECP_ShortW_Prj[Fp[Vesta], G1], bits,
"t_ec_sage_vesta_g1_projective"
)
)
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[Vesta], G1],
run_scalar_mul_test_vs_sage(
ECP_ShortW_Jac[Fp[Vesta], G1], bits,
"t_ec_sage_vesta_g1_jacobian"
)
)

View File

@ -0,0 +1,29 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../../constantine/math/config/curves,
../../constantine/math/elliptic/ec_shortweierstrass_jacobian,
../../constantine/math/arithmetic,
# Test utilities
./t_ec_template
const numPoints = [1, 2, 8, 16, 32, 64, 128, 1024, 2048, 16384] # 32768, 262144, 1048576]
run_EC_multi_scalar_mul_impl(
ec = ECP_ShortW_Jac[Fp[BN254_Snarks], G1],
numPoints = numPoints,
moduleName = "test_ec_shortweierstrass_jacobian_multi_scalar_mul_" & $BN254_Snarks
)
run_EC_multi_scalar_mul_impl(
ec = ECP_ShortW_Jac[Fp[BLS12_381], G1],
numPoints = numPoints,
moduleName = "test_ec_shortweierstrass_jacobian_multi_scalar_mul_" & $BLS12_381
)

View File

@ -16,7 +16,7 @@ import
../../constantine/math/elliptic/[ec_shortweierstrass_affine, ec_shortweierstrass_jacobian, ec_scalar_mul],
# Test utilities
../../helpers/prng_unsafe,
./support/ec_reference_scalar_mult,
../../constantine/math/elliptic/ec_scalar_mul_vartime,
./t_ec_template
const
@ -50,7 +50,7 @@ suite "Order checks on BN254_Snarks":
reference = a
impl.scalarMulGeneric(exponent)
reference.unsafe_ECmul_double_add(exponent)
reference.scalarMul_doubleAdd_vartime(exponent)
check:
bool(impl.isInf())

View File

@ -0,0 +1,53 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../../constantine/math/config/[type_ff, curves],
../../constantine/math/elliptic/ec_shortweierstrass_jacobian_extended,
# Test utilities
./t_ec_template
const
Iters = 6
run_EC_addition_tests(
ec = ECP_ShortW_JacExt[Fp[BN254_Snarks], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extended_g1_add_double_" & $BN254_Snarks
)
run_EC_addition_tests(
ec = ECP_ShortW_JacExt[Fp[BLS12_381], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extended_g1_add_double_" & $BLS12_381
)
run_EC_addition_tests(
ec = ECP_ShortW_JacExt[Fp[BLS12_377], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extended_g1_add_double_" & $BLS12_377
)
run_EC_addition_tests(
ec = ECP_ShortW_JacExt[Fp[BW6_761], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extended_g1_add_double_" & $BW6_761
)
run_EC_addition_tests(
ec = ECP_ShortW_JacExt[Fp[Pallas], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extended_g1_add_double_" & $Pallas
)
run_EC_addition_tests(
ec = ECP_ShortW_JacExt[Fp[Vesta], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extended_g1_add_double_" & $Vesta
)

View File

@ -0,0 +1,54 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../../constantine/math/config/curves,
../../constantine/math/elliptic/ec_shortweierstrass_jacobian_extended,
../../constantine/math/arithmetic,
# Test utilities
./t_ec_template
const
Iters = 6
run_EC_mixed_add_impl(
ec = ECP_ShortW_JacExt[Fp[BN254_Snarks], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extendedmixed_add_" & $BN254_Snarks
)
run_EC_mixed_add_impl(
ec = ECP_ShortW_JacExt[Fp[BLS12_381], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extendedmixed_add_" & $BLS12_381
)
run_EC_mixed_add_impl(
ec = ECP_ShortW_JacExt[Fp[BLS12_377], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extendedmixed_add_" & $BLS12_377
)
run_EC_mixed_add_impl(
ec = ECP_ShortW_JacExt[Fp[BW6_761], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extendedmixed_add_" & $BW6_761
)
run_EC_mixed_add_impl(
ec = ECP_ShortW_JacExt[Fp[Pallas], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extendedmixed_add_" & $Pallas
)
run_EC_mixed_add_impl(
ec = ECP_ShortW_JacExt[Fp[Vesta], G1],
Iters = Iters,
moduleName = "test_ec_shortweierstrass_jacobian_extendedmixed_add_" & $Vesta
)

View File

@ -0,0 +1,29 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../../constantine/math/config/curves,
../../constantine/math/elliptic/ec_shortweierstrass_jacobian_extended,
../../constantine/math/arithmetic,
# Test utilities
./t_ec_template
const numPoints = [1, 2, 8, 16, 128, 1024, 2048, 16384, 32768] # 262144, 1048576]
run_EC_batch_add_impl(
ec = ECP_ShortW_JacExt[Fp[BN254_Snarks], G1],
numPoints = numPoints,
moduleName = "test_ec_shortweierstrass_jacobian_extended_batch_add_" & $BN254_Snarks
)
run_EC_batch_add_impl(
ec = ECP_ShortW_JacExt[Fp[BLS12_381], G1],
numPoints = numPoints,
moduleName = "test_ec_shortweierstrass_jacobian_extended_batch_add_" & $BLS12_381
)

View File

@ -0,0 +1,29 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
# Internals
../../constantine/math/config/curves,
../../constantine/math/elliptic/ec_shortweierstrass_projective,
../../constantine/math/arithmetic,
# Test utilities
./t_ec_template
const numPoints = [1, 2, 8, 16, 128, 1024, 2048, 16384] # 32768, 262144, 1048576]
run_EC_multi_scalar_mul_impl(
ec = ECP_ShortW_Prj[Fp[BN254_Snarks], G1],
numPoints = numPoints,
moduleName = "test_ec_shortweierstrass_projective_multi_scalar_mul_" & $BN254_Snarks
)
run_EC_multi_scalar_mul_impl(
ec = ECP_ShortW_Prj[Fp[BLS12_381], G1],
numPoints = numPoints,
moduleName = "test_ec_shortweierstrass_projective_multi_scalar_mul_" & $BLS12_381
)

View File

@ -15,7 +15,7 @@ import
../../constantine/math/elliptic/[ec_shortweierstrass_affine, ec_shortweierstrass_projective, ec_scalar_mul],
# Test utilities
../../helpers/prng_unsafe,
./support/ec_reference_scalar_mult,
../../constantine/math/elliptic/ec_scalar_mul_vartime,
./t_ec_template
const
@ -49,7 +49,7 @@ suite "Order checks on BN254_Snarks":
reference = a
impl.scalarMulGeneric(exponent)
reference.unsafe_ECmul_double_add(exponent)
reference.scalarMul_doubleAdd_vartime(exponent)
check:
bool(impl.isInf())

View File

@ -22,19 +22,36 @@ import
../../constantine/math/elliptic/[
ec_shortweierstrass_affine,
ec_shortweierstrass_jacobian,
ec_shortweierstrass_jacobian_extended,
ec_shortweierstrass_projective,
ec_shortweierstrass_batch_ops,
ec_twistededwards_affine,
ec_twistededwards_projective,
ec_scalar_mul],
ec_scalar_mul,
ec_multi_scalar_mul],
../../constantine/math/io/[io_bigints, io_fields, io_ec],
../../constantine/math/constants/zoo_subgroups,
# Test utilities
../../helpers/prng_unsafe,
./support/ec_reference_scalar_mult
../../constantine/math/elliptic/ec_scalar_mul_vartime
export unittest, abstractions, arithmetic # Generic sandwich
# Extended Jacobian generic bindings
# ----------------------------------
# All vartime procedures MUST be tagged vartime
# Hence we do not expose `sum` or `+=` for extended jacobian operation to prevent `vartime` mistakes
# we create a local `sum` or `+=` for this module only
func sum[F; G: static Subgroup](r: var ECP_ShortW_JacExt[F, G], P, Q: ECP_ShortW_JacExt[F, G]) =
r.sum_vartime(P, Q)
func `+=`[F; G: static Subgroup](P: var ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_JacExt[F, G]) =
P.sum_vartime(P, Q)
func madd[F; G: static Subgroup](r: var ECP_ShortW_JacExt[F, G], P: ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_Aff[F, G]) =
r.madd_vartime(P, Q)
func `+=`[F; G: static Subgroup](P: var ECP_ShortW_JacExt[F, G], Q: ECP_ShortW_Aff[F, G]) =
P.madd_vartime(P, Q)
type
RandomGen* = enum
Uniform
@ -65,15 +82,6 @@ func random_point*(rng: var RngState, EC: typedesc, randZ: bool, gen: RandomGen)
else:
result = rng.random_long01Seq_with_randZ(EC)
template pairingGroup(EC: typedesc): string =
when EC is (ECP_ShortW_Aff or ECP_ShortW_Prj or ECP_ShortW_Jac):
when EC.G == G1:
"G1"
else:
"G2"
else:
""
proc run_EC_addition_tests*(
ec: typedesc,
Iters: static int,
@ -87,12 +95,10 @@ proc run_EC_addition_tests*(
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
const G1_or_G2 = pairingGroup(ec)
const testSuiteDesc = "Elliptic curve in " & $ec.F.C.getEquationForm() & " form with projective coordinates"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitWidth & "-bit mode]":
test "The infinity point is the neutral element w.r.t. to EC " & G1_or_G2 & " addition":
test "The infinity point is the neutral element w.r.t. to EC " & $ec.G & " addition":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
var inf {.noInit.}: EC
inf.setInf()
@ -124,6 +130,40 @@ proc run_EC_addition_tests*(
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "Infinity point from affine conversion gives proper result":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
var affInf {.noInit.}: affine(EC)
var inf {.noInit.}: EC
affInf.setInf()
inf.fromAffine(affInf)
check: bool inf.isInf()
for _ in 0 ..< Iters:
var r{.noInit.}: EC
let P = rng.random_point(EC, randZ, gen)
r.sum(P, inf)
check: bool(r == P)
r.sum(inf, P)
check: bool(r == P)
# Aliasing tests
r = P
r += inf
check: bool(r == P)
r = inf
r += P
check: bool(r == P)
test(ec, randZ = false, gen = Uniform)
test(ec, randZ = true, gen = Uniform)
test(ec, randZ = false, gen = HighHammingWeight)
test(ec, randZ = true, gen = HighHammingWeight)
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "Adding opposites gives an infinity point":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
@ -145,7 +185,7 @@ proc run_EC_addition_tests*(
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "EC " & G1_or_G2 & " add is commutative":
test "EC " & $ec.G & " add is commutative":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
var r0{.noInit.}, r1{.noInit.}: EC
@ -163,7 +203,7 @@ proc run_EC_addition_tests*(
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "EC " & G1_or_G2 & " add is associative":
test "EC " & $ec.G & " add is associative":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
let a = rng.random_point(EC, randZ, gen)
@ -212,7 +252,7 @@ proc run_EC_addition_tests*(
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "EC " & G1_or_G2 & " double and EC " & G1_or_G2 & " add are consistent":
test "EC " & $ec.G & " double and EC " & $ec.G & " add are consistent":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
let a = rng.random_point(EC, randZ, gen)
@ -244,12 +284,10 @@ proc run_EC_mul_sanity_tests*(
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
const G1_or_G2 = pairingGroup(ec)
const testSuiteDesc = "Elliptic curve in " & $ec.F.C.getEquationForm() & " form"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitWidth & "-bit mode]":
test "EC " & G1_or_G2 & " mul [0]P == Inf":
test "EC " & $ec.G & " mul [0]P == Inf":
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
for _ in 0 ..< ItersMul:
let a = rng.random_point(EC, randZ, gen)
@ -259,7 +297,7 @@ proc run_EC_mul_sanity_tests*(
reference = a
impl.scalarMulGeneric(BigInt[bits]())
reference.unsafe_ECmul_double_add(BigInt[bits]())
reference.scalarMul_doubleAdd_vartime(BigInt[bits]())
check:
bool(impl.isInf())
@ -272,7 +310,7 @@ proc run_EC_mul_sanity_tests*(
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Long01Sequence)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Long01Sequence)
test "EC " & G1_or_G2 & " mul [1]P == P":
test "EC " & $ec.G & " mul [1]P == P":
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
for _ in 0 ..< ItersMul:
let a = rng.random_point(EC, randZ, gen)
@ -285,7 +323,7 @@ proc run_EC_mul_sanity_tests*(
reference = a
impl.scalarMulGeneric(exponent)
reference.unsafe_ECmul_double_add(exponent)
reference.scalarMul_doubleAdd_vartime(exponent)
check:
bool(impl == a)
@ -298,7 +336,7 @@ proc run_EC_mul_sanity_tests*(
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Long01Sequence)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Long01Sequence)
test "EC " & G1_or_G2 & " mul [2]P == P.double()":
test "EC " & $ec.G & " mul [2]P == P.double()":
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
for _ in 0 ..< ItersMul:
let a = rng.random_point(EC, randZ, gen)
@ -313,7 +351,7 @@ proc run_EC_mul_sanity_tests*(
reference = a
impl.scalarMulGeneric(exponent)
reference.unsafe_ECmul_double_add(exponent)
reference.scalarMul_doubleAdd_vartime(exponent)
check:
bool(impl == doubleA)
@ -339,13 +377,11 @@ proc run_EC_mul_distributive_tests*(
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
const G1_or_G2 = pairingGroup(ec)
const testSuiteDesc = "Elliptic curve in " & $ec.F.C.getEquationForm() & " form"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitWidth & "-bit mode]":
test "EC " & G1_or_G2 & " mul is distributive over EC add":
test "EC " & $ec.G & " mul is distributive over EC add":
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
for _ in 0 ..< ItersMul:
let a = rng.random_point(EC, randZ, gen)
@ -362,20 +398,20 @@ proc run_EC_mul_distributive_tests*(
fReference.sum(a, b)
fImpl.scalarMulGeneric(exponent)
fReference.unsafe_ECmul_double_add(exponent)
fReference.scalarMul_doubleAdd_vartime(exponent)
# [k]a + [k]b - Distributed
var kaImpl = a
var kaRef = a
kaImpl.scalarMulGeneric(exponent)
kaRef.unsafe_ECmul_double_add(exponent)
kaRef.scalarMul_doubleAdd_vartime(exponent)
var kbImpl = b
var kbRef = b
kbImpl.scalarMulGeneric(exponent)
kbRef.unsafe_ECmul_double_add(exponent)
kbRef.scalarMul_doubleAdd_vartime(exponent)
var kakbImpl{.noInit.}, kakbRef{.noInit.}: EC
kakbImpl.sum(kaImpl, kbImpl)
@ -406,17 +442,16 @@ proc run_EC_mul_vs_ref_impl*(
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
const G1_or_G2 = pairingGroup(ec)
const testSuiteDesc = "Elliptic curve in " & $ec.F.C.getEquationForm() & " form"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitWidth & "-bit mode]":
test "EC " & G1_or_G2 & " mul constant-time is equivalent to a simple double-and-add algorithm":
test "EC " & $ec.G & " mul constant-time is equivalent to a simple double-and-add and recoded algorithms":
proc test(EC: typedesc, bits: static int, randZ: bool, gen: RandomGen) =
for _ in 0 ..< ItersMul:
let a = rng.random_point(EC, randZ, gen)
let exponent = rng.random_unsafe(BigInt[bits])
# We want to test how window methods handles unbalanced 0/1
let exponent = rng.random_long01Seq(BigInt[bits])
var
impl = a
@ -424,13 +459,24 @@ proc run_EC_mul_vs_ref_impl*(
refMinWeight = a
impl.scalarMulGeneric(exponent)
reference.unsafe_ECmul_double_add(exponent)
refMinWeight.unsafe_ECmul_minHammingWeight(exponent)
reference.scalarMul_doubleAdd_vartime(exponent)
refMinWeight.scalarMul_minHammingWeight_vartime(exponent)
check:
bool(impl == reference)
bool(impl == refMinWeight)
proc refWNaf(w: static int) = # workaround staticFor symbol visibility
var refWNAF = a
refWNAF.scalarMul_minHammingWeight_windowed_vartime(exponent, window = w)
check: bool(impl == refWNAF)
refWNaf(2)
refWNaf(3)
refWNaf(5)
refWNaf(8)
refWNaf(13)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = Uniform)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = true, gen = Uniform)
test(ec, bits = ec.F.C.getCurveOrderBitwidth(), randZ = false, gen = HighHammingWeight)
@ -451,15 +497,10 @@ proc run_EC_mixed_add_impl*(
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
when ec.G == G1:
const G1_or_G2 = "G1"
else:
const G1_or_G2 = "G2"
const testSuiteDesc = "Elliptic curve mixed addition for Short Weierstrass form"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitWidth & "-bit mode]":
test "EC " & G1_or_G2 & " mixed addition is consistent with general addition":
test "EC " & $ec.G & " mixed addition is consistent with general addition":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
let a = rng.random_point(EC, randZ, gen)
@ -481,7 +522,7 @@ proc run_EC_mixed_add_impl*(
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "EC " & G1_or_G2 & " mixed addition - doubling":
test "EC " & $ec.G & " mixed addition - doubling":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
let a = rng.random_point(EC, randZ, gen)
@ -506,7 +547,7 @@ proc run_EC_mixed_add_impl*(
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "EC " & G1_or_G2 & " mixed addition - adding infinity LHS":
test "EC " & $ec.G & " mixed addition - adding infinity LHS":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
var a{.noInit.}: EC
@ -529,7 +570,31 @@ proc run_EC_mixed_add_impl*(
test(ec, randZ = false, gen = HighHammingWeight)
test(ec, randZ = false, gen = Long01Sequence)
test "EC " & G1_or_G2 & " mixed addition - adding infinity RHS":
test "EC " & $ec.G & " mixed addition - adding infinity RHS":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
let a = rng.random_point(EC, randZ, gen)
var naAff{.noInit.}: ECP_ShortW_Aff[EC.F, EC.G]
naAff.affine(a)
naAff.neg()
var r{.noInit.}: EC
r.madd(a, naAff)
check: r.isInf().bool
r = a
r += naAff
check: r.isInf().bool
test(ec, randZ = false, gen = Uniform)
test(ec, randZ = true, gen = Uniform)
test(ec, randZ = false, gen = HighHammingWeight)
test(ec, randZ = true, gen = HighHammingWeight)
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
test "EC " & $ec.G & " mixed addition - adding opposites":
proc test(EC: typedesc, randZ: bool, gen: RandomGen) =
for _ in 0 ..< Iters:
let a = rng.random_point(EC, randZ, gen)
@ -564,11 +629,6 @@ proc run_EC_subgroups_cofactors_impl*(
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
when ec.G == G1:
const G1_or_G2 = "G1"
else:
const G1_or_G2 = "G2"
const testSuiteDesc = "Elliptic curve subgroup check and cofactor clearing"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitWidth & "-bit mode]":
@ -626,7 +686,7 @@ proc run_EC_subgroups_cofactors_impl*(
test(ec, randZ = false, gen = Long01Sequence)
test(ec, randZ = true, gen = Long01Sequence)
echo " [SUCCESS] Test finished with ", inSubgroup, " points in ", G1_or_G2, " subgroup and ",
echo " [SUCCESS] Test finished with ", inSubgroup, " points in ", $ec.G, " subgroup and ",
offSubgroup, " points on curve but not in subgroup (before cofactor clearing)"
proc run_EC_affine_conversion*(
@ -642,12 +702,10 @@ proc run_EC_affine_conversion*(
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
const G1_or_G2 = pairingGroup(ec)
const testSuiteDesc = "Elliptic curve in " & $ec.F.C.getEquationForm() & " form"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitWidth & "-bit mode]":
test "EC " & G1_or_G2 & " batchAffine is consistent with single affine conversion":
test "EC " & $ec.G & " batchAffine is consistent with single affine conversion":
proc test(EC: typedesc, gen: RandomGen) =
const batchSize = 10
for _ in 0 ..< Iters:
@ -807,16 +865,11 @@ proc run_EC_batch_add_impl*[N: static int](
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
when ec.G == G1:
const G1_or_G2 = "G1"
else:
const G1_or_G2 = "G2"
const testSuiteDesc = "Elliptic curve batch addition for Short Weierstrass form"
const testSuiteDesc = "Elliptic curve sum reduction for Short Weierstrass form"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitWidth & "-bit mode]":
for n in numPoints:
test $ec & " batch addition (N=" & $n & ")":
test $ec & " sum reduction (N=" & $n & ")":
proc test(EC: typedesc, gen: RandomGen) =
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](n)
@ -829,7 +882,7 @@ proc run_EC_batch_add_impl*[N: static int](
for i in 0 ..< n:
r_ref += points[i]
r_batch.sum_batch_vartime(points)
r_batch.sum_reduce_vartime(points)
check: bool(r_batch == r_ref)
@ -838,7 +891,7 @@ proc run_EC_batch_add_impl*[N: static int](
test(ec, gen = HighHammingWeight)
test(ec, gen = Long01Sequence)
test "EC " & G1_or_G2 & " batch addition (N=" & $n & ") - special cases":
test "EC " & $ec.G & " sum reduction (N=" & $n & ") - special cases":
proc test(EC: typedesc, gen: RandomGen) =
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](n)
@ -864,10 +917,57 @@ proc run_EC_batch_add_impl*[N: static int](
for i in 0 ..< n:
r_ref += points[i]
r_batch.sum_batch_vartime(points)
r_batch.sum_reduce_vartime(points)
check: bool(r_batch == r_ref)
test(ec, gen = Uniform)
test(ec, gen = HighHammingWeight)
test(ec, gen = Long01Sequence)
proc run_EC_multi_scalar_mul_impl*[N: static int](
ec: typedesc,
numPoints: array[N, int],
moduleName: string
) =
# Random seed for reproducibility
var rng: RngState
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
rng.seed(seed)
echo "\n------------------------------------------------------\n"
echo moduleName, " xoshiro512** seed: ", seed
const testSuiteDesc = "Elliptic curve multi-scalar-multiplication for Short Weierstrass form"
suite testSuiteDesc & " - " & $ec & " - [" & $WordBitWidth & "-bit mode]":
for n in numPoints:
let bucketBits = bestBucketBitSize(n, ec.F.C.getCurveOrderBitwidth(), useSignedBuckets = false, useManualTuning = false)
test $ec & " Multi-scalar-mul (N=" & $n & ", bucket bits: " & $bucketBits & ")":
proc test(EC: typedesc, gen: RandomGen) =
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](n)
var coefs = newSeq[BigInt[EC.F.C.getCurveOrderBitwidth()]](n)
for i in 0 ..< n:
var tmp = rng.random_unsafe(EC)
tmp.clearCofactor()
points[i].affine(tmp)
coefs[i] = rng.random_unsafe(BigInt[EC.F.C.getCurveOrderBitwidth()])
var naive, naive_tmp: EC
naive.setInf()
for i in 0 ..< n:
naive_tmp.fromAffine(points[i])
naive_tmp.scalarMulGeneric(coefs[i])
naive += naive_tmp
var msm_ref, msm: EC
msm_ref.multiScalarMul_reference_vartime(coefs, points)
msm.multiScalarMul_vartime(coefs, points)
doAssert bool(naive == msm_ref)
doAssert bool(naive == msm)
test(ec, gen = Uniform)
test(ec, gen = HighHammingWeight)
test(ec, gen = Long01Sequence)

View File

@ -125,7 +125,7 @@ sqrTest(random_unsafe)
sqrTest(randomHighHammingWeight)
sqrTest(random_long01Seq)
suite "Field Addition/Substraction/Negation via double-precision field elements" & " [" & $WordBitWidth & "-bit mode]":
suite "Field Addition/Substraction/Negation via double-precision field elements" & " [" & $WordBitWidth & "-bit words]":
test "With P-224 field modulus":
for _ in 0 ..< Iters:
addsubneg_random_unsafe(P224)
@ -197,7 +197,7 @@ suite "Field Addition/Substraction/Negation via double-precision field elements"
check: bool r.isZero()
suite "Field Multiplication via double-precision field elements is consistent with single-width." & " [" & $WordBitWidth & "-bit mode]":
suite "Field Multiplication via double-precision field elements is consistent with single-width." & " [" & $WordBitWidth & "-bit words]":
test "With P-224 field modulus":
for _ in 0 ..< Iters:
mul_random_unsafe(P224)
@ -262,7 +262,7 @@ suite "Field Multiplication via double-precision field elements is consistent wi
for _ in 0 ..< Iters:
mul_random_long01Seq(Vesta)
suite "Field Squaring via double-precision field elements is consistent with single-width." & " [" & $WordBitWidth & "-bit mode]":
suite "Field Squaring via double-precision field elements is consistent with single-width." & " [" & $WordBitWidth & "-bit words]":
test "With P-224 field modulus":
for _ in 0 ..< Iters:
sqr_random_unsafe(P224)

View File

@ -78,7 +78,7 @@ proc sanity(C: static Curve) =
bool(n == expected)
proc mainSanity() =
suite "Modular squaring is consistent with multiplication on special elements" & " [" & $WordBitWidth & "-bit mode]":
suite "Modular squaring is consistent with multiplication on special elements" & " [" & $WordBitWidth & "-bit words]":
sanity Fake101
sanity Mersenne61
sanity Mersenne127
@ -94,7 +94,7 @@ proc mainSanity() =
mainSanity()
proc mainSelectCases() =
suite "Modular Squaring: selected tricky cases" & " [" & $WordBitWidth & "-bit mode]":
suite "Modular Squaring: selected tricky cases" & " [" & $WordBitWidth & "-bit words]":
test "P-256 [FastSquaring = " & $(Fp[P256].getSpareBits() >= 2) & "]":
block:
# Triggered an issue in the (t[N+1], t[N]) = t[N] + (A1, A0)
@ -141,7 +141,7 @@ proc random_long01Seq(C: static Curve) =
doAssert bool(r_mul == r_sqr)
suite "Random Modular Squaring is consistent with Modular Multiplication" & " [" & $WordBitWidth & "-bit mode]":
suite "Random Modular Squaring is consistent with Modular Multiplication" & " [" & $WordBitWidth & "-bit words]":
test "Random squaring mod P-224 [FastSquaring = " & $(Fp[P224].getSpareBits() >= 2) & "]":
for _ in 0 ..< Iters:
randomCurve(P224)
@ -358,7 +358,7 @@ proc random_sumprod(C: static Curve, N: static int) =
sumprod_test(random_long01Seq)
sumProdMax()
suite "Random sum products is consistent with naive " & " [" & $WordBitWidth & "-bit mode]":
suite "Random sum products is consistent with naive " & " [" & $WordBitWidth & "-bit words]":
const MaxLength = 8
test "Random sum products mod P-224]":

View File

@ -29,7 +29,7 @@ echo "\n------------------------------------------------------\n"
echo "test_finite_fields_powinv xoshiro512** seed: ", seed
proc main() =
suite "Modular exponentiation over finite fields" & " [" & $WordBitWidth & "-bit mode]":
suite "Modular exponentiation over finite fields" & " [" & $WordBitWidth & "-bit words]":
test "n² mod 101":
let exponent = BigInt[64].fromUint(2'u64)
@ -202,7 +202,7 @@ proc main() =
testRandomDiv2 Pallas
testRandomDiv2 Vesta
suite "Modular inversion over prime fields" & " [" & $WordBitWidth & "-bit mode]":
suite "Modular inversion over prime fields" & " [" & $WordBitWidth & "-bit words]":
test "Specific tests on Fp[BLS12_381]":
block: # No inverse exist for 0 --> should return 0 for projective/jacobian to affine coordinate conversion
var r, x: Fp[BLS12_381]
@ -210,12 +210,20 @@ proc main() =
r.inv(x)
check: bool r.isZero()
var r2: Fp[BLS12_381]
r2.inv_vartime(x)
check: bool r2.isZero()
block:
var r, x: Fp[BLS12_381]
x.setOne()
r.inv(x)
check: bool r.isOne()
var r2: Fp[BLS12_381]
r2.inv_vartime(x)
check: bool r2.isOne()
block:
var r, x: Fp[BLS12_381]
@ -229,6 +237,10 @@ proc main() =
check:
computed == expected
var r2: Fp[BLS12_381]
r2.inv_vartime(x)
let computed2 = r2.toHex()
test "Specific tests on Fp[BN254_Snarks]":
block:
var r, x: Fp[BN254_Snarks]
@ -244,6 +256,10 @@ proc main() =
r.inv(x)
check: bool(r == expected)
var r2: Fp[BN254_Snarks]
r2.inv_vartime(x)
check: bool(r2 == expected)
block:
var r, x, expected: Fp[BN254_Snarks]
x.fromHex"0x0d2007d8aaface1b8501bfbe792974166e8f9ad6106e5b563604f0aea9ab06f6"
@ -252,6 +268,10 @@ proc main() =
r.inv(x)
check: bool(r == expected)
var r2: Fp[BN254_Snarks]
r2.inv_vartime(x)
check: bool(r2 == expected)
proc testRandomInv(curve: static Curve) =
test "Random inversion testing on " & $Curve(curve):
var aInv, r: Fp[curve]
@ -264,6 +284,12 @@ proc main() =
r.prod(aInv, a)
check: bool r.isOne() or (a.isZero() and r.isZero())
aInv.inv_vartime(a)
r.prod(a, aInv)
check: bool r.isOne() or (a.isZero() and r.isZero())
r.prod(aInv, a)
check: bool r.isOne() or (a.isZero() and r.isZero())
for _ in 0 ..< Iters:
let a = rng.randomHighHammingWeight(Fp[curve])
aInv.inv(a)
@ -272,6 +298,11 @@ proc main() =
r.prod(aInv, a)
check: bool r.isOne() or (a.isZero() and r.isZero())
aInv.inv_vartime(a)
r.prod(a, aInv)
check: bool r.isOne() or (a.isZero() and r.isZero())
r.prod(aInv, a)
check: bool r.isOne() or (a.isZero() and r.isZero())
for _ in 0 ..< Iters:
let a = rng.random_long01Seq(Fp[curve])
aInv.inv(a)
@ -280,6 +311,12 @@ proc main() =
r.prod(aInv, a)
check: bool r.isOne() or (a.isZero() and r.isZero())
aInv.inv_vartime(a)
r.prod(a, aInv)
check: bool r.isOne() or (a.isZero() and r.isZero())
r.prod(aInv, a)
check: bool r.isOne() or (a.isZero() and r.isZero())
testRandomInv P224
testRandomInv BN254_Nogami
testRandomInv BN254_Snarks
@ -295,7 +332,7 @@ proc main() =
main()
proc main_anti_regression =
suite "Bug highlighted by property-based testing" & " [" & $WordBitWidth & "-bit mode]":
suite "Bug highlighted by property-based testing" & " [" & $WordBitWidth & "-bit words]":
# test "#30 - Euler's Criterion should be 1 for square on FKM12_447":
# var a: Fp[FKM12_447]
# # square of "0x406e5e74ee09c84fa0c59f2db3ac814a4937e2f57ecd3c0af4265e04598d643c5b772a6549a2d9b825445c34b8ba100fe8d912e61cfda43d"

View File

@ -146,7 +146,7 @@ proc randomSqrtRatioCheck(C: static Curve) =
testSqrtRatioImpl(u, v)
proc main() =
suite "Modular square root" & " [" & $WordBitWidth & "-bit mode]":
suite "Modular square root" & " [" & $WordBitWidth & "-bit words]":
exhaustiveCheck Fake103, 103
# exhaustiveCheck Fake10007, 10007
# exhaustiveCheck Fake65519, 65519
@ -161,14 +161,14 @@ proc main() =
randomSqrtCheck Pallas
randomSqrtCheck Vesta
suite "Modular sqrt(u/v)" & " [" & $WordBitWidth & "-bit mode]":
suite "Modular sqrt(u/v)" & " [" & $WordBitWidth & "-bit words]":
randomSqrtRatioCheck Edwards25519
randomSqrtRatioCheck Jubjub
randomSqrtRatioCheck Bandersnatch
randomSqrtRatioCheck Pallas
randomSqrtRatioCheck Vesta
suite "Modular square root - 32-bit bugs highlighted by property-based testing " & " [" & $WordBitWidth & "-bit mode]":
suite "Modular square root - 32-bit bugs highlighted by property-based testing " & " [" & $WordBitWidth & "-bit words]":
# test "FKM12_447 - #30": - Deactivated, we don't support the curve as no one uses it.
# var a: Fp[FKM12_447]
# a.fromHex"0x406e5e74ee09c84fa0c59f2db3ac814a4937e2f57ecd3c0af4265e04598d643c5b772a6549a2d9b825445c34b8ba100fe8d912e61cfda43d"

View File

@ -175,7 +175,7 @@ proc test_invpow(C: static Curve, gen: RandomGen) =
doAssert: bool(xa == xqya)
suite "Exponentiation in 𝔽p12" & " [" & $WordBitWidth & "-bit mode]":
suite "Exponentiation in 𝔽p12" & " [" & $WordBitWidth & "-bit words]":
staticFor(curve, TestCurves):
test "xᴬ xᴮ = xᴬ⁺ᴮ on " & $curve:
test_sameBaseProduct(curve, gen = Uniform)

View File

@ -70,14 +70,14 @@ proc randomSqrtCheck(C: static Curve, gen: RandomGen) =
bool(s == a or s == na)
proc main() =
suite "Modular square root" & " [" & $WordBitWidth & "-bit mode]":
suite "Modular square root" & " [" & $WordBitWidth & "-bit words]":
staticFor(curve, TestCurves):
test "[𝔽p2] Random square root check for " & $curve:
randomSqrtCheck(curve, gen = Uniform)
randomSqrtCheck(curve, gen = HighHammingWeight)
randomSqrtCheck(curve, gen = Long01Sequence)
suite "Modular square root - 32-bit bugs highlighted by property-based testing " & " [" & $WordBitWidth & "-bit mode]":
suite "Modular square root - 32-bit bugs highlighted by property-based testing " & " [" & $WordBitWidth & "-bit words]":
test "sqrt_if_square invalid square BLS12_381 - #64":
var a: Fp2[BLS12_381]
a.fromHex(
@ -98,7 +98,7 @@ proc main() =
bool not a.isSquare()
bool not a.sqrt_if_square()
suite "Modular square root - Assembly bugs highlighted by property-based testing " & " [" & $WordBitWidth & "-bit mode]":
suite "Modular square root - Assembly bugs highlighted by property-based testing " & " [" & $WordBitWidth & "-bit words]":
test "Don't set Neg(Zero) fields to modulus (non-unique Montgomery repr) - #136":
# https://github.com/mratsim/constantine/issues/136
# and https://github.com/mratsim/constantine/issues/114

View File

@ -23,7 +23,7 @@ proc checkCubeRootOfUnity(curve: static Curve) =
check: bool cru.isOne()
proc main() =
suite "Sanity checks on precomputed values" & " [" & $WordBitWidth & "-bit mode]":
suite "Sanity checks on precomputed values" & " [" & $WordBitWidth & "-bit words]":
checkCubeRootOfUnity(BN254_Snarks)
checkCubeRootOfUnity(BLS12_377)
checkCubeRootOfUnity(BLS12_381)

View File

@ -68,7 +68,7 @@ proc runFrobeniusTowerTests*[N](
rng.seed(seed)
echo moduleName, " xoshiro512** seed: ", seed
suite testSuiteDesc & " [" & $WordBitWidth & "-bit mode]":
suite testSuiteDesc & " [" & $WordBitWidth & "-bit words]":
test "Frobenius(a) = a^p (mod p^" & $ExtDegree & ")":
proc test(Field: typedesc, Iters: static int, gen: RandomGen) =
for _ in 0 ..< Iters:

Some files were not shown because too many files have changed in this diff Show More