mirror of
https://github.com/codex-storage/constantine.git
synced 2025-01-12 12:04:07 +00:00
Perf quick wins - 10% Fp12 mul (#235)
* improve FP12_mul perf by 10% * update README [skip ci]
This commit is contained in:
parent
33c3a2e8c4
commit
1c5341fd7e
14
README.md
14
README.md
@ -37,7 +37,9 @@ The implementations are accompanied with SAGE code used as reference implementat
|
|||||||
- [In blockchain](#in-blockchain)
|
- [In blockchain](#in-blockchain)
|
||||||
- [In zero-knowledge proofs](#in-zero-knowledge-proofs)
|
- [In zero-knowledge proofs](#in-zero-knowledge-proofs)
|
||||||
- [Measuring performance](#measuring-performance)
|
- [Measuring performance](#measuring-performance)
|
||||||
- [BLS12_381 Clang + inline Assembly](#bls12_381-clang--inline-assembly)
|
- [Ethereum BLS signatures over BLS12-381 G2](#ethereum-bls-signatures-over-bls12-381-g2)
|
||||||
|
- [BLS12-381 detailed benchmarks](#bls12-381-detailed-benchmarks)
|
||||||
|
- [BN254-Snarks Multi-Scalar-Multiplication benchmarks](#bn254-snarks-multi-scalar-multiplication-benchmarks)
|
||||||
- [Parallelism](#parallelism)
|
- [Parallelism](#parallelism)
|
||||||
- [Why Nim](#why-nim)
|
- [Why Nim](#why-nim)
|
||||||
- [Compiler caveats](#compiler-caveats)
|
- [Compiler caveats](#compiler-caveats)
|
||||||
@ -304,7 +306,11 @@ The full list of benchmarks is available in the [`benchmarks`](./benchmarks) fol
|
|||||||
|
|
||||||
As mentioned in the [Compiler caveats](#compiler-caveats) section, GCC is up to 2x slower than Clang due to mishandling of carries and register usage.
|
As mentioned in the [Compiler caveats](#compiler-caveats) section, GCC is up to 2x slower than Clang due to mishandling of carries and register usage.
|
||||||
|
|
||||||
#### BLS12_381 (Clang + inline Assembly)
|
#### Ethereum BLS signatures (over BLS12-381 G2)
|
||||||
|
|
||||||
|
![Bench Ethereum BLS signature](./media/ethereum_bls_signatures.png)
|
||||||
|
|
||||||
|
#### BLS12-381 detailed benchmarks
|
||||||
|
|
||||||
On my machine i9-11980HK (8 cores 2.6GHz, turbo 5GHz), for Clang + Assembly, **all being constant-time** (including scalar multiplication, square root and inversion).
|
On my machine i9-11980HK (8 cores 2.6GHz, turbo 5GHz), for Clang + Assembly, **all being constant-time** (including scalar multiplication, square root and inversion).
|
||||||
|
|
||||||
@ -314,9 +320,11 @@ On my machine i9-11980HK (8 cores 2.6GHz, turbo 5GHz), for Clang + Assembly, **a
|
|||||||
![BLS12-381 Multi-Scalar multiplication 2](./media/bls12_381_msm_i9-11980HK-8cores_2.png)
|
![BLS12-381 Multi-Scalar multiplication 2](./media/bls12_381_msm_i9-11980HK-8cores_2.png)
|
||||||
![BLS12-381 Multi-Scalar multiplication 3](./media/bls12_381_msm_i9-11980HK-8cores_3.png)
|
![BLS12-381 Multi-Scalar multiplication 3](./media/bls12_381_msm_i9-11980HK-8cores_3.png)
|
||||||
|
|
||||||
|
#### BN254-Snarks Multi-Scalar-Multiplication benchmarks
|
||||||
|
|
||||||
On a i9-9980XE (18 cores,watercooled, overclocked, 4.1GHz all core turbo)
|
On a i9-9980XE (18 cores,watercooled, overclocked, 4.1GHz all core turbo)
|
||||||
|
|
||||||
![BN254-Snarks multi-sclar multiplication](./media/bn254_snarks_msm-i9-9980XE-18cores.png)
|
![BN254-Snarks multi-scalar multiplication](./media/bn254_snarks_msm-i9-9980XE-18cores.png)
|
||||||
|
|
||||||
#### Parallelism
|
#### Parallelism
|
||||||
|
|
||||||
|
@ -45,7 +45,7 @@ macro fixEllipticDisplay(EC: typedesc): untyped =
|
|||||||
var name = $instantiated[1][0] # EllipticEquationFormCoordinates
|
var name = $instantiated[1][0] # EllipticEquationFormCoordinates
|
||||||
let fieldName = $instantiated[1][1][0]
|
let fieldName = $instantiated[1][1][0]
|
||||||
let curveName = $Curve(instantiated[1][1][1].intVal)
|
let curveName = $Curve(instantiated[1][1][1].intVal)
|
||||||
name.add "[" & fieldName & "[" & curveName & "]]"
|
name.add "[" & fieldName & "[" & curveName & ']'
|
||||||
result = newLit name
|
result = newLit name
|
||||||
|
|
||||||
proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||||
|
@ -76,12 +76,11 @@ func sqrx_complex_sparebit_asm_adx*(
|
|||||||
|
|
||||||
static: doAssert Fp.has1extraBit()
|
static: doAssert Fp.has1extraBit()
|
||||||
|
|
||||||
var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
|
var v0 {.noInit.}, v1 {.noInit.}, v2{.noInit.}: typeof(r.c0)
|
||||||
|
v2.double(a.c1)
|
||||||
v0.diff(a.c0, a.c1)
|
v0.diff(a.c0, a.c1)
|
||||||
v1.sum(a.c0, a.c1)
|
v1.sum(a.c0, a.c1)
|
||||||
r.c1.mres.limbs.mulMont_CIOS_sparebit_asm_adx(a.c0.mres.limbs, a.c1.mres.limbs, Fp.fieldMod().limbs, Fp.getNegInvModWord())
|
r.c1.mres.limbs.mulMont_CIOS_sparebit_asm_adx(a.c0.mres.limbs, v2.mres.limbs, Fp.fieldMod().limbs, Fp.getNegInvModWord())
|
||||||
# aliasing: a unneeded now
|
|
||||||
r.c1.double()
|
|
||||||
r.c0.mres.limbs.mulMont_CIOS_sparebit_asm_adx(v0.mres.limbs, v1.mres.limbs, Fp.fieldMod().limbs, Fp.getNegInvModWord())
|
r.c0.mres.limbs.mulMont_CIOS_sparebit_asm_adx(v0.mres.limbs, v1.mres.limbs, Fp.fieldMod().limbs, Fp.getNegInvModWord())
|
||||||
|
|
||||||
# 𝔽p2 multiplication
|
# 𝔽p2 multiplication
|
||||||
|
@ -57,8 +57,8 @@ type
|
|||||||
CubicExt[Fp2[C]]
|
CubicExt[Fp2[C]]
|
||||||
|
|
||||||
Fp12*[C: static Curve] =
|
Fp12*[C: static Curve] =
|
||||||
# CubicExt[Fp4[C]]
|
CubicExt[Fp4[C]]
|
||||||
QuadraticExt[Fp6[C]]
|
# QuadraticExt[Fp6[C]]
|
||||||
|
|
||||||
template c0*(a: ExtensionField): auto =
|
template c0*(a: ExtensionField): auto =
|
||||||
a.coords[0]
|
a.coords[0]
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 150 KiB After Width: | Height: | Size: 188 KiB |
BIN
media/ethereum_bls_signatures.png
Normal file
BIN
media/ethereum_bls_signatures.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 78 KiB |
Loading…
x
Reference in New Issue
Block a user