Perf quick wins - 10% Fp12 mul (#235)
* improve FP12_mul perf by 10% * update README [skip ci]
This commit is contained in:
parent
33c3a2e8c4
commit
1c5341fd7e
14
README.md
14
README.md
|
@ -37,7 +37,9 @@ The implementations are accompanied with SAGE code used as reference implementat
|
|||
- [In blockchain](#in-blockchain)
|
||||
- [In zero-knowledge proofs](#in-zero-knowledge-proofs)
|
||||
- [Measuring performance](#measuring-performance)
|
||||
- [BLS12_381 Clang + inline Assembly](#bls12_381-clang--inline-assembly)
|
||||
- [Ethereum BLS signatures over BLS12-381 G2](#ethereum-bls-signatures-over-bls12-381-g2)
|
||||
- [BLS12-381 detailed benchmarks](#bls12-381-detailed-benchmarks)
|
||||
- [BN254-Snarks Multi-Scalar-Multiplication benchmarks](#bn254-snarks-multi-scalar-multiplication-benchmarks)
|
||||
- [Parallelism](#parallelism)
|
||||
- [Why Nim](#why-nim)
|
||||
- [Compiler caveats](#compiler-caveats)
|
||||
|
@ -304,7 +306,11 @@ The full list of benchmarks is available in the [`benchmarks`](./benchmarks) fol
|
|||
|
||||
As mentioned in the [Compiler caveats](#compiler-caveats) section, GCC is up to 2x slower than Clang due to mishandling of carries and register usage.
|
||||
|
||||
#### BLS12_381 (Clang + inline Assembly)
|
||||
#### Ethereum BLS signatures (over BLS12-381 G2)
|
||||
|
||||
![Bench Ethereum BLS signature](./media/ethereum_bls_signatures.png)
|
||||
|
||||
#### BLS12-381 detailed benchmarks
|
||||
|
||||
On my machine i9-11980HK (8 cores 2.6GHz, turbo 5GHz), for Clang + Assembly, **all being constant-time** (including scalar multiplication, square root and inversion).
|
||||
|
||||
|
@ -314,9 +320,11 @@ On my machine i9-11980HK (8 cores 2.6GHz, turbo 5GHz), for Clang + Assembly, **a
|
|||
![BLS12-381 Multi-Scalar multiplication 2](./media/bls12_381_msm_i9-11980HK-8cores_2.png)
|
||||
![BLS12-381 Multi-Scalar multiplication 3](./media/bls12_381_msm_i9-11980HK-8cores_3.png)
|
||||
|
||||
#### BN254-Snarks Multi-Scalar-Multiplication benchmarks
|
||||
|
||||
On a i9-9980XE (18 cores,watercooled, overclocked, 4.1GHz all core turbo)
|
||||
|
||||
![BN254-Snarks multi-sclar multiplication](./media/bn254_snarks_msm-i9-9980XE-18cores.png)
|
||||
![BN254-Snarks multi-scalar multiplication](./media/bn254_snarks_msm-i9-9980XE-18cores.png)
|
||||
|
||||
#### Parallelism
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ macro fixEllipticDisplay(EC: typedesc): untyped =
|
|||
var name = $instantiated[1][0] # EllipticEquationFormCoordinates
|
||||
let fieldName = $instantiated[1][1][0]
|
||||
let curveName = $Curve(instantiated[1][1][1].intVal)
|
||||
name.add "[" & fieldName & "[" & curveName & "]]"
|
||||
name.add "[" & fieldName & "[" & curveName & ']'
|
||||
result = newLit name
|
||||
|
||||
proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
|
|
|
@ -76,12 +76,11 @@ func sqrx_complex_sparebit_asm_adx*(
|
|||
|
||||
static: doAssert Fp.has1extraBit()
|
||||
|
||||
var v0 {.noInit.}, v1 {.noInit.}: typeof(r.c0)
|
||||
var v0 {.noInit.}, v1 {.noInit.}, v2{.noInit.}: typeof(r.c0)
|
||||
v2.double(a.c1)
|
||||
v0.diff(a.c0, a.c1)
|
||||
v1.sum(a.c0, a.c1)
|
||||
r.c1.mres.limbs.mulMont_CIOS_sparebit_asm_adx(a.c0.mres.limbs, a.c1.mres.limbs, Fp.fieldMod().limbs, Fp.getNegInvModWord())
|
||||
# aliasing: a unneeded now
|
||||
r.c1.double()
|
||||
r.c1.mres.limbs.mulMont_CIOS_sparebit_asm_adx(a.c0.mres.limbs, v2.mres.limbs, Fp.fieldMod().limbs, Fp.getNegInvModWord())
|
||||
r.c0.mres.limbs.mulMont_CIOS_sparebit_asm_adx(v0.mres.limbs, v1.mres.limbs, Fp.fieldMod().limbs, Fp.getNegInvModWord())
|
||||
|
||||
# 𝔽p2 multiplication
|
||||
|
|
|
@ -57,8 +57,8 @@ type
|
|||
CubicExt[Fp2[C]]
|
||||
|
||||
Fp12*[C: static Curve] =
|
||||
# CubicExt[Fp4[C]]
|
||||
QuadraticExt[Fp6[C]]
|
||||
CubicExt[Fp4[C]]
|
||||
# QuadraticExt[Fp6[C]]
|
||||
|
||||
template c0*(a: ExtensionField): auto =
|
||||
a.coords[0]
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 150 KiB After Width: | Height: | Size: 188 KiB |
Binary file not shown.
After Width: | Height: | Size: 78 KiB |
Loading…
Reference in New Issue