From c925b0c13c9cb6cf4852ca3e91dd2d833312c6c6 Mon Sep 17 00:00:00 2001
From: Mamy Ratsimbazafy <mamy_github@numforge.co>
Date: Sat, 5 Dec 2020 14:20:18 +0100
Subject: [PATCH] Benchmarks rework (#97)

* revive Miracl primitives benchmark

* Revive BLST benchmarks

* Bench hash-to-curve

* Add benchmark of BLS sign, verify and fastAggregateVerify

* Bench all + add benchmarks to CI

* don't bench on 32-bit, inline ASM issue with low-level calls (but high level calls are fine)

* Actually it's the SHA256 tests on 32-bit that causes ASM issue due to inlined headers

* don't bench at all on 32-bit for now

* fix: don't test SH1256 on PowerPC
---
 benchmarks/bench_all.nim       |  21 ++-
 benchmarks/bench_templates.nim |  12 +-
 benchmarks/bls12381_curve.nim  | 256 +++++++++++++++++++++++++--------
 benchmarks/bls_signature.nim   |  73 ++++++++++
 benchmarks/hash_to_curve.nim   |  38 +++--
 benchmarks/keygen.nim          |  35 -----
 blscurve.nimble                |  29 ++--
 blscurve/bls_backend.nim       |   3 +-
 8 files changed, 333 insertions(+), 134 deletions(-)
 delete mode 100644 benchmarks/keygen.nim

diff --git a/benchmarks/bench_all.nim b/benchmarks/bench_all.nim
index 0b186b5..3003d0b 100644
--- a/benchmarks/bench_all.nim
+++ b/benchmarks/bench_all.nim
@@ -1,15 +1,26 @@
 import
+  ../blscurve,
   ./bls12381_curve,
-  ./hash_to_curve
+  ./hash_to_curve,
+  ./bls_signature
 
+# Curve operations
 benchScalarMultG1(1000)
 benchScalarMultG2(1000)
 benchEcAddG1(1000)
 benchEcAddG2(1000)
 
-benchPairingViaDoublePairing(1000)
-benchPairingViaMultiPairing(1000)
+# Pairings
+when BLS_BACKEND == BLST:
+  benchBLSTPairing(1000)
+else:
+  benchMiraclPairingViaDoublePairing(1000)
+  benchMiraclPairingViaMultiPairing(1000)
 
-echo "\n⚠️ Warning: using draft v5 of IETF Hash-To-Curve (HKDF-based)."
-echo "           This is an outdated draft.\n"
+# Hash-to-curve implementation
 benchHashToG2(1000)
+
+# High-level BLS signature scheme
+benchSign(1000)
+benchVerify(1000)
+benchFastAggregateVerify(numKeys = 128, iters = 10)
diff --git a/benchmarks/bench_templates.nim b/benchmarks/bench_templates.nim
index 0fe07f4..2d4c218 100644
--- a/benchmarks/bench_templates.nim
+++ b/benchmarks/bench_templates.nim
@@ -13,6 +13,8 @@ import
   # Standard library
   std/[monotimes, times, strformat, strutils, macros]
 
+from ../blscurve import BLS_BACKEND
+
 # warmup
 proc warmup*() =
   # Warmup - make sure cpu is on max perf
@@ -41,19 +43,15 @@ else:
 
 echo "Optimization level => no optimization: ", not defined(release), " | release: ", defined(release), " | danger: ", defined(danger)
 
-when (sizeof(int) == 4) or defined(use32):
-  echo "⚠️ Warning: using Milagro with 32-bit limbs"
-else:
-  echo "Using Milagro with 64-bit limbs"
-
 when SupportsCPUName:
-  echo "Running on ", cpuName(), "\n\n"
+  echo "Running on ", cpuName(), "\n"
 
 when SupportsGetTicks:
   echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
   echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)"
 
-echo "\n=================================================================================================================\n"
+echo "\nBackend: ", $BLS_BACKEND, ", mode: ", if defined(use32): $32 else: $(sizeof(int) * 8), "-bit"
+echo "=================================================================================================================\n"
 
 proc report(op: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
   let ns = inNanoseconds((stop-start) div iters)
diff --git a/benchmarks/bls12381_curve.nim b/benchmarks/bls12381_curve.nim
index 7a0d645..d515cec 100644
--- a/benchmarks/bls12381_curve.nim
+++ b/benchmarks/bls12381_curve.nim
@@ -8,13 +8,17 @@
 # those terms.
 
 import
-  # Internals
-  ../blscurve/common,
-  ../blscurve/milagro,
-  ../blscurve/hash_to_curve,
-  # Bench
-  ./bench_templates,
-  ./keygen
+  std/random,
+  ../blscurve,
+  ./bench_templates
+
+when BLS_BACKEND == BLST:
+  import
+    ../blscurve/blst/blst_abi
+else:
+  import
+    ../blscurve/miracl/[common, milagro],
+    ../blscurve/miracl/hash_to_curve
 
 # ############################################################
 #
@@ -23,79 +27,202 @@ import
 #
 # ############################################################
 
+var benchRNG = initRand(0xFACADE)
 
 proc benchScalarMultG1*(iters: int) =
-  var x = generator1()
-  var scal: BIG_384
-  random(scal)
+  when BLS_BACKEND == BLST:
+    var x{.noInit.}: blst_p1
+    x.blst_p1_from_affine(BLS12_381_G1) # init from generator
 
-  bench("Scalar multiplication G1", iters):
-    x.mul(scal)
+    var scal{.noInit.}: array[32, byte]
+    for val in scal.mitems:
+      val = byte benchRNG.rand(0xFF)
+
+    var scalar{.noInit.}: blst_scalar
+    scalar.blst_scalar_from_bendian(scal)
+
+    bench("Scalar multiplication G1 (255-bit, constant-time)", iters):
+      x.blst_p1_mult(x, scalar, 255)
+  else:
+    var x = generator1()
+    var scal: BIG_384
+    random(scal)
+    scal.BIG_384_mod(CURVE_Order)
+
+    bench("Scalar multiplication G1 (255-bit, constant-time)", iters):
+      x.mul(scal)
 
 proc benchScalarMultG2*(iters: int) =
-  var x = generator2()
-  var scal: BIG_384
-  random(scal)
+  when BLS_BACKEND == BLST:
+    var x{.noInit.}: blst_p2
+    x.blst_p2_from_affine(BLS12_381_G2) # init from generator
 
-  bench("Scalar multiplication G2", iters):
-    x.mul(scal)
+    var scal{.noInit.}: array[32, byte]
+    for val in scal.mitems:
+      val = byte benchRNG.rand(0xFF)
+
+    var scalar{.noInit.}: blst_scalar
+    scalar.blst_scalar_from_bendian(scal)
+
+    bench("Scalar multiplication G2 (255-bit, constant-time)", iters):
+      x.blst_p2_mult(x, scalar, 255)
+  else:
+    var x = generator2()
+    var scal: BIG_384
+    random(scal)
+    scal.BIG_384_mod(CURVE_Order)
+
+    bench("Scalar multiplication G2 (255-bit, constant-time)", iters):
+      x.mul(scal)
 
 proc benchECAddG1*(iters: int) =
-  var x = generator1()
-  var y = generator1()
+  when BLS_BACKEND == BLST:
+    var x{.noInit.}, y{.noInit.}: blst_p1
+    x.blst_p1_from_affine(BLS12_381_G1) # init from generator
+    y = x
 
-  bench("EC add G1", iters):
-    x.add(y)
+    bench("EC add G1 (constant-time)", iters):
+      x.blst_p1_add_or_double(x, y)
+  else:
+    var x = generator1()
+    var y = generator1()
+
+    bench("EC add G1 (constant-time)", iters):
+      x.add(y)
 
 proc benchECAddG2*(iters: int) =
-  var x = generator2()
-  var y = generator2()
+  when BLS_BACKEND == BLST:
+    var x{.noInit.}, y{.noInit.}: blst_p2
+    x.blst_p2_from_affine(BLS12_381_G2) # init from generator
+    y = x
 
-  bench("EC add G2", iters):
-    x.add(y)
+    bench("EC add G2 (constant-time)", iters):
+      x.blst_p2_add_or_double(x, y)
+  else:
+    var x = generator2()
+    var y = generator2()
 
-proc benchPairingViaDoublePairing*(iters: int) =
-  ## Builtin Milagro Double-Pairing implementation
-  # Ideally we don't depend on the bls_signature_scheme but it's much simpler
-  let (pubkey, seckey) = newKeyPair()
-  let msg = "msg"
-  const domainSepTag = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_"
+    bench("EC add G2 (constant-time)", iters):
+      x.add(y)
 
-  # Signing
-  var sig = hashToG2(msg, domainSepTag)
-  sig.mul(seckey)
+when BLS_BACKEND == BLST:
 
-  # Verification
-  let generator = generator1()
-  let Q = hashToG2(msg, domainSepTag)
-  # Pairing: e(Q, xP) == e(R, P)
-  bench("Pairing (Milagro builtin double pairing)", iters):
-    let valid = doublePairing(
-      Q, pubkey,
-      sig, generator
+  proc benchBLSTPairing*(iters: int) =
+    let (pubkey, seckey) = block:
+      var pk: PublicKey
+      var sk: SecretKey
+      var ikm: array[32, byte]
+      ikm[0] = 0x12
+      discard ikm.keygen(pk, sk)
+      (cast[blst_p1_affine](pk), cast[blst_scalar](sk))
+    let msg = "Mr F was here"
+    const domainSepTag = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_"
+
+    # Signing
+    var sig = block:
+      var sig {.noInit.}: blst_p2_affine
+      var s {.noInit.}: blst_p2
+      s.blst_hash_to_g2(
+        msg,
+        domainSepTag,
+        aug = ""
+      )
+      s.blst_sign_pk_in_g1(s, seckey)
+      sig.blst_p2_to_affine(s)
+      sig
+
+    # Verification
+    let ctx = createU(blst_pairing) # Heap to avoid stack smashing
+    ctx[].blst_pairing_init(
+      hash_or_encode = kHash,
+      domainSepTag
+    )
+    doAssert BLST_SUCCESS == ctx[].blst_pairing_aggregate_pk_in_g1(
+      PK = pubkey.unsafeAddr,
+      signature = nil,
+      msg,
+      aug = ""
+    )
+    doAssert BLST_SUCCESS == ctx[].blst_pairing_aggregate_pk_in_g1(
+      PK = nil,
+      signature = sig.unsafeAddr,
+      msg = "",
+      aug = ""
     )
 
-proc benchPairingViaMultiPairing*(iters: int) =
-  ## MultiPairing implementation
-  ## Using deferred Miller loop + Final Exponentiation
-  # Ideally we don't depend on the bls_signature_scheme but it's much simpler
-  let (pubkey, seckey) = newKeyPair()
-  let msg = "msg"
-  const domainSepTag = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_"
+    # Cache the benchmarking context, there will be a ~8MB copy overhead (context size)
+    let ctxSave = createU(blst_pairing)
+    ctxSave[] = ctx[]
 
-  # Signing
-  var sig = hashToG2(msg, domainSepTag)
-  sig.mul(seckey)
+    ctx[].blst_pairing_commit()                     # Miller loop
+    let valid = ctx[].blst_pairing_finalVerify(nil) # Final Exponentiation
+    doAssert bool valid
 
-  # Verification
-  let generator = generator1()
-  let Q = hashToG2(msg, domainSepTag)
-  # Pairing: e(Q, xP) == e(R, P)
-  bench("Pairing (Multi-Pairing with delayed Miller and Exp)", iters):
-    let valid = multiPairing(
-      Q, pubkey,
-      sig, generator
-    )
+    # Pairing: e(Q, xP) == e(R, P)
+    bench("Pairing (Miller loop + Final Exponentiation)", iters):
+      ctx[] = ctxSave[]
+      ctx[].blst_pairing_commit()                     # Miller loop
+      let valid = ctx[].blst_pairing_finalVerify(nil) # Final Exponentiation
+      # doAssert bool valid
+
+else:
+
+  proc benchMiraclPairingViaDoublePairing*(iters: int) =
+    ## Builtin Miracl Double-Pairing implementation
+    # Ideally we don't depend on the bls_signature_scheme but it's much simpler
+    let (pubkey, seckey) = block:
+      var pk: PublicKey
+      var sk: SecretKey
+      var ikm: array[32, byte]
+      ikm[0] = 0x12
+      discard ikm.keygen(pk, sk)
+      (cast[ECP_BLS12381](pk), cast[BIG_384](sk))
+    let msg = "Mr F was here"
+    const domainSepTag = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_"
+
+    # Signing
+    var sig = hashToG2(msg, domainSepTag)
+    sig.mul(seckey)
+
+    # Verification
+    let generator = generator1()
+    let Q = hashToG2(msg, domainSepTag)
+    # Pairing: e(Q, xP) == e(R, P)
+    bench("Pairing (Milagro builtin double pairing)", iters):
+      let valid = doublePairing(
+        Q, pubkey,
+        sig, generator
+      )
+      # doAssert valid
+
+  proc benchMiraclPairingViaMultiPairing*(iters: int) =
+    ## MultiPairing implementation
+    ## Using deferred Miller loop + Final Exponentiation
+    # Ideally we don't depend on the bls_signature_scheme but it's much simpler
+    let (pubkey, seckey) = block:
+      var pk: PublicKey
+      var sk: SecretKey
+      var ikm: array[32, byte]
+      ikm[0] = 0x12
+      discard ikm.keygen(pk, sk)
+      (cast[ECP_BLS12381](pk), cast[BIG_384](sk))
+    let msg = "Mr F was here"
+    const domainSepTag = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_"
+
+    # Signing
+    var sig = hashToG2(msg, domainSepTag)
+    sig.mul(seckey)
+
+    # Verification
+    let generator = generator1()
+    let Q = hashToG2(msg, domainSepTag)
+    # Pairing: e(Q, xP) == e(R, P)
+    bench("Pairing (Multi-Pairing with delayed Miller and Exp)", iters):
+      let valid = multiPairing(
+        Q, pubkey,
+        sig, generator
+      )
+      # doAssert valid
 
 when isMainModule:
   benchScalarMultG1(1000)
@@ -103,5 +230,8 @@ when isMainModule:
   benchEcAddG1(1000)
   benchEcAddG2(1000)
 
-  benchPairingViaDoublePairing(1000)
-  benchPairingViaMultiPairing(1000)
+  when BLS_BACKEND == BLST:
+    benchBLSTPairing(1000)
+  else:
+    benchMiraclPairingViaDoublePairing(1000)
+    benchMiraclPairingViaMultiPairing(1000)
diff --git a/benchmarks/bls_signature.nim b/benchmarks/bls_signature.nim
index a555d20..6a66e2f 100644
--- a/benchmarks/bls_signature.nim
+++ b/benchmarks/bls_signature.nim
@@ -7,6 +7,10 @@
 # This file may not be copied, modified, or distributed except according to
 # those terms.
 
+import
+  std/random,
+  ../blscurve,
+  ./bench_templates
 
 # ############################################################
 #
@@ -14,3 +18,72 @@
 #                   (Boneh-Lynn-Schacham)
 #
 # ############################################################
+
+var benchRNG = initRand(0xFACADE)
+
+proc benchSign*(iters: int) =
+  let msg = "Mr F was here"
+
+  var pk: PublicKey
+  var sk: SecretKey
+  var ikm: array[32, byte]
+
+  for b in ikm.mitems:
+    b = byte benchRNG.rand(0xFF)
+  doAssert ikm.keyGen(pk, sk)
+
+  bench("BLS signature", iters):
+    let sig = sk.sign(msg)
+
+proc benchVerify*(iters: int) =
+  let msg = "Mr F was here"
+
+  var pk: PublicKey
+  var sk: SecretKey
+  var ikm: array[32, byte]
+
+  for b in ikm.mitems:
+    b = byte benchRNG.rand(0xFF)
+  doAssert ikm.keyGen(pk, sk)
+
+  let sig = sk.sign(msg)
+
+  bench("BLS verification", iters):
+    let valid = pk.verify(msg, sig)
+    # doAssert valid
+
+proc benchFastAggregateVerify*(numKeys, iters: int) =
+  let msg = "Mr F was here"
+
+  var validators = newSeq[PublicKey](numKeys)
+  var aggSig: AggregateSignature
+
+  for i in 0 ..< numKeys:
+    var pk: PublicKey
+    var sk: SecretKey
+    var ikm: array[32, byte]
+
+    for b in ikm.mitems:
+      b = byte benchRNG.rand(0xFF)
+    doAssert ikm.keyGen(pk, sk)
+
+    validators[i] = pk
+
+    let sig = sk.sign(msg)
+
+    if i == 0:
+      aggSig.init(sig)
+    else:
+      aggSig.aggregate(sig)
+
+  var finalSig: Signature
+  finalSig.finish(aggSig)
+
+  bench("BLS agg verif of 1 msg by " & $numKeys & " pubkeys", iters):
+    let valid = validators.fastAggregateVerify(msg, finalSig)
+    doAssert valid
+
+when isMainModule:
+  benchSign(1000)
+  benchVerify(1000)
+  benchFastAggregateVerify(numKeys = 128, iters = 10)
diff --git a/benchmarks/hash_to_curve.nim b/benchmarks/hash_to_curve.nim
index 4077b1b..2df2fbf 100644
--- a/benchmarks/hash_to_curve.nim
+++ b/benchmarks/hash_to_curve.nim
@@ -8,30 +8,46 @@
 # those terms.
 
 import
-  # Internals
-  ../blscurve/[common, milagro, hash_to_curve],
-  # Bench
+  std/random,
+  ../blscurve,
   ./bench_templates
 
+when BLS_BACKEND == BLST:
+  import
+    ../blscurve/blst/blst_abi
+else:
+  import
+    ../blscurve/miracl/[common, milagro],
+    ../blscurve/miracl/hash_to_curve
+
 # ############################################################
 #
 #             Benchmark of Hash to G2 of BLS12-381
-#           Using Draft #5 of IETF spec (HKDF-based)
+#                  Using Draft #9 of IETF spec
 #
 # ############################################################
-# https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-05#appendix-C.3
+# https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-09#appendix-H.10
 
 proc benchHashToG2*(iters: int) =
   const dst = "BLS_SIG_BLS12381G2-SHA256-SSWU-RO_POP_"
-  let msg = "msg"
+  let msg = "Mr F was here"
 
-  var point: ECP2_BLS12381
+  when BLS_BACKEND == BLST:
+    var P: blst_p2
+    var Paff: blst_p2_affine
 
-  bench("Hash to G2 (Draft #5)", iters):
-    point = hashToG2(msg, dst)
+    bench("Hash to G2 (Draft #9) + affine conversion", iters):
+      P.blst_hash_to_g2(
+        msg,
+        dst,
+        aug = ""
+      )
+      Paff.blst_p2_to_affine(P)
+  else:
+    var point: ECP2_BLS12381
 
+    bench("Hash to G2 (Draft #9)", iters):
+      point = hashToG2(msg, dst)
 
 when isMainModule:
-  echo "⚠️ Warning: using draft v5 of IETF Hash-To-Curve (HKDF-based)."
-  echo "            This is an outdated draft.\n\n"
   benchHashToG2(1000)
diff --git a/benchmarks/keygen.nim b/benchmarks/keygen.nim
deleted file mode 100644
index f4044b9..0000000
--- a/benchmarks/keygen.nim
+++ /dev/null
@@ -1,35 +0,0 @@
-# Nim-BLSCurve
-# Copyright (c) 2018 Status Research & Development GmbH
-# Licensed under either of
-#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
-#  * MIT license ([LICENSE-MIT](LICENSE-MIT))
-# at your option.
-# This file may not be copied, modified, or distributed except according to
-# those terms.
-
-import
-  # Status libraries
-  nimcrypto/sysrand,
-  # Internals
-  ../blscurve/bls_signature_scheme,
-  ../blscurve/milagro
-
-proc newKeyPair*(): tuple[pubkey: ECP_BLS12381, seckey: BIG_384] {.noInit.}=
-  ## Generates a new public-private keypair
-  ## This requires entropy on the system
-  # The input-keying-material requires 32 bytes at least for security
-  # The generation is deterministic and the input-keying-material
-  # must be protected against side-channel attacks
-
-  var ikm: array[32, byte]
-  let written = randomBytes(ikm)
-  doAssert written >= 32, "Key generation failure"
-
-  var pk: PublicKey
-  var sk: SecretKey
-
-  doAssert keyGen(ikm, pk, sk), "Key generation failure"
-
-  # We cast because the fields are normally private to the signature module
-  result.pubkey = cast[ECP_BLS12381](pk)
-  result.seckey = cast[BIG_384](sk)
diff --git a/blscurve.nimble b/blscurve.nimble
index 98136fe..2f7905d 100644
--- a/blscurve.nimble
+++ b/blscurve.nimble
@@ -47,18 +47,23 @@ task test, "Run all tests":
     # Internal SHA256
     test "-d:BLS_FORCE_BACKEND=blst", "tests/blst_sha256.nim"
 
-  # # Ensure benchmarks stay relevant. Ignore Windows 32-bit at the moment
-  # if not defined(windows) or not existsEnv"PLATFORM" or getEnv"PLATFORM" == "x64":
-  #   exec "nim c -d:danger --outdir:build -r" &
-  #         " --verbosity:0 --hints:off --warnings:off" &
-  #         " benchmarks/bench_all.nim"
+  # Ensure benchmarks stay relevant.
+  # TODO, solve "inconsistent operand constraints"
+  # on 32-bit for asm volatile, this might be due to
+  # incorrect RDTSC call in benchmark
+  when defined(arm64) or defined(amd64):
+    exec "nim c -d:BLS_FORCE_BACKEND=miracl -d:danger --outdir:build -r" &
+          " --verbosity:0 --hints:off --warnings:off" &
+          " benchmarks/bench_all.nim"
 
-# TODO: update benchmarks
+    exec "nim c -d:BLS_FORCE_BACKEND=blst -d:danger --outdir:build -r" &
+          " --verbosity:0 --hints:off --warnings:off" &
+          " benchmarks/bench_all.nim"
 
-# task bench, "Run benchmarks":
-#   if not dirExists "build":
-#     mkDir "build"
+task bench, "Run benchmarks":
+  if not dirExists "build":
+    mkDir "build"
 
-#   exec "nim c -d:danger --outdir:build -r" &
-#          " --verbosity:0 --hints:off --warnings:off" &
-#          " benchmarks/bench_all.nim"
+  exec "nim c -d:danger --outdir:build -r" &
+         " --verbosity:0 --hints:off --warnings:off" &
+         " benchmarks/bench_all.nim"
diff --git a/blscurve/bls_backend.nim b/blscurve/bls_backend.nim
index 3926176..dedd04a 100644
--- a/blscurve/bls_backend.nim
+++ b/blscurve/bls_backend.nim
@@ -36,7 +36,8 @@ when (BLS_FORCE_BACKEND == "blst" or AutoSelectBLST) and (
   const BLS_BACKEND* = BLST
 elif BLS_FORCE_BACKEND == "blst" or AutoSelectBLST:
   # CPU doesn't support SSE3 which is used in optimized SHA256
-  # BLST_PORTABLE is a no-op on ARM
+  # On ARM, BLST_PORTABLE will prevent use builtin SHA256
+  # which is unsupported by Raspberry Pi, detection via (__ARM_FEATURE_CRYPTO)
   const BLS_BACKEND* = BLST
   {.passC: "-D__BLST_PORTABLE__".}
 else: