modexp: 2.5x accel on small exponent (#268)

* add metering to modexp

* modexp: accel exponent = 1

* modexp: improve runtime Montgomery constants compute. 2.49x faster on DOS vectors
This commit is contained in:
Mamy Ratsimbazafy 2023-09-09 09:21:05 +02:00 committed by GitHub
parent f3a5f352b8
commit 15757557b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 315 additions and 51 deletions

View File

@ -3,16 +3,16 @@ import
../constantine/math/arithmetic,
../constantine/math/io/io_bigints,
../constantine/platforms/abstractions,
./platforms, ./bench_blueprint
./bench_blueprint
proc report(op: string, elapsedNs: int64, elapsedCycles: int64, iters: int) =
let ns = elapsedNs div iters
let cycles = elapsedCycles div iters
let throughput = 1e9 / float64(ns)
when SupportsGetTicks:
echo &"{op:<45} {throughput:>15.3f} ops/s {ns:>16} ns/op {cycles:>12} CPU cycles (approx)"
echo &"{op:<70} {throughput:>15.3f} ops/s {ns:>16} ns/op {cycles:>12} CPU cycles (approx)"
else:
echo &"{op:<45} {throughput:>15.3f} ops/s {ns:>16} ns/op"
echo &"{op:<70} {throughput:>15.3f} ops/s {ns:>16} ns/op"
template bench(fnCall: untyped, ticks, ns: var int64): untyped =
block:
@ -148,11 +148,119 @@ proc dos1() =
(let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds)
report("EVM Modexp - 32,32,32", nanoseconds, ticks, execsEIP2565)
report("EVM Modexp - 32,32,32 - even base & power-of-2 modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
proc dos2() =
let input = [
# Length of base (1)
uint8 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of exponent (1)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of modulus (121)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
# Base
0x33,
# Exponent
0x01,
# Modulus
0x04, 0xea, 0xbb, 0x12, 0x55, 0x88, 0xd7, 0x3c, 0xad, 0x22, 0xea, 0x2b, 0x4a, 0x77, 0x6e, 0x9d,
0x4d, 0xfc, 0x13, 0xa8, 0x1b, 0xf9, 0x0c, 0x0d, 0x37, 0xe8, 0x4e, 0x8b, 0xeb, 0xb2, 0xa5, 0x48,
0x8b, 0x2c, 0x87, 0x6d, 0x13, 0x51, 0x75, 0xeb, 0x97, 0xc6, 0x13, 0xd9, 0x06, 0xce, 0x8b, 0x53,
0xd0, 0x02, 0x68, 0xb8, 0xd6, 0x12, 0xab, 0x8b, 0x15, 0x0c, 0xef, 0x0a, 0xd0, 0x3b, 0x73, 0xd2,
0xdb, 0x9d, 0x2a, 0xa5, 0x23, 0x70, 0xdc, 0x26, 0x55, 0x80, 0xca, 0xf2, 0xc0, 0x18, 0xe3, 0xe3,
0x1b, 0xad, 0xd5, 0x22, 0xdd, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x1c, 0x05, 0x71, 0x52, 0x7c, 0x3a, 0xb0, 0x77,
]
var r = newSeq[byte](121)
var ticks, nanoseconds: int64
let (gasFeeEIP198, gasFeeEIP2565) = computeGasFee(input)
const blockSize = 30000000
let execsEIP198 = blockSize div gasFeeEIP198
let execsEIP2565 = blockSize div gasFeeEIP2565
echo "Gas cost: ", gasFeeEIP198, " gas (EIP-198) - ", execsEIP198, " executions per block"
echo "Gas cost: ", gasFeeEIP2565, " gas (EIP-2565) - ", execsEIP2565, " executions per block"
for i in 0 ..< execsEIP2565:
bench(
(let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds)
report("EVM Modexp - 1,1,121 - exponent=1 and odd modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
proc dos2a() =
# shortcuttable variation with even modulus
let input = [
# Length of base (1)
uint8 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of exponent (1)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of modulus (121)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
# Base
0x33,
# Exponent
0x01,
# Modulus
0x04, 0xea, 0xbb, 0x12, 0x55, 0x88, 0xd7, 0x3c, 0xad, 0x22, 0xea, 0x2b, 0x4a, 0x77, 0x6e, 0x9d,
0x4d, 0xfc, 0x13, 0xa8, 0x1b, 0xf9, 0x0c, 0x0d, 0x37, 0xe8, 0x4e, 0x8b, 0xeb, 0xb2, 0xa5, 0x48,
0x8b, 0x2c, 0x87, 0x6d, 0x13, 0x51, 0x75, 0xeb, 0x97, 0xc6, 0x13, 0xd9, 0x06, 0xce, 0x8b, 0x53,
0xd0, 0x02, 0x68, 0xb8, 0xd6, 0x12, 0xab, 0x8b, 0x15, 0x0c, 0xef, 0x0a, 0xd0, 0x3b, 0x73, 0xd2,
0xdb, 0x9d, 0x2a, 0xa5, 0x23, 0x70, 0xdc, 0x26, 0x55, 0x80, 0xca, 0xf2, 0xc0, 0x18, 0xe3, 0xe3,
0x1b, 0xad, 0xd5, 0x22, 0xdd, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x1c, 0x05, 0x71, 0x52, 0x7c, 0x3a, 0xb0, 0x76,
]
var r = newSeq[byte](121)
var ticks, nanoseconds: int64
let (gasFeeEIP198, gasFeeEIP2565) = computeGasFee(input)
const blockSize = 30000000
let execsEIP198 = blockSize div gasFeeEIP198
let execsEIP2565 = blockSize div gasFeeEIP2565
echo "Gas cost: ", gasFeeEIP198, " gas (EIP-198) - ", execsEIP198, " executions per block"
echo "Gas cost: ", gasFeeEIP2565, " gas (EIP-2565) - ", execsEIP2565, " executions per block"
for i in 0 ..< execsEIP2565:
bench(
(let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds)
report("EVM Modexp - 1,1,121 - exponent=1 and even modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
proc dos2b() =
# even variation with no shortcut
let input = [
# Length of base (1)
uint8 0x00,
@ -201,10 +309,11 @@ proc dos2() =
(let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds)
report("EVM Modexp - 1,1,121", nanoseconds, ticks, execsEIP2565)
report("EVM Modexp - 1,1,121 - exponent=16 and odd modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
proc dos3() =
proc dos2c() =
# odd variation with no shortcut
let input = [
# Length of base (1)
@ -254,11 +363,71 @@ proc dos3() =
(let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds)
report("EVM Modexp - 1,1,121", nanoseconds, ticks, execsEIP2565)
report("EVM Modexp - 1,1,121 - exponent=7 and odd modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
proc dos2d() =
# odd variation with no shortcut and power of 2 modulus
let input = [
# Length of base (1)
uint8 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of exponent (1)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of modulus (121)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
# Base
0x33,
# Exponent
0x07,
# Modulus
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
]
var r = newSeq[byte](121)
var ticks, nanoseconds: int64
let (gasFeeEIP198, gasFeeEIP2565) = computeGasFee(input)
const blockSize = 30000000
let execsEIP198 = blockSize div gasFeeEIP198
let execsEIP2565 = blockSize div gasFeeEIP2565
echo "Gas cost: ", gasFeeEIP198, " gas (EIP-198) - ", execsEIP198, " executions per block"
echo "Gas cost: ", gasFeeEIP2565, " gas (EIP-2565) - ", execsEIP2565, " executions per block"
for i in 0 ..< execsEIP2565:
bench(
(let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds)
report("EVM Modexp - 1,1,121 - exponent=7 and power-of-2 modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
dos1()
echo "\n"
dos2()
echo "\n"
dos3()
dos2a()
echo "\n"
dos2b()
echo "\n"
dos2c()
echo "\n"
dos2d()

View File

@ -370,7 +370,7 @@ func eth_evm_ecpairing*(
r[r.len-1] = byte 1
return cttEVM_Success
func eth_evm_modexp*(r: var openArray[byte], inputs: openArray[byte]): CttEVMStatus {.noInline, tags:[Alloca, Vartime].} =
func eth_evm_modexp*(r: var openArray[byte], inputs: openArray[byte]): CttEVMStatus {.noInline, tags:[Alloca, Vartime], meter.} =
## Modular exponentiation
##
## Name: MODEXP

View File

@ -38,7 +38,7 @@ template `+=`[F; G: static Subgroup](P: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_
template `-=`[F; G: static Subgroup](P: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]), Q: ECP_ShortW_Aff[F, G]) =
P.msub_vartime(P, Q)
func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} =
func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime], meter.} =
## **Variable-time** Elliptic Curve Scalar Multiplication
##
## P <- [k] P
@ -67,7 +67,7 @@ func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime
else:
P += Paff
func scalarMul_addchain_4bit_vartime[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} =
func scalarMul_addchain_4bit_vartime[EC](P: var EC, scalar: BigInt) {.tags:[VarTime], meter.} =
## **Variable-time** Elliptic Curve Scalar Multiplication
## This can only handle for small scalars up to 2⁴ = 16 excluded
let s = uint scalar.limbs[0]
@ -206,7 +206,7 @@ func accumNAF[precompSize, NafMax: static int, EC, ECaff](
elif digit < 0:
P -= tab[-digit shr 1]
func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt, window: static int) {.tags:[VarTime, Alloca].} =
func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt, window: static int) {.tags:[VarTime, Alloca], meter.} =
## **Variable-time** Elliptic Curve Scalar Multiplication
##
## P <- [k] P
@ -246,7 +246,7 @@ func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt,
func scalarMulEndo_minHammingWeight_windowed_vartime*[scalBits: static int; EC](
P: var EC,
scalar: BigInt[scalBits],
window: static int) {.tags:[VarTime, Alloca].} =
window: static int) {.tags:[VarTime, Alloca], meter.} =
## Endomorphism-accelerated windowed vartime scalar multiplication
##
## P <- [k] P

View File

@ -88,7 +88,7 @@ func batchAffine*[N: static int, F, G](
func batchAffine*[F, G](
affs: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
jacs: ptr UncheckedArray[ECP_ShortW_Jac[F, G]],
N: int) {.noInline, tags:[Alloca].} =
N: int) {.noInline, tags:[Alloca], meter.} =
# Algorithm: Montgomery's batch inversion
# - Speeding the Pollard and Elliptic Curve Methods of Factorization
# Section 10.3.1

View File

@ -46,7 +46,7 @@ func powOddMod_vartime*(
a: openArray[SecretWord],
exponent: openArray[byte],
M: openArray[SecretWord],
window: int) {.noInline, tags:[Alloca, VarTime].} =
window: int) {.noInline, tags:[Alloca, VarTime], meter.} =
## r <- a^exponent (mod M) with M odd
## assumes a < M
##
@ -57,6 +57,12 @@ func powOddMod_vartime*(
let aBits = a.getBits_LE_vartime()
let mBits = M.getBits_LE_vartime()
let eBits = exponent.getBits_BE_vartime()
if eBits == 1:
r.view().reduce(a.view(), aBits, M.view(), mBits)
return
let L = wordsRequired(mBits)
let m0ninv = M[0].negInvModWord()
var rMont = allocStackArray(SecretWord, L)
@ -97,7 +103,7 @@ func powMod_vartime*(
a: openArray[SecretWord],
exponent: openArray[byte],
M: openArray[SecretWord],
window: int) {.noInline, tags:[Alloca, VarTime].} =
window: int) {.noInline, tags:[Alloca, VarTime], meter.} =
## r <- a^exponent (mod M) with M odd
## assumes a < exponent
##

View File

@ -142,7 +142,7 @@ func shlAddMod(a: LimbsViewMut, aLen: int,
func reduce*(r: LimbsViewMut,
a: LimbsViewAny, aBits: int,
M: LimbsViewConst, mBits: int) =
M: LimbsViewConst, mBits: int) {.meter.} =
## Reduce `a` modulo `M` and store the result in `r`
##
## The modulus `M` most-significant bit at `mBits` MUST be set.

View File

@ -39,6 +39,6 @@ func prod_comba(r: var openArray[SecretWord], a, b: openArray[SecretWord]) {.noI
for i in stopEx ..< r.len:
r[i] = Zero
func prod*(r: var openArray[SecretWord], a, b: openArray[SecretWord]) {.inline.}=
func prod*(r: var openArray[SecretWord], a, b: openArray[SecretWord]) {.inline, meter.}=
## Extended precision multiplication
r.prod_comba(a, b)

View File

@ -30,7 +30,7 @@ import
# Comparison
# ------------------------------------------------------------
func lt*(a, b: distinct LimbsViewAny, len: int): SecretBool =
func lt*(a, b: distinct LimbsViewAny, len: int): SecretBool {.meter.} =
## Returns true if a < b
## Comparison is constant-time
var diff: SecretWord
@ -43,7 +43,7 @@ func lt*(a, b: distinct LimbsViewAny, len: int): SecretBool =
# Type-erased add-sub
# ------------------------------------------------------------
func cadd*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Carry =
func cadd*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Carry {.meter.} =
## Type-erased conditional addition
## Returns the carry
##
@ -58,7 +58,7 @@ func cadd*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Carry =
addC(result, sum, a[i], b[i], result)
ctl.ccopy(a[i], sum)
func csub*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Borrow =
func csub*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Borrow {.meter.} =
## Type-erased conditional addition
## Returns the borrow
##

View File

@ -17,7 +17,7 @@ import
#
# ############################################################
func addmod_vartime*(r: var openArray[SecretWord], a, b, M: openArray[SecretWord]) =
func addmod_vartime*(r: var openArray[SecretWord], a, b, M: openArray[SecretWord]) {.meter.} =
## r <- a+b (mod M)
## assumes a and b are in the range [0, M)
@ -43,6 +43,6 @@ func addmod_vartime*(r: var openArray[SecretWord], a, b, M: openArray[SecretWord
for i in 0 ..< r.len:
r[i] = t[i]
func doublemod_vartime*(r: var openArray[SecretWord], a, M: openArray[SecretWord]) {.inline.} =
func doublemod_vartime*(r: var openArray[SecretWord], a, M: openArray[SecretWord]) {.inline, meter.} =
## r <- 2a (mod M)
r.addmod_vartime(a, a, M)

View File

@ -17,7 +17,7 @@ import
# No exceptions allowed
{.push raises: [], checks: off.}
func mod2k_vartime*(a: var openArray[SecretWord], k: uint) =
func mod2k_vartime*(a: var openArray[SecretWord], k: uint) {.meter.} =
## a <- a (mod 2ᵏ)
const SlotShift = log2_vartime(WordBitWidth.uint32)
const SelectMask = WordBitWidth - 1
@ -38,7 +38,7 @@ func mod2k_vartime*(a: var openArray[SecretWord], k: uint) =
for i in hiIndex+1 ..< a.len:
a[i] = Zero
func submod2k_vartime*(r{.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) =
func submod2k_vartime*(r{.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) {.meter.} =
## r <- a - b (mod 2ᵏ)
debug:
const SlotShift = log2_vartime(WordBitWidth.uint32)
@ -63,7 +63,7 @@ func submod2k_vartime*(r{.noAlias.}: var openArray[SecretWord], a, b: openArray[
r.mod2k_vartime(k)
func mulmod2k_vartime*(r: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) {.inline.} =
func mulmod2k_vartime*(r: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) {.inline, meter.} =
## r <- a*b (mod 2ᵏ)
r.prod(a, b)
r.mod2k_vartime(k)
@ -75,7 +75,7 @@ iterator unpackLE(scalarByte: byte): bool =
func powMod2k_vartime*(
r{.noAlias.}: var openArray[SecretWord],
a{.noAlias.}: openArray[SecretWord],
exponent: openArray[byte], k: uint) {.noInline, tags: [Alloca].} =
exponent: openArray[byte], k: uint) {.noInline, tags: [Alloca], meter.} =
## r <- a^exponent (mod 2ᵏ)
##
## Requires:
@ -115,6 +115,13 @@ func powMod2k_vartime*(
r[0] = One # x⁰ = 1, even for 0⁰
return
if msb == 0: # exponent is 1
for i in 0 ..< min(r.len, a.len):
# range [r.len, a.len) will be truncated (mod 2ᵏ)
r[i] = a[i]
r.mod2k_vartime(k)
return
if a.isEven().bool:
let aTrailingZeroes = block:
var i = 0
@ -155,7 +162,7 @@ func powMod2k_vartime*(
func invModBitwidth(a: SecretWord): SecretWord {.borrow.}
## Inversion a⁻¹ (mod 2³²) or a⁻¹ (mod 2⁶⁴)
func invMod2k_vartime*(r: var openArray[SecretWord], a: openArray[SecretWord], k: uint) {.noInline, tags: [Alloca].} =
func invMod2k_vartime*(r: var openArray[SecretWord], a: openArray[SecretWord], k: uint) {.noInline, tags: [Alloca], meter.} =
## Inversion a⁻¹ (mod 2ᵏ)
## with 2ᵏ a multi-precision integer.
#

View File

@ -11,7 +11,8 @@ import
../../platforms/[abstractions, allocs, bithacks],
./limbs_views,
./limbs_mod,
./limbs_fixedprec
./limbs_fixedprec,
./limbs_division
# No exceptions allowed
{.push raises: [], checks: off.}
@ -66,18 +67,37 @@ func r_powmod_vartime(r: var openArray[SecretWord], M: openArray[SecretWord], n:
for i in start ..< stop:
r.doublemod_vartime(r, M)
func oneMont_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) =
func oneMont_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) {.meter.} =
## Returns 1 in Montgomery domain:
r.r_powmod_vartime(M, 1)
func r2_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) =
# r.r_powmod_vartime(M, 1)
let mBits = getBits_LE_vartime(M)
let t = allocStackArray(SecretWord, M.len + 1)
zeroMem(t, M.len*sizeof(SecretWord))
t[M.len] = One
r.view().reduce(LimbsViewMut t, M.len*WordBitWidth+1, M.view(), mBits)
func r2_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) {.meter.} =
## Returns the Montgomery domain magic constant for the input modulus:
##
## R² ≡ R² (mod M) with R = (2^WordBitWidth)^numWords
##
## Assuming a field modulus of size 256-bit with 63-bit words, we require 5 words
## R² ≡ ((2^63)^5)^2 (mod M) = 2^630 (mod M)
r.r_powmod_vartime(M, 2)
# r.r_powmod_vartime(M, 2)
let mBits = getBits_LE_vartime(M)
let t = allocStackArray(SecretWord, 2*M.len + 1)
zeroMem(t, 2*M.len*sizeof(SecretWord))
t[2*M.len] = One
r.view().reduce(LimbsViewMut t, 2*M.len*WordBitWidth+1, M.view(), mBits)
# Montgomery multiplication
# ------------------------------------------
@ -88,7 +108,7 @@ func mulMont_FIPS*(
M: LimbsViewConst,
m0ninv: SecretWord,
mBits: int,
skipFinalSub: static bool = false) {.noInline, tags:[Alloca].} =
skipFinalSub: static bool = false) {.noInline, tags:[Alloca], meter.} =
## Montgomery Multiplication using Finely Integrated Product Scanning (FIPS)
##
## This maps
@ -138,7 +158,7 @@ func mulMont_FIPS*(
# ------------------------------------------
func fromMont*(r: LimbsViewMut, a: LimbsViewAny, M: LimbsViewConst,
m0ninv: SecretWord, mBits: int) {.noInline, tags:[Alloca].} =
m0ninv: SecretWord, mBits: int) {.noInline, tags:[Alloca], meter.} =
## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)
## to the regular natural representation (mod N)
##
@ -166,7 +186,7 @@ func fromMont*(r: LimbsViewMut, a: LimbsViewAny, M: LimbsViewConst,
r.copyWords(0, t, 0, N)
func getMont*(r: LimbsViewMut, a: LimbsViewAny, M, r2modM: LimbsViewConst,
m0ninv: SecretWord, mBits: int) {.inline.} =
m0ninv: SecretWord, mBits: int) {.inline, meter.} =
## Transform a bigint ``a`` from it's natural representation (mod N)
## to a the Montgomery n-residue representation
##
@ -233,7 +253,7 @@ func powMontPrologue(
m0ninv: SecretWord,
scratchspace: LimbsViewMut,
scratchLen: int,
mBits: int): uint {.tags:[Alloca].} =
mBits: int): uint {.tags:[Alloca], meter.} =
## Setup the scratchspace
## Returns the fixed-window size for exponentiation with window optimization.
# Precompute window content, special case for window = 1
@ -263,7 +283,7 @@ func powMontSquarings(
tmp: LimbsViewMut,
window: uint,
acc, acc_len: var uint,
e: var int): tuple[k, bits: uint] {.inline.}=
e: var int): tuple[k, bits: uint] {.inline, meter.}=
## Squaring step of exponentiation by squaring
## Get the next k bits in range [1, window)
## Square k times
@ -309,7 +329,7 @@ func powMont*(
m0ninv: SecretWord,
scratchspace: LimbsViewMut,
scratchLen: int,
mBits: int) =
mBits: int) {.meter.} =
## Modular exponentiation r = a^exponent mod M
## in the Montgomery domain
##
@ -379,7 +399,7 @@ func powMont_vartime*(
m0ninv: SecretWord,
scratchspace: LimbsViewMut,
scratchLen: int,
mBits: int) {.tags:[VarTime, Alloca].} =
mBits: int) {.tags:[VarTime, Alloca], meter.} =
## Modular exponentiation a <- a^exponent (mod M)
## in the Montgomery domain
##

View File

@ -61,7 +61,7 @@ func shrWords(r {.noalias.}: var openArray[SecretWord], a: openArray[SecretWord]
for i in a.len-w ..< r.len:
r[i] = Zero
func shiftRight_vartime*(r {.noalias.}: var openArray[SecretWord], a: openArray[SecretWord], k: SomeInteger) =
func shiftRight_vartime*(r {.noalias.}: var openArray[SecretWord], a: openArray[SecretWord], k: SomeInteger) {.meter.} =
## Shift `a` right by k bits and store in `r`
if k == 0:
let min = min(a.len, r.len)
@ -87,7 +87,7 @@ func shiftRight_vartime*(r {.noalias.}: var openArray[SecretWord], a: openArray[
# Arithmetic
# --------------------------------------------------------
func neg*(a: var openArray[SecretWord]) =
func neg*(a: var openArray[SecretWord]) {.meter.} =
## Computes the additive inverse -a
## in 2-complement representation
@ -97,7 +97,7 @@ func neg*(a: var openArray[SecretWord]) =
for i in 1 ..< a.len:
addC(carry, a[i], not(a[i]), Zero, carry)
func addMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool =
func addMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool {.meter.} =
## r <- a + b
## and
## returns the carry
@ -130,7 +130,7 @@ func addMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord
else:
return bool carry
func subMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool =
func subMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool {.meter.} =
## r <- a - b
## and
## returns false if a >= b

View File

@ -90,7 +90,7 @@ func setOne*(a: var openArray[SomeNumber]){.inline.} =
a[0] = 1
for i in 1 ..< a.len:
a[i] = 0
func asBytes*(s: static string): auto =
## Reinterpret a compile-time string as an array of bytes
const N = s.len
@ -104,8 +104,7 @@ func rawCopy*(
dStart: SomeInteger,
src: openArray[byte],
sStart: SomeInteger,
len: SomeInteger
) {.inline.} =
len: SomeInteger) {.inline.} =
## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len]
## Unlike the standard library, this cannot throw
## even a defect.

53
metering/m_modexp.nim Normal file
View File

@ -0,0 +1,53 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
./reports, ./tracer,
../constantine/ethereum_evm_precompiles,
../constantine/platforms/abstractions
let input = [
# Length of base (1)
uint8 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of exponent (1)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of modulus (121)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
# Base
0x33,
# Exponent
0x07,
# Modulus
0x04, 0xea, 0xbb, 0x12, 0x55, 0x88, 0xd7, 0x3c, 0xad, 0x22, 0xea, 0x2b, 0x4a, 0x77, 0x6e, 0x9d,
0x4d, 0xfc, 0x13, 0xa8, 0x1b, 0xf9, 0x0c, 0x0d, 0x37, 0xe8, 0x4e, 0x8b, 0xeb, 0xb2, 0xa5, 0x48,
0x8b, 0x2c, 0x87, 0x6d, 0x13, 0x51, 0x75, 0xeb, 0x97, 0xc6, 0x13, 0xd9, 0x06, 0xce, 0x8b, 0x53,
0xd0, 0x02, 0x68, 0xb8, 0xd6, 0x12, 0xab, 0x8b, 0x15, 0x0c, 0xef, 0x0a, 0xd0, 0x3b, 0x73, 0xd2,
0xdb, 0x9d, 0x2a, 0xa5, 0x23, 0x70, 0xdc, 0x26, 0x55, 0x80, 0xca, 0xf2, 0xc0, 0x18, 0xe3, 0xe3,
0x1b, 0xad, 0xd5, 0x22, 0xdd, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x1c, 0x05, 0x71, 0x52, 0x7c, 0x3a, 0xb0, 0x77,
]
var r = newSeq[byte](121)
resetMetering()
let status = eth_evm_modexp(r, input)
doAssert status == cttEVM_Success
const flags = if UseASM_X86_64 or UseASM_X86_32: "UseAssembly" else: "NoAssembly"
reportCli(Metrics, flags)

View File

@ -33,6 +33,11 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
for m in metrics:
if m.numCalls == 0:
continue
let shortname = block:
if m.procName.len <= 150: m.procName.replace('\n', ' ')
else: m.procName[0..145].replace('\n', ' ') & " ..."
# TODO: running variance / standard deviation but the Welford method is quite costly.
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3
@ -40,11 +45,11 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
let throughput = 1e6 / avgTimeUs
let cumulCyclesBillions = m.cumulatedCycles.float64 * 1e-9
let avgCyclesBillions = cumulCyclesBillions / m.numCalls.float64
echo &"""|{m.procName:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo &"""|{shortname:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo lineSep
else:
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|"""
const lineSep = &"""|{'-'.repeat(150)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|"""
echo "\n"
echo lineSep
echo &"""|{"Procedures":^150}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (µs)":^15}|{"Avg Time (µs)":^17}|"""
@ -53,10 +58,15 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
for m in metrics:
if m.numCalls == 0:
continue
let shortname = block:
if m.procName.len <= 150: m.procName.replace('\n', ' ')
else: m.procName[0..145].replace('\n', ' ') & " ..."
# TODO: running variance / standard deviation but the Welford method is quite costly.
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3
let avgTimeUs = cumulTimeUs / m.numCalls.float64
let throughput = 1e6 / avgTimeUs
echo &"""|{m.procName:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo &"""|{shortname:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo lineSep

View File

@ -88,7 +88,7 @@ when CTT_METER or CTT_TRACE:
let stopTime = getMonoTime()
when SupportsGetTicks:
let elapsedCycles = stopCycle - startCycle
let elapsedTime = inMicroseconds(stopTime - startTime)
let elapsedTime = inNanoseconds(stopTime - startTime)
discard Metrics[id].cumulatedTimeNs.atomicInc(elapsedTime)
when SupportsGetTicks: