modexp: 2.5x accel on small exponent (#268)
* add metering to modexp * modexp: accel exponent = 1 * modexp: improve runtime Montgomery constants compute. 2.49x faster on DOS vectors
This commit is contained in:
parent
f3a5f352b8
commit
15757557b4
|
@ -3,16 +3,16 @@ import
|
|||
../constantine/math/arithmetic,
|
||||
../constantine/math/io/io_bigints,
|
||||
../constantine/platforms/abstractions,
|
||||
./platforms, ./bench_blueprint
|
||||
./bench_blueprint
|
||||
|
||||
proc report(op: string, elapsedNs: int64, elapsedCycles: int64, iters: int) =
|
||||
let ns = elapsedNs div iters
|
||||
let cycles = elapsedCycles div iters
|
||||
let throughput = 1e9 / float64(ns)
|
||||
when SupportsGetTicks:
|
||||
echo &"{op:<45} {throughput:>15.3f} ops/s {ns:>16} ns/op {cycles:>12} CPU cycles (approx)"
|
||||
echo &"{op:<70} {throughput:>15.3f} ops/s {ns:>16} ns/op {cycles:>12} CPU cycles (approx)"
|
||||
else:
|
||||
echo &"{op:<45} {throughput:>15.3f} ops/s {ns:>16} ns/op"
|
||||
echo &"{op:<70} {throughput:>15.3f} ops/s {ns:>16} ns/op"
|
||||
|
||||
template bench(fnCall: untyped, ticks, ns: var int64): untyped =
|
||||
block:
|
||||
|
@ -148,11 +148,119 @@ proc dos1() =
|
|||
(let _ = r.eth_evm_modexp(input)),
|
||||
ticks, nanoseconds)
|
||||
|
||||
report("EVM Modexp - 32,32,32", nanoseconds, ticks, execsEIP2565)
|
||||
report("EVM Modexp - 32,32,32 - even base & power-of-2 modulus", nanoseconds, ticks, execsEIP2565)
|
||||
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
|
||||
|
||||
proc dos2() =
|
||||
|
||||
let input = [
|
||||
# Length of base (1)
|
||||
uint8 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
|
||||
# Length of exponent (1)
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
|
||||
# Length of modulus (121)
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
|
||||
|
||||
# Base
|
||||
0x33,
|
||||
|
||||
# Exponent
|
||||
0x01,
|
||||
|
||||
# Modulus
|
||||
0x04, 0xea, 0xbb, 0x12, 0x55, 0x88, 0xd7, 0x3c, 0xad, 0x22, 0xea, 0x2b, 0x4a, 0x77, 0x6e, 0x9d,
|
||||
0x4d, 0xfc, 0x13, 0xa8, 0x1b, 0xf9, 0x0c, 0x0d, 0x37, 0xe8, 0x4e, 0x8b, 0xeb, 0xb2, 0xa5, 0x48,
|
||||
0x8b, 0x2c, 0x87, 0x6d, 0x13, 0x51, 0x75, 0xeb, 0x97, 0xc6, 0x13, 0xd9, 0x06, 0xce, 0x8b, 0x53,
|
||||
0xd0, 0x02, 0x68, 0xb8, 0xd6, 0x12, 0xab, 0x8b, 0x15, 0x0c, 0xef, 0x0a, 0xd0, 0x3b, 0x73, 0xd2,
|
||||
0xdb, 0x9d, 0x2a, 0xa5, 0x23, 0x70, 0xdc, 0x26, 0x55, 0x80, 0xca, 0xf2, 0xc0, 0x18, 0xe3, 0xe3,
|
||||
0x1b, 0xad, 0xd5, 0x22, 0xdd, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x1c, 0x05, 0x71, 0x52, 0x7c, 0x3a, 0xb0, 0x77,
|
||||
]
|
||||
|
||||
var r = newSeq[byte](121)
|
||||
var ticks, nanoseconds: int64
|
||||
|
||||
let (gasFeeEIP198, gasFeeEIP2565) = computeGasFee(input)
|
||||
const blockSize = 30000000
|
||||
|
||||
let execsEIP198 = blockSize div gasFeeEIP198
|
||||
let execsEIP2565 = blockSize div gasFeeEIP2565
|
||||
|
||||
echo "Gas cost: ", gasFeeEIP198, " gas (EIP-198) - ", execsEIP198, " executions per block"
|
||||
echo "Gas cost: ", gasFeeEIP2565, " gas (EIP-2565) - ", execsEIP2565, " executions per block"
|
||||
|
||||
for i in 0 ..< execsEIP2565:
|
||||
bench(
|
||||
(let _ = r.eth_evm_modexp(input)),
|
||||
ticks, nanoseconds)
|
||||
|
||||
report("EVM Modexp - 1,1,121 - exponent=1 and odd modulus", nanoseconds, ticks, execsEIP2565)
|
||||
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
|
||||
|
||||
proc dos2a() =
|
||||
# shortcuttable variation with even modulus
|
||||
|
||||
let input = [
|
||||
# Length of base (1)
|
||||
uint8 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
|
||||
# Length of exponent (1)
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
|
||||
# Length of modulus (121)
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
|
||||
|
||||
# Base
|
||||
0x33,
|
||||
|
||||
# Exponent
|
||||
0x01,
|
||||
|
||||
# Modulus
|
||||
0x04, 0xea, 0xbb, 0x12, 0x55, 0x88, 0xd7, 0x3c, 0xad, 0x22, 0xea, 0x2b, 0x4a, 0x77, 0x6e, 0x9d,
|
||||
0x4d, 0xfc, 0x13, 0xa8, 0x1b, 0xf9, 0x0c, 0x0d, 0x37, 0xe8, 0x4e, 0x8b, 0xeb, 0xb2, 0xa5, 0x48,
|
||||
0x8b, 0x2c, 0x87, 0x6d, 0x13, 0x51, 0x75, 0xeb, 0x97, 0xc6, 0x13, 0xd9, 0x06, 0xce, 0x8b, 0x53,
|
||||
0xd0, 0x02, 0x68, 0xb8, 0xd6, 0x12, 0xab, 0x8b, 0x15, 0x0c, 0xef, 0x0a, 0xd0, 0x3b, 0x73, 0xd2,
|
||||
0xdb, 0x9d, 0x2a, 0xa5, 0x23, 0x70, 0xdc, 0x26, 0x55, 0x80, 0xca, 0xf2, 0xc0, 0x18, 0xe3, 0xe3,
|
||||
0x1b, 0xad, 0xd5, 0x22, 0xdd, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x1c, 0x05, 0x71, 0x52, 0x7c, 0x3a, 0xb0, 0x76,
|
||||
]
|
||||
|
||||
var r = newSeq[byte](121)
|
||||
var ticks, nanoseconds: int64
|
||||
|
||||
let (gasFeeEIP198, gasFeeEIP2565) = computeGasFee(input)
|
||||
const blockSize = 30000000
|
||||
|
||||
let execsEIP198 = blockSize div gasFeeEIP198
|
||||
let execsEIP2565 = blockSize div gasFeeEIP2565
|
||||
|
||||
echo "Gas cost: ", gasFeeEIP198, " gas (EIP-198) - ", execsEIP198, " executions per block"
|
||||
echo "Gas cost: ", gasFeeEIP2565, " gas (EIP-2565) - ", execsEIP2565, " executions per block"
|
||||
|
||||
for i in 0 ..< execsEIP2565:
|
||||
bench(
|
||||
(let _ = r.eth_evm_modexp(input)),
|
||||
ticks, nanoseconds)
|
||||
|
||||
report("EVM Modexp - 1,1,121 - exponent=1 and even modulus", nanoseconds, ticks, execsEIP2565)
|
||||
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
|
||||
|
||||
proc dos2b() =
|
||||
# even variation with no shortcut
|
||||
|
||||
let input = [
|
||||
# Length of base (1)
|
||||
uint8 0x00,
|
||||
|
@ -201,10 +309,11 @@ proc dos2() =
|
|||
(let _ = r.eth_evm_modexp(input)),
|
||||
ticks, nanoseconds)
|
||||
|
||||
report("EVM Modexp - 1,1,121", nanoseconds, ticks, execsEIP2565)
|
||||
report("EVM Modexp - 1,1,121 - exponent=16 and odd modulus", nanoseconds, ticks, execsEIP2565)
|
||||
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
|
||||
|
||||
proc dos3() =
|
||||
proc dos2c() =
|
||||
# odd variation with no shortcut
|
||||
|
||||
let input = [
|
||||
# Length of base (1)
|
||||
|
@ -254,11 +363,71 @@ proc dos3() =
|
|||
(let _ = r.eth_evm_modexp(input)),
|
||||
ticks, nanoseconds)
|
||||
|
||||
report("EVM Modexp - 1,1,121", nanoseconds, ticks, execsEIP2565)
|
||||
report("EVM Modexp - 1,1,121 - exponent=7 and odd modulus", nanoseconds, ticks, execsEIP2565)
|
||||
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
|
||||
|
||||
proc dos2d() =
|
||||
# odd variation with no shortcut and power of 2 modulus
|
||||
|
||||
let input = [
|
||||
# Length of base (1)
|
||||
uint8 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
|
||||
# Length of exponent (1)
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
|
||||
# Length of modulus (121)
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
|
||||
|
||||
# Base
|
||||
0x33,
|
||||
|
||||
# Exponent
|
||||
0x07,
|
||||
|
||||
# Modulus
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
]
|
||||
|
||||
var r = newSeq[byte](121)
|
||||
var ticks, nanoseconds: int64
|
||||
|
||||
let (gasFeeEIP198, gasFeeEIP2565) = computeGasFee(input)
|
||||
const blockSize = 30000000
|
||||
|
||||
let execsEIP198 = blockSize div gasFeeEIP198
|
||||
let execsEIP2565 = blockSize div gasFeeEIP2565
|
||||
|
||||
echo "Gas cost: ", gasFeeEIP198, " gas (EIP-198) - ", execsEIP198, " executions per block"
|
||||
echo "Gas cost: ", gasFeeEIP2565, " gas (EIP-2565) - ", execsEIP2565, " executions per block"
|
||||
|
||||
for i in 0 ..< execsEIP2565:
|
||||
bench(
|
||||
(let _ = r.eth_evm_modexp(input)),
|
||||
ticks, nanoseconds)
|
||||
|
||||
report("EVM Modexp - 1,1,121 - exponent=7 and power-of-2 modulus", nanoseconds, ticks, execsEIP2565)
|
||||
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
|
||||
|
||||
dos1()
|
||||
echo "\n"
|
||||
dos2()
|
||||
echo "\n"
|
||||
dos3()
|
||||
dos2a()
|
||||
echo "\n"
|
||||
dos2b()
|
||||
echo "\n"
|
||||
dos2c()
|
||||
echo "\n"
|
||||
dos2d()
|
|
@ -370,7 +370,7 @@ func eth_evm_ecpairing*(
|
|||
r[r.len-1] = byte 1
|
||||
return cttEVM_Success
|
||||
|
||||
func eth_evm_modexp*(r: var openArray[byte], inputs: openArray[byte]): CttEVMStatus {.noInline, tags:[Alloca, Vartime].} =
|
||||
func eth_evm_modexp*(r: var openArray[byte], inputs: openArray[byte]): CttEVMStatus {.noInline, tags:[Alloca, Vartime], meter.} =
|
||||
## Modular exponentiation
|
||||
##
|
||||
## Name: MODEXP
|
||||
|
|
|
@ -38,7 +38,7 @@ template `+=`[F; G: static Subgroup](P: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_
|
|||
template `-=`[F; G: static Subgroup](P: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]), Q: ECP_ShortW_Aff[F, G]) =
|
||||
P.msub_vartime(P, Q)
|
||||
|
||||
func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} =
|
||||
func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime], meter.} =
|
||||
## **Variable-time** Elliptic Curve Scalar Multiplication
|
||||
##
|
||||
## P <- [k] P
|
||||
|
@ -67,7 +67,7 @@ func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime
|
|||
else:
|
||||
P += Paff
|
||||
|
||||
func scalarMul_addchain_4bit_vartime[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} =
|
||||
func scalarMul_addchain_4bit_vartime[EC](P: var EC, scalar: BigInt) {.tags:[VarTime], meter.} =
|
||||
## **Variable-time** Elliptic Curve Scalar Multiplication
|
||||
## This can only handle for small scalars up to 2⁴ = 16 excluded
|
||||
let s = uint scalar.limbs[0]
|
||||
|
@ -206,7 +206,7 @@ func accumNAF[precompSize, NafMax: static int, EC, ECaff](
|
|||
elif digit < 0:
|
||||
P -= tab[-digit shr 1]
|
||||
|
||||
func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt, window: static int) {.tags:[VarTime, Alloca].} =
|
||||
func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt, window: static int) {.tags:[VarTime, Alloca], meter.} =
|
||||
## **Variable-time** Elliptic Curve Scalar Multiplication
|
||||
##
|
||||
## P <- [k] P
|
||||
|
@ -246,7 +246,7 @@ func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt,
|
|||
func scalarMulEndo_minHammingWeight_windowed_vartime*[scalBits: static int; EC](
|
||||
P: var EC,
|
||||
scalar: BigInt[scalBits],
|
||||
window: static int) {.tags:[VarTime, Alloca].} =
|
||||
window: static int) {.tags:[VarTime, Alloca], meter.} =
|
||||
## Endomorphism-accelerated windowed vartime scalar multiplication
|
||||
##
|
||||
## P <- [k] P
|
||||
|
|
|
@ -88,7 +88,7 @@ func batchAffine*[N: static int, F, G](
|
|||
func batchAffine*[F, G](
|
||||
affs: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
|
||||
jacs: ptr UncheckedArray[ECP_ShortW_Jac[F, G]],
|
||||
N: int) {.noInline, tags:[Alloca].} =
|
||||
N: int) {.noInline, tags:[Alloca], meter.} =
|
||||
# Algorithm: Montgomery's batch inversion
|
||||
# - Speeding the Pollard and Elliptic Curve Methods of Factorization
|
||||
# Section 10.3.1
|
||||
|
|
|
@ -46,7 +46,7 @@ func powOddMod_vartime*(
|
|||
a: openArray[SecretWord],
|
||||
exponent: openArray[byte],
|
||||
M: openArray[SecretWord],
|
||||
window: int) {.noInline, tags:[Alloca, VarTime].} =
|
||||
window: int) {.noInline, tags:[Alloca, VarTime], meter.} =
|
||||
## r <- a^exponent (mod M) with M odd
|
||||
## assumes a < M
|
||||
##
|
||||
|
@ -57,6 +57,12 @@ func powOddMod_vartime*(
|
|||
|
||||
let aBits = a.getBits_LE_vartime()
|
||||
let mBits = M.getBits_LE_vartime()
|
||||
let eBits = exponent.getBits_BE_vartime()
|
||||
|
||||
if eBits == 1:
|
||||
r.view().reduce(a.view(), aBits, M.view(), mBits)
|
||||
return
|
||||
|
||||
let L = wordsRequired(mBits)
|
||||
let m0ninv = M[0].negInvModWord()
|
||||
var rMont = allocStackArray(SecretWord, L)
|
||||
|
@ -97,7 +103,7 @@ func powMod_vartime*(
|
|||
a: openArray[SecretWord],
|
||||
exponent: openArray[byte],
|
||||
M: openArray[SecretWord],
|
||||
window: int) {.noInline, tags:[Alloca, VarTime].} =
|
||||
window: int) {.noInline, tags:[Alloca, VarTime], meter.} =
|
||||
## r <- a^exponent (mod M) with M odd
|
||||
## assumes a < exponent
|
||||
##
|
||||
|
|
|
@ -142,7 +142,7 @@ func shlAddMod(a: LimbsViewMut, aLen: int,
|
|||
|
||||
func reduce*(r: LimbsViewMut,
|
||||
a: LimbsViewAny, aBits: int,
|
||||
M: LimbsViewConst, mBits: int) =
|
||||
M: LimbsViewConst, mBits: int) {.meter.} =
|
||||
## Reduce `a` modulo `M` and store the result in `r`
|
||||
##
|
||||
## The modulus `M` most-significant bit at `mBits` MUST be set.
|
||||
|
|
|
@ -39,6 +39,6 @@ func prod_comba(r: var openArray[SecretWord], a, b: openArray[SecretWord]) {.noI
|
|||
for i in stopEx ..< r.len:
|
||||
r[i] = Zero
|
||||
|
||||
func prod*(r: var openArray[SecretWord], a, b: openArray[SecretWord]) {.inline.}=
|
||||
func prod*(r: var openArray[SecretWord], a, b: openArray[SecretWord]) {.inline, meter.}=
|
||||
## Extended precision multiplication
|
||||
r.prod_comba(a, b)
|
|
@ -30,7 +30,7 @@ import
|
|||
# Comparison
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func lt*(a, b: distinct LimbsViewAny, len: int): SecretBool =
|
||||
func lt*(a, b: distinct LimbsViewAny, len: int): SecretBool {.meter.} =
|
||||
## Returns true if a < b
|
||||
## Comparison is constant-time
|
||||
var diff: SecretWord
|
||||
|
@ -43,7 +43,7 @@ func lt*(a, b: distinct LimbsViewAny, len: int): SecretBool =
|
|||
# Type-erased add-sub
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func cadd*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Carry =
|
||||
func cadd*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Carry {.meter.} =
|
||||
## Type-erased conditional addition
|
||||
## Returns the carry
|
||||
##
|
||||
|
@ -58,7 +58,7 @@ func cadd*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Carry =
|
|||
addC(result, sum, a[i], b[i], result)
|
||||
ctl.ccopy(a[i], sum)
|
||||
|
||||
func csub*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Borrow =
|
||||
func csub*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Borrow {.meter.} =
|
||||
## Type-erased conditional addition
|
||||
## Returns the borrow
|
||||
##
|
||||
|
|
|
@ -17,7 +17,7 @@ import
|
|||
#
|
||||
# ############################################################
|
||||
|
||||
func addmod_vartime*(r: var openArray[SecretWord], a, b, M: openArray[SecretWord]) =
|
||||
func addmod_vartime*(r: var openArray[SecretWord], a, b, M: openArray[SecretWord]) {.meter.} =
|
||||
## r <- a+b (mod M)
|
||||
## assumes a and b are in the range [0, M)
|
||||
|
||||
|
@ -43,6 +43,6 @@ func addmod_vartime*(r: var openArray[SecretWord], a, b, M: openArray[SecretWord
|
|||
for i in 0 ..< r.len:
|
||||
r[i] = t[i]
|
||||
|
||||
func doublemod_vartime*(r: var openArray[SecretWord], a, M: openArray[SecretWord]) {.inline.} =
|
||||
func doublemod_vartime*(r: var openArray[SecretWord], a, M: openArray[SecretWord]) {.inline, meter.} =
|
||||
## r <- 2a (mod M)
|
||||
r.addmod_vartime(a, a, M)
|
|
@ -17,7 +17,7 @@ import
|
|||
# No exceptions allowed
|
||||
{.push raises: [], checks: off.}
|
||||
|
||||
func mod2k_vartime*(a: var openArray[SecretWord], k: uint) =
|
||||
func mod2k_vartime*(a: var openArray[SecretWord], k: uint) {.meter.} =
|
||||
## a <- a (mod 2ᵏ)
|
||||
const SlotShift = log2_vartime(WordBitWidth.uint32)
|
||||
const SelectMask = WordBitWidth - 1
|
||||
|
@ -38,7 +38,7 @@ func mod2k_vartime*(a: var openArray[SecretWord], k: uint) =
|
|||
for i in hiIndex+1 ..< a.len:
|
||||
a[i] = Zero
|
||||
|
||||
func submod2k_vartime*(r{.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) =
|
||||
func submod2k_vartime*(r{.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) {.meter.} =
|
||||
## r <- a - b (mod 2ᵏ)
|
||||
debug:
|
||||
const SlotShift = log2_vartime(WordBitWidth.uint32)
|
||||
|
@ -63,7 +63,7 @@ func submod2k_vartime*(r{.noAlias.}: var openArray[SecretWord], a, b: openArray[
|
|||
|
||||
r.mod2k_vartime(k)
|
||||
|
||||
func mulmod2k_vartime*(r: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) {.inline.} =
|
||||
func mulmod2k_vartime*(r: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) {.inline, meter.} =
|
||||
## r <- a*b (mod 2ᵏ)
|
||||
r.prod(a, b)
|
||||
r.mod2k_vartime(k)
|
||||
|
@ -75,7 +75,7 @@ iterator unpackLE(scalarByte: byte): bool =
|
|||
func powMod2k_vartime*(
|
||||
r{.noAlias.}: var openArray[SecretWord],
|
||||
a{.noAlias.}: openArray[SecretWord],
|
||||
exponent: openArray[byte], k: uint) {.noInline, tags: [Alloca].} =
|
||||
exponent: openArray[byte], k: uint) {.noInline, tags: [Alloca], meter.} =
|
||||
## r <- a^exponent (mod 2ᵏ)
|
||||
##
|
||||
## Requires:
|
||||
|
@ -115,6 +115,13 @@ func powMod2k_vartime*(
|
|||
r[0] = One # x⁰ = 1, even for 0⁰
|
||||
return
|
||||
|
||||
if msb == 0: # exponent is 1
|
||||
for i in 0 ..< min(r.len, a.len):
|
||||
# range [r.len, a.len) will be truncated (mod 2ᵏ)
|
||||
r[i] = a[i]
|
||||
r.mod2k_vartime(k)
|
||||
return
|
||||
|
||||
if a.isEven().bool:
|
||||
let aTrailingZeroes = block:
|
||||
var i = 0
|
||||
|
@ -155,7 +162,7 @@ func powMod2k_vartime*(
|
|||
func invModBitwidth(a: SecretWord): SecretWord {.borrow.}
|
||||
## Inversion a⁻¹ (mod 2³²) or a⁻¹ (mod 2⁶⁴)
|
||||
|
||||
func invMod2k_vartime*(r: var openArray[SecretWord], a: openArray[SecretWord], k: uint) {.noInline, tags: [Alloca].} =
|
||||
func invMod2k_vartime*(r: var openArray[SecretWord], a: openArray[SecretWord], k: uint) {.noInline, tags: [Alloca], meter.} =
|
||||
## Inversion a⁻¹ (mod 2ᵏ)
|
||||
## with 2ᵏ a multi-precision integer.
|
||||
#
|
||||
|
|
|
@ -11,7 +11,8 @@ import
|
|||
../../platforms/[abstractions, allocs, bithacks],
|
||||
./limbs_views,
|
||||
./limbs_mod,
|
||||
./limbs_fixedprec
|
||||
./limbs_fixedprec,
|
||||
./limbs_division
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [], checks: off.}
|
||||
|
@ -66,18 +67,37 @@ func r_powmod_vartime(r: var openArray[SecretWord], M: openArray[SecretWord], n:
|
|||
for i in start ..< stop:
|
||||
r.doublemod_vartime(r, M)
|
||||
|
||||
func oneMont_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) =
|
||||
func oneMont_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) {.meter.} =
|
||||
## Returns 1 in Montgomery domain:
|
||||
r.r_powmod_vartime(M, 1)
|
||||
|
||||
func r2_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) =
|
||||
# r.r_powmod_vartime(M, 1)
|
||||
|
||||
let mBits = getBits_LE_vartime(M)
|
||||
|
||||
let t = allocStackArray(SecretWord, M.len + 1)
|
||||
zeroMem(t, M.len*sizeof(SecretWord))
|
||||
t[M.len] = One
|
||||
|
||||
r.view().reduce(LimbsViewMut t, M.len*WordBitWidth+1, M.view(), mBits)
|
||||
|
||||
func r2_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) {.meter.} =
|
||||
## Returns the Montgomery domain magic constant for the input modulus:
|
||||
##
|
||||
## R² ≡ R² (mod M) with R = (2^WordBitWidth)^numWords
|
||||
##
|
||||
## Assuming a field modulus of size 256-bit with 63-bit words, we require 5 words
|
||||
## R² ≡ ((2^63)^5)^2 (mod M) = 2^630 (mod M)
|
||||
r.r_powmod_vartime(M, 2)
|
||||
|
||||
# r.r_powmod_vartime(M, 2)
|
||||
|
||||
let mBits = getBits_LE_vartime(M)
|
||||
|
||||
let t = allocStackArray(SecretWord, 2*M.len + 1)
|
||||
zeroMem(t, 2*M.len*sizeof(SecretWord))
|
||||
t[2*M.len] = One
|
||||
|
||||
r.view().reduce(LimbsViewMut t, 2*M.len*WordBitWidth+1, M.view(), mBits)
|
||||
|
||||
|
||||
# Montgomery multiplication
|
||||
# ------------------------------------------
|
||||
|
@ -88,7 +108,7 @@ func mulMont_FIPS*(
|
|||
M: LimbsViewConst,
|
||||
m0ninv: SecretWord,
|
||||
mBits: int,
|
||||
skipFinalSub: static bool = false) {.noInline, tags:[Alloca].} =
|
||||
skipFinalSub: static bool = false) {.noInline, tags:[Alloca], meter.} =
|
||||
## Montgomery Multiplication using Finely Integrated Product Scanning (FIPS)
|
||||
##
|
||||
## This maps
|
||||
|
@ -138,7 +158,7 @@ func mulMont_FIPS*(
|
|||
# ------------------------------------------
|
||||
|
||||
func fromMont*(r: LimbsViewMut, a: LimbsViewAny, M: LimbsViewConst,
|
||||
m0ninv: SecretWord, mBits: int) {.noInline, tags:[Alloca].} =
|
||||
m0ninv: SecretWord, mBits: int) {.noInline, tags:[Alloca], meter.} =
|
||||
## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)
|
||||
## to the regular natural representation (mod N)
|
||||
##
|
||||
|
@ -166,7 +186,7 @@ func fromMont*(r: LimbsViewMut, a: LimbsViewAny, M: LimbsViewConst,
|
|||
r.copyWords(0, t, 0, N)
|
||||
|
||||
func getMont*(r: LimbsViewMut, a: LimbsViewAny, M, r2modM: LimbsViewConst,
|
||||
m0ninv: SecretWord, mBits: int) {.inline.} =
|
||||
m0ninv: SecretWord, mBits: int) {.inline, meter.} =
|
||||
## Transform a bigint ``a`` from it's natural representation (mod N)
|
||||
## to a the Montgomery n-residue representation
|
||||
##
|
||||
|
@ -233,7 +253,7 @@ func powMontPrologue(
|
|||
m0ninv: SecretWord,
|
||||
scratchspace: LimbsViewMut,
|
||||
scratchLen: int,
|
||||
mBits: int): uint {.tags:[Alloca].} =
|
||||
mBits: int): uint {.tags:[Alloca], meter.} =
|
||||
## Setup the scratchspace
|
||||
## Returns the fixed-window size for exponentiation with window optimization.
|
||||
# Precompute window content, special case for window = 1
|
||||
|
@ -263,7 +283,7 @@ func powMontSquarings(
|
|||
tmp: LimbsViewMut,
|
||||
window: uint,
|
||||
acc, acc_len: var uint,
|
||||
e: var int): tuple[k, bits: uint] {.inline.}=
|
||||
e: var int): tuple[k, bits: uint] {.inline, meter.}=
|
||||
## Squaring step of exponentiation by squaring
|
||||
## Get the next k bits in range [1, window)
|
||||
## Square k times
|
||||
|
@ -309,7 +329,7 @@ func powMont*(
|
|||
m0ninv: SecretWord,
|
||||
scratchspace: LimbsViewMut,
|
||||
scratchLen: int,
|
||||
mBits: int) =
|
||||
mBits: int) {.meter.} =
|
||||
## Modular exponentiation r = a^exponent mod M
|
||||
## in the Montgomery domain
|
||||
##
|
||||
|
@ -379,7 +399,7 @@ func powMont_vartime*(
|
|||
m0ninv: SecretWord,
|
||||
scratchspace: LimbsViewMut,
|
||||
scratchLen: int,
|
||||
mBits: int) {.tags:[VarTime, Alloca].} =
|
||||
mBits: int) {.tags:[VarTime, Alloca], meter.} =
|
||||
## Modular exponentiation a <- a^exponent (mod M)
|
||||
## in the Montgomery domain
|
||||
##
|
||||
|
|
|
@ -61,7 +61,7 @@ func shrWords(r {.noalias.}: var openArray[SecretWord], a: openArray[SecretWord]
|
|||
for i in a.len-w ..< r.len:
|
||||
r[i] = Zero
|
||||
|
||||
func shiftRight_vartime*(r {.noalias.}: var openArray[SecretWord], a: openArray[SecretWord], k: SomeInteger) =
|
||||
func shiftRight_vartime*(r {.noalias.}: var openArray[SecretWord], a: openArray[SecretWord], k: SomeInteger) {.meter.} =
|
||||
## Shift `a` right by k bits and store in `r`
|
||||
if k == 0:
|
||||
let min = min(a.len, r.len)
|
||||
|
@ -87,7 +87,7 @@ func shiftRight_vartime*(r {.noalias.}: var openArray[SecretWord], a: openArray[
|
|||
# Arithmetic
|
||||
# --------------------------------------------------------
|
||||
|
||||
func neg*(a: var openArray[SecretWord]) =
|
||||
func neg*(a: var openArray[SecretWord]) {.meter.} =
|
||||
## Computes the additive inverse -a
|
||||
## in 2-complement representation
|
||||
|
||||
|
@ -97,7 +97,7 @@ func neg*(a: var openArray[SecretWord]) =
|
|||
for i in 1 ..< a.len:
|
||||
addC(carry, a[i], not(a[i]), Zero, carry)
|
||||
|
||||
func addMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool =
|
||||
func addMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool {.meter.} =
|
||||
## r <- a + b
|
||||
## and
|
||||
## returns the carry
|
||||
|
@ -130,7 +130,7 @@ func addMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord
|
|||
else:
|
||||
return bool carry
|
||||
|
||||
func subMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool =
|
||||
func subMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool {.meter.} =
|
||||
## r <- a - b
|
||||
## and
|
||||
## returns false if a >= b
|
||||
|
|
|
@ -90,7 +90,7 @@ func setOne*(a: var openArray[SomeNumber]){.inline.} =
|
|||
a[0] = 1
|
||||
for i in 1 ..< a.len:
|
||||
a[i] = 0
|
||||
|
||||
|
||||
func asBytes*(s: static string): auto =
|
||||
## Reinterpret a compile-time string as an array of bytes
|
||||
const N = s.len
|
||||
|
@ -104,8 +104,7 @@ func rawCopy*(
|
|||
dStart: SomeInteger,
|
||||
src: openArray[byte],
|
||||
sStart: SomeInteger,
|
||||
len: SomeInteger
|
||||
) {.inline.} =
|
||||
len: SomeInteger) {.inline.} =
|
||||
## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len]
|
||||
## Unlike the standard library, this cannot throw
|
||||
## even a defect.
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
./reports, ./tracer,
|
||||
../constantine/ethereum_evm_precompiles,
|
||||
../constantine/platforms/abstractions
|
||||
|
||||
let input = [
|
||||
# Length of base (1)
|
||||
uint8 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
|
||||
# Length of exponent (1)
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
|
||||
# Length of modulus (121)
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
|
||||
|
||||
# Base
|
||||
0x33,
|
||||
|
||||
# Exponent
|
||||
0x07,
|
||||
|
||||
# Modulus
|
||||
0x04, 0xea, 0xbb, 0x12, 0x55, 0x88, 0xd7, 0x3c, 0xad, 0x22, 0xea, 0x2b, 0x4a, 0x77, 0x6e, 0x9d,
|
||||
0x4d, 0xfc, 0x13, 0xa8, 0x1b, 0xf9, 0x0c, 0x0d, 0x37, 0xe8, 0x4e, 0x8b, 0xeb, 0xb2, 0xa5, 0x48,
|
||||
0x8b, 0x2c, 0x87, 0x6d, 0x13, 0x51, 0x75, 0xeb, 0x97, 0xc6, 0x13, 0xd9, 0x06, 0xce, 0x8b, 0x53,
|
||||
0xd0, 0x02, 0x68, 0xb8, 0xd6, 0x12, 0xab, 0x8b, 0x15, 0x0c, 0xef, 0x0a, 0xd0, 0x3b, 0x73, 0xd2,
|
||||
0xdb, 0x9d, 0x2a, 0xa5, 0x23, 0x70, 0xdc, 0x26, 0x55, 0x80, 0xca, 0xf2, 0xc0, 0x18, 0xe3, 0xe3,
|
||||
0x1b, 0xad, 0xd5, 0x22, 0xdd, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x1c, 0x05, 0x71, 0x52, 0x7c, 0x3a, 0xb0, 0x77,
|
||||
]
|
||||
|
||||
var r = newSeq[byte](121)
|
||||
|
||||
resetMetering()
|
||||
|
||||
let status = eth_evm_modexp(r, input)
|
||||
doAssert status == cttEVM_Success
|
||||
|
||||
const flags = if UseASM_X86_64 or UseASM_X86_32: "UseAssembly" else: "NoAssembly"
|
||||
reportCli(Metrics, flags)
|
|
@ -33,6 +33,11 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
|
|||
for m in metrics:
|
||||
if m.numCalls == 0:
|
||||
continue
|
||||
|
||||
let shortname = block:
|
||||
if m.procName.len <= 150: m.procName.replace('\n', ' ')
|
||||
else: m.procName[0..145].replace('\n', ' ') & " ..."
|
||||
|
||||
# TODO: running variance / standard deviation but the Welford method is quite costly.
|
||||
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
|
||||
let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3
|
||||
|
@ -40,11 +45,11 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
|
|||
let throughput = 1e6 / avgTimeUs
|
||||
let cumulCyclesBillions = m.cumulatedCycles.float64 * 1e-9
|
||||
let avgCyclesBillions = cumulCyclesBillions / m.numCalls.float64
|
||||
echo &"""|{m.procName:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
|
||||
echo &"""|{shortname:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
|
||||
echo lineSep
|
||||
|
||||
else:
|
||||
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|"""
|
||||
const lineSep = &"""|{'-'.repeat(150)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|"""
|
||||
echo "\n"
|
||||
echo lineSep
|
||||
echo &"""|{"Procedures":^150}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (µs)":^15}|{"Avg Time (µs)":^17}|"""
|
||||
|
@ -53,10 +58,15 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
|
|||
for m in metrics:
|
||||
if m.numCalls == 0:
|
||||
continue
|
||||
|
||||
let shortname = block:
|
||||
if m.procName.len <= 150: m.procName.replace('\n', ' ')
|
||||
else: m.procName[0..145].replace('\n', ' ') & " ..."
|
||||
|
||||
# TODO: running variance / standard deviation but the Welford method is quite costly.
|
||||
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
|
||||
let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3
|
||||
let avgTimeUs = cumulTimeUs / m.numCalls.float64
|
||||
let throughput = 1e6 / avgTimeUs
|
||||
echo &"""|{m.procName:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
|
||||
echo &"""|{shortname:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
|
||||
echo lineSep
|
||||
|
|
|
@ -88,7 +88,7 @@ when CTT_METER or CTT_TRACE:
|
|||
let stopTime = getMonoTime()
|
||||
when SupportsGetTicks:
|
||||
let elapsedCycles = stopCycle - startCycle
|
||||
let elapsedTime = inMicroseconds(stopTime - startTime)
|
||||
let elapsedTime = inNanoseconds(stopTime - startTime)
|
||||
|
||||
discard Metrics[id].cumulatedTimeNs.atomicInc(elapsedTime)
|
||||
when SupportsGetTicks:
|
||||
|
|
Loading…
Reference in New Issue