modexp: 2.5x accel on small exponent (#268)

* add metering to modexp

* modexp: accel exponent = 1

* modexp: improve runtime Montgomery constants compute. 2.49x faster on DOS vectors
This commit is contained in:
Mamy Ratsimbazafy 2023-09-09 09:21:05 +02:00 committed by GitHub
parent f3a5f352b8
commit 15757557b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 315 additions and 51 deletions

View File

@ -3,16 +3,16 @@ import
../constantine/math/arithmetic, ../constantine/math/arithmetic,
../constantine/math/io/io_bigints, ../constantine/math/io/io_bigints,
../constantine/platforms/abstractions, ../constantine/platforms/abstractions,
./platforms, ./bench_blueprint ./bench_blueprint
proc report(op: string, elapsedNs: int64, elapsedCycles: int64, iters: int) = proc report(op: string, elapsedNs: int64, elapsedCycles: int64, iters: int) =
let ns = elapsedNs div iters let ns = elapsedNs div iters
let cycles = elapsedCycles div iters let cycles = elapsedCycles div iters
let throughput = 1e9 / float64(ns) let throughput = 1e9 / float64(ns)
when SupportsGetTicks: when SupportsGetTicks:
echo &"{op:<45} {throughput:>15.3f} ops/s {ns:>16} ns/op {cycles:>12} CPU cycles (approx)" echo &"{op:<70} {throughput:>15.3f} ops/s {ns:>16} ns/op {cycles:>12} CPU cycles (approx)"
else: else:
echo &"{op:<45} {throughput:>15.3f} ops/s {ns:>16} ns/op" echo &"{op:<70} {throughput:>15.3f} ops/s {ns:>16} ns/op"
template bench(fnCall: untyped, ticks, ns: var int64): untyped = template bench(fnCall: untyped, ticks, ns: var int64): untyped =
block: block:
@ -148,11 +148,119 @@ proc dos1() =
(let _ = r.eth_evm_modexp(input)), (let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds) ticks, nanoseconds)
report("EVM Modexp - 32,32,32", nanoseconds, ticks, execsEIP2565) report("EVM Modexp - 32,32,32 - even base & power-of-2 modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations" echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
proc dos2() = proc dos2() =
let input = [
# Length of base (1)
uint8 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of exponent (1)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of modulus (121)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
# Base
0x33,
# Exponent
0x01,
# Modulus
0x04, 0xea, 0xbb, 0x12, 0x55, 0x88, 0xd7, 0x3c, 0xad, 0x22, 0xea, 0x2b, 0x4a, 0x77, 0x6e, 0x9d,
0x4d, 0xfc, 0x13, 0xa8, 0x1b, 0xf9, 0x0c, 0x0d, 0x37, 0xe8, 0x4e, 0x8b, 0xeb, 0xb2, 0xa5, 0x48,
0x8b, 0x2c, 0x87, 0x6d, 0x13, 0x51, 0x75, 0xeb, 0x97, 0xc6, 0x13, 0xd9, 0x06, 0xce, 0x8b, 0x53,
0xd0, 0x02, 0x68, 0xb8, 0xd6, 0x12, 0xab, 0x8b, 0x15, 0x0c, 0xef, 0x0a, 0xd0, 0x3b, 0x73, 0xd2,
0xdb, 0x9d, 0x2a, 0xa5, 0x23, 0x70, 0xdc, 0x26, 0x55, 0x80, 0xca, 0xf2, 0xc0, 0x18, 0xe3, 0xe3,
0x1b, 0xad, 0xd5, 0x22, 0xdd, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x1c, 0x05, 0x71, 0x52, 0x7c, 0x3a, 0xb0, 0x77,
]
var r = newSeq[byte](121)
var ticks, nanoseconds: int64
let (gasFeeEIP198, gasFeeEIP2565) = computeGasFee(input)
const blockSize = 30000000
let execsEIP198 = blockSize div gasFeeEIP198
let execsEIP2565 = blockSize div gasFeeEIP2565
echo "Gas cost: ", gasFeeEIP198, " gas (EIP-198) - ", execsEIP198, " executions per block"
echo "Gas cost: ", gasFeeEIP2565, " gas (EIP-2565) - ", execsEIP2565, " executions per block"
for i in 0 ..< execsEIP2565:
bench(
(let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds)
report("EVM Modexp - 1,1,121 - exponent=1 and odd modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
proc dos2a() =
# shortcuttable variation with even modulus
let input = [
# Length of base (1)
uint8 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of exponent (1)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of modulus (121)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
# Base
0x33,
# Exponent
0x01,
# Modulus
0x04, 0xea, 0xbb, 0x12, 0x55, 0x88, 0xd7, 0x3c, 0xad, 0x22, 0xea, 0x2b, 0x4a, 0x77, 0x6e, 0x9d,
0x4d, 0xfc, 0x13, 0xa8, 0x1b, 0xf9, 0x0c, 0x0d, 0x37, 0xe8, 0x4e, 0x8b, 0xeb, 0xb2, 0xa5, 0x48,
0x8b, 0x2c, 0x87, 0x6d, 0x13, 0x51, 0x75, 0xeb, 0x97, 0xc6, 0x13, 0xd9, 0x06, 0xce, 0x8b, 0x53,
0xd0, 0x02, 0x68, 0xb8, 0xd6, 0x12, 0xab, 0x8b, 0x15, 0x0c, 0xef, 0x0a, 0xd0, 0x3b, 0x73, 0xd2,
0xdb, 0x9d, 0x2a, 0xa5, 0x23, 0x70, 0xdc, 0x26, 0x55, 0x80, 0xca, 0xf2, 0xc0, 0x18, 0xe3, 0xe3,
0x1b, 0xad, 0xd5, 0x22, 0xdd, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x1c, 0x05, 0x71, 0x52, 0x7c, 0x3a, 0xb0, 0x76,
]
var r = newSeq[byte](121)
var ticks, nanoseconds: int64
let (gasFeeEIP198, gasFeeEIP2565) = computeGasFee(input)
const blockSize = 30000000
let execsEIP198 = blockSize div gasFeeEIP198
let execsEIP2565 = blockSize div gasFeeEIP2565
echo "Gas cost: ", gasFeeEIP198, " gas (EIP-198) - ", execsEIP198, " executions per block"
echo "Gas cost: ", gasFeeEIP2565, " gas (EIP-2565) - ", execsEIP2565, " executions per block"
for i in 0 ..< execsEIP2565:
bench(
(let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds)
report("EVM Modexp - 1,1,121 - exponent=1 and even modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
proc dos2b() =
# even variation with no shortcut
let input = [ let input = [
# Length of base (1) # Length of base (1)
uint8 0x00, uint8 0x00,
@ -201,10 +309,11 @@ proc dos2() =
(let _ = r.eth_evm_modexp(input)), (let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds) ticks, nanoseconds)
report("EVM Modexp - 1,1,121", nanoseconds, ticks, execsEIP2565) report("EVM Modexp - 1,1,121 - exponent=16 and odd modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations" echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
proc dos3() = proc dos2c() =
# odd variation with no shortcut
let input = [ let input = [
# Length of base (1) # Length of base (1)
@ -254,11 +363,71 @@ proc dos3() =
(let _ = r.eth_evm_modexp(input)), (let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds) ticks, nanoseconds)
report("EVM Modexp - 1,1,121", nanoseconds, ticks, execsEIP2565) report("EVM Modexp - 1,1,121 - exponent=7 and odd modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
proc dos2d() =
# odd variation with no shortcut and power of 2 modulus
let input = [
# Length of base (1)
uint8 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of exponent (1)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of modulus (121)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
# Base
0x33,
# Exponent
0x07,
# Modulus
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
]
var r = newSeq[byte](121)
var ticks, nanoseconds: int64
let (gasFeeEIP198, gasFeeEIP2565) = computeGasFee(input)
const blockSize = 30000000
let execsEIP198 = blockSize div gasFeeEIP198
let execsEIP2565 = blockSize div gasFeeEIP2565
echo "Gas cost: ", gasFeeEIP198, " gas (EIP-198) - ", execsEIP198, " executions per block"
echo "Gas cost: ", gasFeeEIP2565, " gas (EIP-2565) - ", execsEIP2565, " executions per block"
for i in 0 ..< execsEIP2565:
bench(
(let _ = r.eth_evm_modexp(input)),
ticks, nanoseconds)
report("EVM Modexp - 1,1,121 - exponent=7 and power-of-2 modulus", nanoseconds, ticks, execsEIP2565)
echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations" echo "Total time: ", nanoseconds.float64 / 1e6, " ms for ", execsEIP2565, " iterations"
dos1() dos1()
echo "\n" echo "\n"
dos2() dos2()
echo "\n" echo "\n"
dos3() dos2a()
echo "\n"
dos2b()
echo "\n"
dos2c()
echo "\n"
dos2d()

View File

@ -370,7 +370,7 @@ func eth_evm_ecpairing*(
r[r.len-1] = byte 1 r[r.len-1] = byte 1
return cttEVM_Success return cttEVM_Success
func eth_evm_modexp*(r: var openArray[byte], inputs: openArray[byte]): CttEVMStatus {.noInline, tags:[Alloca, Vartime].} = func eth_evm_modexp*(r: var openArray[byte], inputs: openArray[byte]): CttEVMStatus {.noInline, tags:[Alloca, Vartime], meter.} =
## Modular exponentiation ## Modular exponentiation
## ##
## Name: MODEXP ## Name: MODEXP

View File

@ -38,7 +38,7 @@ template `+=`[F; G: static Subgroup](P: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_
template `-=`[F; G: static Subgroup](P: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]), Q: ECP_ShortW_Aff[F, G]) = template `-=`[F; G: static Subgroup](P: var (ECP_ShortW_Jac[F, G] or ECP_ShortW_Prj[F, G]), Q: ECP_ShortW_Aff[F, G]) =
P.msub_vartime(P, Q) P.msub_vartime(P, Q)
func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} = func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime], meter.} =
## **Variable-time** Elliptic Curve Scalar Multiplication ## **Variable-time** Elliptic Curve Scalar Multiplication
## ##
## P <- [k] P ## P <- [k] P
@ -67,7 +67,7 @@ func scalarMul_doubleAdd_vartime*[EC](P: var EC, scalar: BigInt) {.tags:[VarTime
else: else:
P += Paff P += Paff
func scalarMul_addchain_4bit_vartime[EC](P: var EC, scalar: BigInt) {.tags:[VarTime].} = func scalarMul_addchain_4bit_vartime[EC](P: var EC, scalar: BigInt) {.tags:[VarTime], meter.} =
## **Variable-time** Elliptic Curve Scalar Multiplication ## **Variable-time** Elliptic Curve Scalar Multiplication
## This can only handle for small scalars up to 2⁴ = 16 excluded ## This can only handle for small scalars up to 2⁴ = 16 excluded
let s = uint scalar.limbs[0] let s = uint scalar.limbs[0]
@ -206,7 +206,7 @@ func accumNAF[precompSize, NafMax: static int, EC, ECaff](
elif digit < 0: elif digit < 0:
P -= tab[-digit shr 1] P -= tab[-digit shr 1]
func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt, window: static int) {.tags:[VarTime, Alloca].} = func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt, window: static int) {.tags:[VarTime, Alloca], meter.} =
## **Variable-time** Elliptic Curve Scalar Multiplication ## **Variable-time** Elliptic Curve Scalar Multiplication
## ##
## P <- [k] P ## P <- [k] P
@ -246,7 +246,7 @@ func scalarMul_minHammingWeight_windowed_vartime*[EC](P: var EC, scalar: BigInt,
func scalarMulEndo_minHammingWeight_windowed_vartime*[scalBits: static int; EC]( func scalarMulEndo_minHammingWeight_windowed_vartime*[scalBits: static int; EC](
P: var EC, P: var EC,
scalar: BigInt[scalBits], scalar: BigInt[scalBits],
window: static int) {.tags:[VarTime, Alloca].} = window: static int) {.tags:[VarTime, Alloca], meter.} =
## Endomorphism-accelerated windowed vartime scalar multiplication ## Endomorphism-accelerated windowed vartime scalar multiplication
## ##
## P <- [k] P ## P <- [k] P

View File

@ -88,7 +88,7 @@ func batchAffine*[N: static int, F, G](
func batchAffine*[F, G]( func batchAffine*[F, G](
affs: ptr UncheckedArray[ECP_ShortW_Aff[F, G]], affs: ptr UncheckedArray[ECP_ShortW_Aff[F, G]],
jacs: ptr UncheckedArray[ECP_ShortW_Jac[F, G]], jacs: ptr UncheckedArray[ECP_ShortW_Jac[F, G]],
N: int) {.noInline, tags:[Alloca].} = N: int) {.noInline, tags:[Alloca], meter.} =
# Algorithm: Montgomery's batch inversion # Algorithm: Montgomery's batch inversion
# - Speeding the Pollard and Elliptic Curve Methods of Factorization # - Speeding the Pollard and Elliptic Curve Methods of Factorization
# Section 10.3.1 # Section 10.3.1

View File

@ -46,7 +46,7 @@ func powOddMod_vartime*(
a: openArray[SecretWord], a: openArray[SecretWord],
exponent: openArray[byte], exponent: openArray[byte],
M: openArray[SecretWord], M: openArray[SecretWord],
window: int) {.noInline, tags:[Alloca, VarTime].} = window: int) {.noInline, tags:[Alloca, VarTime], meter.} =
## r <- a^exponent (mod M) with M odd ## r <- a^exponent (mod M) with M odd
## assumes a < M ## assumes a < M
## ##
@ -57,6 +57,12 @@ func powOddMod_vartime*(
let aBits = a.getBits_LE_vartime() let aBits = a.getBits_LE_vartime()
let mBits = M.getBits_LE_vartime() let mBits = M.getBits_LE_vartime()
let eBits = exponent.getBits_BE_vartime()
if eBits == 1:
r.view().reduce(a.view(), aBits, M.view(), mBits)
return
let L = wordsRequired(mBits) let L = wordsRequired(mBits)
let m0ninv = M[0].negInvModWord() let m0ninv = M[0].negInvModWord()
var rMont = allocStackArray(SecretWord, L) var rMont = allocStackArray(SecretWord, L)
@ -97,7 +103,7 @@ func powMod_vartime*(
a: openArray[SecretWord], a: openArray[SecretWord],
exponent: openArray[byte], exponent: openArray[byte],
M: openArray[SecretWord], M: openArray[SecretWord],
window: int) {.noInline, tags:[Alloca, VarTime].} = window: int) {.noInline, tags:[Alloca, VarTime], meter.} =
## r <- a^exponent (mod M) with M odd ## r <- a^exponent (mod M) with M odd
## assumes a < exponent ## assumes a < exponent
## ##

View File

@ -142,7 +142,7 @@ func shlAddMod(a: LimbsViewMut, aLen: int,
func reduce*(r: LimbsViewMut, func reduce*(r: LimbsViewMut,
a: LimbsViewAny, aBits: int, a: LimbsViewAny, aBits: int,
M: LimbsViewConst, mBits: int) = M: LimbsViewConst, mBits: int) {.meter.} =
## Reduce `a` modulo `M` and store the result in `r` ## Reduce `a` modulo `M` and store the result in `r`
## ##
## The modulus `M` most-significant bit at `mBits` MUST be set. ## The modulus `M` most-significant bit at `mBits` MUST be set.

View File

@ -39,6 +39,6 @@ func prod_comba(r: var openArray[SecretWord], a, b: openArray[SecretWord]) {.noI
for i in stopEx ..< r.len: for i in stopEx ..< r.len:
r[i] = Zero r[i] = Zero
func prod*(r: var openArray[SecretWord], a, b: openArray[SecretWord]) {.inline.}= func prod*(r: var openArray[SecretWord], a, b: openArray[SecretWord]) {.inline, meter.}=
## Extended precision multiplication ## Extended precision multiplication
r.prod_comba(a, b) r.prod_comba(a, b)

View File

@ -30,7 +30,7 @@ import
# Comparison # Comparison
# ------------------------------------------------------------ # ------------------------------------------------------------
func lt*(a, b: distinct LimbsViewAny, len: int): SecretBool = func lt*(a, b: distinct LimbsViewAny, len: int): SecretBool {.meter.} =
## Returns true if a < b ## Returns true if a < b
## Comparison is constant-time ## Comparison is constant-time
var diff: SecretWord var diff: SecretWord
@ -43,7 +43,7 @@ func lt*(a, b: distinct LimbsViewAny, len: int): SecretBool =
# Type-erased add-sub # Type-erased add-sub
# ------------------------------------------------------------ # ------------------------------------------------------------
func cadd*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Carry = func cadd*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Carry {.meter.} =
## Type-erased conditional addition ## Type-erased conditional addition
## Returns the carry ## Returns the carry
## ##
@ -58,7 +58,7 @@ func cadd*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Carry =
addC(result, sum, a[i], b[i], result) addC(result, sum, a[i], b[i], result)
ctl.ccopy(a[i], sum) ctl.ccopy(a[i], sum)
func csub*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Borrow = func csub*(a: LimbsViewMut, b: LimbsViewAny, ctl: SecretBool, len: int): Borrow {.meter.} =
## Type-erased conditional addition ## Type-erased conditional addition
## Returns the borrow ## Returns the borrow
## ##

View File

@ -17,7 +17,7 @@ import
# #
# ############################################################ # ############################################################
func addmod_vartime*(r: var openArray[SecretWord], a, b, M: openArray[SecretWord]) = func addmod_vartime*(r: var openArray[SecretWord], a, b, M: openArray[SecretWord]) {.meter.} =
## r <- a+b (mod M) ## r <- a+b (mod M)
## assumes a and b are in the range [0, M) ## assumes a and b are in the range [0, M)
@ -43,6 +43,6 @@ func addmod_vartime*(r: var openArray[SecretWord], a, b, M: openArray[SecretWord
for i in 0 ..< r.len: for i in 0 ..< r.len:
r[i] = t[i] r[i] = t[i]
func doublemod_vartime*(r: var openArray[SecretWord], a, M: openArray[SecretWord]) {.inline.} = func doublemod_vartime*(r: var openArray[SecretWord], a, M: openArray[SecretWord]) {.inline, meter.} =
## r <- 2a (mod M) ## r <- 2a (mod M)
r.addmod_vartime(a, a, M) r.addmod_vartime(a, a, M)

View File

@ -17,7 +17,7 @@ import
# No exceptions allowed # No exceptions allowed
{.push raises: [], checks: off.} {.push raises: [], checks: off.}
func mod2k_vartime*(a: var openArray[SecretWord], k: uint) = func mod2k_vartime*(a: var openArray[SecretWord], k: uint) {.meter.} =
## a <- a (mod 2ᵏ) ## a <- a (mod 2ᵏ)
const SlotShift = log2_vartime(WordBitWidth.uint32) const SlotShift = log2_vartime(WordBitWidth.uint32)
const SelectMask = WordBitWidth - 1 const SelectMask = WordBitWidth - 1
@ -38,7 +38,7 @@ func mod2k_vartime*(a: var openArray[SecretWord], k: uint) =
for i in hiIndex+1 ..< a.len: for i in hiIndex+1 ..< a.len:
a[i] = Zero a[i] = Zero
func submod2k_vartime*(r{.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) = func submod2k_vartime*(r{.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) {.meter.} =
## r <- a - b (mod 2ᵏ) ## r <- a - b (mod 2ᵏ)
debug: debug:
const SlotShift = log2_vartime(WordBitWidth.uint32) const SlotShift = log2_vartime(WordBitWidth.uint32)
@ -63,7 +63,7 @@ func submod2k_vartime*(r{.noAlias.}: var openArray[SecretWord], a, b: openArray[
r.mod2k_vartime(k) r.mod2k_vartime(k)
func mulmod2k_vartime*(r: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) {.inline.} = func mulmod2k_vartime*(r: var openArray[SecretWord], a, b: openArray[SecretWord], k: uint) {.inline, meter.} =
## r <- a*b (mod 2ᵏ) ## r <- a*b (mod 2ᵏ)
r.prod(a, b) r.prod(a, b)
r.mod2k_vartime(k) r.mod2k_vartime(k)
@ -75,7 +75,7 @@ iterator unpackLE(scalarByte: byte): bool =
func powMod2k_vartime*( func powMod2k_vartime*(
r{.noAlias.}: var openArray[SecretWord], r{.noAlias.}: var openArray[SecretWord],
a{.noAlias.}: openArray[SecretWord], a{.noAlias.}: openArray[SecretWord],
exponent: openArray[byte], k: uint) {.noInline, tags: [Alloca].} = exponent: openArray[byte], k: uint) {.noInline, tags: [Alloca], meter.} =
## r <- a^exponent (mod 2ᵏ) ## r <- a^exponent (mod 2ᵏ)
## ##
## Requires: ## Requires:
@ -115,6 +115,13 @@ func powMod2k_vartime*(
r[0] = One # x⁰ = 1, even for 0⁰ r[0] = One # x⁰ = 1, even for 0⁰
return return
if msb == 0: # exponent is 1
for i in 0 ..< min(r.len, a.len):
# range [r.len, a.len) will be truncated (mod 2ᵏ)
r[i] = a[i]
r.mod2k_vartime(k)
return
if a.isEven().bool: if a.isEven().bool:
let aTrailingZeroes = block: let aTrailingZeroes = block:
var i = 0 var i = 0
@ -155,7 +162,7 @@ func powMod2k_vartime*(
func invModBitwidth(a: SecretWord): SecretWord {.borrow.} func invModBitwidth(a: SecretWord): SecretWord {.borrow.}
## Inversion a⁻¹ (mod 2³²) or a⁻¹ (mod 2⁶⁴) ## Inversion a⁻¹ (mod 2³²) or a⁻¹ (mod 2⁶⁴)
func invMod2k_vartime*(r: var openArray[SecretWord], a: openArray[SecretWord], k: uint) {.noInline, tags: [Alloca].} = func invMod2k_vartime*(r: var openArray[SecretWord], a: openArray[SecretWord], k: uint) {.noInline, tags: [Alloca], meter.} =
## Inversion a⁻¹ (mod 2ᵏ) ## Inversion a⁻¹ (mod 2ᵏ)
## with 2ᵏ a multi-precision integer. ## with 2ᵏ a multi-precision integer.
# #

View File

@ -11,7 +11,8 @@ import
../../platforms/[abstractions, allocs, bithacks], ../../platforms/[abstractions, allocs, bithacks],
./limbs_views, ./limbs_views,
./limbs_mod, ./limbs_mod,
./limbs_fixedprec ./limbs_fixedprec,
./limbs_division
# No exceptions allowed # No exceptions allowed
{.push raises: [], checks: off.} {.push raises: [], checks: off.}
@ -66,18 +67,37 @@ func r_powmod_vartime(r: var openArray[SecretWord], M: openArray[SecretWord], n:
for i in start ..< stop: for i in start ..< stop:
r.doublemod_vartime(r, M) r.doublemod_vartime(r, M)
func oneMont_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) = func oneMont_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) {.meter.} =
## Returns 1 in Montgomery domain: ## Returns 1 in Montgomery domain:
r.r_powmod_vartime(M, 1)
func r2_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) = # r.r_powmod_vartime(M, 1)
let mBits = getBits_LE_vartime(M)
let t = allocStackArray(SecretWord, M.len + 1)
zeroMem(t, M.len*sizeof(SecretWord))
t[M.len] = One
r.view().reduce(LimbsViewMut t, M.len*WordBitWidth+1, M.view(), mBits)
func r2_vartime*(r: var openArray[SecretWord], M: openArray[SecretWord]) {.meter.} =
## Returns the Montgomery domain magic constant for the input modulus: ## Returns the Montgomery domain magic constant for the input modulus:
## ##
## R² ≡ R² (mod M) with R = (2^WordBitWidth)^numWords ## R² ≡ R² (mod M) with R = (2^WordBitWidth)^numWords
## ##
## Assuming a field modulus of size 256-bit with 63-bit words, we require 5 words ## Assuming a field modulus of size 256-bit with 63-bit words, we require 5 words
## R² ≡ ((2^63)^5)^2 (mod M) = 2^630 (mod M) ## R² ≡ ((2^63)^5)^2 (mod M) = 2^630 (mod M)
r.r_powmod_vartime(M, 2)
# r.r_powmod_vartime(M, 2)
let mBits = getBits_LE_vartime(M)
let t = allocStackArray(SecretWord, 2*M.len + 1)
zeroMem(t, 2*M.len*sizeof(SecretWord))
t[2*M.len] = One
r.view().reduce(LimbsViewMut t, 2*M.len*WordBitWidth+1, M.view(), mBits)
# Montgomery multiplication # Montgomery multiplication
# ------------------------------------------ # ------------------------------------------
@ -88,7 +108,7 @@ func mulMont_FIPS*(
M: LimbsViewConst, M: LimbsViewConst,
m0ninv: SecretWord, m0ninv: SecretWord,
mBits: int, mBits: int,
skipFinalSub: static bool = false) {.noInline, tags:[Alloca].} = skipFinalSub: static bool = false) {.noInline, tags:[Alloca], meter.} =
## Montgomery Multiplication using Finely Integrated Product Scanning (FIPS) ## Montgomery Multiplication using Finely Integrated Product Scanning (FIPS)
## ##
## This maps ## This maps
@ -138,7 +158,7 @@ func mulMont_FIPS*(
# ------------------------------------------ # ------------------------------------------
func fromMont*(r: LimbsViewMut, a: LimbsViewAny, M: LimbsViewConst, func fromMont*(r: LimbsViewMut, a: LimbsViewAny, M: LimbsViewConst,
m0ninv: SecretWord, mBits: int) {.noInline, tags:[Alloca].} = m0ninv: SecretWord, mBits: int) {.noInline, tags:[Alloca], meter.} =
## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N) ## Transform a bigint ``a`` from it's Montgomery N-residue representation (mod N)
## to the regular natural representation (mod N) ## to the regular natural representation (mod N)
## ##
@ -166,7 +186,7 @@ func fromMont*(r: LimbsViewMut, a: LimbsViewAny, M: LimbsViewConst,
r.copyWords(0, t, 0, N) r.copyWords(0, t, 0, N)
func getMont*(r: LimbsViewMut, a: LimbsViewAny, M, r2modM: LimbsViewConst, func getMont*(r: LimbsViewMut, a: LimbsViewAny, M, r2modM: LimbsViewConst,
m0ninv: SecretWord, mBits: int) {.inline.} = m0ninv: SecretWord, mBits: int) {.inline, meter.} =
## Transform a bigint ``a`` from it's natural representation (mod N) ## Transform a bigint ``a`` from it's natural representation (mod N)
## to a the Montgomery n-residue representation ## to a the Montgomery n-residue representation
## ##
@ -233,7 +253,7 @@ func powMontPrologue(
m0ninv: SecretWord, m0ninv: SecretWord,
scratchspace: LimbsViewMut, scratchspace: LimbsViewMut,
scratchLen: int, scratchLen: int,
mBits: int): uint {.tags:[Alloca].} = mBits: int): uint {.tags:[Alloca], meter.} =
## Setup the scratchspace ## Setup the scratchspace
## Returns the fixed-window size for exponentiation with window optimization. ## Returns the fixed-window size for exponentiation with window optimization.
# Precompute window content, special case for window = 1 # Precompute window content, special case for window = 1
@ -263,7 +283,7 @@ func powMontSquarings(
tmp: LimbsViewMut, tmp: LimbsViewMut,
window: uint, window: uint,
acc, acc_len: var uint, acc, acc_len: var uint,
e: var int): tuple[k, bits: uint] {.inline.}= e: var int): tuple[k, bits: uint] {.inline, meter.}=
## Squaring step of exponentiation by squaring ## Squaring step of exponentiation by squaring
## Get the next k bits in range [1, window) ## Get the next k bits in range [1, window)
## Square k times ## Square k times
@ -309,7 +329,7 @@ func powMont*(
m0ninv: SecretWord, m0ninv: SecretWord,
scratchspace: LimbsViewMut, scratchspace: LimbsViewMut,
scratchLen: int, scratchLen: int,
mBits: int) = mBits: int) {.meter.} =
## Modular exponentiation r = a^exponent mod M ## Modular exponentiation r = a^exponent mod M
## in the Montgomery domain ## in the Montgomery domain
## ##
@ -379,7 +399,7 @@ func powMont_vartime*(
m0ninv: SecretWord, m0ninv: SecretWord,
scratchspace: LimbsViewMut, scratchspace: LimbsViewMut,
scratchLen: int, scratchLen: int,
mBits: int) {.tags:[VarTime, Alloca].} = mBits: int) {.tags:[VarTime, Alloca], meter.} =
## Modular exponentiation a <- a^exponent (mod M) ## Modular exponentiation a <- a^exponent (mod M)
## in the Montgomery domain ## in the Montgomery domain
## ##

View File

@ -61,7 +61,7 @@ func shrWords(r {.noalias.}: var openArray[SecretWord], a: openArray[SecretWord]
for i in a.len-w ..< r.len: for i in a.len-w ..< r.len:
r[i] = Zero r[i] = Zero
func shiftRight_vartime*(r {.noalias.}: var openArray[SecretWord], a: openArray[SecretWord], k: SomeInteger) = func shiftRight_vartime*(r {.noalias.}: var openArray[SecretWord], a: openArray[SecretWord], k: SomeInteger) {.meter.} =
## Shift `a` right by k bits and store in `r` ## Shift `a` right by k bits and store in `r`
if k == 0: if k == 0:
let min = min(a.len, r.len) let min = min(a.len, r.len)
@ -87,7 +87,7 @@ func shiftRight_vartime*(r {.noalias.}: var openArray[SecretWord], a: openArray[
# Arithmetic # Arithmetic
# -------------------------------------------------------- # --------------------------------------------------------
func neg*(a: var openArray[SecretWord]) = func neg*(a: var openArray[SecretWord]) {.meter.} =
## Computes the additive inverse -a ## Computes the additive inverse -a
## in 2-complement representation ## in 2-complement representation
@ -97,7 +97,7 @@ func neg*(a: var openArray[SecretWord]) =
for i in 1 ..< a.len: for i in 1 ..< a.len:
addC(carry, a[i], not(a[i]), Zero, carry) addC(carry, a[i], not(a[i]), Zero, carry)
func addMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool = func addMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool {.meter.} =
## r <- a + b ## r <- a + b
## and ## and
## returns the carry ## returns the carry
@ -130,7 +130,7 @@ func addMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord
else: else:
return bool carry return bool carry
func subMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool = func subMP*(r {.noAlias.}: var openArray[SecretWord], a, b: openArray[SecretWord]): bool {.meter.} =
## r <- a - b ## r <- a - b
## and ## and
## returns false if a >= b ## returns false if a >= b

View File

@ -104,8 +104,7 @@ func rawCopy*(
dStart: SomeInteger, dStart: SomeInteger,
src: openArray[byte], src: openArray[byte],
sStart: SomeInteger, sStart: SomeInteger,
len: SomeInteger len: SomeInteger) {.inline.} =
) {.inline.} =
## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len] ## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len]
## Unlike the standard library, this cannot throw ## Unlike the standard library, this cannot throw
## even a defect. ## even a defect.

53
metering/m_modexp.nim Normal file
View File

@ -0,0 +1,53 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.
import
./reports, ./tracer,
../constantine/ethereum_evm_precompiles,
../constantine/platforms/abstractions
let input = [
# Length of base (1)
uint8 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of exponent (1)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
# Length of modulus (121)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
# Base
0x33,
# Exponent
0x07,
# Modulus
0x04, 0xea, 0xbb, 0x12, 0x55, 0x88, 0xd7, 0x3c, 0xad, 0x22, 0xea, 0x2b, 0x4a, 0x77, 0x6e, 0x9d,
0x4d, 0xfc, 0x13, 0xa8, 0x1b, 0xf9, 0x0c, 0x0d, 0x37, 0xe8, 0x4e, 0x8b, 0xeb, 0xb2, 0xa5, 0x48,
0x8b, 0x2c, 0x87, 0x6d, 0x13, 0x51, 0x75, 0xeb, 0x97, 0xc6, 0x13, 0xd9, 0x06, 0xce, 0x8b, 0x53,
0xd0, 0x02, 0x68, 0xb8, 0xd6, 0x12, 0xab, 0x8b, 0x15, 0x0c, 0xef, 0x0a, 0xd0, 0x3b, 0x73, 0xd2,
0xdb, 0x9d, 0x2a, 0xa5, 0x23, 0x70, 0xdc, 0x26, 0x55, 0x80, 0xca, 0xf2, 0xc0, 0x18, 0xe3, 0xe3,
0x1b, 0xad, 0xd5, 0x22, 0xdd, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x1c, 0x05, 0x71, 0x52, 0x7c, 0x3a, 0xb0, 0x77,
]
var r = newSeq[byte](121)
resetMetering()
let status = eth_evm_modexp(r, input)
doAssert status == cttEVM_Success
const flags = if UseASM_X86_64 or UseASM_X86_32: "UseAssembly" else: "NoAssembly"
reportCli(Metrics, flags)

View File

@ -33,6 +33,11 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
for m in metrics: for m in metrics:
if m.numCalls == 0: if m.numCalls == 0:
continue continue
let shortname = block:
if m.procName.len <= 150: m.procName.replace('\n', ' ')
else: m.procName[0..145].replace('\n', ' ') & " ..."
# TODO: running variance / standard deviation but the Welford method is quite costly. # TODO: running variance / standard deviation but the Welford method is quite costly.
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm # https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3 let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3
@ -40,11 +45,11 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
let throughput = 1e6 / avgTimeUs let throughput = 1e6 / avgTimeUs
let cumulCyclesBillions = m.cumulatedCycles.float64 * 1e-9 let cumulCyclesBillions = m.cumulatedCycles.float64 * 1e-9
let avgCyclesBillions = cumulCyclesBillions / m.numCalls.float64 let avgCyclesBillions = cumulCyclesBillions / m.numCalls.float64
echo &"""|{m.procName:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|""" echo &"""|{shortname:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo lineSep echo lineSep
else: else:
const lineSep = &"""|{'-'.repeat(50)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|""" const lineSep = &"""|{'-'.repeat(150)}|{'-'.repeat(14)}|{'-'.repeat(20)}|{'-'.repeat(15)}|{'-'.repeat(17)}|"""
echo "\n" echo "\n"
echo lineSep echo lineSep
echo &"""|{"Procedures":^150}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (µs)":^15}|{"Avg Time (µs)":^17}|""" echo &"""|{"Procedures":^150}|{"# of Calls":^14}|{"Throughput (ops/s)":^20}|{"Time (µs)":^15}|{"Avg Time (µs)":^17}|"""
@ -53,10 +58,15 @@ proc reportCli*(metrics: seq[Metadata], flags: string) =
for m in metrics: for m in metrics:
if m.numCalls == 0: if m.numCalls == 0:
continue continue
let shortname = block:
if m.procName.len <= 150: m.procName.replace('\n', ' ')
else: m.procName[0..145].replace('\n', ' ') & " ..."
# TODO: running variance / standard deviation but the Welford method is quite costly. # TODO: running variance / standard deviation but the Welford method is quite costly.
# https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm # https://nim-lang.org/docs/stats.html / https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3 let cumulTimeUs = m.cumulatedTimeNs.float64 * 1e-3
let avgTimeUs = cumulTimeUs / m.numCalls.float64 let avgTimeUs = cumulTimeUs / m.numCalls.float64
let throughput = 1e6 / avgTimeUs let throughput = 1e6 / avgTimeUs
echo &"""|{m.procName:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|""" echo &"""|{shortname:<150}|{m.numCalls:>14}|{throughput:>20.3f}|{cumulTimeUs:>15.3f}|{avgTimeUs:>17.3f}|"""
echo lineSep echo lineSep

View File

@ -88,7 +88,7 @@ when CTT_METER or CTT_TRACE:
let stopTime = getMonoTime() let stopTime = getMonoTime()
when SupportsGetTicks: when SupportsGetTicks:
let elapsedCycles = stopCycle - startCycle let elapsedCycles = stopCycle - startCycle
let elapsedTime = inMicroseconds(stopTime - startTime) let elapsedTime = inNanoseconds(stopTime - startTime)
discard Metrics[id].cumulatedTimeNs.atomicInc(elapsedTime) discard Metrics[id].cumulatedTimeNs.atomicInc(elapsedTime)
when SupportsGetTicks: when SupportsGetTicks: