mirror of
https://github.com/logos-storage/constantine.git
synced 2026-01-02 13:13:07 +00:00
30% faster constant-time inversion
This commit is contained in:
parent
1958356a09
commit
bde619155b
@ -1,6 +0,0 @@
|
||||
# Benchmark of Ethereum 1 and Ethereum 2 elliptic curves
|
||||
|
||||
import
|
||||
./secp256k1_fp,
|
||||
./bn254_fp,
|
||||
./bls12_381_fp
|
||||
157
benchmarks/bench_finite_fields.nim
Normal file
157
benchmarks/bench_finite_fields.nim
Normal file
@ -0,0 +1,157 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Benchmark of finite fields
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
import
|
||||
# Internals
|
||||
../constantine/config/[common, curves],
|
||||
../constantine/arithmetic,
|
||||
../constantine/io/[io_bigints, io_fields],
|
||||
../constantine/primitives,
|
||||
# Helpers
|
||||
../helpers/[timers, prng, static_for],
|
||||
# Standard library
|
||||
std/[monotimes, times, strformat, strutils, macros]
|
||||
|
||||
const Iters = 1_000_000
|
||||
const InvIters = 1000
|
||||
const AvailableCurves = [
|
||||
P224,
|
||||
BN254,
|
||||
P256,
|
||||
Secp256k1,
|
||||
BLS12_381
|
||||
]
|
||||
|
||||
var rng: RngState
|
||||
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
|
||||
rng.seed(seed)
|
||||
echo "bench_finite_field xoshiro512** seed: ", seed
|
||||
|
||||
# warmup
|
||||
proc warmup*() =
|
||||
# Warmup - make sure cpu is on max perf
|
||||
let start = cpuTime()
|
||||
var foo = 123
|
||||
for i in 0 ..< 300_000_000:
|
||||
foo += i*i mod 456
|
||||
foo = foo mod 789
|
||||
|
||||
# Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
|
||||
let stop = cpuTime()
|
||||
echo &"Warmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n"
|
||||
|
||||
warmup()
|
||||
|
||||
echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "==========================================================================================================\n"
|
||||
echo "All benchmarks are using constant-time implementations to protect against side-channel attacks."
|
||||
when defined(gcc):
|
||||
echo "\nCompiled with GCC"
|
||||
elif defined(clang):
|
||||
echo "\nCompiled with Clang"
|
||||
elif defined(vcc):
|
||||
echo "\nCompiled with MSVC"
|
||||
elif defined(icc):
|
||||
echo "\nCompiled with ICC"
|
||||
else:
|
||||
echo "\nCompiled with an unknown compiler"
|
||||
|
||||
when defined(i386) or defined(amd64):
|
||||
import ../helpers/x86
|
||||
echo "Running on ", cpuName(), "\n\n"
|
||||
|
||||
proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
echo &"{op:<15} {field:<15} {inNanoseconds((stop-start) div iters):>9} ns {(stopClk - startClk) div iters:>9} cycles"
|
||||
|
||||
macro fixFieldDisplay(T: typedesc): untyped =
|
||||
# At compile-time, enums are integers and their display is buggy
|
||||
# we get the Curve ID instead of the curve name.
|
||||
let instantiated = T.getTypeInst()
|
||||
var name = $instantiated[1][0] # Fp
|
||||
name.add "[" & $Curve(instantiated[1][1].intVal) & "]"
|
||||
result = newLit name
|
||||
|
||||
# Compilers are smart with dead code (but not with multiprecision arithmetic :/)
|
||||
var globalsAreNotOptimizedAway: Word
|
||||
|
||||
template bench(op: string, T: typedesc, iters: int, body: untyped): untyped =
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< iters:
|
||||
body
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
|
||||
report(op, fixFieldDisplay(T), start, stop, startClk, stopClk, iters)
|
||||
|
||||
proc addBench(T: typedesc) =
|
||||
var x = rng.random(T)
|
||||
let y = rng.random(T)
|
||||
bench("Addition", T, Iters):
|
||||
x += y
|
||||
globalsAreNotOptimizedAway += x.mres.limbs[^1]
|
||||
|
||||
proc subBench(T: typedesc) =
|
||||
var x = rng.random(T)
|
||||
let y = rng.random(T)
|
||||
preventOptimAway(x)
|
||||
bench("Substraction", T, Iters):
|
||||
x -= y
|
||||
globalsAreNotOptimizedAway += x.mres.limbs[^1]
|
||||
|
||||
proc negBench(T: typedesc) =
|
||||
var r: T
|
||||
let x = rng.random(T)
|
||||
bench("Negation", T, Iters):
|
||||
r.neg(x)
|
||||
globalsAreNotOptimizedAway += r.mres.limbs[^1]
|
||||
|
||||
proc mulBench(T: typedesc) =
|
||||
var r: T
|
||||
let x = rng.random(T)
|
||||
let y = rng.random(T)
|
||||
preventOptimAway(r)
|
||||
bench("Multiplication", T, Iters):
|
||||
r.prod(x, y)
|
||||
|
||||
proc sqrBench(T: typedesc) =
|
||||
var r: T
|
||||
let x = rng.random(T)
|
||||
preventOptimAway(r)
|
||||
bench("Squaring", T, Iters):
|
||||
r.square(x)
|
||||
|
||||
proc invBench(T: typedesc) =
|
||||
var r: T
|
||||
let x = rng.random(T)
|
||||
preventOptimAway(r)
|
||||
bench("Inversion", T, InvIters):
|
||||
r.inv(x)
|
||||
|
||||
proc main() =
|
||||
echo "-".repeat(80)
|
||||
staticFor i, 0, AvailableCurves.len:
|
||||
const curve = AvailableCurves[i]
|
||||
addBench(Fp[curve])
|
||||
subBench(Fp[curve])
|
||||
negBench(Fp[curve])
|
||||
mulBench(Fp[curve])
|
||||
sqrBench(Fp[curve])
|
||||
invBench(Fp[curve])
|
||||
echo "-".repeat(80)
|
||||
|
||||
main()
|
||||
|
||||
echo "Notes:"
|
||||
echo " GCC is significantly slower than Clang on multiprecision arithmetic."
|
||||
@ -1,48 +0,0 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Benchmark of the conversion from Big Int to Fq
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
# 2 implementations are possible
|
||||
# - 1 based on Montgomery Multiplication
|
||||
# - 1 based on modular left shift which involves multiple divisions
|
||||
|
||||
import
|
||||
../constantine/config/[common, curves],
|
||||
../constantine/arithmetic/[bigints_checked, finite_fields],
|
||||
random, std/monotimes, times, strformat
|
||||
|
||||
const Iters = 1_000_000
|
||||
|
||||
randomize(1234)
|
||||
|
||||
proc main() =
|
||||
var x: BigInt[381]
|
||||
x.setInternalBitLength()
|
||||
for i in 0 ..< x.limbs.len - 1:
|
||||
# Set x to a random value guaranteed below the prime
|
||||
x.limbs[i] = Word(rand(BaseType.high.int))
|
||||
|
||||
|
||||
let start = getMonotime()
|
||||
for _ in 0 ..< Iters:
|
||||
let y = Fq[BLS12_381].fromBig(x)
|
||||
let stop = getMonotime()
|
||||
|
||||
echo &"Time for {Iters} iterations: {inMilliseconds(stop-start)} ms"
|
||||
|
||||
|
||||
main()
|
||||
# 1_000_000 iterations with -d:danger on i9-9980XE all-core turbo 4.1GHz
|
||||
# Montgomery Multiplication based: 254ms
|
||||
# shlAddMod based (using assembly div2n1n!!): 907 ms
|
||||
# Note: shlAddMod will be even slower when division is made constant-time
|
||||
@ -1,155 +0,0 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Benchmark of modular exponentiation
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
# 2 implementations are available
|
||||
# - 1 is constant time
|
||||
# - 1 exposes the exponent bits to:
|
||||
# timing attack,
|
||||
# memory access analysis,
|
||||
# power analysis (i.e. oscilloscopes on embedded)
|
||||
# It is suitable for public exponents for example
|
||||
# to compute modular inversion via the Fermat method
|
||||
|
||||
import
|
||||
../constantine/config/[common, curves],
|
||||
../constantine/arithmetic/[bigints, finite_fields],
|
||||
../constantine/io/[io_bigints, io_fields],
|
||||
random, std/monotimes, times, strformat,
|
||||
./timers
|
||||
|
||||
const Iters = 1_000_000
|
||||
const InvIters = 1000
|
||||
|
||||
randomize(1234)
|
||||
|
||||
# warmup
|
||||
proc warmup*() =
|
||||
# Warmup - make sure cpu is on max perf
|
||||
let start = cpuTime()
|
||||
var foo = 123
|
||||
for i in 0 ..< 300_000_000:
|
||||
foo += i*i mod 456
|
||||
foo = foo mod 789
|
||||
|
||||
# Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
|
||||
let stop = cpuTime()
|
||||
echo &"\n\nWarmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n"
|
||||
|
||||
warmup()
|
||||
|
||||
echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "==========================================================================================================\n"
|
||||
|
||||
proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
echo &"{op:<15} {field:<15} {inNanoseconds((stop-start) div iters):>9} ns {(stopClk - startClk) div iters:>9} cycles"
|
||||
|
||||
proc addBench() =
|
||||
var x, y: Fp[BLS12_381]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# BLS12-381 prime - 2
|
||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
x += y
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Addition", "Fp[BLS12_381]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
|
||||
addBench()
|
||||
|
||||
proc subBench() =
|
||||
var x, y: Fp[BLS12_381]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# BLS12-381 prime - 2
|
||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
x -= y
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Substraction", "Fp[BLS12_381]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
subBench()
|
||||
|
||||
proc negBench() =
|
||||
var r, x: Fp[BLS12_381]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
r.neg(x)
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Negation", "Fp[BLS12_381]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
negBench()
|
||||
|
||||
proc mulBench() =
|
||||
var r, x, y: Fp[BLS12_381]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# BLS12-381 prime - 2
|
||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
r.prod(x, y)
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Multiplication", "Fp[BLS12_381]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
mulBench()
|
||||
|
||||
proc sqrBench() =
|
||||
var r, x: Fp[BLS12_381]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
r.square(x)
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Squaring", "Fp[BLS12_381]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
sqrBench()
|
||||
|
||||
proc invBench() =
|
||||
# TODO: having x on the stack triggers stack smashing detection. To be investigated
|
||||
var x: ref Fp[BLS12_381]
|
||||
new x
|
||||
# BN254 field modulus
|
||||
x[].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< InvIters:
|
||||
# Note: we don't copy the original x so x is alterning between x and x^-1
|
||||
inv(x[])
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Inversion", "Fp[BLS12_381]", start, stop, startClk, stopClk, InvIters)
|
||||
|
||||
invBench()
|
||||
@ -1,155 +0,0 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Benchmark of modular exponentiation
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
# 2 implementations are available
|
||||
# - 1 is constant time
|
||||
# - 1 exposes the exponent bits to:
|
||||
# timing attack,
|
||||
# memory access analysis,
|
||||
# power analysis (i.e. oscilloscopes on embedded)
|
||||
# It is suitable for public exponents for example
|
||||
# to compute modular inversion via the Fermat method
|
||||
|
||||
import
|
||||
../constantine/config/[common, curves],
|
||||
../constantine/arithmetic/[bigints, finite_fields],
|
||||
../constantine/io/[io_bigints, io_fields],
|
||||
random, std/monotimes, times, strformat,
|
||||
./timers
|
||||
|
||||
const Iters = 1_000_000
|
||||
const InvIters = 1000
|
||||
|
||||
randomize(1234)
|
||||
|
||||
# warmup
|
||||
proc warmup*() =
|
||||
# Warmup - make sure cpu is on max perf
|
||||
let start = cpuTime()
|
||||
var foo = 123
|
||||
for i in 0 ..< 300_000_000:
|
||||
foo += i*i mod 456
|
||||
foo = foo mod 789
|
||||
|
||||
# Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
|
||||
let stop = cpuTime()
|
||||
echo &"\n\nWarmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n"
|
||||
|
||||
warmup()
|
||||
|
||||
echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "==========================================================================================================\n"
|
||||
|
||||
proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
echo &"{op:<15} {field:<15} {inNanoseconds((stop-start) div iters):>9} ns {(stopClk - startClk) div iters:>9} cycles"
|
||||
|
||||
proc addBench() =
|
||||
var x, y: Fp[BN254]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# Truncated BLS12-381 prime
|
||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
x += y
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Addition", "Fp[BN254]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
|
||||
addBench()
|
||||
|
||||
proc subBench() =
|
||||
var x, y: Fp[BN254]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# Truncated BLS12-381 prime
|
||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
x -= y
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Substraction", "Fp[BN254]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
subBench()
|
||||
|
||||
proc negBench() =
|
||||
var r, x: Fp[BN254]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
r.neg(x)
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Negation", "Fp[BN254]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
negBench()
|
||||
|
||||
proc mulBench() =
|
||||
var r, x, y: Fp[BN254]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# Truncated BLS12-381 prime
|
||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
r.prod(x, y)
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Multiplication", "Fp[BN254]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
mulBench()
|
||||
|
||||
proc sqrBench() =
|
||||
var r, x: Fp[BN254]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
r.square(x)
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Squaring", "Fp[BN254]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
sqrBench()
|
||||
|
||||
proc invBench() =
|
||||
# TODO: having x on the stack triggers stack smashing detection. To be investigated
|
||||
var x: ref Fp[BN254]
|
||||
new x
|
||||
# BN254 field modulus
|
||||
x[].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< InvIters:
|
||||
# Note: we don't copy the original x so x is alterning between x and x^-1
|
||||
inv(x[])
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Inversion", "Fp[BN254]", start, stop, startClk, stopClk, InvIters)
|
||||
|
||||
invBench()
|
||||
@ -1,69 +0,0 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Benchmark of modular exponentiation
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
# 2 implementations are available
|
||||
# - 1 is constant time
|
||||
# - 1 exposes the exponent bits to:
|
||||
# timing attack,
|
||||
# memory access analysis,
|
||||
# power analysis (i.e. oscilloscopes on embedded)
|
||||
# It is suitable for public exponents for example
|
||||
# to compute modular inversion via the Fermat method
|
||||
|
||||
import
|
||||
../constantine/config/[common, curves],
|
||||
../constantine/arithmetic/[bigints_checked, finite_fields],
|
||||
../constantine/io/[io_bigints, io_fields],
|
||||
random, std/monotimes, times, strformat
|
||||
|
||||
const Iters = 10_000
|
||||
|
||||
randomize(1234)
|
||||
|
||||
proc mainCT() =
|
||||
var x: Fp[BLS12_381]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# BLS12-381 prime - 2
|
||||
let exponent = BigInt[381].fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
|
||||
|
||||
let start = getMonotime()
|
||||
for _ in 0 ..< Iters:
|
||||
var y = x
|
||||
x.pow(exponent)
|
||||
let stop = getMonotime()
|
||||
|
||||
echo &"Time for {Iters} iterations (constant-time 381-bit): {inMilliseconds(stop-start)} ms"
|
||||
|
||||
proc mainUnsafe() =
|
||||
var x: Fp[BLS12_381]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# BLS12-381 prime - 2
|
||||
let exponent = BigInt[381].fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9")
|
||||
|
||||
let start = getMonotime()
|
||||
for _ in 0 ..< Iters:
|
||||
var y = x
|
||||
x.powUnsafeExponent(exponent)
|
||||
let stop = getMonotime()
|
||||
|
||||
echo &"Time for {Iters} iterations (unsafe 381-bit): {inMilliseconds(stop-start)} ms"
|
||||
|
||||
mainCT()
|
||||
mainUnsafe()
|
||||
# 10_000 iterations with -d:danger on i9-9980XE all-core turbo 4.1GHz
|
||||
|
||||
# Time for 10000 iterations (constant-time 381-bit): 1349 ms
|
||||
# Time for 10000 iterations (unsafe 381-bit): 1226 ms
|
||||
@ -1,155 +0,0 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Benchmark of modular exponentiation
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
# 2 implementations are available
|
||||
# - 1 is constant time
|
||||
# - 1 exposes the exponent bits to:
|
||||
# timing attack,
|
||||
# memory access analysis,
|
||||
# power analysis (i.e. oscilloscopes on embedded)
|
||||
# It is suitable for public exponents for example
|
||||
# to compute modular inversion via the Fermat method
|
||||
|
||||
import
|
||||
../constantine/config/[common, curves],
|
||||
../constantine/arithmetic/[bigints, finite_fields],
|
||||
../constantine/io/[io_bigints, io_fields],
|
||||
random, std/monotimes, times, strformat,
|
||||
./timers
|
||||
|
||||
const Iters = 1_000_000
|
||||
const InvIters = 1000
|
||||
|
||||
randomize(1234)
|
||||
|
||||
# warmup
|
||||
proc warmup*() =
|
||||
# Warmup - make sure cpu is on max perf
|
||||
let start = cpuTime()
|
||||
var foo = 123
|
||||
for i in 0 ..< 300_000_000:
|
||||
foo += i*i mod 456
|
||||
foo = foo mod 789
|
||||
|
||||
# Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
|
||||
let stop = cpuTime()
|
||||
echo &"\n\nWarmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n"
|
||||
|
||||
warmup()
|
||||
|
||||
echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "==========================================================================================================\n"
|
||||
|
||||
proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
echo &"{op:<15} {field:<15} {inNanoseconds((stop-start) div iters):>9} ns {(stopClk - startClk) div iters:>9} cycles"
|
||||
|
||||
proc addBench() =
|
||||
var x, y: Fp[Secp256k1]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# Truncated BLS12-381 prime
|
||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
x += y
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Addition", "Fp[Secp256k1]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
|
||||
addBench()
|
||||
|
||||
proc subBench() =
|
||||
var x, y: Fp[Secp256k1]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# Truncated BLS12-381 prime
|
||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
x -= y
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Substraction", "Fp[Secp256k1]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
subBench()
|
||||
|
||||
proc negBench() =
|
||||
var r, x: Fp[Secp256k1]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
r.neg(x)
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Negation", "Fp[Secp256k1]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
negBench()
|
||||
|
||||
proc mulBench() =
|
||||
var r, x, y: Fp[Secp256k1]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
# Truncated BLS12-381 prime
|
||||
y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
r.prod(x, y)
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Multiplication", "Fp[Secp256k1]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
mulBench()
|
||||
|
||||
proc sqrBench() =
|
||||
var r, x: Fp[Secp256k1]
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< Iters:
|
||||
r.square(x)
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Squaring", "Fp[Secp256k1]", start, stop, startClk, stopClk, Iters)
|
||||
|
||||
sqrBench()
|
||||
|
||||
proc invBench() =
|
||||
# TODO: having x on the stack triggers stack smashing detection. To be investigated
|
||||
var x: ref Fp[Secp256k1]
|
||||
new x
|
||||
# BN254 field modulus
|
||||
x[].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
|
||||
let start = getMonotime()
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< InvIters:
|
||||
# Note: we don't copy the original x so x is alterning between x and x^-1
|
||||
inv(x[])
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
report("Inversion", "Fp[Secp256k1]", start, stop, startClk, stopClk, InvIters)
|
||||
|
||||
invBench()
|
||||
@ -1,49 +0,0 @@
|
||||
when defined(i386) or defined(amd64):
|
||||
# From Linux
|
||||
#
|
||||
# The RDTSC instruction is not ordered relative to memory
|
||||
# access. The Intel SDM and the AMD APM are both vague on this
|
||||
# point, but empirically an RDTSC instruction can be
|
||||
# speculatively executed before prior loads. An RDTSC
|
||||
# immediately after an appropriate barrier appears to be
|
||||
# ordered as a normal load, that is, it provides the same
|
||||
# ordering guarantees as reading from a global memory location
|
||||
# that some other imaginary CPU is updating continuously with a
|
||||
# time stamp.
|
||||
#
|
||||
# From Intel SDM
|
||||
# https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf
|
||||
when not defined(vcc):
|
||||
when defined(amd64):
|
||||
proc getTicks*(): int64 {.inline.} =
|
||||
var lo, hi: int64
|
||||
# TODO: Provide a compile-time flag for RDTSCP support
|
||||
# and use it instead of lfence + RDTSC
|
||||
{.emit: """asm volatile(
|
||||
"lfence\n"
|
||||
"rdtsc\n"
|
||||
: "=a"(`lo`), "=d"(`hi`)
|
||||
:
|
||||
: "memory"
|
||||
);""".}
|
||||
return (hi shl 32) or lo
|
||||
else:
|
||||
proc getTicks*(): int64 {.inline.} =
|
||||
# TODO: Provide a compile-time flag for RDTSCP support
|
||||
# and use it instead of lfence + RDTSC
|
||||
{.emit: """asm volatile(
|
||||
"lfence\n"
|
||||
"rdtsc\n"
|
||||
: "=a"(`result`)
|
||||
:
|
||||
: "memory"
|
||||
);""".}
|
||||
else:
|
||||
proc rdtsc(): int64 {.sideeffect, importc: "__rdtsc", header: "<intrin.h>".}
|
||||
proc lfence() {.importc: "__mm_lfence", header: "<intrin.h>".}
|
||||
|
||||
proc getTicks*(): int64 {.inline.} =
|
||||
lfence()
|
||||
return rdtsc()
|
||||
else:
|
||||
{.error: "getticks is not supported on this CPU architecture".}
|
||||
@ -110,3 +110,18 @@ task test_no_gmp, "Run tests that don't require GMP":
|
||||
|
||||
# 𝔽p2
|
||||
test "", "tests/test_fp2.nim"
|
||||
|
||||
task bench, "Run benchmark with your default compiler":
|
||||
if not dirExists "build":
|
||||
mkDir "build"
|
||||
exec "nim c -d:danger --verbosity:0 -o:build/bench_finite_fields_default -r --hints:off --warnings:off benchmarks/bench_finite_fields"
|
||||
|
||||
task bench_gcc, "Run benchmark with gcc":
|
||||
if not dirExists "build":
|
||||
mkDir "build"
|
||||
exec "nim c --cc:gcc -d:danger --verbosity:0 -o:build/bench_finite_fields_gcc -r --hints:off --warnings:off benchmarks/bench_finite_fields"
|
||||
|
||||
task bench_clang, "Run benchmark with clang":
|
||||
if not dirExists "build":
|
||||
mkDir "build"
|
||||
exec "nim c --cc:clang -d:danger --verbosity:0 -o:build/bench_finite_fields_clang -r --hints:off --warnings:off benchmarks/bench_finite_fields"
|
||||
|
||||
17
constantine/arithmetic.nim
Normal file
17
constantine/arithmetic.nim
Normal file
@ -0,0 +1,17 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
arithmetic/[limbs, limbs_modular, limbs_montgomery],
|
||||
arithmetic/bigints,
|
||||
arithmetic/[finite_fields, finite_fields_inversion]
|
||||
|
||||
export
|
||||
limbs, limbs_modular, limbs_montgomery,
|
||||
bigints,
|
||||
finite_fields, finite_fields_inversion
|
||||
@ -9,8 +9,7 @@
|
||||
import
|
||||
../config/common,
|
||||
../primitives,
|
||||
./limbs,
|
||||
./montgomery
|
||||
./limbs, ./limbs_montgomery, ./limbs_modular
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
@ -87,25 +86,16 @@ debug:
|
||||
func `$`*(a: BigInt): string =
|
||||
result = "BigInt["
|
||||
result.add $BigInt.bits
|
||||
result.add "](limbs: ["
|
||||
result.add $BaseType(a.limbs[0]) & " (0x" & toHex(BaseType(a.limbs[0])) & ')'
|
||||
for i in 1 ..< a.limbs.len:
|
||||
result.add ", "
|
||||
result.add $BaseType(a.limbs[i]) & " (0x" & toHex(BaseType(a.limbs[i])) & ')'
|
||||
result.add "])"
|
||||
result.add "](limbs: "
|
||||
result.add a.limbs.toString()
|
||||
result.add ")"
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [].}
|
||||
{.push inline.}
|
||||
|
||||
func `==`*(a, b: BigInt): CTBool[Word] =
|
||||
## Returns true if 2 big ints are equal
|
||||
## Comparison is constant-time
|
||||
a.limbs == b.limbs
|
||||
|
||||
func isZero*(a: BigInt): CTBool[Word] =
|
||||
## Returns true if a big int is equal to zero
|
||||
a.limbs.isZero
|
||||
# Initialization
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func setZero*(a: var BigInt) =
|
||||
## Set a BigInt to 0
|
||||
@ -115,6 +105,51 @@ func setOne*(a: var BigInt) =
|
||||
## Set a BigInt to 1
|
||||
a.limbs.setOne()
|
||||
|
||||
# Copy
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func ccopy*(a: var BigInt, b: BigInt, ctl: CTBool[Word]) =
|
||||
## Constant-time conditional copy
|
||||
## If ctl is true: b is copied into a
|
||||
## if ctl is false: b is not copied and a is untouched
|
||||
## Time and memory accesses are the same whether a copy occurs or not
|
||||
ccopy(a.limbs, b.limbs, ctl)
|
||||
|
||||
func cswap*(a, b: var BigInt, ctl: CTBool) =
|
||||
## Swap ``a`` and ``b`` if ``ctl`` is true
|
||||
##
|
||||
## Constant-time:
|
||||
## Whether ``ctl`` is true or not, the same
|
||||
## memory accesses are done (unless the compiler tries to be clever)
|
||||
cswap(a.limbs, b.limbs, ctl)
|
||||
|
||||
# Comparison
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func `==`*(a, b: BigInt): CTBool[Word] =
|
||||
## Returns true if 2 big ints are equal
|
||||
## Comparison is constant-time
|
||||
a.limbs == b.limbs
|
||||
|
||||
func `<`*(a, b: BigInt): CTBool[Word] =
|
||||
## Returns true if a < b
|
||||
a.limbs < b.limbs
|
||||
|
||||
func `<=`*(a, b: BigInt): CTBool[Word] =
|
||||
## Returns true if a <= b
|
||||
a.limbs <= b.limbs
|
||||
|
||||
func isZero*(a: BigInt): CTBool[Word] =
|
||||
## Returns true if a big int is equal to zero
|
||||
a.limbs.isZero
|
||||
|
||||
func isOdd*(a: BigInt): CTBool[Word] =
|
||||
## Returns true if a is odd
|
||||
a.limbs.isOdd
|
||||
|
||||
# Arithmetic
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func cadd*(a: var BigInt, b: BigInt, ctl: CTBool[Word]): CTBool[Word] =
|
||||
## Constant-time in-place conditional addition
|
||||
## The addition is only performed if ctl is "true"
|
||||
@ -133,19 +168,16 @@ func cdouble*(a: var BigInt, ctl: CTBool[Word]): CTBool[Word] =
|
||||
## The result carry is always computed.
|
||||
(CTBool[Word]) cadd(a.limbs, a.limbs, ctl)
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# BigInt Primitives Optimized for speed
|
||||
#
|
||||
# ############################################################
|
||||
#
|
||||
# TODO: fallback to cadd / csub with a "size" compile-option
|
||||
|
||||
func add*(a: var BigInt, b: BigInt): CTBool[Word] =
|
||||
## Constant-time in-place addition
|
||||
## Returns the carry
|
||||
(CTBool[Word]) add(a.limbs, b.limbs)
|
||||
|
||||
func add*(a: var BigInt, b: Word): CTBool[Word] =
|
||||
## Constant-time in-place addition
|
||||
## Returns the carry
|
||||
(CTBool[Word]) add(a.limbs, b)
|
||||
|
||||
func sub*(a: var BigInt, b: BigInt): CTBool[Word] =
|
||||
## Constant-time in-place substraction
|
||||
## Returns the borrow
|
||||
@ -177,19 +209,23 @@ func double*(r: var BigInt, a: BigInt): CTBool[Word] =
|
||||
## Returns the carry
|
||||
(CTBool[Word]) sum(r.limbs, a.limbs, a.limbs)
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Comparisons
|
||||
#
|
||||
# ############################################################
|
||||
func div2*(a: var BigInt) =
|
||||
## In-place divide ``a`` by 2
|
||||
a.limbs.shiftRight(1)
|
||||
|
||||
func `<`*(a, b: BigInt): CTBool[Word] =
|
||||
## Returns true if a < b
|
||||
a.limbs < b.limbs
|
||||
func cneg*(a: var BigInt, ctl: CTBool) =
|
||||
## Conditional negation.
|
||||
## Negate if ``ctl`` is true
|
||||
a.limbs.cneg(ctl)
|
||||
|
||||
func `<=`*(a, b: BigInt): CTBool[Word] =
|
||||
## Returns true if a <= b
|
||||
a.limbs <= b.limbs
|
||||
# Bit Manipulation
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func shiftRight*(a: var BigInt, k: int) =
|
||||
## Shift right by k.
|
||||
##
|
||||
## k MUST be less than the base word size (2^31 or 2^63)
|
||||
a.limbs.shiftRight(k)
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
@ -209,6 +245,22 @@ func reduce*[aBits, mBits](r: var BigInt[mBits], a: BigInt[aBits], M: BigInt[mBi
|
||||
# pass a pointer+length to a fixed session of the BSS.
|
||||
reduce(r.limbs, a.limbs, aBits, M.limbs, mBits)
|
||||
|
||||
func steinsGCD*[bits](r: var BigInt[bits], a, F, M, mp1div2: BigInt[bits]) =
|
||||
## Compute F multiplied the modular inverse of ``a`` modulo M
|
||||
## r ≡ F . a^-1 (mod M)
|
||||
##
|
||||
## M MUST be odd, M does not need to be prime.
|
||||
## ``a`` MUST be less than M.
|
||||
r.limbs.steinsGCD(a.limbs, F.limbs, M.limbs, bits, mp1div2.limbs)
|
||||
|
||||
func invmod*[bits](r: var BigInt[bits], a, M, mp1div2: BigInt[bits]) =
|
||||
## Compute the modular inverse of ``a`` modulo M
|
||||
##
|
||||
## The modulus ``M`` MUST be odd
|
||||
var one {.noInit.}: BigInt[bits]
|
||||
one.setOne()
|
||||
r.steinsGCD(a, one, M, mp1div2)
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Montgomery Arithmetic
|
||||
@ -335,3 +387,6 @@ func montyPowUnsafeExponent*[mBits: static int](
|
||||
else: (1 shl windowSize) + 1
|
||||
var scratchSpace {.noInit.}: array[scratchLen, Limbs[mBits.wordsRequired]]
|
||||
montyPowUnsafeExponent(a.limbs, exponent, M.limbs, one.limbs, negInvModWord, scratchSpace, canUseNoCarryMontyMul)
|
||||
|
||||
{.pop.} # inline
|
||||
{.pop.} # raises no exceptions
|
||||
|
||||
@ -27,7 +27,7 @@
|
||||
import
|
||||
../primitives,
|
||||
../config/[common, curves],
|
||||
./bigints, ./montgomery
|
||||
./bigints, ./limbs_montgomery
|
||||
|
||||
# type
|
||||
# `Fp`*[C: static Curve] = object
|
||||
@ -64,7 +64,7 @@ func fromBig*[C: static Curve](dst: var Fp[C], src: BigInt) {.noInit.} =
|
||||
dst.mres.montyResidue(src, C.Mod.mres, C.getR2modP(), C.getNegInvModWord(), C.canUseNoCarryMontyMul())
|
||||
|
||||
func toBig*(src: Fp): auto {.noInit.} =
|
||||
## Convert a finite-field element to a BigInt in natral representation
|
||||
## Convert a finite-field element to a BigInt in natural representation
|
||||
var r {.noInit.}: typeof(src.mres)
|
||||
r.redc(src.mres, Fp.C.Mod.mres, Fp.C.getNegInvModWord(), Fp.C.canUseNoCarryMontyMul())
|
||||
return r
|
||||
@ -193,17 +193,6 @@ func powUnsafeExponent*(a: var Fp, exponent: BigInt) =
|
||||
Fp.C.canUseNoCarryMontyMul()
|
||||
)
|
||||
|
||||
func inv*(a: var Fp) =
|
||||
## Inversion modulo p
|
||||
## Warning ⚠️ :
|
||||
## - This assumes that `Fp` is a prime field
|
||||
const windowSize = 5 # TODO: find best window size for each curves
|
||||
a.mres.montyPowUnsafeExponent(
|
||||
Fp.C.getInvModExponent(),
|
||||
Fp.C.Mod.mres, Fp.C.getMontyOne(),
|
||||
Fp.C.getNegInvModWord(), windowSize,
|
||||
Fp.C.canUseNoCarryMontyMul()
|
||||
)
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
@ -235,12 +224,8 @@ func `*`*(a, b: Fp): Fp {.noInit.} =
|
||||
|
||||
func `*=`*(a: var Fp, b: Fp) =
|
||||
## Multiplication modulo p
|
||||
##
|
||||
## Implementation note:
|
||||
## - This requires a temporary field element
|
||||
##
|
||||
## Cost
|
||||
## Stack: 1 * ModulusBitSize
|
||||
var tmp{.noInit.}: Fp
|
||||
tmp.prod(a, b)
|
||||
a = tmp
|
||||
a.prod(a, b)
|
||||
|
||||
func square*(a: var Fp) =
|
||||
## Squaring modulo p
|
||||
a.mres.montySquare(a.mres, Fp.C.Mod.mres, Fp.C.getNegInvModWord(), Fp.C.canUseNoCarryMontySquare())
|
||||
|
||||
133
constantine/arithmetic/finite_fields_inversion.nim
Normal file
133
constantine/arithmetic/finite_fields_inversion.nim
Normal file
@ -0,0 +1,133 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
../config/[common, curves],
|
||||
./finite_fields
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Specialized inversions
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
# Field-specific inversion routines
|
||||
template repeat(num: int, body: untyped) =
|
||||
for _ in 0 ..< num:
|
||||
body
|
||||
|
||||
# Secp256k1
|
||||
# ------------------------------------------------------------
|
||||
func invmod_addchain(r: var Fp[Secp256k1], a: Fp[Secp256k1]) =
|
||||
## We invert via Little Fermat's theorem
|
||||
## a^(-1) ≡ a^(p-2) (mod p)
|
||||
## with p = "0xFFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F"
|
||||
## We take advantage of the prime special form to hardcode
|
||||
## the sequence of squarings and multiplications for the modular exponentiation
|
||||
##
|
||||
## See libsecp256k1
|
||||
##
|
||||
## The binary representation of (p - 2) has 5 blocks of 1s, with lengths in
|
||||
## { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
|
||||
## [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
|
||||
|
||||
var
|
||||
x2{.noInit.}: Fp[Secp256k1]
|
||||
x3{.noInit.}: Fp[Secp256k1]
|
||||
x6{.noInit.}: Fp[Secp256k1]
|
||||
x9{.noInit.}: Fp[Secp256k1]
|
||||
x11{.noInit.}: Fp[Secp256k1]
|
||||
x22{.noInit.}: Fp[Secp256k1]
|
||||
x44{.noInit.}: Fp[Secp256k1]
|
||||
x88{.noInit.}: Fp[Secp256k1]
|
||||
x176{.noInit.}: Fp[Secp256k1]
|
||||
x220{.noInit.}: Fp[Secp256k1]
|
||||
x223{.noInit.}: Fp[Secp256k1]
|
||||
|
||||
x2.square(a)
|
||||
x2 *= a
|
||||
|
||||
x3.square(x2)
|
||||
x3 *= a
|
||||
|
||||
x6 = x3
|
||||
repeat 3: x6.square()
|
||||
x6 *= x3
|
||||
|
||||
x9 = x6
|
||||
repeat 3: x9.square()
|
||||
x9 *= x3
|
||||
|
||||
x11 = x9
|
||||
repeat 2: x11.square()
|
||||
x11 *= x2
|
||||
|
||||
x22 = x11
|
||||
repeat 11: x22.square()
|
||||
x22 *= x11
|
||||
|
||||
x44 = x22
|
||||
repeat 22: x44.square()
|
||||
x44 *= x22
|
||||
|
||||
x88 = x44
|
||||
repeat 44: x88.square()
|
||||
x88 *= x44
|
||||
|
||||
x176 = x88
|
||||
repeat 88: x88.square()
|
||||
x176 *= x88
|
||||
|
||||
x220 = x176
|
||||
repeat 44: x220.square()
|
||||
x220 *= x44
|
||||
|
||||
x223 = x220
|
||||
repeat 3: x223.square()
|
||||
x223 *= x3
|
||||
|
||||
# The final result is then assembled using a sliding window over the blocks
|
||||
r = x223
|
||||
repeat 23: r.square()
|
||||
r *= x22
|
||||
repeat 5: r.square()
|
||||
r *= a
|
||||
repeat 3: r.square()
|
||||
r *= x2
|
||||
repeat 2: r.square()
|
||||
r *= a
|
||||
|
||||
# BN Curves
|
||||
# ------------------------------------------------------------
|
||||
# Efficient Pairings and ECC for Embedded Systems
|
||||
# Thomas Unterluggauer and Erich Wenger
|
||||
# https://eprint.iacr.org/2014/800.pdf
|
||||
#
|
||||
# BN curve field modulus are of the form:
|
||||
# p = 36u^4 + 36u^3 + 24u^2 + 6u + 1
|
||||
#
|
||||
# We construct the following multiplication-squaring chain
|
||||
# a^-1 mod p = a^(p-2) mod p (Little Fermat Theorem)
|
||||
# = a^(36 u^4 + 36 u^3 + 24 u^2 + 6u + 1 - 2) mod p
|
||||
# = a^(36 u^4) . a^(36 u^3) . a^(24 u^2) . a^(6u-1) mod p
|
||||
#
|
||||
# Note: it only works for u positive, in particular BN254 doesn't work :/
|
||||
# Is there a way to only use a^-u or even powers?
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Dispatch
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
func inv*(r: var Fp, a: Fp) =
|
||||
## Inversion modulo p
|
||||
# For now we don't activate the addition chain.
|
||||
# Performance is equal to GCD and it does not pass test on 𝔽p2
|
||||
# We need faster squaring/multiplications
|
||||
r.mres.steinsGCD(a.mres, Fp.C.getR2modP(), Fp.C.Mod.mres, Fp.C.getPrimePlus1div2())
|
||||
@ -43,6 +43,22 @@ type Limbs*[N: static int] = array[N, Word]
|
||||
##
|
||||
## but for unknown reason, it prevents semchecking `bits`
|
||||
|
||||
debug:
|
||||
import strutils
|
||||
|
||||
func toString*(a: Limbs): string =
|
||||
result = "["
|
||||
result.add $BaseType(a[0]) & " (0x" & toHex(BaseType(a[0])) & ')'
|
||||
for i in 1 ..< a.len:
|
||||
result.add ", "
|
||||
result.add $BaseType(a[i]) & " (0x" & toHex(BaseType(a[i])) & ')'
|
||||
result.add "])"
|
||||
|
||||
func toHex*(a: Limbs): string =
|
||||
result = "0x"
|
||||
for i in countdown(a.len-1, 0):
|
||||
result.add toHex(BaseType(a[i]))
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [].}
|
||||
|
||||
@ -82,20 +98,8 @@ type Limbs*[N: static int] = array[N, Word]
|
||||
# as we avoid the parmeter packing/unpacking ceremony at function entry/exit
|
||||
# and unrolling overhead is minimal.
|
||||
|
||||
func `==`*(a, b: Limbs): CTBool[Word] =
|
||||
## Returns true if 2 limbs are equal
|
||||
## Comparison is constant-time
|
||||
var accum = Zero
|
||||
for i in 0 ..< a.len:
|
||||
accum = accum or (a[i] xor b[i])
|
||||
result = accum.isZero()
|
||||
|
||||
func isZero*(a: Limbs): CTBool[Word] =
|
||||
## Returns true if ``a`` is equal to zero
|
||||
var accum = Zero
|
||||
for i in 0 ..< a.len:
|
||||
accum = accum or a[i]
|
||||
result = accum.isZero()
|
||||
# Initialization
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func setZero*(a: var Limbs) =
|
||||
## Set ``a`` to 0
|
||||
@ -107,6 +111,9 @@ func setOne*(a: var Limbs) =
|
||||
when a.len > 1:
|
||||
zeroMem(a[1].addr, (a.len - 1) * sizeof(Word))
|
||||
|
||||
# Copy
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func ccopy*(a: var Limbs, b: Limbs, ctl: CTBool[Word]) =
|
||||
## Constant-time conditional copy
|
||||
## If ctl is true: b is copied into a
|
||||
@ -118,6 +125,65 @@ func ccopy*(a: var Limbs, b: Limbs, ctl: CTBool[Word]) =
|
||||
for i in 0 ..< a.len:
|
||||
ctl.ccopy(a[i], b[i])
|
||||
|
||||
func cswap*(a, b: var Limbs, ctl: CTBool) =
|
||||
## Swap ``a`` and ``b`` if ``ctl`` is true
|
||||
##
|
||||
## Constant-time:
|
||||
## Whether ``ctl`` is true or not, the same
|
||||
## memory accesses are done (unless the compiler tries to be clever)
|
||||
|
||||
var mask = -(Word ctl)
|
||||
for i in 0 ..< a.len:
|
||||
let t = mask and (a[i] xor b[i])
|
||||
a[i] = a[i] xor t
|
||||
b[i] = b[i] xor t
|
||||
|
||||
# Comparison
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func `==`*(a, b: Limbs): CTBool[Word] =
|
||||
## Returns true if 2 limbs are equal
|
||||
## Comparison is constant-time
|
||||
var accum = Zero
|
||||
for i in 0 ..< a.len:
|
||||
accum = accum or (a[i] xor b[i])
|
||||
result = accum.isZero()
|
||||
|
||||
func `<`*(a, b: Limbs): CTBool[Word] =
|
||||
## Returns true if a < b
|
||||
## Comparison is constant-time
|
||||
var diff: Word
|
||||
var borrow: Borrow
|
||||
for i in 0 ..< a.len:
|
||||
subB(borrow, diff, a[i], b[i], borrow)
|
||||
|
||||
result = (CTBool[Word])(borrow)
|
||||
|
||||
func `<=`*(a, b: Limbs): CTBool[Word] =
|
||||
## Returns true if a <= b
|
||||
## Comparison is constant-time
|
||||
not(b < a)
|
||||
|
||||
func isZero*(a: Limbs): CTBool[Word] =
|
||||
## Returns true if ``a`` is equal to zero
|
||||
var accum = Zero
|
||||
for i in 0 ..< a.len:
|
||||
accum = accum or a[i]
|
||||
result = accum.isZero()
|
||||
|
||||
func isOne*(a: Limbs): CTBool[Word] =
|
||||
## Returns true if ``a`` is equal to one
|
||||
result = a[0] == Word(1)
|
||||
for i in 1 ..< a.len:
|
||||
result = result and a[i].isZero()
|
||||
|
||||
func isOdd*(a: Limbs): CTBool[Word] =
|
||||
## Returns true if a is odd
|
||||
CTBool[Word](a[0] and Word(1))
|
||||
|
||||
# Arithmetic
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func add*(a: var Limbs, b: Limbs): Carry =
|
||||
## Limbs addition
|
||||
## Returns the carry
|
||||
@ -125,6 +191,14 @@ func add*(a: var Limbs, b: Limbs): Carry =
|
||||
for i in 0 ..< a.len:
|
||||
addC(result, a[i], a[i], b[i], result)
|
||||
|
||||
func add*(a: var Limbs, w: Word): Carry =
|
||||
## Limbs addition, add a number that fits in a word
|
||||
## Returns the carry
|
||||
result = Carry(0)
|
||||
addC(result, a[0], a[0], w, result)
|
||||
for i in 1 ..< a.len:
|
||||
addC(result, a[i], a[i], Zero, result)
|
||||
|
||||
func cadd*(a: var Limbs, b: Limbs, ctl: CTBool[Word]): Carry =
|
||||
## Limbs conditional addition
|
||||
## Returns the carry
|
||||
@ -180,266 +254,46 @@ func diff*(r: var Limbs, a, b: Limbs): Borrow =
|
||||
for i in 0 ..< a.len:
|
||||
subB(result, r[i], a[i], b[i], result)
|
||||
|
||||
func `<`*(a, b: Limbs): CTBool[Word] =
|
||||
## Returns true if a < b
|
||||
## Comparison is constant-time
|
||||
var diff: Word
|
||||
var borrow: Borrow
|
||||
func cneg*(a: var Limbs, ctl: CTBool) =
|
||||
## Conditional negation.
|
||||
## Negate if ``ctl`` is true
|
||||
|
||||
# Algorithm:
|
||||
# In two-complement representation
|
||||
# -x <=> not(x) + 1 <=> x xor 0xFF... + 1
|
||||
# and
|
||||
# x <=> x xor 0x00...<=> x xor 0x00... + 0
|
||||
#
|
||||
# So we need to xor all words and then add 1
|
||||
# The "+1" might carry
|
||||
# So we fuse the 2 steps
|
||||
let mask = -Word(ctl) # Obtain a 0xFF... or 0x00... mask
|
||||
var carry = Word(ctl)
|
||||
for i in 0 ..< a.len:
|
||||
subB(borrow, diff, a[i], b[i], borrow)
|
||||
let t = (a[i] xor mask) + carry # XOR with mask and add 0x01 or 0x00 respectively
|
||||
carry = Word(t < carry) # Carry on
|
||||
a[i] = t
|
||||
|
||||
result = (CTBool[Word])(borrow)
|
||||
# Bit manipulation
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func `<=`*(a, b: Limbs): CTBool[Word] =
|
||||
## Returns true if a <= b
|
||||
## Comparison is constant-time
|
||||
not(b < a)
|
||||
func shiftRight*(a: var Limbs, k: int) =
|
||||
## Shift right by k.
|
||||
##
|
||||
## k MUST be less than the base word size (2^32 or 2^64)
|
||||
# We don't reuse shr as this is an in-place operation
|
||||
# Do we need to return the shifted out part?
|
||||
#
|
||||
# Note: for speed, loading a[i] and a[i+1]
|
||||
# instead of a[i-1] and a[i]
|
||||
# is probably easier to parallelize for the compiler
|
||||
# (antidependence WAR vs loop-carried dependence RAW)
|
||||
|
||||
# checkWordShift(k)
|
||||
|
||||
for i in 0 ..< a.len-1:
|
||||
a[i] = (a[i] shr k) or (a[i+1] shl (WordBitWidth - k))
|
||||
a[a.len-1] = a[a.len-1] shr k
|
||||
|
||||
{.pop.} # inline
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Modular BigInt
|
||||
#
|
||||
# ############################################################
|
||||
#
|
||||
# To avoid code-size explosion due to monomorphization
|
||||
# and given that reductions are not in hot path in Constantine
|
||||
# we use type-erased procedures, instead of instantiating
|
||||
# one per number of limbs combination
|
||||
|
||||
# Type-erasure
|
||||
# ------------------------------------------------------------
|
||||
|
||||
type
|
||||
LimbsView = ptr UncheckedArray[Word]
|
||||
## Type-erased fixed-precision limbs
|
||||
##
|
||||
## This type mirrors the Limb type and is used
|
||||
## for some low-level computation API
|
||||
## This design
|
||||
## - avoids code bloat due to generic monomorphization
|
||||
## otherwise limbs routines would have an instantiation for
|
||||
## each number of words.
|
||||
##
|
||||
## Accesses should be done via BigIntViewConst / BigIntViewConst
|
||||
## to have the compiler check for mutability
|
||||
|
||||
# "Indirection" to enforce pointer types deep immutability
|
||||
LimbsViewConst = distinct LimbsView
|
||||
## Immutable view into the limbs of a BigInt
|
||||
LimbsViewMut = distinct LimbsView
|
||||
## Mutable view into a BigInt
|
||||
LimbsViewAny = LimbsViewConst or LimbsViewMut
|
||||
|
||||
# Deep Mutability safety
|
||||
# ------------------------------------------------------------
|
||||
|
||||
template view(a: Limbs): LimbsViewConst =
|
||||
## Returns a borrowed type-erased immutable view to a bigint
|
||||
LimbsViewConst(cast[LimbsView](a.unsafeAddr))
|
||||
|
||||
template view(a: var Limbs): LimbsViewMut =
|
||||
## Returns a borrowed type-erased mutable view to a mutable bigint
|
||||
LimbsViewMut(cast[LimbsView](a.addr))
|
||||
|
||||
template `[]`*(v: LimbsViewConst, limbIdx: int): Word =
|
||||
LimbsView(v)[limbIdx]
|
||||
|
||||
template `[]`*(v: LimbsViewMut, limbIdx: int): var Word =
|
||||
LimbsView(v)[limbIdx]
|
||||
|
||||
template `[]=`*(v: LimbsViewMut, limbIdx: int, val: Word) =
|
||||
LimbsView(v)[limbIdx] = val
|
||||
|
||||
# Type-erased add-sub
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func cadd(a: LimbsViewMut, b: LimbsViewAny, ctl: CTBool[Word], len: int): Carry =
|
||||
## Type-erased conditional addition
|
||||
## Returns the carry
|
||||
##
|
||||
## if ctl is true: a <- a + b
|
||||
## if ctl is false: a <- a
|
||||
## The carry is always computed whether ctl is true or false
|
||||
##
|
||||
## Time and memory accesses are the same whether a copy occurs or not
|
||||
result = Carry(0)
|
||||
var sum: Word
|
||||
for i in 0 ..< len:
|
||||
addC(result, sum, a[i], b[i], result)
|
||||
ctl.ccopy(a[i], sum)
|
||||
|
||||
func csub(a: LimbsViewMut, b: LimbsViewAny, ctl: CTBool[Word], len: int): Borrow =
|
||||
## Type-erased conditional addition
|
||||
## Returns the borrow
|
||||
##
|
||||
## if ctl is true: a <- a - b
|
||||
## if ctl is false: a <- a
|
||||
## The borrow is always computed whether ctl is true or false
|
||||
##
|
||||
## Time and memory accesses are the same whether a copy occurs or not
|
||||
result = Borrow(0)
|
||||
var diff: Word
|
||||
for i in 0 ..< len:
|
||||
subB(result, diff, a[i], b[i], result)
|
||||
ctl.ccopy(a[i], diff)
|
||||
|
||||
# Modular reduction
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func numWordsFromBits(bits: int): int {.inline.} =
|
||||
const divShiftor = log2(uint32(WordBitWidth))
|
||||
result = (bits + WordBitWidth - 1) shr divShiftor
|
||||
|
||||
func shlAddMod_estimate(a: LimbsViewMut, aLen: int,
|
||||
c: Word, M: LimbsViewConst, mBits: int
|
||||
): tuple[neg, tooBig: CTBool[Word]] =
|
||||
## Estimate a <- a shl 2^w + c (mod M)
|
||||
##
|
||||
## with w the base word width, usually 32 on 32-bit platforms and 64 on 64-bit platforms
|
||||
##
|
||||
## Updates ``a`` and returns ``neg`` and ``tooBig``
|
||||
## If ``neg``, the estimate in ``a`` is negative and ``M`` must be added to it.
|
||||
## If ``tooBig``, the estimate in ``a`` overflowed and ``M`` must be substracted from it.
|
||||
|
||||
# Aliases
|
||||
# ----------------------------------------------------------------------
|
||||
let MLen = numWordsFromBits(mBits)
|
||||
|
||||
# Captures aLen and MLen
|
||||
template `[]`(v: untyped, limbIdxFromEnd: BackwardsIndex): Word {.dirty.}=
|
||||
v[`v Len` - limbIdxFromEnd.int]
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Assuming 64-bit words
|
||||
let hi = a[^1] # Save the high word to detect carries
|
||||
let R = mBits and (WordBitWidth - 1) # R = mBits mod 64
|
||||
|
||||
var a0, a1, m0: Word
|
||||
if R == 0: # If the number of mBits is a multiple of 64
|
||||
a0 = a[^1] #
|
||||
moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof) # we can just shift words
|
||||
a[0] = c # and replace the first one by c
|
||||
a1 = a[^1]
|
||||
m0 = M[^1]
|
||||
else: # Else: need to deal with partial word shifts at the edge.
|
||||
a0 = (a[^1] shl (WordBitWidth-R)) or (a[^2] shr R)
|
||||
moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof)
|
||||
a[0] = c
|
||||
a1 = (a[^1] shl (WordBitWidth-R)) or (a[^2] shr R)
|
||||
m0 = (M[^1] shl (WordBitWidth-R)) or (M[^2] shr R)
|
||||
|
||||
# m0 has its high bit set. (a0, a1)/p0 fits in a limb.
|
||||
# Get a quotient q, at most we will be 2 iterations off
|
||||
# from the true quotient
|
||||
var q, r: Word
|
||||
unsafeDiv2n1n(q, r, a0, a1, m0) # Estimate quotient
|
||||
q = mux( # If n_hi == divisor
|
||||
a0 == m0, MaxWord, # Quotient == MaxWord (0b1111...1111)
|
||||
mux(
|
||||
q.isZero, Zero, # elif q == 0, true quotient = 0
|
||||
q - One # else instead of being of by 0, 1 or 2
|
||||
) # we returning q-1 to be off by -1, 0 or 1
|
||||
)
|
||||
|
||||
# Now substract a*2^64 - q*p
|
||||
var carry = Zero
|
||||
var over_p = CtTrue # Track if quotient greater than the modulus
|
||||
|
||||
for i in 0 ..< MLen:
|
||||
var qp_lo: Word
|
||||
|
||||
block: # q*p
|
||||
# q * p + carry (doubleword) carry from previous limb
|
||||
muladd1(carry, qp_lo, q, M[i], Word carry)
|
||||
|
||||
block: # a*2^64 - q*p
|
||||
var borrow: Borrow
|
||||
subB(borrow, a[i], a[i], qp_lo, Borrow(0))
|
||||
carry += Word(borrow) # Adjust if borrow
|
||||
|
||||
over_p = mux(
|
||||
a[i] == M[i], over_p,
|
||||
a[i] > M[i]
|
||||
)
|
||||
|
||||
# Fix quotient, the true quotient is either q-1, q or q+1
|
||||
#
|
||||
# if carry < q or carry == q and over_p we must do "a -= p"
|
||||
# if carry > hi (negative result) we must do "a += p"
|
||||
|
||||
result.neg = Word(carry) > hi
|
||||
result.tooBig = not(result.neg) and (over_p or (Word(carry) < hi))
|
||||
|
||||
func shlAddMod(a: LimbsViewMut, aLen: int,
|
||||
c: Word, M: LimbsViewConst, mBits: int) =
|
||||
## Fused modular left-shift + add
|
||||
## Shift input `a` by a word and add `c` modulo `M`
|
||||
##
|
||||
## With a word W = 2^WordBitSize and a modulus M
|
||||
## Does a <- a * W + c (mod M)
|
||||
##
|
||||
## The modulus `M` most-significant bit at `mBits` MUST be set.
|
||||
if mBits <= WordBitWidth:
|
||||
# If M fits in a single limb
|
||||
|
||||
# We normalize M with R so that the MSB is set
|
||||
# And normalize (a * 2^64 + c) by R as well to maintain the result
|
||||
# This ensures that (a0, a1)/p0 fits in a limb.
|
||||
let R = mBits and (WordBitWidth - 1)
|
||||
|
||||
# (hi, lo) = a * 2^64 + c
|
||||
let hi = (a[0] shl (WordBitWidth-R)) or (c shr R)
|
||||
let lo = c shl (WordBitWidth-R)
|
||||
let m0 = M[0] shl (WordBitWidth-R)
|
||||
|
||||
var q, r: Word
|
||||
unsafeDiv2n1n(q, r, hi, lo, m0) # (hi, lo) mod M
|
||||
|
||||
a[0] = r shr (WordBitWidth-R)
|
||||
|
||||
else:
|
||||
## Multiple limbs
|
||||
let (neg, tooBig) = shlAddMod_estimate(a, aLen, c, M, mBits)
|
||||
discard a.cadd(M, ctl = neg, aLen)
|
||||
discard a.csub(M, ctl = tooBig, aLen)
|
||||
|
||||
func reduce(r: LimbsViewMut,
|
||||
a: LimbsViewAny, aBits: int,
|
||||
M: LimbsViewConst, mBits: int) =
|
||||
## Reduce `a` modulo `M` and store the result in `r`
|
||||
let aLen = numWordsFromBits(aBits)
|
||||
let mLen = numWordsFromBits(mBits)
|
||||
let rLen = mLen
|
||||
|
||||
if aBits < mBits:
|
||||
# if a uses less bits than the modulus,
|
||||
# it is guaranteed < modulus.
|
||||
# This relies on the precondition that the modulus uses all declared bits
|
||||
copyMem(r[0].addr, a[0].unsafeAddr, aLen * sizeof(Word))
|
||||
for i in aLen ..< mLen:
|
||||
r[i] = Zero
|
||||
else:
|
||||
# a length i at least equal to the modulus.
|
||||
# we can copy modulus.limbs-1 words
|
||||
# and modular shift-left-add the rest
|
||||
let aOffset = aLen - mLen
|
||||
copyMem(r[0].addr, a[aOffset+1].unsafeAddr, (mLen-1) * sizeof(Word))
|
||||
r[rLen - 1] = Zero
|
||||
# Now shift-left the copied words while adding the new word modulo M
|
||||
for i in countdown(aOffset, 0):
|
||||
shlAddMod(r, rLen, a[i], M, mBits)
|
||||
|
||||
func reduce*[aLen, mLen](r: var Limbs[mLen],
|
||||
a: Limbs[aLen], aBits: static int,
|
||||
M: Limbs[mLen], mBits: static int
|
||||
) {.inline.} =
|
||||
## Reduce `a` modulo `M` and store the result in `r`
|
||||
##
|
||||
## Warning ⚠: At the moment this is NOT constant-time
|
||||
## as it relies on hardware division.
|
||||
# This is implemented via type-erased indirection to avoid
|
||||
# a significant amount of code duplication if instantiated for
|
||||
# varying bitwidth.
|
||||
reduce(r.view(), a.view(), aBits, M.view(), mBits)
|
||||
{.pop.} # raises no exceptions
|
||||
|
||||
371
constantine/arithmetic/limbs_modular.nim
Normal file
371
constantine/arithmetic/limbs_modular.nim
Normal file
@ -0,0 +1,371 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
../config/common,
|
||||
../primitives,
|
||||
./limbs
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [].}
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Modular inversion
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
# Generic (odd-only modulus)
|
||||
# ------------------------------------------------------------
|
||||
# Algorithm by Niels Möller,
|
||||
# a modified version of Stein's Algorithm (binary Extended Euclid GCD)
|
||||
#
|
||||
# Algorithm 5 in
|
||||
# Fast Software Polynomial Multiplication on ARM Processors Using the NEON Engine
|
||||
# Danilo Camara, Conrado P. L. Gouvea, Julio Lopez, and Ricardo Dahab
|
||||
# https://link.springer.com/content/pdf/10.1007%2F978-3-642-40588-4_10.pdf
|
||||
#
|
||||
# Input: integer x, odd integer n, x < n
|
||||
# Output: x−1 (mod n)
|
||||
# 1: function ModInv(x, n)
|
||||
# 2: (a, b, u, v) ← (x, n, 1, 1)
|
||||
# 3: ℓ ← ⌊log2 n⌋ + 1 ⮚ number of bits in n
|
||||
# 4: for i ← 0 to 2ℓ − 1 do
|
||||
# 5: odd ← a & 1
|
||||
# 6: if odd and a ≥ b then
|
||||
# 7: a ← a − b
|
||||
# 8: else if odd and a < b then
|
||||
# 9: (a, b, u, v) ← (b − a, a, v, u)
|
||||
# 10: a ← a >> 1
|
||||
# 11: if odd then u ← u − v
|
||||
# 12: if u < 0 then u ← u + n
|
||||
# 13: if u & 1 then u ← u + n
|
||||
# 14: u ← u >> 1
|
||||
# 15: return v
|
||||
#
|
||||
# Warning ⚠️: v should be 0 at initialization
|
||||
#
|
||||
# We modify it to return F . a^-1
|
||||
# So that we can pass an adjustment factor F
|
||||
# And directly compute modular division or Montgomery inversion
|
||||
|
||||
func steinsGCD*(v: var Limbs, a: Limbs, F, M: Limbs, bits: int, mp1div2: Limbs) =
|
||||
## Compute F multiplied the modular inverse of ``a`` modulo M
|
||||
## r ≡ F . a^-1 (mod M)
|
||||
##
|
||||
## M MUST be odd, M does not need to be prime.
|
||||
## ``a`` MUST be less than M.
|
||||
##
|
||||
## No information about ``a`` in particular its actual length in bits is leaked.
|
||||
##
|
||||
## This takes (M+1)/2 (mp1div2) as a precomputed parameter as a slight optimization
|
||||
## in stack size and speed.
|
||||
|
||||
# Ideally we need registers for a, b, u, v
|
||||
# but:
|
||||
# - Even with ~256-bit primes, that's 4 limbs = 4*4 => 16 registers
|
||||
# - x86_64 only has 16 general purposes registers
|
||||
# - Registers are needed for the loop counter and comparison results
|
||||
# - CMOV is reg <- RM so can move registers/memory into registers
|
||||
# but cannot move into memory.
|
||||
# so we choose to keep "v" from the algorithm in memory as `r`
|
||||
|
||||
# TODO: the inlining of primitives like `csub` is bad for codesize
|
||||
# but there is a 80% slowdown without it.
|
||||
# TODO: The `cmov` in `cadd` and `csub` always retest the condition
|
||||
# which is probably costly here given how many we have.
|
||||
|
||||
var a = a
|
||||
var b = M
|
||||
var u = F
|
||||
v.setZero()
|
||||
|
||||
for i in 0 ..< 2 * bits:
|
||||
debug: doAssert bool(b.isOdd)
|
||||
let isOddA = a.isOdd()
|
||||
|
||||
# if isOddA: a -= b
|
||||
let aLessThanB = isOddA and (CTBool[Word]) a.csub(b, isOddA)
|
||||
# if a < b and the sub was processed
|
||||
# in that case, b <- a = a - b + b
|
||||
discard b.cadd(a, aLessThanB)
|
||||
# and a <- -new_a = (b-a)
|
||||
a.cneg(aLessThanB)
|
||||
debug: doAssert not bool(a.isOdd)
|
||||
a.shiftRight(1)
|
||||
|
||||
# Swap u and v is a < b
|
||||
u.cswap(v, aLessThanB)
|
||||
# if isOddA: u -= v (mod M)
|
||||
let neg = isOddA and (CTBool[Word]) u.csub(v, isOddA)
|
||||
let corrected = u.cadd(M, neg)
|
||||
|
||||
let isOddU = u.isOdd()
|
||||
# if u.isOdd:
|
||||
# u += n
|
||||
# u = u shr 1
|
||||
#
|
||||
# Warning ⚠️: u += n will overflow the BigInt
|
||||
# and we might lose a bit on the next shift
|
||||
# Instead we shift first and then add hald the modulus rounded up
|
||||
u.shiftRight(1)
|
||||
let carry = u.cadd(mp1div2, isOddU)
|
||||
debug: doAssert not carry.bool
|
||||
|
||||
debug:
|
||||
doAssert bool a.isZero()
|
||||
doAssert bool b.isOne() # GCD always exist if a and M are relatively prime
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Modular Reduction
|
||||
#
|
||||
# ############################################################
|
||||
#
|
||||
# To avoid code-size explosion due to monomorphization
|
||||
# and given that reductions are not in hot path in Constantine
|
||||
# we use type-erased procedures, instead of instantiating
|
||||
# one per number of limbs combination
|
||||
|
||||
# Type-erasure
|
||||
# ------------------------------------------------------------
|
||||
|
||||
type
|
||||
LimbsView = ptr UncheckedArray[Word]
|
||||
## Type-erased fixed-precision limbs
|
||||
##
|
||||
## This type mirrors the Limb type and is used
|
||||
## for some low-level computation API
|
||||
## This design
|
||||
## - avoids code bloat due to generic monomorphization
|
||||
## otherwise limbs routines would have an instantiation for
|
||||
## each number of words.
|
||||
##
|
||||
## Accesses should be done via BigIntViewConst / BigIntViewConst
|
||||
## to have the compiler check for mutability
|
||||
|
||||
# "Indirection" to enforce pointer types deep immutability
|
||||
LimbsViewConst = distinct LimbsView
|
||||
## Immutable view into the limbs of a BigInt
|
||||
LimbsViewMut = distinct LimbsView
|
||||
## Mutable view into a BigInt
|
||||
LimbsViewAny = LimbsViewConst or LimbsViewMut
|
||||
|
||||
# Deep Mutability safety
|
||||
# ------------------------------------------------------------
|
||||
|
||||
template view(a: Limbs): LimbsViewConst =
|
||||
## Returns a borrowed type-erased immutable view to a bigint
|
||||
LimbsViewConst(cast[LimbsView](a.unsafeAddr))
|
||||
|
||||
template view(a: var Limbs): LimbsViewMut =
|
||||
## Returns a borrowed type-erased mutable view to a mutable bigint
|
||||
LimbsViewMut(cast[LimbsView](a.addr))
|
||||
|
||||
template `[]`*(v: LimbsViewConst, limbIdx: int): Word =
|
||||
LimbsView(v)[limbIdx]
|
||||
|
||||
template `[]`*(v: LimbsViewMut, limbIdx: int): var Word =
|
||||
LimbsView(v)[limbIdx]
|
||||
|
||||
template `[]=`*(v: LimbsViewMut, limbIdx: int, val: Word) =
|
||||
LimbsView(v)[limbIdx] = val
|
||||
|
||||
# Type-erased add-sub
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func cadd(a: LimbsViewMut, b: LimbsViewAny, ctl: CTBool[Word], len: int): Carry =
|
||||
## Type-erased conditional addition
|
||||
## Returns the carry
|
||||
##
|
||||
## if ctl is true: a <- a + b
|
||||
## if ctl is false: a <- a
|
||||
## The carry is always computed whether ctl is true or false
|
||||
##
|
||||
## Time and memory accesses are the same whether a copy occurs or not
|
||||
result = Carry(0)
|
||||
var sum: Word
|
||||
for i in 0 ..< len:
|
||||
addC(result, sum, a[i], b[i], result)
|
||||
ctl.ccopy(a[i], sum)
|
||||
|
||||
func csub(a: LimbsViewMut, b: LimbsViewAny, ctl: CTBool[Word], len: int): Borrow =
|
||||
## Type-erased conditional addition
|
||||
## Returns the borrow
|
||||
##
|
||||
## if ctl is true: a <- a - b
|
||||
## if ctl is false: a <- a
|
||||
## The borrow is always computed whether ctl is true or false
|
||||
##
|
||||
## Time and memory accesses are the same whether a copy occurs or not
|
||||
result = Borrow(0)
|
||||
var diff: Word
|
||||
for i in 0 ..< len:
|
||||
subB(result, diff, a[i], b[i], result)
|
||||
ctl.ccopy(a[i], diff)
|
||||
|
||||
# Modular reduction
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func numWordsFromBits(bits: int): int {.inline.} =
|
||||
const divShiftor = log2(uint32(WordBitWidth))
|
||||
result = (bits + WordBitWidth - 1) shr divShiftor
|
||||
|
||||
func shlAddMod_estimate(a: LimbsViewMut, aLen: int,
|
||||
c: Word, M: LimbsViewConst, mBits: int
|
||||
): tuple[neg, tooBig: CTBool[Word]] =
|
||||
## Estimate a <- a shl 2^w + c (mod M)
|
||||
##
|
||||
## with w the base word width, usually 32 on 32-bit platforms and 64 on 64-bit platforms
|
||||
##
|
||||
## Updates ``a`` and returns ``neg`` and ``tooBig``
|
||||
## If ``neg``, the estimate in ``a`` is negative and ``M`` must be added to it.
|
||||
## If ``tooBig``, the estimate in ``a`` overflowed and ``M`` must be substracted from it.
|
||||
|
||||
# Aliases
|
||||
# ----------------------------------------------------------------------
|
||||
let MLen = numWordsFromBits(mBits)
|
||||
|
||||
# Captures aLen and MLen
|
||||
template `[]`(v: untyped, limbIdxFromEnd: BackwardsIndex): Word {.dirty.}=
|
||||
v[`v Len` - limbIdxFromEnd.int]
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Assuming 64-bit words
|
||||
let hi = a[^1] # Save the high word to detect carries
|
||||
let R = mBits and (WordBitWidth - 1) # R = mBits mod 64
|
||||
|
||||
var a0, a1, m0: Word
|
||||
if R == 0: # If the number of mBits is a multiple of 64
|
||||
a0 = a[^1] #
|
||||
moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof) # we can just shift words
|
||||
a[0] = c # and replace the first one by c
|
||||
a1 = a[^1]
|
||||
m0 = M[^1]
|
||||
else: # Else: need to deal with partial word shifts at the edge.
|
||||
a0 = (a[^1] shl (WordBitWidth-R)) or (a[^2] shr R)
|
||||
moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof)
|
||||
a[0] = c
|
||||
a1 = (a[^1] shl (WordBitWidth-R)) or (a[^2] shr R)
|
||||
m0 = (M[^1] shl (WordBitWidth-R)) or (M[^2] shr R)
|
||||
|
||||
# m0 has its high bit set. (a0, a1)/p0 fits in a limb.
|
||||
# Get a quotient q, at most we will be 2 iterations off
|
||||
# from the true quotient
|
||||
var q, r: Word
|
||||
unsafeDiv2n1n(q, r, a0, a1, m0) # Estimate quotient
|
||||
q = mux( # If n_hi == divisor
|
||||
a0 == m0, MaxWord, # Quotient == MaxWord (0b1111...1111)
|
||||
mux(
|
||||
q.isZero, Zero, # elif q == 0, true quotient = 0
|
||||
q - One # else instead of being of by 0, 1 or 2
|
||||
) # we returning q-1 to be off by -1, 0 or 1
|
||||
)
|
||||
|
||||
# Now substract a*2^64 - q*p
|
||||
var carry = Zero
|
||||
var over_p = CtTrue # Track if quotient greater than the modulus
|
||||
|
||||
for i in 0 ..< MLen:
|
||||
var qp_lo: Word
|
||||
|
||||
block: # q*p
|
||||
# q * p + carry (doubleword) carry from previous limb
|
||||
muladd1(carry, qp_lo, q, M[i], Word carry)
|
||||
|
||||
block: # a*2^64 - q*p
|
||||
var borrow: Borrow
|
||||
subB(borrow, a[i], a[i], qp_lo, Borrow(0))
|
||||
carry += Word(borrow) # Adjust if borrow
|
||||
|
||||
over_p = mux(
|
||||
a[i] == M[i], over_p,
|
||||
a[i] > M[i]
|
||||
)
|
||||
|
||||
# Fix quotient, the true quotient is either q-1, q or q+1
|
||||
#
|
||||
# if carry < q or carry == q and over_p we must do "a -= p"
|
||||
# if carry > hi (negative result) we must do "a += p"
|
||||
|
||||
result.neg = Word(carry) > hi
|
||||
result.tooBig = not(result.neg) and (over_p or (Word(carry) < hi))
|
||||
|
||||
func shlAddMod(a: LimbsViewMut, aLen: int,
|
||||
c: Word, M: LimbsViewConst, mBits: int) =
|
||||
## Fused modular left-shift + add
|
||||
## Shift input `a` by a word and add `c` modulo `M`
|
||||
##
|
||||
## With a word W = 2^WordBitSize and a modulus M
|
||||
## Does a <- a * W + c (mod M)
|
||||
##
|
||||
## The modulus `M` most-significant bit at `mBits` MUST be set.
|
||||
if mBits <= WordBitWidth:
|
||||
# If M fits in a single limb
|
||||
|
||||
# We normalize M with R so that the MSB is set
|
||||
# And normalize (a * 2^64 + c) by R as well to maintain the result
|
||||
# This ensures that (a0, a1)/p0 fits in a limb.
|
||||
let R = mBits and (WordBitWidth - 1)
|
||||
|
||||
# (hi, lo) = a * 2^64 + c
|
||||
let hi = (a[0] shl (WordBitWidth-R)) or (c shr R)
|
||||
let lo = c shl (WordBitWidth-R)
|
||||
let m0 = M[0] shl (WordBitWidth-R)
|
||||
|
||||
var q, r: Word
|
||||
unsafeDiv2n1n(q, r, hi, lo, m0) # (hi, lo) mod M
|
||||
|
||||
a[0] = r shr (WordBitWidth-R)
|
||||
|
||||
else:
|
||||
## Multiple limbs
|
||||
let (neg, tooBig) = shlAddMod_estimate(a, aLen, c, M, mBits)
|
||||
discard a.cadd(M, ctl = neg, aLen)
|
||||
discard a.csub(M, ctl = tooBig, aLen)
|
||||
|
||||
func reduce(r: LimbsViewMut,
|
||||
a: LimbsViewAny, aBits: int,
|
||||
M: LimbsViewConst, mBits: int) =
|
||||
## Reduce `a` modulo `M` and store the result in `r`
|
||||
let aLen = numWordsFromBits(aBits)
|
||||
let mLen = numWordsFromBits(mBits)
|
||||
let rLen = mLen
|
||||
|
||||
if aBits < mBits:
|
||||
# if a uses less bits than the modulus,
|
||||
# it is guaranteed < modulus.
|
||||
# This relies on the precondition that the modulus uses all declared bits
|
||||
copyMem(r[0].addr, a[0].unsafeAddr, aLen * sizeof(Word))
|
||||
for i in aLen ..< mLen:
|
||||
r[i] = Zero
|
||||
else:
|
||||
# a length i at least equal to the modulus.
|
||||
# we can copy modulus.limbs-1 words
|
||||
# and modular shift-left-add the rest
|
||||
let aOffset = aLen - mLen
|
||||
copyMem(r[0].addr, a[aOffset+1].unsafeAddr, (mLen-1) * sizeof(Word))
|
||||
r[rLen - 1] = Zero
|
||||
# Now shift-left the copied words while adding the new word modulo M
|
||||
for i in countdown(aOffset, 0):
|
||||
shlAddMod(r, rLen, a[i], M, mBits)
|
||||
|
||||
func reduce*[aLen, mLen](r: var Limbs[mLen],
|
||||
a: Limbs[aLen], aBits: static int,
|
||||
M: Limbs[mLen], mBits: static int
|
||||
) {.inline.} =
|
||||
## Reduce `a` modulo `M` and store the result in `r`
|
||||
##
|
||||
## Warning ⚠: At the moment this is NOT constant-time
|
||||
## as it relies on hardware division.
|
||||
# This is implemented via type-erased indirection to avoid
|
||||
# a significant amount of code duplication if instantiated for
|
||||
# varying bitwidth.
|
||||
reduce(r.view(), a.view(), aBits, M.view(), mBits)
|
||||
|
||||
{.pop.} # raises no exceptions
|
||||
@ -21,7 +21,7 @@ import
|
||||
# Note: Montgomery multiplications and squarings are the biggest bottlenecks
|
||||
# of an elliptic curve library, asymptotically 100% of the costly algorithms:
|
||||
# - field exponentiation
|
||||
# - field inversion
|
||||
# - field inversion via Little Fermat
|
||||
# - extension towers multiplication, squarings, inversion
|
||||
# - elliptic curve point addition
|
||||
# - elliptic curve point doubling
|
||||
@ -80,6 +80,9 @@ macro staticFor(idx: untyped{nkIdent}, start, stopEx: static int, body: untyped)
|
||||
body.replaceNodes(idx, newLit i)
|
||||
)
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [].}
|
||||
|
||||
# Implementation
|
||||
# ------------------------------------------------------------
|
||||
# Note: the low-level implementations should not use static parameter
|
||||
@ -565,3 +568,5 @@ func montyPowUnsafeExponent*(
|
||||
# scratchspace[1] holds the original `a`
|
||||
scratchspace[0].montyMul(a, scratchspace[1], M, negInvModWord, canUseNoCarryMontyMul)
|
||||
a = scratchspace[0]
|
||||
|
||||
{.pop.} # raises no exceptions
|
||||
@ -67,6 +67,19 @@ func subB(bOut, diff: var BaseType, a, b, bIn: BaseType) =
|
||||
bOut = BaseType(noBorrowHi == 0)
|
||||
diff = merge(rHi, rLo)
|
||||
|
||||
func add(a: var BigInt, w: BaseType): bool =
|
||||
## Limbs addition, add a number that fits in a word
|
||||
## Returns the carry
|
||||
var carry, sum: BaseType
|
||||
addC(carry, sum, BaseType(a.limbs[0]), w, carry)
|
||||
a.limbs[0] = Word(sum)
|
||||
for i in 1 ..< a.limbs.len:
|
||||
let ai = BaseType(a.limbs[i])
|
||||
addC(carry, sum, ai, 0, carry)
|
||||
a.limbs[i] = Word(sum)
|
||||
|
||||
result = bool(carry)
|
||||
|
||||
func dbl(a: var BigInt): bool =
|
||||
## In-place multiprecision double
|
||||
## a -> 2a
|
||||
@ -246,9 +259,24 @@ func primeMinus2_BE*[bits: static int](
|
||||
): array[(bits+7) div 8, byte] {.noInit.} =
|
||||
## Compute an input prime-2
|
||||
## and return the result as a canonical byte array / octet string
|
||||
## For use to precompute modular inverse exponent.
|
||||
## For use to precompute modular inverse exponent
|
||||
## when using inversion by Little Fermat Theorem a^-1 = a^(p-2) mod p
|
||||
|
||||
var tmp = P
|
||||
discard tmp.csub(BigInt[bits].fromRawUint([byte 2], bigEndian), true)
|
||||
|
||||
result.exportRawUint(tmp, bigEndian)
|
||||
|
||||
func primePlus1div2*(P: BigInt): BigInt =
|
||||
## Compute (P+1)/2, assumes P is odd
|
||||
## For use in constant-time modular inversion
|
||||
checkOddModulus(P)
|
||||
|
||||
# (P+1)/2 = P/2 + 1 if P is odd,
|
||||
# this avoids overflowing if the prime uses all bits
|
||||
# i.e. in the form (2^64)^w - 1 or (2^32)^w - 1
|
||||
|
||||
result = P
|
||||
result.shiftRight(1)
|
||||
let carry = result.add(1)
|
||||
doAssert not carry
|
||||
|
||||
@ -13,6 +13,7 @@ import
|
||||
./curves_parser, ./common,
|
||||
../arithmetic/[precomputed, bigints]
|
||||
|
||||
{.push used.}
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
@ -40,27 +41,23 @@ import
|
||||
# which returns the field modulus of the curve
|
||||
when not defined(testingCurves):
|
||||
declareCurves:
|
||||
# Barreto-Naehrig curve, pairing-friendly, Prime 254 bit, ~100-bit security
|
||||
# https://eprint.iacr.org/2013/879.pdf
|
||||
# Usage: Zero-Knowledge Proofs / zkSNARKs in ZCash and Ethereum 1
|
||||
# https://eips.ethereum.org/EIPS/eip-196
|
||||
curve P224: # NIST P-224
|
||||
bitsize: 224
|
||||
modulus: "0xffffffff_ffffffff_ffffffff_ffffffff_00000000_00000000_00000001"
|
||||
curve BN254:
|
||||
bitsize: 254
|
||||
modulus: "0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"
|
||||
# Equation: Y^2 = X^3 + 3
|
||||
curve BLS12_381:
|
||||
bitsize: 381
|
||||
modulus: "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab"
|
||||
# Equation: y^2 = x^3 + 4
|
||||
curve P224: # NIST P-224
|
||||
bitsize: 224
|
||||
modulus: "0xffffffff_ffffffff_ffffffff_ffffffff_00000000_00000000_00000001"
|
||||
curve P256: # secp256r1 / NIST P-256
|
||||
bitsize: 256
|
||||
modulus: "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff"
|
||||
curve Secp256k1: # Bitcoin curve
|
||||
bitsize: 256
|
||||
modulus: "0xFFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F"
|
||||
curve BLS12_381:
|
||||
bitsize: 381
|
||||
modulus: "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab"
|
||||
# Equation: y^2 = x^3 + 4
|
||||
else:
|
||||
# Fake curve for testing field arithmetic
|
||||
declareCurves:
|
||||
@ -76,12 +73,75 @@ else:
|
||||
curve P224: # NIST P-224
|
||||
bitsize: 224
|
||||
modulus: "0xffffffff_ffffffff_ffffffff_ffffffff_00000000_00000000_00000001"
|
||||
curve BN254: # Zero-Knowledge proofs curve (SNARKS, STARKS)
|
||||
bitsize: 254
|
||||
modulus: "0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"
|
||||
# Equation: Y^2 = X^3 + 3
|
||||
curve P256: # secp256r1 / NIST P-256
|
||||
bitsize: 256
|
||||
modulus: "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff"
|
||||
curve Secp256k1: # Bitcoin curve
|
||||
bitsize: 256
|
||||
modulus: "0xFFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F"
|
||||
curve BLS12_381:
|
||||
bitsize: 381
|
||||
modulus: "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab"
|
||||
# Equation: y^2 = x^3 + 4
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Curve Families
|
||||
#
|
||||
# ############################################################
|
||||
type CurveFamily = enum
|
||||
None
|
||||
BN # Barreto-Naehrig
|
||||
BLS # Barreto-Lynn-Scott
|
||||
|
||||
func family*(curve: Curve): CurveFamily =
|
||||
case curve
|
||||
of BN254:
|
||||
BN
|
||||
of BLS12_381:
|
||||
BLS
|
||||
else:
|
||||
None
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Curve Specific Parameters
|
||||
#
|
||||
# ############################################################
|
||||
#
|
||||
# In the form CurveXXX_ParameterName where CurveXXX is the curve name + number of bits
|
||||
# of the field modulus
|
||||
|
||||
# BN Curves
|
||||
# ------------------------------------------------------------
|
||||
# See https://tools.ietf.org/id/draft-yonezawa-pairing-friendly-curves-00.html
|
||||
#
|
||||
# The prime p and order r are primes and of the form
|
||||
# p = 36u^4 + 36u^3 + 24u^2 + 6u + 1
|
||||
# r = 36u^4 + 36u^3 + 18u^2 + 6u + 1
|
||||
#
|
||||
# https://eprint.iacr.org/2010/429.pdf
|
||||
# https://eprint.iacr.org/2013/879.pdf
|
||||
# Usage: Zero-Knowledge Proofs / zkSNARKs in ZCash and Ethereum 1
|
||||
# https://eips.ethereum.org/EIPS/eip-196
|
||||
|
||||
# BLS Curves
|
||||
# ------------------------------------------------------------
|
||||
# See https://tools.ietf.org/id/draft-yonezawa-pairing-friendly-curves-00.html
|
||||
#
|
||||
# BLS12 curves
|
||||
# The prime p and order r are primes and of the form
|
||||
# p = (u - 1)^2 (u^4 - u^2 + 1)/3 + u
|
||||
# r = u^4 - u^2 + 1
|
||||
#
|
||||
# BLS48 curves
|
||||
# The prime p and order r are primes and of the form
|
||||
# p = (u - 1)^2 (u^16 - u^8 + 1)/3 + u
|
||||
# r = u^16 - u^8 + 1
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
@ -147,6 +207,7 @@ macro genMontyMagics(T: typed): untyped =
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# const MyCurve_NegInvModWord = negInvModWord(MyCurve_Modulus)
|
||||
result.add newConstStmt(
|
||||
ident($curve & "_NegInvModWord"), newCall(
|
||||
@ -177,6 +238,16 @@ macro genMontyMagics(T: typed): untyped =
|
||||
)
|
||||
)
|
||||
)
|
||||
# const MyCurve_PrimePlus1div2 = primePlus1div2(MyCurve_Modulus)
|
||||
result.add newConstStmt(
|
||||
ident($curve & "_PrimePlus1div2"), newCall(
|
||||
bindSym"primePlus1div2",
|
||||
nnkDotExpr.newTree(
|
||||
bindSym($curve & "_Modulus"),
|
||||
ident"mres"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# echo result.toStrLit
|
||||
|
||||
@ -208,6 +279,10 @@ macro getInvModExponent*(C: static Curve): untyped =
|
||||
## Get modular inversion exponent (Modulus-2 in canonical representation)
|
||||
result = bindSym($C & "_InvModExponent")
|
||||
|
||||
macro getPrimePlus1div2*(C: static Curve): untyped =
|
||||
## Get (P+1) / 2 for an odd prime
|
||||
result = bindSym($C & "_PrimePlus1div2")
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Debug info printed at compile-time
|
||||
@ -234,5 +309,5 @@ macro debugConsts(): untyped =
|
||||
result.add quote do:
|
||||
echo "----------------------------------------------------------------------------"
|
||||
|
||||
debug:
|
||||
debugConsts()
|
||||
# debug:
|
||||
# debugConsts()
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
../arithmetic/finite_fields,
|
||||
../arithmetic,
|
||||
../config/common,
|
||||
../primitives
|
||||
|
||||
|
||||
@ -44,7 +44,7 @@
|
||||
# TODO: Clarify some assumptions about the prime p ≡ 3 (mod 4)
|
||||
|
||||
import
|
||||
../arithmetic/finite_fields,
|
||||
../arithmetic,
|
||||
../config/curves,
|
||||
./abelian_groups
|
||||
|
||||
@ -151,7 +151,7 @@ func inv*(r: var Fp2, a: Fp2) =
|
||||
t0 += t1 # t0 = a0² + a1² (the norm / squared magnitude of a)
|
||||
|
||||
# [1 Inv, 2 Sqr, 1 Add]
|
||||
inv(t0) # t0 = 1 / (a0² + a1²)
|
||||
t0.inv(t0) # t0 = 1 / (a0² + a1²)
|
||||
|
||||
# [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg]
|
||||
r.c0.prod(a.c0, t0) # r0 = a0 / (a0² + a1²)
|
||||
|
||||
@ -33,7 +33,7 @@
|
||||
|
||||
|
||||
import
|
||||
../arithmetic/finite_fields,
|
||||
../arithmetic,
|
||||
../config/curves,
|
||||
./abelian_groups
|
||||
|
||||
|
||||
@ -32,7 +32,7 @@
|
||||
# https://eprint.iacr.org/2010/354
|
||||
|
||||
import
|
||||
../arithmetic/finite_fields,
|
||||
../arithmetic,
|
||||
../config/curves,
|
||||
./abelian_groups
|
||||
|
||||
|
||||
@ -128,7 +128,7 @@ func fromHex(output: var openArray[byte], hexStr: string, order: static[Endianne
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
when isMainModule:
|
||||
import random, std/monotimes, times, strformat, ./timers
|
||||
import random, std/monotimes, times, strformat, ../helpers/timers
|
||||
|
||||
const Iters = 1_000_000
|
||||
const InvIters = 1000
|
||||
|
||||
@ -1,49 +0,0 @@
|
||||
when defined(i386) or defined(amd64):
|
||||
# From Linux
|
||||
#
|
||||
# The RDTSC instruction is not ordered relative to memory
|
||||
# access. The Intel SDM and the AMD APM are both vague on this
|
||||
# point, but empirically an RDTSC instruction can be
|
||||
# speculatively executed before prior loads. An RDTSC
|
||||
# immediately after an appropriate barrier appears to be
|
||||
# ordered as a normal load, that is, it provides the same
|
||||
# ordering guarantees as reading from a global memory location
|
||||
# that some other imaginary CPU is updating continuously with a
|
||||
# time stamp.
|
||||
#
|
||||
# From Intel SDM
|
||||
# https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf
|
||||
when not defined(vcc):
|
||||
when defined(amd64):
|
||||
proc getTicks*(): int64 {.inline.} =
|
||||
var lo, hi: int64
|
||||
# TODO: Provide a compile-time flag for RDTSCP support
|
||||
# and use it instead of lfence + RDTSC
|
||||
{.emit: """asm volatile(
|
||||
"lfence\n"
|
||||
"rdtsc\n"
|
||||
: "=a"(`lo`), "=d"(`hi`)
|
||||
:
|
||||
: "memory"
|
||||
);""".}
|
||||
return (hi shl 32) or lo
|
||||
else:
|
||||
proc getTicks*(): int64 {.inline.} =
|
||||
# TODO: Provide a compile-time flag for RDTSCP support
|
||||
# and use it instead of lfence + RDTSC
|
||||
{.emit: """asm volatile(
|
||||
"lfence\n"
|
||||
"rdtsc\n"
|
||||
: "=a"(`result`)
|
||||
:
|
||||
: "memory"
|
||||
);""".}
|
||||
else:
|
||||
proc rdtsc(): int64 {.sideeffect, importc: "__rdtsc", header: "<intrin.h>".}
|
||||
proc lfence() {.importc: "__mm_lfence", header: "<intrin.h>".}
|
||||
|
||||
proc getTicks*(): int64 {.inline.} =
|
||||
lfence()
|
||||
return rdtsc()
|
||||
else:
|
||||
{.error: "getticks is not supported on this CPU architecture".}
|
||||
3
helpers/README.md
Normal file
3
helpers/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# Helpers, utilities, tools, miscellaneous
|
||||
|
||||
This folder holds helper functions that are not part of Constantine but facilitates testing and benchmarking.
|
||||
@ -98,5 +98,5 @@ func random[T](rng: var RngState, a: var T, C: static Curve) {.noInit.}=
|
||||
rng.random(field, C)
|
||||
|
||||
func random*(rng: var RngState, T: typedesc): T =
|
||||
## Create a random Field or Extension FIeld Element
|
||||
## Create a random Field or Extension Field Element
|
||||
rng.random(result, T.C)
|
||||
28
helpers/static_for.nim
Normal file
28
helpers/static_for.nim
Normal file
@ -0,0 +1,28 @@
|
||||
import macros
|
||||
|
||||
proc replaceNodes(ast: NimNode, what: NimNode, by: NimNode): NimNode =
|
||||
# Replace "what" ident node by "by"
|
||||
proc inspect(node: NimNode): NimNode =
|
||||
case node.kind:
|
||||
of {nnkIdent, nnkSym}:
|
||||
if node.eqIdent(what):
|
||||
return by
|
||||
return node
|
||||
of nnkEmpty:
|
||||
return node
|
||||
of nnkLiterals:
|
||||
return node
|
||||
else:
|
||||
var rTree = node.kind.newTree()
|
||||
for child in node:
|
||||
rTree.add inspect(child)
|
||||
return rTree
|
||||
result = inspect(ast)
|
||||
|
||||
macro staticFor*(idx: untyped{nkIdent}, start, stopEx: static int, body: untyped): untyped =
|
||||
result = newStmtList()
|
||||
for i in start ..< stopEx:
|
||||
result.add nnkBlockStmt.newTree(
|
||||
ident("unrolledIter_" & $idx & $i),
|
||||
body.replaceNodes(idx, newLit i)
|
||||
)
|
||||
14
helpers/timers.nim
Normal file
14
helpers/timers.nim
Normal file
@ -0,0 +1,14 @@
|
||||
when defined(i386) or defined(amd64):
|
||||
import x86
|
||||
export getTicks
|
||||
|
||||
# This doesn't always work unfortunately ...
|
||||
|
||||
proc volatilize(x: ptr byte) {.codegenDecl: "$# $#(char const volatile *x)", inline.} =
|
||||
discard
|
||||
|
||||
template preventOptimAway*[T](x: var T) =
|
||||
volatilize(cast[ptr byte](unsafeAddr x))
|
||||
|
||||
template preventOptimAway*[T](x: T) =
|
||||
volatilize(cast[ptr byte](x))
|
||||
74
helpers/x86.nim
Normal file
74
helpers/x86.nim
Normal file
@ -0,0 +1,74 @@
|
||||
# Cpu Name
|
||||
# -------------------------------------------------------
|
||||
|
||||
{.passC:"-std=gnu99".} # TODO may conflict with milagro "-std=c99"
|
||||
|
||||
proc cpuID(eaxi, ecxi: int32): tuple[eax, ebx, ecx, edx: int32] =
|
||||
when defined(vcc):
|
||||
proc cpuidVcc(cpuInfo: ptr int32; functionID: int32)
|
||||
{.importc: "__cpuidex", header: "intrin.h".}
|
||||
cpuidVcc(addr result.eax, eaxi, ecxi)
|
||||
else:
|
||||
var (eaxr, ebxr, ecxr, edxr) = (0'i32, 0'i32, 0'i32, 0'i32)
|
||||
asm """
|
||||
cpuid
|
||||
:"=a"(`eaxr`), "=b"(`ebxr`), "=c"(`ecxr`), "=d"(`edxr`)
|
||||
:"a"(`eaxi`), "c"(`ecxi`)"""
|
||||
(eaxr, ebxr, ecxr, edxr)
|
||||
|
||||
proc cpuName*(): string =
|
||||
var leaves {.global.} = cast[array[48, char]]([
|
||||
cpuID(eaxi = 0x80000002'i32, ecxi = 0),
|
||||
cpuID(eaxi = 0x80000003'i32, ecxi = 0),
|
||||
cpuID(eaxi = 0x80000004'i32, ecxi = 0)])
|
||||
result = $cast[cstring](addr leaves[0])
|
||||
|
||||
# Counting cycles
|
||||
# -------------------------------------------------------
|
||||
|
||||
# From Linux
|
||||
#
|
||||
# The RDTSC instruction is not ordered relative to memory
|
||||
# access. The Intel SDM and the AMD APM are both vague on this
|
||||
# point, but empirically an RDTSC instruction can be
|
||||
# speculatively executed before prior loads. An RDTSC
|
||||
# immediately after an appropriate barrier appears to be
|
||||
# ordered as a normal load, that is, it provides the same
|
||||
# ordering guarantees as reading from a global memory location
|
||||
# that some other imaginary CPU is updating continuously with a
|
||||
# time stamp.
|
||||
#
|
||||
# From Intel SDM
|
||||
# https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf
|
||||
|
||||
proc getTicks*(): int64 {.inline.} =
|
||||
when defined(vcc):
|
||||
proc rdtsc(): int64 {.sideeffect, importc: "__rdtsc", header: "<intrin.h>".}
|
||||
proc lfence() {.importc: "__mm_lfence", header: "<intrin.h>".}
|
||||
|
||||
lfence()
|
||||
return rdtsc()
|
||||
|
||||
else:
|
||||
when defined(amd64):
|
||||
var lo, hi: int64
|
||||
# TODO: Provide a compile-time flag for RDTSCP support
|
||||
# and use it instead of lfence + RDTSC
|
||||
{.emit: """asm volatile(
|
||||
"lfence\n"
|
||||
"rdtsc\n"
|
||||
: "=a"(`lo`), "=d"(`hi`)
|
||||
:
|
||||
: "memory"
|
||||
);""".}
|
||||
return (hi shl 32) or lo
|
||||
else: # 32-bit x86
|
||||
# TODO: Provide a compile-time flag for RDTSCP support
|
||||
# and use it instead of lfence + RDTSC
|
||||
{.emit: """asm volatile(
|
||||
"lfence\n"
|
||||
"rdtsc\n"
|
||||
: "=a"(`result`)
|
||||
:
|
||||
: "memory"
|
||||
);""".}
|
||||
@ -8,11 +8,11 @@
|
||||
|
||||
import unittest,
|
||||
../constantine/io/io_bigints,
|
||||
../constantine/arithmetic/bigints,
|
||||
../constantine/arithmetic,
|
||||
../constantine/config/common,
|
||||
../constantine/primitives
|
||||
|
||||
proc main() =
|
||||
proc mainArith() =
|
||||
suite "isZero":
|
||||
test "isZero for zero":
|
||||
var x: BigInt[128]
|
||||
@ -128,6 +128,16 @@ proc main() =
|
||||
bool(a == c)
|
||||
not bool(carry)
|
||||
|
||||
suite "BigInt + Word":
|
||||
test "Addition limbs carry":
|
||||
block: # P256 / 2
|
||||
var a = BigInt[256].fromhex"0x7fffffff800000008000000000000000000000007fffffffffffffffffffffff"
|
||||
|
||||
let expected = BigInt[256].fromHex"7fffffff80000000800000000000000000000000800000000000000000000000"
|
||||
|
||||
discard a.add(Word 1)
|
||||
check: bool(a == expected)
|
||||
|
||||
suite "Modular operations - small modulus":
|
||||
# Vectors taken from Stint - https://github.com/status-im/nim-stint
|
||||
test "100 mod 13":
|
||||
@ -205,4 +215,249 @@ proc main() =
|
||||
check:
|
||||
bool(r == BigInt[40].fromUint(u mod v))
|
||||
|
||||
main()
|
||||
proc mainNeg() =
|
||||
suite "Conditional negation":
|
||||
test "Conditional negation":
|
||||
block:
|
||||
var a = fromHex(BigInt[128], "0x12345678_FF11FFAA_00321321_CAFECAFE")
|
||||
var b = fromHex(BigInt[128], "0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF")
|
||||
|
||||
let a2 = a
|
||||
let b2 = b
|
||||
|
||||
a.cneg(CtTrue)
|
||||
b.cneg(CtTrue)
|
||||
|
||||
discard a.add(a2)
|
||||
discard b.add(b2)
|
||||
|
||||
check:
|
||||
bool(a.isZero)
|
||||
bool(b.isZero)
|
||||
|
||||
block:
|
||||
var a = fromHex(BigInt[128], "0x12345678_FF11FFAA_00321321_CAFECAFE")
|
||||
var b = fromHex(BigInt[128], "0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF")
|
||||
|
||||
let a2 = a
|
||||
let b2 = b
|
||||
|
||||
a.cneg(CtFalse)
|
||||
b.cneg(CtFalse)
|
||||
|
||||
check:
|
||||
bool(a == a2)
|
||||
bool(b == b2)
|
||||
|
||||
test "Conditional negation with carries":
|
||||
block:
|
||||
var a = fromHex(BigInt[128], "0x12345678_FF11FFAA_00321321_FFFFFFFF")
|
||||
var b = fromHex(BigInt[128], "0xFFFFFFFF_FFFFFFFF_00000000_00000000")
|
||||
|
||||
let a2 = a
|
||||
let b2 = b
|
||||
|
||||
a.cneg(CtTrue)
|
||||
b.cneg(CtTrue)
|
||||
|
||||
discard a.add(a2)
|
||||
discard b.add(b2)
|
||||
|
||||
check:
|
||||
bool(a.isZero)
|
||||
bool(b.isZero)
|
||||
|
||||
block:
|
||||
var a = fromHex(BigInt[128], "0x12345678_00000000_00321321_FFFFFFFF")
|
||||
var b = fromHex(BigInt[128], "0xFFFFFFFF_FFFFFFFF_00000000_00000000")
|
||||
|
||||
let a2 = a
|
||||
let b2 = b
|
||||
|
||||
a.cneg(CtFalse)
|
||||
b.cneg(CtFalse)
|
||||
|
||||
check:
|
||||
bool(a == a2)
|
||||
bool(b == b2)
|
||||
|
||||
test "Conditional all-zero bit or all-one bit":
|
||||
block:
|
||||
var a = fromHex(BigInt[128], "0x00000000_00000000_00000000_00000000")
|
||||
var b = fromHex(BigInt[128], "0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF")
|
||||
|
||||
let a2 = a
|
||||
let b2 = b
|
||||
|
||||
a.cneg(CtTrue)
|
||||
b.cneg(CtTrue)
|
||||
|
||||
discard a.add(a2)
|
||||
discard b.add(b2)
|
||||
|
||||
check:
|
||||
bool(a.isZero)
|
||||
bool(b.isZero)
|
||||
|
||||
block:
|
||||
var a = fromHex(BigInt[128], "0x00000000_00000000_00000000_00000000")
|
||||
var b = fromHex(BigInt[128], "0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF")
|
||||
|
||||
let a2 = a
|
||||
let b2 = b
|
||||
|
||||
a.cneg(CtFalse)
|
||||
b.cneg(CtFalse)
|
||||
|
||||
check:
|
||||
bool(a == a2)
|
||||
bool(b == b2)
|
||||
|
||||
proc mainCopySwap() =
|
||||
suite "Copy and Swap":
|
||||
test "Conditional copy":
|
||||
block:
|
||||
var a = fromHex(BigInt[128], "0x12345678_FF11FFAA_00321321_CAFECAFE")
|
||||
let b = fromHex(BigInt[128], "0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF")
|
||||
|
||||
var expected = a
|
||||
a.ccopy(b, CtFalse)
|
||||
|
||||
check: bool(expected == a)
|
||||
|
||||
block:
|
||||
var a = fromHex(BigInt[128], "0x00000000_FFFFFFFF_FFFFFFFF_FFFFFFFF")
|
||||
let b = fromHex(BigInt[128], "0x00000000_00000000_00000000_00000001")
|
||||
|
||||
var expected = b
|
||||
a.ccopy(b, CtTrue)
|
||||
|
||||
check: bool(expected == b)
|
||||
|
||||
test "Conditional swap":
|
||||
block:
|
||||
var a = fromHex(BigInt[128], "0x12345678_FF11FFAA_00321321_CAFECAFE")
|
||||
var b = fromHex(BigInt[128], "0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF")
|
||||
|
||||
let eA = a
|
||||
let eB = b
|
||||
|
||||
a.cswap(b, CtFalse)
|
||||
check:
|
||||
bool(eA == a)
|
||||
bool(eB == b)
|
||||
|
||||
block:
|
||||
var a = fromHex(BigInt[128], "0x00000000_FFFFFFFF_FFFFFFFF_FFFFFFFF")
|
||||
var b = fromHex(BigInt[128], "0x00000000_00000000_00000000_00000001")
|
||||
|
||||
let eA = b
|
||||
let eB = a
|
||||
|
||||
a.cswap(b, CtTrue)
|
||||
check:
|
||||
bool(eA == a)
|
||||
bool(eB == b)
|
||||
|
||||
proc mainModularInverse() =
|
||||
suite "Modular Inverse (with odd modulus)":
|
||||
# Note: We don't define multi-precision multiplication
|
||||
# because who needs it when you have Montgomery?
|
||||
# ¯\_(ツ)_/¯
|
||||
test "42^-1 (mod 2017) = 1969":
|
||||
block: # small int
|
||||
let a = BigInt[16].fromUint(42'u16)
|
||||
let M = BigInt[16].fromUint(2017'u16)
|
||||
|
||||
var mp1div2 = M
|
||||
discard mp1div2.add(Word 1)
|
||||
mp1div2.shiftRight(1)
|
||||
|
||||
let expected = BigInt[16].fromUint(1969'u16)
|
||||
var r {.noInit.}: BigInt[16]
|
||||
|
||||
r.invmod(a, M, mp1div2)
|
||||
|
||||
check: bool(r == expected)
|
||||
|
||||
block: # huge int
|
||||
let a = BigInt[381].fromUint(42'u16)
|
||||
let M = BigInt[381].fromUint(2017'u16)
|
||||
|
||||
var mp1div2 = M
|
||||
discard mp1div2.add(Word 1)
|
||||
mp1div2.shiftRight(1)
|
||||
|
||||
let expected = BigInt[381].fromUint(1969'u16)
|
||||
var r {.noInit.}: BigInt[381]
|
||||
|
||||
r.invmod(a, M, mp1div2)
|
||||
|
||||
check: bool(r == expected)
|
||||
|
||||
test "271^-1 (mod 383) = 106":
|
||||
block: # small int
|
||||
let a = BigInt[16].fromUint(271'u16)
|
||||
let M = BigInt[16].fromUint(383'u16)
|
||||
|
||||
var mp1div2 = M
|
||||
discard mp1div2.add(Word 1)
|
||||
mp1div2.shiftRight(1)
|
||||
|
||||
let expected = BigInt[16].fromUint(106'u16)
|
||||
var r {.noInit.}: BigInt[16]
|
||||
|
||||
r.invmod(a, M, mp1div2)
|
||||
|
||||
check: bool(r == expected)
|
||||
|
||||
block: # huge int
|
||||
let a = BigInt[381].fromUint(271'u16)
|
||||
let M = BigInt[381].fromUint(383'u16)
|
||||
|
||||
var mp1div2 = M
|
||||
discard mp1div2.add(Word 1)
|
||||
mp1div2.shiftRight(1)
|
||||
|
||||
let expected = BigInt[381].fromUint(106'u16)
|
||||
var r {.noInit.}: BigInt[381]
|
||||
|
||||
r.invmod(a, M, mp1div2)
|
||||
|
||||
check: bool(r == expected)
|
||||
|
||||
test "BN254_Modulus^-1 (mod BLS12_381)":
|
||||
let a = BigInt[381].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
let M = BigInt[381].fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab")
|
||||
|
||||
var mp1div2 = M
|
||||
discard mp1div2.add(Word 1)
|
||||
mp1div2.shiftRight(1)
|
||||
|
||||
let expected = BigInt[381].fromHex("0x0636759a0f3034fa47174b2c0334902f11e9915b7bd89c6a2b3082b109abbc9837da17201f6d8286fe6203caa1b9d4c8")
|
||||
|
||||
var r {.noInit.}: BigInt[381]
|
||||
r.invmod(a, M, mp1div2)
|
||||
|
||||
check: bool(r == expected)
|
||||
|
||||
test "0^-1 (mod 0) = 0 (need for tower of extension fields)":
|
||||
let a = BigInt[16].fromUint(0'u16)
|
||||
let M = BigInt[16].fromUint(2017'u16)
|
||||
|
||||
var mp1div2 = M
|
||||
mp1div2.shiftRight(1)
|
||||
discard mp1div2.add(Word 1)
|
||||
|
||||
let expected = BigInt[16].fromUint(0'u16)
|
||||
var r {.noInit.}: BigInt[16]
|
||||
|
||||
r.invmod(a, M, mp1div2)
|
||||
|
||||
check: bool(r == expected)
|
||||
|
||||
|
||||
mainArith()
|
||||
mainNeg()
|
||||
mainCopySwap()
|
||||
mainModularInverse()
|
||||
|
||||
@ -11,7 +11,7 @@ import
|
||||
unittest,
|
||||
# Third-party
|
||||
../constantine/io/io_bigints,
|
||||
../constantine/arithmetic/bigints,
|
||||
../constantine/arithmetic,
|
||||
../constantine/primitives
|
||||
|
||||
proc main() =
|
||||
|
||||
@ -13,8 +13,8 @@ import
|
||||
gmp, stew/byteutils,
|
||||
# Internal
|
||||
../constantine/io/io_bigints,
|
||||
../constantine/arithmetic/bigints,
|
||||
../constantine/primitives/constant_time
|
||||
../constantine/arithmetic,
|
||||
../constantine/primitives
|
||||
|
||||
# We test up to 1024-bit, more is really slow
|
||||
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import unittest,
|
||||
../constantine/arithmetic/finite_fields,
|
||||
../constantine/arithmetic,
|
||||
../constantine/io/io_fields,
|
||||
../constantine/config/curves
|
||||
|
||||
|
||||
@ -7,11 +7,11 @@
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import std/unittest, std/times,
|
||||
../constantine/arithmetic/[bigints, finite_fields],
|
||||
../constantine/arithmetic,
|
||||
../constantine/io/[io_bigints, io_fields],
|
||||
../constantine/config/curves,
|
||||
# Test utilities
|
||||
./prng
|
||||
../helpers/prng
|
||||
|
||||
const Iters = 128
|
||||
|
||||
@ -25,17 +25,22 @@ static: doAssert defined(testingCurves), "This modules requires the -d:testingCu
|
||||
import ../constantine/config/common
|
||||
|
||||
proc sanity(C: static Curve) =
|
||||
test "Squaring 0,1,2 with "& $C & " [FastSquaring = " & $Fake101.canUseNoCarryMontySquare & "]":
|
||||
test "Squaring 0,1,2 with "& $Curve(C) & " [FastSquaring = " & $C.canUseNoCarryMontySquare & "]":
|
||||
block: # 0² mod
|
||||
var n: Fp[C]
|
||||
|
||||
n.fromUint(0'u32)
|
||||
let expected = n
|
||||
|
||||
# Out-of-place
|
||||
var r: Fp[C]
|
||||
r.square(n)
|
||||
# In-place
|
||||
n.square()
|
||||
|
||||
check: bool(r == expected)
|
||||
check:
|
||||
bool(r == expected)
|
||||
bool(n == expected)
|
||||
|
||||
block: # 1² mod
|
||||
var n: Fp[C]
|
||||
@ -43,10 +48,15 @@ proc sanity(C: static Curve) =
|
||||
n.fromUint(1'u32)
|
||||
let expected = n
|
||||
|
||||
# Out-of-place
|
||||
var r: Fp[C]
|
||||
r.square(n)
|
||||
# In-place
|
||||
n.square()
|
||||
|
||||
check: bool(r == expected)
|
||||
check:
|
||||
bool(r == expected)
|
||||
bool(n == expected)
|
||||
|
||||
block: # 2² mod
|
||||
var n, expected: Fp[C]
|
||||
@ -54,10 +64,15 @@ proc sanity(C: static Curve) =
|
||||
n.fromUint(2'u32)
|
||||
expected.fromUint(4'u32)
|
||||
|
||||
# Out-of-place
|
||||
var r: Fp[C]
|
||||
r.square(n)
|
||||
# In-place
|
||||
n.square()
|
||||
|
||||
check: bool(r == expected)
|
||||
check:
|
||||
bool(r == expected)
|
||||
bool(n == expected)
|
||||
|
||||
proc mainSanity() =
|
||||
suite "Modular squaring is consistent with multiplication on special elements":
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import unittest,
|
||||
../constantine/arithmetic/[bigints, finite_fields],
|
||||
../constantine/arithmetic,
|
||||
../constantine/io/[io_bigints, io_fields],
|
||||
../constantine/config/curves
|
||||
|
||||
@ -144,14 +144,14 @@ proc main() =
|
||||
|
||||
suite "Modular inversion over prime fields":
|
||||
test "x^(-1) mod p":
|
||||
var x: Fp[BLS12_381]
|
||||
var r, x: Fp[BLS12_381]
|
||||
|
||||
# BN254 field modulus
|
||||
x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
|
||||
|
||||
let expected = "0x0636759a0f3034fa47174b2c0334902f11e9915b7bd89c6a2b3082b109abbc9837da17201f6d8286fe6203caa1b9d4c8"
|
||||
x.inv()
|
||||
let computed = x.toHex()
|
||||
r.inv(x)
|
||||
let computed = r.toHex()
|
||||
|
||||
check:
|
||||
computed == expected
|
||||
|
||||
@ -13,18 +13,21 @@ import
|
||||
gmp, stew/byteutils,
|
||||
# Internal
|
||||
../constantine/io/[io_bigints, io_fields],
|
||||
../constantine/arithmetic/[finite_fields, bigints],
|
||||
../constantine/primitives/constant_time,
|
||||
../constantine/arithmetic,
|
||||
../constantine/primitives,
|
||||
../constantine/config/curves
|
||||
|
||||
# We test up to 1024-bit, more is really slow
|
||||
|
||||
var RNG {.compileTime.} = initRand(1234)
|
||||
const CurveParams = [
|
||||
P224: (224, "0xffffffffffffffffffffffffffffffff000000000000000000000001"),
|
||||
BN254: (254, "0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"),
|
||||
P256: (256, "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff"),
|
||||
Secp256k1: (256, "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F"),
|
||||
BLS12_381: (381, "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab")
|
||||
]
|
||||
|
||||
const AvailableCurves = [P224, BN254, P256, Secp256k1, BLS12_381]
|
||||
|
||||
const # https://gmplib.org/manual/Integer-Import-and-Export.html
|
||||
GMP_WordLittleEndian = -1'i32
|
||||
GMP_WordNativeEndian = 0'i32
|
||||
@ -53,7 +56,7 @@ proc binary_prologue[C: static Curve, N: static int](
|
||||
mpz_urandomb(b, gmpRng, uint bits)
|
||||
# Set modulus to curve modulus
|
||||
let err = mpz_set_str(p, CurveParams[C][1], 0)
|
||||
doAssert err == 0
|
||||
doAssert err == 0, "Error on prime for curve " & $Curve(C)
|
||||
|
||||
#########################################################
|
||||
# Conversion buffers
|
||||
@ -189,8 +192,8 @@ proc invTests(gmpRng: var gmp_randstate_t, a, b, p, r: var mpz_t, C: static Curv
|
||||
let exist = mpz_invert(r, a, p)
|
||||
doAssert exist != 0
|
||||
|
||||
var rTest = aTest
|
||||
rTest.inv()
|
||||
var rTest {.noInit.}: Fp[C]
|
||||
rTest.inv(aTest)
|
||||
|
||||
binary_epilogue(r, a, b, rTest, aBuf, bBuf, "Inversion (b is unused)")
|
||||
|
||||
@ -206,7 +209,7 @@ macro randomTests(numTests: static int, curveSym, body: untyped): untyped =
|
||||
result = newStmtList()
|
||||
|
||||
for _ in 0 ..< numTests:
|
||||
let curve = RNG.rand([BN254, BLS12_381])
|
||||
let curve = RNG.rand(AvailableCurves)
|
||||
|
||||
result.add quote do:
|
||||
block:
|
||||
|
||||
@ -12,9 +12,9 @@ import
|
||||
# Internals
|
||||
../constantine/tower_field_extensions/[abelian_groups, fp2_complex],
|
||||
../constantine/config/[common, curves],
|
||||
../constantine/arithmetic/bigints,
|
||||
../constantine/arithmetic,
|
||||
# Test utilities
|
||||
./prng
|
||||
../helpers/prng
|
||||
|
||||
const Iters = 128
|
||||
|
||||
|
||||
@ -9,7 +9,7 @@
|
||||
import unittest, random,
|
||||
../constantine/io/io_bigints,
|
||||
../constantine/config/common,
|
||||
../constantine/arithmetic/bigints
|
||||
../constantine/arithmetic
|
||||
|
||||
randomize(0xDEADBEEF) # Random seed for reproducibility
|
||||
type T = BaseType
|
||||
|
||||
@ -10,7 +10,7 @@ import unittest, random,
|
||||
../constantine/io/[io_bigints, io_fields],
|
||||
../constantine/config/curves,
|
||||
../constantine/config/common,
|
||||
../constantine/arithmetic/[bigints, finite_fields]
|
||||
../constantine/arithmetic
|
||||
|
||||
randomize(0xDEADBEEF) # Random seed for reproducibility
|
||||
type T = BaseType
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user