diff --git a/benchmarks/bench_eth_curves.nim b/benchmarks/bench_eth_curves.nim deleted file mode 100644 index 2caf0e0..0000000 --- a/benchmarks/bench_eth_curves.nim +++ /dev/null @@ -1,6 +0,0 @@ -# Benchmark of Ethereum 1 and Ethereum 2 elliptic curves - -import - ./secp256k1_fp, - ./bn254_fp, - ./bls12_381_fp diff --git a/benchmarks/bench_finite_fields.nim b/benchmarks/bench_finite_fields.nim new file mode 100644 index 0000000..d670c93 --- /dev/null +++ b/benchmarks/bench_finite_fields.nim @@ -0,0 +1,157 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# ############################################################ +# +# Benchmark of finite fields +# +# ############################################################ + +import + # Internals + ../constantine/config/[common, curves], + ../constantine/arithmetic, + ../constantine/io/[io_bigints, io_fields], + ../constantine/primitives, + # Helpers + ../helpers/[timers, prng, static_for], + # Standard library + std/[monotimes, times, strformat, strutils, macros] + +const Iters = 1_000_000 +const InvIters = 1000 +const AvailableCurves = [ + P224, + BN254, + P256, + Secp256k1, + BLS12_381 +] + +var rng: RngState +let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32 +rng.seed(seed) +echo "bench_finite_field xoshiro512** seed: ", seed + +# warmup +proc warmup*() = + # Warmup - make sure cpu is on max perf + let start = cpuTime() + var foo = 123 + for i in 0 ..< 300_000_000: + foo += i*i mod 456 + foo = foo mod 789 + + # Compiler shouldn't optimize away the results as cpuTime rely on sideeffects + let stop = cpuTime() + echo &"Warmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n" + +warmup() + +echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them." +echo "==========================================================================================================\n" +echo "All benchmarks are using constant-time implementations to protect against side-channel attacks." +when defined(gcc): + echo "\nCompiled with GCC" +elif defined(clang): + echo "\nCompiled with Clang" +elif defined(vcc): + echo "\nCompiled with MSVC" +elif defined(icc): + echo "\nCompiled with ICC" +else: + echo "\nCompiled with an unknown compiler" + +when defined(i386) or defined(amd64): + import ../helpers/x86 + echo "Running on ", cpuName(), "\n\n" + +proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) = + echo &"{op:<15} {field:<15} {inNanoseconds((stop-start) div iters):>9} ns {(stopClk - startClk) div iters:>9} cycles" + +macro fixFieldDisplay(T: typedesc): untyped = + # At compile-time, enums are integers and their display is buggy + # we get the Curve ID instead of the curve name. + let instantiated = T.getTypeInst() + var name = $instantiated[1][0] # Fp + name.add "[" & $Curve(instantiated[1][1].intVal) & "]" + result = newLit name + +# Compilers are smart with dead code (but not with multiprecision arithmetic :/) +var globalsAreNotOptimizedAway: Word + +template bench(op: string, T: typedesc, iters: int, body: untyped): untyped = + let start = getMonotime() + let startClk = getTicks() + for _ in 0 ..< iters: + body + let stopClk = getTicks() + let stop = getMonotime() + + report(op, fixFieldDisplay(T), start, stop, startClk, stopClk, iters) + +proc addBench(T: typedesc) = + var x = rng.random(T) + let y = rng.random(T) + bench("Addition", T, Iters): + x += y + globalsAreNotOptimizedAway += x.mres.limbs[^1] + +proc subBench(T: typedesc) = + var x = rng.random(T) + let y = rng.random(T) + preventOptimAway(x) + bench("Substraction", T, Iters): + x -= y + globalsAreNotOptimizedAway += x.mres.limbs[^1] + +proc negBench(T: typedesc) = + var r: T + let x = rng.random(T) + bench("Negation", T, Iters): + r.neg(x) + globalsAreNotOptimizedAway += r.mres.limbs[^1] + +proc mulBench(T: typedesc) = + var r: T + let x = rng.random(T) + let y = rng.random(T) + preventOptimAway(r) + bench("Multiplication", T, Iters): + r.prod(x, y) + +proc sqrBench(T: typedesc) = + var r: T + let x = rng.random(T) + preventOptimAway(r) + bench("Squaring", T, Iters): + r.square(x) + +proc invBench(T: typedesc) = + var r: T + let x = rng.random(T) + preventOptimAway(r) + bench("Inversion", T, InvIters): + r.inv(x) + +proc main() = + echo "-".repeat(80) + staticFor i, 0, AvailableCurves.len: + const curve = AvailableCurves[i] + addBench(Fp[curve]) + subBench(Fp[curve]) + negBench(Fp[curve]) + mulBench(Fp[curve]) + sqrBench(Fp[curve]) + invBench(Fp[curve]) + echo "-".repeat(80) + +main() + +echo "Notes:" +echo " GCC is significantly slower than Clang on multiprecision arithmetic." diff --git a/benchmarks/big_to_fq.nim b/benchmarks/big_to_fq.nim deleted file mode 100644 index 419f770..0000000 --- a/benchmarks/big_to_fq.nim +++ /dev/null @@ -1,48 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -# ############################################################ -# -# Benchmark of the conversion from Big Int to Fq -# -# ############################################################ - -# 2 implementations are possible -# - 1 based on Montgomery Multiplication -# - 1 based on modular left shift which involves multiple divisions - -import - ../constantine/config/[common, curves], - ../constantine/arithmetic/[bigints_checked, finite_fields], - random, std/monotimes, times, strformat - -const Iters = 1_000_000 - -randomize(1234) - -proc main() = - var x: BigInt[381] - x.setInternalBitLength() - for i in 0 ..< x.limbs.len - 1: - # Set x to a random value guaranteed below the prime - x.limbs[i] = Word(rand(BaseType.high.int)) - - - let start = getMonotime() - for _ in 0 ..< Iters: - let y = Fq[BLS12_381].fromBig(x) - let stop = getMonotime() - - echo &"Time for {Iters} iterations: {inMilliseconds(stop-start)} ms" - - -main() -# 1_000_000 iterations with -d:danger on i9-9980XE all-core turbo 4.1GHz -# Montgomery Multiplication based: 254ms -# shlAddMod based (using assembly div2n1n!!): 907 ms -# Note: shlAddMod will be even slower when division is made constant-time diff --git a/benchmarks/bls12_381_fp.nim b/benchmarks/bls12_381_fp.nim deleted file mode 100644 index 553f53f..0000000 --- a/benchmarks/bls12_381_fp.nim +++ /dev/null @@ -1,155 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -# ############################################################ -# -# Benchmark of modular exponentiation -# -# ############################################################ - -# 2 implementations are available -# - 1 is constant time -# - 1 exposes the exponent bits to: -# timing attack, -# memory access analysis, -# power analysis (i.e. oscilloscopes on embedded) -# It is suitable for public exponents for example -# to compute modular inversion via the Fermat method - -import - ../constantine/config/[common, curves], - ../constantine/arithmetic/[bigints, finite_fields], - ../constantine/io/[io_bigints, io_fields], - random, std/monotimes, times, strformat, - ./timers - -const Iters = 1_000_000 -const InvIters = 1000 - -randomize(1234) - -# warmup -proc warmup*() = - # Warmup - make sure cpu is on max perf - let start = cpuTime() - var foo = 123 - for i in 0 ..< 300_000_000: - foo += i*i mod 456 - foo = foo mod 789 - - # Compiler shouldn't optimize away the results as cpuTime rely on sideeffects - let stop = cpuTime() - echo &"\n\nWarmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n" - -warmup() - -echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them." -echo "==========================================================================================================\n" - -proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) = - echo &"{op:<15} {field:<15} {inNanoseconds((stop-start) div iters):>9} ns {(stopClk - startClk) div iters:>9} cycles" - -proc addBench() = - var x, y: Fp[BLS12_381] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # BLS12-381 prime - 2 - y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - x += y - let stopClk = getTicks() - let stop = getMonotime() - report("Addition", "Fp[BLS12_381]", start, stop, startClk, stopClk, Iters) - - -addBench() - -proc subBench() = - var x, y: Fp[BLS12_381] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # BLS12-381 prime - 2 - y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - x -= y - let stopClk = getTicks() - let stop = getMonotime() - report("Substraction", "Fp[BLS12_381]", start, stop, startClk, stopClk, Iters) - -subBench() - -proc negBench() = - var r, x: Fp[BLS12_381] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - r.neg(x) - let stopClk = getTicks() - let stop = getMonotime() - report("Negation", "Fp[BLS12_381]", start, stop, startClk, stopClk, Iters) - -negBench() - -proc mulBench() = - var r, x, y: Fp[BLS12_381] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # BLS12-381 prime - 2 - y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - r.prod(x, y) - let stopClk = getTicks() - let stop = getMonotime() - report("Multiplication", "Fp[BLS12_381]", start, stop, startClk, stopClk, Iters) - -mulBench() - -proc sqrBench() = - var r, x: Fp[BLS12_381] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - r.square(x) - let stopClk = getTicks() - let stop = getMonotime() - report("Squaring", "Fp[BLS12_381]", start, stop, startClk, stopClk, Iters) - -sqrBench() - -proc invBench() = - # TODO: having x on the stack triggers stack smashing detection. To be investigated - var x: ref Fp[BLS12_381] - new x - # BN254 field modulus - x[].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< InvIters: - # Note: we don't copy the original x so x is alterning between x and x^-1 - inv(x[]) - let stopClk = getTicks() - let stop = getMonotime() - report("Inversion", "Fp[BLS12_381]", start, stop, startClk, stopClk, InvIters) - -invBench() diff --git a/benchmarks/bn254_fp.nim b/benchmarks/bn254_fp.nim deleted file mode 100644 index 1321482..0000000 --- a/benchmarks/bn254_fp.nim +++ /dev/null @@ -1,155 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -# ############################################################ -# -# Benchmark of modular exponentiation -# -# ############################################################ - -# 2 implementations are available -# - 1 is constant time -# - 1 exposes the exponent bits to: -# timing attack, -# memory access analysis, -# power analysis (i.e. oscilloscopes on embedded) -# It is suitable for public exponents for example -# to compute modular inversion via the Fermat method - -import - ../constantine/config/[common, curves], - ../constantine/arithmetic/[bigints, finite_fields], - ../constantine/io/[io_bigints, io_fields], - random, std/monotimes, times, strformat, - ./timers - -const Iters = 1_000_000 -const InvIters = 1000 - -randomize(1234) - -# warmup -proc warmup*() = - # Warmup - make sure cpu is on max perf - let start = cpuTime() - var foo = 123 - for i in 0 ..< 300_000_000: - foo += i*i mod 456 - foo = foo mod 789 - - # Compiler shouldn't optimize away the results as cpuTime rely on sideeffects - let stop = cpuTime() - echo &"\n\nWarmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n" - -warmup() - -echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them." -echo "==========================================================================================================\n" - -proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) = - echo &"{op:<15} {field:<15} {inNanoseconds((stop-start) div iters):>9} ns {(stopClk - startClk) div iters:>9} cycles" - -proc addBench() = - var x, y: Fp[BN254] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # Truncated BLS12-381 prime - y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - x += y - let stopClk = getTicks() - let stop = getMonotime() - report("Addition", "Fp[BN254]", start, stop, startClk, stopClk, Iters) - - -addBench() - -proc subBench() = - var x, y: Fp[BN254] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # Truncated BLS12-381 prime - y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - x -= y - let stopClk = getTicks() - let stop = getMonotime() - report("Substraction", "Fp[BN254]", start, stop, startClk, stopClk, Iters) - -subBench() - -proc negBench() = - var r, x: Fp[BN254] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - r.neg(x) - let stopClk = getTicks() - let stop = getMonotime() - report("Negation", "Fp[BN254]", start, stop, startClk, stopClk, Iters) - -negBench() - -proc mulBench() = - var r, x, y: Fp[BN254] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # Truncated BLS12-381 prime - y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - r.prod(x, y) - let stopClk = getTicks() - let stop = getMonotime() - report("Multiplication", "Fp[BN254]", start, stop, startClk, stopClk, Iters) - -mulBench() - -proc sqrBench() = - var r, x: Fp[BN254] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - r.square(x) - let stopClk = getTicks() - let stop = getMonotime() - report("Squaring", "Fp[BN254]", start, stop, startClk, stopClk, Iters) - -sqrBench() - -proc invBench() = - # TODO: having x on the stack triggers stack smashing detection. To be investigated - var x: ref Fp[BN254] - new x - # BN254 field modulus - x[].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< InvIters: - # Note: we don't copy the original x so x is alterning between x and x^-1 - inv(x[]) - let stopClk = getTicks() - let stop = getMonotime() - report("Inversion", "Fp[BN254]", start, stop, startClk, stopClk, InvIters) - -invBench() diff --git a/benchmarks/powmod.nim b/benchmarks/powmod.nim deleted file mode 100644 index 1aeee5b..0000000 --- a/benchmarks/powmod.nim +++ /dev/null @@ -1,69 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -# ############################################################ -# -# Benchmark of modular exponentiation -# -# ############################################################ - -# 2 implementations are available -# - 1 is constant time -# - 1 exposes the exponent bits to: -# timing attack, -# memory access analysis, -# power analysis (i.e. oscilloscopes on embedded) -# It is suitable for public exponents for example -# to compute modular inversion via the Fermat method - -import - ../constantine/config/[common, curves], - ../constantine/arithmetic/[bigints_checked, finite_fields], - ../constantine/io/[io_bigints, io_fields], - random, std/monotimes, times, strformat - -const Iters = 10_000 - -randomize(1234) - -proc mainCT() = - var x: Fp[BLS12_381] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # BLS12-381 prime - 2 - let exponent = BigInt[381].fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9") - - let start = getMonotime() - for _ in 0 ..< Iters: - var y = x - x.pow(exponent) - let stop = getMonotime() - - echo &"Time for {Iters} iterations (constant-time 381-bit): {inMilliseconds(stop-start)} ms" - -proc mainUnsafe() = - var x: Fp[BLS12_381] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # BLS12-381 prime - 2 - let exponent = BigInt[381].fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9") - - let start = getMonotime() - for _ in 0 ..< Iters: - var y = x - x.powUnsafeExponent(exponent) - let stop = getMonotime() - - echo &"Time for {Iters} iterations (unsafe 381-bit): {inMilliseconds(stop-start)} ms" - -mainCT() -mainUnsafe() -# 10_000 iterations with -d:danger on i9-9980XE all-core turbo 4.1GHz - -# Time for 10000 iterations (constant-time 381-bit): 1349 ms -# Time for 10000 iterations (unsafe 381-bit): 1226 ms diff --git a/benchmarks/secp256k1_fp.nim b/benchmarks/secp256k1_fp.nim deleted file mode 100644 index 5191c16..0000000 --- a/benchmarks/secp256k1_fp.nim +++ /dev/null @@ -1,155 +0,0 @@ -# Constantine -# Copyright (c) 2018-2019 Status Research & Development GmbH -# Copyright (c) 2020-Present Mamy André-Ratsimbazafy -# Licensed and distributed under either of -# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). -# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). -# at your option. This file may not be copied, modified, or distributed except according to those terms. - -# ############################################################ -# -# Benchmark of modular exponentiation -# -# ############################################################ - -# 2 implementations are available -# - 1 is constant time -# - 1 exposes the exponent bits to: -# timing attack, -# memory access analysis, -# power analysis (i.e. oscilloscopes on embedded) -# It is suitable for public exponents for example -# to compute modular inversion via the Fermat method - -import - ../constantine/config/[common, curves], - ../constantine/arithmetic/[bigints, finite_fields], - ../constantine/io/[io_bigints, io_fields], - random, std/monotimes, times, strformat, - ./timers - -const Iters = 1_000_000 -const InvIters = 1000 - -randomize(1234) - -# warmup -proc warmup*() = - # Warmup - make sure cpu is on max perf - let start = cpuTime() - var foo = 123 - for i in 0 ..< 300_000_000: - foo += i*i mod 456 - foo = foo mod 789 - - # Compiler shouldn't optimize away the results as cpuTime rely on sideeffects - let stop = cpuTime() - echo &"\n\nWarmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n" - -warmup() - -echo "\n⚠️ Measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them." -echo "==========================================================================================================\n" - -proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) = - echo &"{op:<15} {field:<15} {inNanoseconds((stop-start) div iters):>9} ns {(stopClk - startClk) div iters:>9} cycles" - -proc addBench() = - var x, y: Fp[Secp256k1] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # Truncated BLS12-381 prime - y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - x += y - let stopClk = getTicks() - let stop = getMonotime() - report("Addition", "Fp[Secp256k1]", start, stop, startClk, stopClk, Iters) - - -addBench() - -proc subBench() = - var x, y: Fp[Secp256k1] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # Truncated BLS12-381 prime - y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - x -= y - let stopClk = getTicks() - let stop = getMonotime() - report("Substraction", "Fp[Secp256k1]", start, stop, startClk, stopClk, Iters) - -subBench() - -proc negBench() = - var r, x: Fp[Secp256k1] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - r.neg(x) - let stopClk = getTicks() - let stop = getMonotime() - report("Negation", "Fp[Secp256k1]", start, stop, startClk, stopClk, Iters) - -negBench() - -proc mulBench() = - var r, x, y: Fp[Secp256k1] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - # Truncated BLS12-381 prime - y.fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - r.prod(x, y) - let stopClk = getTicks() - let stop = getMonotime() - report("Multiplication", "Fp[Secp256k1]", start, stop, startClk, stopClk, Iters) - -mulBench() - -proc sqrBench() = - var r, x: Fp[Secp256k1] - # BN254 field modulus - x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< Iters: - r.square(x) - let stopClk = getTicks() - let stop = getMonotime() - report("Squaring", "Fp[Secp256k1]", start, stop, startClk, stopClk, Iters) - -sqrBench() - -proc invBench() = - # TODO: having x on the stack triggers stack smashing detection. To be investigated - var x: ref Fp[Secp256k1] - new x - # BN254 field modulus - x[].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") - - let start = getMonotime() - let startClk = getTicks() - for _ in 0 ..< InvIters: - # Note: we don't copy the original x so x is alterning between x and x^-1 - inv(x[]) - let stopClk = getTicks() - let stop = getMonotime() - report("Inversion", "Fp[Secp256k1]", start, stop, startClk, stopClk, InvIters) - -invBench() diff --git a/benchmarks/timers.nim b/benchmarks/timers.nim deleted file mode 100644 index 45d415f..0000000 --- a/benchmarks/timers.nim +++ /dev/null @@ -1,49 +0,0 @@ -when defined(i386) or defined(amd64): - # From Linux - # - # The RDTSC instruction is not ordered relative to memory - # access. The Intel SDM and the AMD APM are both vague on this - # point, but empirically an RDTSC instruction can be - # speculatively executed before prior loads. An RDTSC - # immediately after an appropriate barrier appears to be - # ordered as a normal load, that is, it provides the same - # ordering guarantees as reading from a global memory location - # that some other imaginary CPU is updating continuously with a - # time stamp. - # - # From Intel SDM - # https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf - when not defined(vcc): - when defined(amd64): - proc getTicks*(): int64 {.inline.} = - var lo, hi: int64 - # TODO: Provide a compile-time flag for RDTSCP support - # and use it instead of lfence + RDTSC - {.emit: """asm volatile( - "lfence\n" - "rdtsc\n" - : "=a"(`lo`), "=d"(`hi`) - : - : "memory" - );""".} - return (hi shl 32) or lo - else: - proc getTicks*(): int64 {.inline.} = - # TODO: Provide a compile-time flag for RDTSCP support - # and use it instead of lfence + RDTSC - {.emit: """asm volatile( - "lfence\n" - "rdtsc\n" - : "=a"(`result`) - : - : "memory" - );""".} - else: - proc rdtsc(): int64 {.sideeffect, importc: "__rdtsc", header: "".} - proc lfence() {.importc: "__mm_lfence", header: "".} - - proc getTicks*(): int64 {.inline.} = - lfence() - return rdtsc() -else: - {.error: "getticks is not supported on this CPU architecture".} diff --git a/constantine.nimble b/constantine.nimble index 15d2e2a..83267e7 100644 --- a/constantine.nimble +++ b/constantine.nimble @@ -110,3 +110,18 @@ task test_no_gmp, "Run tests that don't require GMP": # 𝔽p2 test "", "tests/test_fp2.nim" + +task bench, "Run benchmark with your default compiler": + if not dirExists "build": + mkDir "build" + exec "nim c -d:danger --verbosity:0 -o:build/bench_finite_fields_default -r --hints:off --warnings:off benchmarks/bench_finite_fields" + +task bench_gcc, "Run benchmark with gcc": + if not dirExists "build": + mkDir "build" + exec "nim c --cc:gcc -d:danger --verbosity:0 -o:build/bench_finite_fields_gcc -r --hints:off --warnings:off benchmarks/bench_finite_fields" + +task bench_clang, "Run benchmark with clang": + if not dirExists "build": + mkDir "build" + exec "nim c --cc:clang -d:danger --verbosity:0 -o:build/bench_finite_fields_clang -r --hints:off --warnings:off benchmarks/bench_finite_fields" diff --git a/constantine/arithmetic.nim b/constantine/arithmetic.nim new file mode 100644 index 0000000..7d2c6ca --- /dev/null +++ b/constantine/arithmetic.nim @@ -0,0 +1,17 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + arithmetic/[limbs, limbs_modular, limbs_montgomery], + arithmetic/bigints, + arithmetic/[finite_fields, finite_fields_inversion] + +export + limbs, limbs_modular, limbs_montgomery, + bigints, + finite_fields, finite_fields_inversion diff --git a/constantine/arithmetic/bigints.nim b/constantine/arithmetic/bigints.nim index 9399ed9..a3f85e0 100644 --- a/constantine/arithmetic/bigints.nim +++ b/constantine/arithmetic/bigints.nim @@ -9,8 +9,7 @@ import ../config/common, ../primitives, - ./limbs, - ./montgomery + ./limbs, ./limbs_montgomery, ./limbs_modular # ############################################################ # @@ -87,25 +86,16 @@ debug: func `$`*(a: BigInt): string = result = "BigInt[" result.add $BigInt.bits - result.add "](limbs: [" - result.add $BaseType(a.limbs[0]) & " (0x" & toHex(BaseType(a.limbs[0])) & ')' - for i in 1 ..< a.limbs.len: - result.add ", " - result.add $BaseType(a.limbs[i]) & " (0x" & toHex(BaseType(a.limbs[i])) & ')' - result.add "])" + result.add "](limbs: " + result.add a.limbs.toString() + result.add ")" # No exceptions allowed {.push raises: [].} {.push inline.} -func `==`*(a, b: BigInt): CTBool[Word] = - ## Returns true if 2 big ints are equal - ## Comparison is constant-time - a.limbs == b.limbs - -func isZero*(a: BigInt): CTBool[Word] = - ## Returns true if a big int is equal to zero - a.limbs.isZero +# Initialization +# ------------------------------------------------------------ func setZero*(a: var BigInt) = ## Set a BigInt to 0 @@ -115,6 +105,51 @@ func setOne*(a: var BigInt) = ## Set a BigInt to 1 a.limbs.setOne() +# Copy +# ------------------------------------------------------------ + +func ccopy*(a: var BigInt, b: BigInt, ctl: CTBool[Word]) = + ## Constant-time conditional copy + ## If ctl is true: b is copied into a + ## if ctl is false: b is not copied and a is untouched + ## Time and memory accesses are the same whether a copy occurs or not + ccopy(a.limbs, b.limbs, ctl) + +func cswap*(a, b: var BigInt, ctl: CTBool) = + ## Swap ``a`` and ``b`` if ``ctl`` is true + ## + ## Constant-time: + ## Whether ``ctl`` is true or not, the same + ## memory accesses are done (unless the compiler tries to be clever) + cswap(a.limbs, b.limbs, ctl) + +# Comparison +# ------------------------------------------------------------ + +func `==`*(a, b: BigInt): CTBool[Word] = + ## Returns true if 2 big ints are equal + ## Comparison is constant-time + a.limbs == b.limbs + +func `<`*(a, b: BigInt): CTBool[Word] = + ## Returns true if a < b + a.limbs < b.limbs + +func `<=`*(a, b: BigInt): CTBool[Word] = + ## Returns true if a <= b + a.limbs <= b.limbs + +func isZero*(a: BigInt): CTBool[Word] = + ## Returns true if a big int is equal to zero + a.limbs.isZero + +func isOdd*(a: BigInt): CTBool[Word] = + ## Returns true if a is odd + a.limbs.isOdd + +# Arithmetic +# ------------------------------------------------------------ + func cadd*(a: var BigInt, b: BigInt, ctl: CTBool[Word]): CTBool[Word] = ## Constant-time in-place conditional addition ## The addition is only performed if ctl is "true" @@ -133,19 +168,16 @@ func cdouble*(a: var BigInt, ctl: CTBool[Word]): CTBool[Word] = ## The result carry is always computed. (CTBool[Word]) cadd(a.limbs, a.limbs, ctl) -# ############################################################ -# -# BigInt Primitives Optimized for speed -# -# ############################################################ -# -# TODO: fallback to cadd / csub with a "size" compile-option - func add*(a: var BigInt, b: BigInt): CTBool[Word] = ## Constant-time in-place addition ## Returns the carry (CTBool[Word]) add(a.limbs, b.limbs) +func add*(a: var BigInt, b: Word): CTBool[Word] = + ## Constant-time in-place addition + ## Returns the carry + (CTBool[Word]) add(a.limbs, b) + func sub*(a: var BigInt, b: BigInt): CTBool[Word] = ## Constant-time in-place substraction ## Returns the borrow @@ -177,19 +209,23 @@ func double*(r: var BigInt, a: BigInt): CTBool[Word] = ## Returns the carry (CTBool[Word]) sum(r.limbs, a.limbs, a.limbs) -# ############################################################ -# -# Comparisons -# -# ############################################################ +func div2*(a: var BigInt) = + ## In-place divide ``a`` by 2 + a.limbs.shiftRight(1) -func `<`*(a, b: BigInt): CTBool[Word] = - ## Returns true if a < b - a.limbs < b.limbs +func cneg*(a: var BigInt, ctl: CTBool) = + ## Conditional negation. + ## Negate if ``ctl`` is true + a.limbs.cneg(ctl) -func `<=`*(a, b: BigInt): CTBool[Word] = - ## Returns true if a <= b - a.limbs <= b.limbs +# Bit Manipulation +# ------------------------------------------------------------ + +func shiftRight*(a: var BigInt, k: int) = + ## Shift right by k. + ## + ## k MUST be less than the base word size (2^31 or 2^63) + a.limbs.shiftRight(k) # ############################################################ # @@ -209,6 +245,22 @@ func reduce*[aBits, mBits](r: var BigInt[mBits], a: BigInt[aBits], M: BigInt[mBi # pass a pointer+length to a fixed session of the BSS. reduce(r.limbs, a.limbs, aBits, M.limbs, mBits) +func steinsGCD*[bits](r: var BigInt[bits], a, F, M, mp1div2: BigInt[bits]) = + ## Compute F multiplied the modular inverse of ``a`` modulo M + ## r ≡ F . a^-1 (mod M) + ## + ## M MUST be odd, M does not need to be prime. + ## ``a`` MUST be less than M. + r.limbs.steinsGCD(a.limbs, F.limbs, M.limbs, bits, mp1div2.limbs) + +func invmod*[bits](r: var BigInt[bits], a, M, mp1div2: BigInt[bits]) = + ## Compute the modular inverse of ``a`` modulo M + ## + ## The modulus ``M`` MUST be odd + var one {.noInit.}: BigInt[bits] + one.setOne() + r.steinsGCD(a, one, M, mp1div2) + # ############################################################ # # Montgomery Arithmetic @@ -335,3 +387,6 @@ func montyPowUnsafeExponent*[mBits: static int]( else: (1 shl windowSize) + 1 var scratchSpace {.noInit.}: array[scratchLen, Limbs[mBits.wordsRequired]] montyPowUnsafeExponent(a.limbs, exponent, M.limbs, one.limbs, negInvModWord, scratchSpace, canUseNoCarryMontyMul) + +{.pop.} # inline +{.pop.} # raises no exceptions diff --git a/constantine/arithmetic/finite_fields.nim b/constantine/arithmetic/finite_fields.nim index 93c9f93..8463f7d 100644 --- a/constantine/arithmetic/finite_fields.nim +++ b/constantine/arithmetic/finite_fields.nim @@ -27,7 +27,7 @@ import ../primitives, ../config/[common, curves], - ./bigints, ./montgomery + ./bigints, ./limbs_montgomery # type # `Fp`*[C: static Curve] = object @@ -64,7 +64,7 @@ func fromBig*[C: static Curve](dst: var Fp[C], src: BigInt) {.noInit.} = dst.mres.montyResidue(src, C.Mod.mres, C.getR2modP(), C.getNegInvModWord(), C.canUseNoCarryMontyMul()) func toBig*(src: Fp): auto {.noInit.} = - ## Convert a finite-field element to a BigInt in natral representation + ## Convert a finite-field element to a BigInt in natural representation var r {.noInit.}: typeof(src.mres) r.redc(src.mres, Fp.C.Mod.mres, Fp.C.getNegInvModWord(), Fp.C.canUseNoCarryMontyMul()) return r @@ -193,17 +193,6 @@ func powUnsafeExponent*(a: var Fp, exponent: BigInt) = Fp.C.canUseNoCarryMontyMul() ) -func inv*(a: var Fp) = - ## Inversion modulo p - ## Warning ⚠️ : - ## - This assumes that `Fp` is a prime field - const windowSize = 5 # TODO: find best window size for each curves - a.mres.montyPowUnsafeExponent( - Fp.C.getInvModExponent(), - Fp.C.Mod.mres, Fp.C.getMontyOne(), - Fp.C.getNegInvModWord(), windowSize, - Fp.C.canUseNoCarryMontyMul() - ) # ############################################################ # @@ -235,12 +224,8 @@ func `*`*(a, b: Fp): Fp {.noInit.} = func `*=`*(a: var Fp, b: Fp) = ## Multiplication modulo p - ## - ## Implementation note: - ## - This requires a temporary field element - ## - ## Cost - ## Stack: 1 * ModulusBitSize - var tmp{.noInit.}: Fp - tmp.prod(a, b) - a = tmp + a.prod(a, b) + +func square*(a: var Fp) = + ## Squaring modulo p + a.mres.montySquare(a.mres, Fp.C.Mod.mres, Fp.C.getNegInvModWord(), Fp.C.canUseNoCarryMontySquare()) diff --git a/constantine/arithmetic/finite_fields_inversion.nim b/constantine/arithmetic/finite_fields_inversion.nim new file mode 100644 index 0000000..d6f2004 --- /dev/null +++ b/constantine/arithmetic/finite_fields_inversion.nim @@ -0,0 +1,133 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/[common, curves], + ./finite_fields + +# ############################################################ +# +# Specialized inversions +# +# ############################################################ + +# Field-specific inversion routines +template repeat(num: int, body: untyped) = + for _ in 0 ..< num: + body + +# Secp256k1 +# ------------------------------------------------------------ +func invmod_addchain(r: var Fp[Secp256k1], a: Fp[Secp256k1]) = + ## We invert via Little Fermat's theorem + ## a^(-1) ≡ a^(p-2) (mod p) + ## with p = "0xFFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F" + ## We take advantage of the prime special form to hardcode + ## the sequence of squarings and multiplications for the modular exponentiation + ## + ## See libsecp256k1 + ## + ## The binary representation of (p - 2) has 5 blocks of 1s, with lengths in + ## { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block: + ## [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223] + + var + x2{.noInit.}: Fp[Secp256k1] + x3{.noInit.}: Fp[Secp256k1] + x6{.noInit.}: Fp[Secp256k1] + x9{.noInit.}: Fp[Secp256k1] + x11{.noInit.}: Fp[Secp256k1] + x22{.noInit.}: Fp[Secp256k1] + x44{.noInit.}: Fp[Secp256k1] + x88{.noInit.}: Fp[Secp256k1] + x176{.noInit.}: Fp[Secp256k1] + x220{.noInit.}: Fp[Secp256k1] + x223{.noInit.}: Fp[Secp256k1] + + x2.square(a) + x2 *= a + + x3.square(x2) + x3 *= a + + x6 = x3 + repeat 3: x6.square() + x6 *= x3 + + x9 = x6 + repeat 3: x9.square() + x9 *= x3 + + x11 = x9 + repeat 2: x11.square() + x11 *= x2 + + x22 = x11 + repeat 11: x22.square() + x22 *= x11 + + x44 = x22 + repeat 22: x44.square() + x44 *= x22 + + x88 = x44 + repeat 44: x88.square() + x88 *= x44 + + x176 = x88 + repeat 88: x88.square() + x176 *= x88 + + x220 = x176 + repeat 44: x220.square() + x220 *= x44 + + x223 = x220 + repeat 3: x223.square() + x223 *= x3 + + # The final result is then assembled using a sliding window over the blocks + r = x223 + repeat 23: r.square() + r *= x22 + repeat 5: r.square() + r *= a + repeat 3: r.square() + r *= x2 + repeat 2: r.square() + r *= a + +# BN Curves +# ------------------------------------------------------------ +# Efficient Pairings and ECC for Embedded Systems +# Thomas Unterluggauer and Erich Wenger +# https://eprint.iacr.org/2014/800.pdf +# +# BN curve field modulus are of the form: +# p = 36u^4 + 36u^3 + 24u^2 + 6u + 1 +# +# We construct the following multiplication-squaring chain +# a^-1 mod p = a^(p-2) mod p (Little Fermat Theorem) +# = a^(36 u^4 + 36 u^3 + 24 u^2 + 6u + 1 - 2) mod p +# = a^(36 u^4) . a^(36 u^3) . a^(24 u^2) . a^(6u-1) mod p +# +# Note: it only works for u positive, in particular BN254 doesn't work :/ +# Is there a way to only use a^-u or even powers? + +# ############################################################ +# +# Dispatch +# +# ############################################################ + +func inv*(r: var Fp, a: Fp) = + ## Inversion modulo p + # For now we don't activate the addition chain. + # Performance is equal to GCD and it does not pass test on 𝔽p2 + # We need faster squaring/multiplications + r.mres.steinsGCD(a.mres, Fp.C.getR2modP(), Fp.C.Mod.mres, Fp.C.getPrimePlus1div2()) diff --git a/constantine/arithmetic/limbs.nim b/constantine/arithmetic/limbs.nim index ae8c7c2..3b18839 100644 --- a/constantine/arithmetic/limbs.nim +++ b/constantine/arithmetic/limbs.nim @@ -43,6 +43,22 @@ type Limbs*[N: static int] = array[N, Word] ## ## but for unknown reason, it prevents semchecking `bits` +debug: + import strutils + + func toString*(a: Limbs): string = + result = "[" + result.add $BaseType(a[0]) & " (0x" & toHex(BaseType(a[0])) & ')' + for i in 1 ..< a.len: + result.add ", " + result.add $BaseType(a[i]) & " (0x" & toHex(BaseType(a[i])) & ')' + result.add "])" + + func toHex*(a: Limbs): string = + result = "0x" + for i in countdown(a.len-1, 0): + result.add toHex(BaseType(a[i])) + # No exceptions allowed {.push raises: [].} @@ -82,20 +98,8 @@ type Limbs*[N: static int] = array[N, Word] # as we avoid the parmeter packing/unpacking ceremony at function entry/exit # and unrolling overhead is minimal. -func `==`*(a, b: Limbs): CTBool[Word] = - ## Returns true if 2 limbs are equal - ## Comparison is constant-time - var accum = Zero - for i in 0 ..< a.len: - accum = accum or (a[i] xor b[i]) - result = accum.isZero() - -func isZero*(a: Limbs): CTBool[Word] = - ## Returns true if ``a`` is equal to zero - var accum = Zero - for i in 0 ..< a.len: - accum = accum or a[i] - result = accum.isZero() +# Initialization +# ------------------------------------------------------------ func setZero*(a: var Limbs) = ## Set ``a`` to 0 @@ -107,6 +111,9 @@ func setOne*(a: var Limbs) = when a.len > 1: zeroMem(a[1].addr, (a.len - 1) * sizeof(Word)) +# Copy +# ------------------------------------------------------------ + func ccopy*(a: var Limbs, b: Limbs, ctl: CTBool[Word]) = ## Constant-time conditional copy ## If ctl is true: b is copied into a @@ -118,6 +125,65 @@ func ccopy*(a: var Limbs, b: Limbs, ctl: CTBool[Word]) = for i in 0 ..< a.len: ctl.ccopy(a[i], b[i]) +func cswap*(a, b: var Limbs, ctl: CTBool) = + ## Swap ``a`` and ``b`` if ``ctl`` is true + ## + ## Constant-time: + ## Whether ``ctl`` is true or not, the same + ## memory accesses are done (unless the compiler tries to be clever) + + var mask = -(Word ctl) + for i in 0 ..< a.len: + let t = mask and (a[i] xor b[i]) + a[i] = a[i] xor t + b[i] = b[i] xor t + +# Comparison +# ------------------------------------------------------------ + +func `==`*(a, b: Limbs): CTBool[Word] = + ## Returns true if 2 limbs are equal + ## Comparison is constant-time + var accum = Zero + for i in 0 ..< a.len: + accum = accum or (a[i] xor b[i]) + result = accum.isZero() + +func `<`*(a, b: Limbs): CTBool[Word] = + ## Returns true if a < b + ## Comparison is constant-time + var diff: Word + var borrow: Borrow + for i in 0 ..< a.len: + subB(borrow, diff, a[i], b[i], borrow) + + result = (CTBool[Word])(borrow) + +func `<=`*(a, b: Limbs): CTBool[Word] = + ## Returns true if a <= b + ## Comparison is constant-time + not(b < a) + +func isZero*(a: Limbs): CTBool[Word] = + ## Returns true if ``a`` is equal to zero + var accum = Zero + for i in 0 ..< a.len: + accum = accum or a[i] + result = accum.isZero() + +func isOne*(a: Limbs): CTBool[Word] = + ## Returns true if ``a`` is equal to one + result = a[0] == Word(1) + for i in 1 ..< a.len: + result = result and a[i].isZero() + +func isOdd*(a: Limbs): CTBool[Word] = + ## Returns true if a is odd + CTBool[Word](a[0] and Word(1)) + +# Arithmetic +# ------------------------------------------------------------ + func add*(a: var Limbs, b: Limbs): Carry = ## Limbs addition ## Returns the carry @@ -125,6 +191,14 @@ func add*(a: var Limbs, b: Limbs): Carry = for i in 0 ..< a.len: addC(result, a[i], a[i], b[i], result) +func add*(a: var Limbs, w: Word): Carry = + ## Limbs addition, add a number that fits in a word + ## Returns the carry + result = Carry(0) + addC(result, a[0], a[0], w, result) + for i in 1 ..< a.len: + addC(result, a[i], a[i], Zero, result) + func cadd*(a: var Limbs, b: Limbs, ctl: CTBool[Word]): Carry = ## Limbs conditional addition ## Returns the carry @@ -180,266 +254,46 @@ func diff*(r: var Limbs, a, b: Limbs): Borrow = for i in 0 ..< a.len: subB(result, r[i], a[i], b[i], result) -func `<`*(a, b: Limbs): CTBool[Word] = - ## Returns true if a < b - ## Comparison is constant-time - var diff: Word - var borrow: Borrow +func cneg*(a: var Limbs, ctl: CTBool) = + ## Conditional negation. + ## Negate if ``ctl`` is true + + # Algorithm: + # In two-complement representation + # -x <=> not(x) + 1 <=> x xor 0xFF... + 1 + # and + # x <=> x xor 0x00...<=> x xor 0x00... + 0 + # + # So we need to xor all words and then add 1 + # The "+1" might carry + # So we fuse the 2 steps + let mask = -Word(ctl) # Obtain a 0xFF... or 0x00... mask + var carry = Word(ctl) for i in 0 ..< a.len: - subB(borrow, diff, a[i], b[i], borrow) + let t = (a[i] xor mask) + carry # XOR with mask and add 0x01 or 0x00 respectively + carry = Word(t < carry) # Carry on + a[i] = t - result = (CTBool[Word])(borrow) +# Bit manipulation +# ------------------------------------------------------------ -func `<=`*(a, b: Limbs): CTBool[Word] = - ## Returns true if a <= b - ## Comparison is constant-time - not(b < a) +func shiftRight*(a: var Limbs, k: int) = + ## Shift right by k. + ## + ## k MUST be less than the base word size (2^32 or 2^64) + # We don't reuse shr as this is an in-place operation + # Do we need to return the shifted out part? + # + # Note: for speed, loading a[i] and a[i+1] + # instead of a[i-1] and a[i] + # is probably easier to parallelize for the compiler + # (antidependence WAR vs loop-carried dependence RAW) + + # checkWordShift(k) + + for i in 0 ..< a.len-1: + a[i] = (a[i] shr k) or (a[i+1] shl (WordBitWidth - k)) + a[a.len-1] = a[a.len-1] shr k {.pop.} # inline - -# ############################################################ -# -# Modular BigInt -# -# ############################################################ -# -# To avoid code-size explosion due to monomorphization -# and given that reductions are not in hot path in Constantine -# we use type-erased procedures, instead of instantiating -# one per number of limbs combination - -# Type-erasure -# ------------------------------------------------------------ - -type - LimbsView = ptr UncheckedArray[Word] - ## Type-erased fixed-precision limbs - ## - ## This type mirrors the Limb type and is used - ## for some low-level computation API - ## This design - ## - avoids code bloat due to generic monomorphization - ## otherwise limbs routines would have an instantiation for - ## each number of words. - ## - ## Accesses should be done via BigIntViewConst / BigIntViewConst - ## to have the compiler check for mutability - - # "Indirection" to enforce pointer types deep immutability - LimbsViewConst = distinct LimbsView - ## Immutable view into the limbs of a BigInt - LimbsViewMut = distinct LimbsView - ## Mutable view into a BigInt - LimbsViewAny = LimbsViewConst or LimbsViewMut - -# Deep Mutability safety -# ------------------------------------------------------------ - -template view(a: Limbs): LimbsViewConst = - ## Returns a borrowed type-erased immutable view to a bigint - LimbsViewConst(cast[LimbsView](a.unsafeAddr)) - -template view(a: var Limbs): LimbsViewMut = - ## Returns a borrowed type-erased mutable view to a mutable bigint - LimbsViewMut(cast[LimbsView](a.addr)) - -template `[]`*(v: LimbsViewConst, limbIdx: int): Word = - LimbsView(v)[limbIdx] - -template `[]`*(v: LimbsViewMut, limbIdx: int): var Word = - LimbsView(v)[limbIdx] - -template `[]=`*(v: LimbsViewMut, limbIdx: int, val: Word) = - LimbsView(v)[limbIdx] = val - -# Type-erased add-sub -# ------------------------------------------------------------ - -func cadd(a: LimbsViewMut, b: LimbsViewAny, ctl: CTBool[Word], len: int): Carry = - ## Type-erased conditional addition - ## Returns the carry - ## - ## if ctl is true: a <- a + b - ## if ctl is false: a <- a - ## The carry is always computed whether ctl is true or false - ## - ## Time and memory accesses are the same whether a copy occurs or not - result = Carry(0) - var sum: Word - for i in 0 ..< len: - addC(result, sum, a[i], b[i], result) - ctl.ccopy(a[i], sum) - -func csub(a: LimbsViewMut, b: LimbsViewAny, ctl: CTBool[Word], len: int): Borrow = - ## Type-erased conditional addition - ## Returns the borrow - ## - ## if ctl is true: a <- a - b - ## if ctl is false: a <- a - ## The borrow is always computed whether ctl is true or false - ## - ## Time and memory accesses are the same whether a copy occurs or not - result = Borrow(0) - var diff: Word - for i in 0 ..< len: - subB(result, diff, a[i], b[i], result) - ctl.ccopy(a[i], diff) - -# Modular reduction -# ------------------------------------------------------------ - -func numWordsFromBits(bits: int): int {.inline.} = - const divShiftor = log2(uint32(WordBitWidth)) - result = (bits + WordBitWidth - 1) shr divShiftor - -func shlAddMod_estimate(a: LimbsViewMut, aLen: int, - c: Word, M: LimbsViewConst, mBits: int - ): tuple[neg, tooBig: CTBool[Word]] = - ## Estimate a <- a shl 2^w + c (mod M) - ## - ## with w the base word width, usually 32 on 32-bit platforms and 64 on 64-bit platforms - ## - ## Updates ``a`` and returns ``neg`` and ``tooBig`` - ## If ``neg``, the estimate in ``a`` is negative and ``M`` must be added to it. - ## If ``tooBig``, the estimate in ``a`` overflowed and ``M`` must be substracted from it. - - # Aliases - # ---------------------------------------------------------------------- - let MLen = numWordsFromBits(mBits) - - # Captures aLen and MLen - template `[]`(v: untyped, limbIdxFromEnd: BackwardsIndex): Word {.dirty.}= - v[`v Len` - limbIdxFromEnd.int] - - # ---------------------------------------------------------------------- - # Assuming 64-bit words - let hi = a[^1] # Save the high word to detect carries - let R = mBits and (WordBitWidth - 1) # R = mBits mod 64 - - var a0, a1, m0: Word - if R == 0: # If the number of mBits is a multiple of 64 - a0 = a[^1] # - moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof) # we can just shift words - a[0] = c # and replace the first one by c - a1 = a[^1] - m0 = M[^1] - else: # Else: need to deal with partial word shifts at the edge. - a0 = (a[^1] shl (WordBitWidth-R)) or (a[^2] shr R) - moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof) - a[0] = c - a1 = (a[^1] shl (WordBitWidth-R)) or (a[^2] shr R) - m0 = (M[^1] shl (WordBitWidth-R)) or (M[^2] shr R) - - # m0 has its high bit set. (a0, a1)/p0 fits in a limb. - # Get a quotient q, at most we will be 2 iterations off - # from the true quotient - var q, r: Word - unsafeDiv2n1n(q, r, a0, a1, m0) # Estimate quotient - q = mux( # If n_hi == divisor - a0 == m0, MaxWord, # Quotient == MaxWord (0b1111...1111) - mux( - q.isZero, Zero, # elif q == 0, true quotient = 0 - q - One # else instead of being of by 0, 1 or 2 - ) # we returning q-1 to be off by -1, 0 or 1 - ) - - # Now substract a*2^64 - q*p - var carry = Zero - var over_p = CtTrue # Track if quotient greater than the modulus - - for i in 0 ..< MLen: - var qp_lo: Word - - block: # q*p - # q * p + carry (doubleword) carry from previous limb - muladd1(carry, qp_lo, q, M[i], Word carry) - - block: # a*2^64 - q*p - var borrow: Borrow - subB(borrow, a[i], a[i], qp_lo, Borrow(0)) - carry += Word(borrow) # Adjust if borrow - - over_p = mux( - a[i] == M[i], over_p, - a[i] > M[i] - ) - - # Fix quotient, the true quotient is either q-1, q or q+1 - # - # if carry < q or carry == q and over_p we must do "a -= p" - # if carry > hi (negative result) we must do "a += p" - - result.neg = Word(carry) > hi - result.tooBig = not(result.neg) and (over_p or (Word(carry) < hi)) - -func shlAddMod(a: LimbsViewMut, aLen: int, - c: Word, M: LimbsViewConst, mBits: int) = - ## Fused modular left-shift + add - ## Shift input `a` by a word and add `c` modulo `M` - ## - ## With a word W = 2^WordBitSize and a modulus M - ## Does a <- a * W + c (mod M) - ## - ## The modulus `M` most-significant bit at `mBits` MUST be set. - if mBits <= WordBitWidth: - # If M fits in a single limb - - # We normalize M with R so that the MSB is set - # And normalize (a * 2^64 + c) by R as well to maintain the result - # This ensures that (a0, a1)/p0 fits in a limb. - let R = mBits and (WordBitWidth - 1) - - # (hi, lo) = a * 2^64 + c - let hi = (a[0] shl (WordBitWidth-R)) or (c shr R) - let lo = c shl (WordBitWidth-R) - let m0 = M[0] shl (WordBitWidth-R) - - var q, r: Word - unsafeDiv2n1n(q, r, hi, lo, m0) # (hi, lo) mod M - - a[0] = r shr (WordBitWidth-R) - - else: - ## Multiple limbs - let (neg, tooBig) = shlAddMod_estimate(a, aLen, c, M, mBits) - discard a.cadd(M, ctl = neg, aLen) - discard a.csub(M, ctl = tooBig, aLen) - -func reduce(r: LimbsViewMut, - a: LimbsViewAny, aBits: int, - M: LimbsViewConst, mBits: int) = - ## Reduce `a` modulo `M` and store the result in `r` - let aLen = numWordsFromBits(aBits) - let mLen = numWordsFromBits(mBits) - let rLen = mLen - - if aBits < mBits: - # if a uses less bits than the modulus, - # it is guaranteed < modulus. - # This relies on the precondition that the modulus uses all declared bits - copyMem(r[0].addr, a[0].unsafeAddr, aLen * sizeof(Word)) - for i in aLen ..< mLen: - r[i] = Zero - else: - # a length i at least equal to the modulus. - # we can copy modulus.limbs-1 words - # and modular shift-left-add the rest - let aOffset = aLen - mLen - copyMem(r[0].addr, a[aOffset+1].unsafeAddr, (mLen-1) * sizeof(Word)) - r[rLen - 1] = Zero - # Now shift-left the copied words while adding the new word modulo M - for i in countdown(aOffset, 0): - shlAddMod(r, rLen, a[i], M, mBits) - -func reduce*[aLen, mLen](r: var Limbs[mLen], - a: Limbs[aLen], aBits: static int, - M: Limbs[mLen], mBits: static int - ) {.inline.} = - ## Reduce `a` modulo `M` and store the result in `r` - ## - ## Warning ⚠: At the moment this is NOT constant-time - ## as it relies on hardware division. - # This is implemented via type-erased indirection to avoid - # a significant amount of code duplication if instantiated for - # varying bitwidth. - reduce(r.view(), a.view(), aBits, M.view(), mBits) +{.pop.} # raises no exceptions diff --git a/constantine/arithmetic/limbs_modular.nim b/constantine/arithmetic/limbs_modular.nim new file mode 100644 index 0000000..12bc7c2 --- /dev/null +++ b/constantine/arithmetic/limbs_modular.nim @@ -0,0 +1,371 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../config/common, + ../primitives, + ./limbs + +# No exceptions allowed +{.push raises: [].} + +# ############################################################ +# +# Modular inversion +# +# ############################################################ + +# Generic (odd-only modulus) +# ------------------------------------------------------------ +# Algorithm by Niels Möller, +# a modified version of Stein's Algorithm (binary Extended Euclid GCD) +# +# Algorithm 5 in +# Fast Software Polynomial Multiplication on ARM Processors Using the NEON Engine +# Danilo Camara, Conrado P. L. Gouvea, Julio Lopez, and Ricardo Dahab +# https://link.springer.com/content/pdf/10.1007%2F978-3-642-40588-4_10.pdf +# +# Input: integer x, odd integer n, x < n +# Output: x−1 (mod n) +# 1: function ModInv(x, n) +# 2: (a, b, u, v) ← (x, n, 1, 1) +# 3: ℓ ← ⌊log2 n⌋ + 1 ⮚ number of bits in n +# 4: for i ← 0 to 2ℓ − 1 do +# 5: odd ← a & 1 +# 6: if odd and a ≥ b then +# 7: a ← a − b +# 8: else if odd and a < b then +# 9: (a, b, u, v) ← (b − a, a, v, u) +# 10: a ← a >> 1 +# 11: if odd then u ← u − v +# 12: if u < 0 then u ← u + n +# 13: if u & 1 then u ← u + n +# 14: u ← u >> 1 +# 15: return v +# +# Warning ⚠️: v should be 0 at initialization +# +# We modify it to return F . a^-1 +# So that we can pass an adjustment factor F +# And directly compute modular division or Montgomery inversion + +func steinsGCD*(v: var Limbs, a: Limbs, F, M: Limbs, bits: int, mp1div2: Limbs) = + ## Compute F multiplied the modular inverse of ``a`` modulo M + ## r ≡ F . a^-1 (mod M) + ## + ## M MUST be odd, M does not need to be prime. + ## ``a`` MUST be less than M. + ## + ## No information about ``a`` in particular its actual length in bits is leaked. + ## + ## This takes (M+1)/2 (mp1div2) as a precomputed parameter as a slight optimization + ## in stack size and speed. + + # Ideally we need registers for a, b, u, v + # but: + # - Even with ~256-bit primes, that's 4 limbs = 4*4 => 16 registers + # - x86_64 only has 16 general purposes registers + # - Registers are needed for the loop counter and comparison results + # - CMOV is reg <- RM so can move registers/memory into registers + # but cannot move into memory. + # so we choose to keep "v" from the algorithm in memory as `r` + + # TODO: the inlining of primitives like `csub` is bad for codesize + # but there is a 80% slowdown without it. + # TODO: The `cmov` in `cadd` and `csub` always retest the condition + # which is probably costly here given how many we have. + + var a = a + var b = M + var u = F + v.setZero() + + for i in 0 ..< 2 * bits: + debug: doAssert bool(b.isOdd) + let isOddA = a.isOdd() + + # if isOddA: a -= b + let aLessThanB = isOddA and (CTBool[Word]) a.csub(b, isOddA) + # if a < b and the sub was processed + # in that case, b <- a = a - b + b + discard b.cadd(a, aLessThanB) + # and a <- -new_a = (b-a) + a.cneg(aLessThanB) + debug: doAssert not bool(a.isOdd) + a.shiftRight(1) + + # Swap u and v is a < b + u.cswap(v, aLessThanB) + # if isOddA: u -= v (mod M) + let neg = isOddA and (CTBool[Word]) u.csub(v, isOddA) + let corrected = u.cadd(M, neg) + + let isOddU = u.isOdd() + # if u.isOdd: + # u += n + # u = u shr 1 + # + # Warning ⚠️: u += n will overflow the BigInt + # and we might lose a bit on the next shift + # Instead we shift first and then add hald the modulus rounded up + u.shiftRight(1) + let carry = u.cadd(mp1div2, isOddU) + debug: doAssert not carry.bool + + debug: + doAssert bool a.isZero() + doAssert bool b.isOne() # GCD always exist if a and M are relatively prime + +# ############################################################ +# +# Modular Reduction +# +# ############################################################ +# +# To avoid code-size explosion due to monomorphization +# and given that reductions are not in hot path in Constantine +# we use type-erased procedures, instead of instantiating +# one per number of limbs combination + +# Type-erasure +# ------------------------------------------------------------ + +type + LimbsView = ptr UncheckedArray[Word] + ## Type-erased fixed-precision limbs + ## + ## This type mirrors the Limb type and is used + ## for some low-level computation API + ## This design + ## - avoids code bloat due to generic monomorphization + ## otherwise limbs routines would have an instantiation for + ## each number of words. + ## + ## Accesses should be done via BigIntViewConst / BigIntViewConst + ## to have the compiler check for mutability + + # "Indirection" to enforce pointer types deep immutability + LimbsViewConst = distinct LimbsView + ## Immutable view into the limbs of a BigInt + LimbsViewMut = distinct LimbsView + ## Mutable view into a BigInt + LimbsViewAny = LimbsViewConst or LimbsViewMut + +# Deep Mutability safety +# ------------------------------------------------------------ + +template view(a: Limbs): LimbsViewConst = + ## Returns a borrowed type-erased immutable view to a bigint + LimbsViewConst(cast[LimbsView](a.unsafeAddr)) + +template view(a: var Limbs): LimbsViewMut = + ## Returns a borrowed type-erased mutable view to a mutable bigint + LimbsViewMut(cast[LimbsView](a.addr)) + +template `[]`*(v: LimbsViewConst, limbIdx: int): Word = + LimbsView(v)[limbIdx] + +template `[]`*(v: LimbsViewMut, limbIdx: int): var Word = + LimbsView(v)[limbIdx] + +template `[]=`*(v: LimbsViewMut, limbIdx: int, val: Word) = + LimbsView(v)[limbIdx] = val + +# Type-erased add-sub +# ------------------------------------------------------------ + +func cadd(a: LimbsViewMut, b: LimbsViewAny, ctl: CTBool[Word], len: int): Carry = + ## Type-erased conditional addition + ## Returns the carry + ## + ## if ctl is true: a <- a + b + ## if ctl is false: a <- a + ## The carry is always computed whether ctl is true or false + ## + ## Time and memory accesses are the same whether a copy occurs or not + result = Carry(0) + var sum: Word + for i in 0 ..< len: + addC(result, sum, a[i], b[i], result) + ctl.ccopy(a[i], sum) + +func csub(a: LimbsViewMut, b: LimbsViewAny, ctl: CTBool[Word], len: int): Borrow = + ## Type-erased conditional addition + ## Returns the borrow + ## + ## if ctl is true: a <- a - b + ## if ctl is false: a <- a + ## The borrow is always computed whether ctl is true or false + ## + ## Time and memory accesses are the same whether a copy occurs or not + result = Borrow(0) + var diff: Word + for i in 0 ..< len: + subB(result, diff, a[i], b[i], result) + ctl.ccopy(a[i], diff) + +# Modular reduction +# ------------------------------------------------------------ + +func numWordsFromBits(bits: int): int {.inline.} = + const divShiftor = log2(uint32(WordBitWidth)) + result = (bits + WordBitWidth - 1) shr divShiftor + +func shlAddMod_estimate(a: LimbsViewMut, aLen: int, + c: Word, M: LimbsViewConst, mBits: int + ): tuple[neg, tooBig: CTBool[Word]] = + ## Estimate a <- a shl 2^w + c (mod M) + ## + ## with w the base word width, usually 32 on 32-bit platforms and 64 on 64-bit platforms + ## + ## Updates ``a`` and returns ``neg`` and ``tooBig`` + ## If ``neg``, the estimate in ``a`` is negative and ``M`` must be added to it. + ## If ``tooBig``, the estimate in ``a`` overflowed and ``M`` must be substracted from it. + + # Aliases + # ---------------------------------------------------------------------- + let MLen = numWordsFromBits(mBits) + + # Captures aLen and MLen + template `[]`(v: untyped, limbIdxFromEnd: BackwardsIndex): Word {.dirty.}= + v[`v Len` - limbIdxFromEnd.int] + + # ---------------------------------------------------------------------- + # Assuming 64-bit words + let hi = a[^1] # Save the high word to detect carries + let R = mBits and (WordBitWidth - 1) # R = mBits mod 64 + + var a0, a1, m0: Word + if R == 0: # If the number of mBits is a multiple of 64 + a0 = a[^1] # + moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof) # we can just shift words + a[0] = c # and replace the first one by c + a1 = a[^1] + m0 = M[^1] + else: # Else: need to deal with partial word shifts at the edge. + a0 = (a[^1] shl (WordBitWidth-R)) or (a[^2] shr R) + moveMem(a[1].addr, a[0].addr, (aLen-1) * Word.sizeof) + a[0] = c + a1 = (a[^1] shl (WordBitWidth-R)) or (a[^2] shr R) + m0 = (M[^1] shl (WordBitWidth-R)) or (M[^2] shr R) + + # m0 has its high bit set. (a0, a1)/p0 fits in a limb. + # Get a quotient q, at most we will be 2 iterations off + # from the true quotient + var q, r: Word + unsafeDiv2n1n(q, r, a0, a1, m0) # Estimate quotient + q = mux( # If n_hi == divisor + a0 == m0, MaxWord, # Quotient == MaxWord (0b1111...1111) + mux( + q.isZero, Zero, # elif q == 0, true quotient = 0 + q - One # else instead of being of by 0, 1 or 2 + ) # we returning q-1 to be off by -1, 0 or 1 + ) + + # Now substract a*2^64 - q*p + var carry = Zero + var over_p = CtTrue # Track if quotient greater than the modulus + + for i in 0 ..< MLen: + var qp_lo: Word + + block: # q*p + # q * p + carry (doubleword) carry from previous limb + muladd1(carry, qp_lo, q, M[i], Word carry) + + block: # a*2^64 - q*p + var borrow: Borrow + subB(borrow, a[i], a[i], qp_lo, Borrow(0)) + carry += Word(borrow) # Adjust if borrow + + over_p = mux( + a[i] == M[i], over_p, + a[i] > M[i] + ) + + # Fix quotient, the true quotient is either q-1, q or q+1 + # + # if carry < q or carry == q and over_p we must do "a -= p" + # if carry > hi (negative result) we must do "a += p" + + result.neg = Word(carry) > hi + result.tooBig = not(result.neg) and (over_p or (Word(carry) < hi)) + +func shlAddMod(a: LimbsViewMut, aLen: int, + c: Word, M: LimbsViewConst, mBits: int) = + ## Fused modular left-shift + add + ## Shift input `a` by a word and add `c` modulo `M` + ## + ## With a word W = 2^WordBitSize and a modulus M + ## Does a <- a * W + c (mod M) + ## + ## The modulus `M` most-significant bit at `mBits` MUST be set. + if mBits <= WordBitWidth: + # If M fits in a single limb + + # We normalize M with R so that the MSB is set + # And normalize (a * 2^64 + c) by R as well to maintain the result + # This ensures that (a0, a1)/p0 fits in a limb. + let R = mBits and (WordBitWidth - 1) + + # (hi, lo) = a * 2^64 + c + let hi = (a[0] shl (WordBitWidth-R)) or (c shr R) + let lo = c shl (WordBitWidth-R) + let m0 = M[0] shl (WordBitWidth-R) + + var q, r: Word + unsafeDiv2n1n(q, r, hi, lo, m0) # (hi, lo) mod M + + a[0] = r shr (WordBitWidth-R) + + else: + ## Multiple limbs + let (neg, tooBig) = shlAddMod_estimate(a, aLen, c, M, mBits) + discard a.cadd(M, ctl = neg, aLen) + discard a.csub(M, ctl = tooBig, aLen) + +func reduce(r: LimbsViewMut, + a: LimbsViewAny, aBits: int, + M: LimbsViewConst, mBits: int) = + ## Reduce `a` modulo `M` and store the result in `r` + let aLen = numWordsFromBits(aBits) + let mLen = numWordsFromBits(mBits) + let rLen = mLen + + if aBits < mBits: + # if a uses less bits than the modulus, + # it is guaranteed < modulus. + # This relies on the precondition that the modulus uses all declared bits + copyMem(r[0].addr, a[0].unsafeAddr, aLen * sizeof(Word)) + for i in aLen ..< mLen: + r[i] = Zero + else: + # a length i at least equal to the modulus. + # we can copy modulus.limbs-1 words + # and modular shift-left-add the rest + let aOffset = aLen - mLen + copyMem(r[0].addr, a[aOffset+1].unsafeAddr, (mLen-1) * sizeof(Word)) + r[rLen - 1] = Zero + # Now shift-left the copied words while adding the new word modulo M + for i in countdown(aOffset, 0): + shlAddMod(r, rLen, a[i], M, mBits) + +func reduce*[aLen, mLen](r: var Limbs[mLen], + a: Limbs[aLen], aBits: static int, + M: Limbs[mLen], mBits: static int + ) {.inline.} = + ## Reduce `a` modulo `M` and store the result in `r` + ## + ## Warning ⚠: At the moment this is NOT constant-time + ## as it relies on hardware division. + # This is implemented via type-erased indirection to avoid + # a significant amount of code duplication if instantiated for + # varying bitwidth. + reduce(r.view(), a.view(), aBits, M.view(), mBits) + +{.pop.} # raises no exceptions diff --git a/constantine/arithmetic/montgomery.nim b/constantine/arithmetic/limbs_montgomery.nim similarity index 99% rename from constantine/arithmetic/montgomery.nim rename to constantine/arithmetic/limbs_montgomery.nim index 964f975..ae7a7fc 100644 --- a/constantine/arithmetic/montgomery.nim +++ b/constantine/arithmetic/limbs_montgomery.nim @@ -21,7 +21,7 @@ import # Note: Montgomery multiplications and squarings are the biggest bottlenecks # of an elliptic curve library, asymptotically 100% of the costly algorithms: # - field exponentiation -# - field inversion +# - field inversion via Little Fermat # - extension towers multiplication, squarings, inversion # - elliptic curve point addition # - elliptic curve point doubling @@ -80,6 +80,9 @@ macro staticFor(idx: untyped{nkIdent}, start, stopEx: static int, body: untyped) body.replaceNodes(idx, newLit i) ) +# No exceptions allowed +{.push raises: [].} + # Implementation # ------------------------------------------------------------ # Note: the low-level implementations should not use static parameter @@ -565,3 +568,5 @@ func montyPowUnsafeExponent*( # scratchspace[1] holds the original `a` scratchspace[0].montyMul(a, scratchspace[1], M, negInvModWord, canUseNoCarryMontyMul) a = scratchspace[0] + +{.pop.} # raises no exceptions diff --git a/constantine/arithmetic/precomputed.nim b/constantine/arithmetic/precomputed.nim index ec93e07..64da688 100644 --- a/constantine/arithmetic/precomputed.nim +++ b/constantine/arithmetic/precomputed.nim @@ -67,6 +67,19 @@ func subB(bOut, diff: var BaseType, a, b, bIn: BaseType) = bOut = BaseType(noBorrowHi == 0) diff = merge(rHi, rLo) +func add(a: var BigInt, w: BaseType): bool = + ## Limbs addition, add a number that fits in a word + ## Returns the carry + var carry, sum: BaseType + addC(carry, sum, BaseType(a.limbs[0]), w, carry) + a.limbs[0] = Word(sum) + for i in 1 ..< a.limbs.len: + let ai = BaseType(a.limbs[i]) + addC(carry, sum, ai, 0, carry) + a.limbs[i] = Word(sum) + + result = bool(carry) + func dbl(a: var BigInt): bool = ## In-place multiprecision double ## a -> 2a @@ -246,9 +259,24 @@ func primeMinus2_BE*[bits: static int]( ): array[(bits+7) div 8, byte] {.noInit.} = ## Compute an input prime-2 ## and return the result as a canonical byte array / octet string - ## For use to precompute modular inverse exponent. + ## For use to precompute modular inverse exponent + ## when using inversion by Little Fermat Theorem a^-1 = a^(p-2) mod p var tmp = P discard tmp.csub(BigInt[bits].fromRawUint([byte 2], bigEndian), true) result.exportRawUint(tmp, bigEndian) + +func primePlus1div2*(P: BigInt): BigInt = + ## Compute (P+1)/2, assumes P is odd + ## For use in constant-time modular inversion + checkOddModulus(P) + + # (P+1)/2 = P/2 + 1 if P is odd, + # this avoids overflowing if the prime uses all bits + # i.e. in the form (2^64)^w - 1 or (2^32)^w - 1 + + result = P + result.shiftRight(1) + let carry = result.add(1) + doAssert not carry diff --git a/constantine/config/curves.nim b/constantine/config/curves.nim index 9615043..7419200 100644 --- a/constantine/config/curves.nim +++ b/constantine/config/curves.nim @@ -13,6 +13,7 @@ import ./curves_parser, ./common, ../arithmetic/[precomputed, bigints] +{.push used.} # ############################################################ # @@ -40,27 +41,23 @@ import # which returns the field modulus of the curve when not defined(testingCurves): declareCurves: - # Barreto-Naehrig curve, pairing-friendly, Prime 254 bit, ~100-bit security - # https://eprint.iacr.org/2013/879.pdf - # Usage: Zero-Knowledge Proofs / zkSNARKs in ZCash and Ethereum 1 - # https://eips.ethereum.org/EIPS/eip-196 + curve P224: # NIST P-224 + bitsize: 224 + modulus: "0xffffffff_ffffffff_ffffffff_ffffffff_00000000_00000000_00000001" curve BN254: bitsize: 254 modulus: "0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47" # Equation: Y^2 = X^3 + 3 - curve BLS12_381: - bitsize: 381 - modulus: "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab" - # Equation: y^2 = x^3 + 4 - curve P224: # NIST P-224 - bitsize: 224 - modulus: "0xffffffff_ffffffff_ffffffff_ffffffff_00000000_00000000_00000001" curve P256: # secp256r1 / NIST P-256 bitsize: 256 modulus: "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff" curve Secp256k1: # Bitcoin curve bitsize: 256 modulus: "0xFFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F" + curve BLS12_381: + bitsize: 381 + modulus: "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab" + # Equation: y^2 = x^3 + 4 else: # Fake curve for testing field arithmetic declareCurves: @@ -76,12 +73,75 @@ else: curve P224: # NIST P-224 bitsize: 224 modulus: "0xffffffff_ffffffff_ffffffff_ffffffff_00000000_00000000_00000001" + curve BN254: # Zero-Knowledge proofs curve (SNARKS, STARKS) + bitsize: 254 + modulus: "0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47" + # Equation: Y^2 = X^3 + 3 curve P256: # secp256r1 / NIST P-256 bitsize: 256 modulus: "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff" + curve Secp256k1: # Bitcoin curve + bitsize: 256 + modulus: "0xFFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F" curve BLS12_381: bitsize: 381 modulus: "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab" + # Equation: y^2 = x^3 + 4 + +# ############################################################ +# +# Curve Families +# +# ############################################################ +type CurveFamily = enum + None + BN # Barreto-Naehrig + BLS # Barreto-Lynn-Scott + +func family*(curve: Curve): CurveFamily = + case curve + of BN254: + BN + of BLS12_381: + BLS + else: + None + +# ############################################################ +# +# Curve Specific Parameters +# +# ############################################################ +# +# In the form CurveXXX_ParameterName where CurveXXX is the curve name + number of bits +# of the field modulus + +# BN Curves +# ------------------------------------------------------------ +# See https://tools.ietf.org/id/draft-yonezawa-pairing-friendly-curves-00.html +# +# The prime p and order r are primes and of the form +# p = 36u^4 + 36u^3 + 24u^2 + 6u + 1 +# r = 36u^4 + 36u^3 + 18u^2 + 6u + 1 +# +# https://eprint.iacr.org/2010/429.pdf +# https://eprint.iacr.org/2013/879.pdf +# Usage: Zero-Knowledge Proofs / zkSNARKs in ZCash and Ethereum 1 +# https://eips.ethereum.org/EIPS/eip-196 + +# BLS Curves +# ------------------------------------------------------------ +# See https://tools.ietf.org/id/draft-yonezawa-pairing-friendly-curves-00.html +# +# BLS12 curves +# The prime p and order r are primes and of the form +# p = (u - 1)^2 (u^4 - u^2 + 1)/3 + u +# r = u^4 - u^2 + 1 +# +# BLS48 curves +# The prime p and order r are primes and of the form +# p = (u - 1)^2 (u^16 - u^8 + 1)/3 + u +# r = u^16 - u^8 + 1 # ############################################################ # @@ -147,6 +207,7 @@ macro genMontyMagics(T: typed): untyped = ) ) ) + # const MyCurve_NegInvModWord = negInvModWord(MyCurve_Modulus) result.add newConstStmt( ident($curve & "_NegInvModWord"), newCall( @@ -177,6 +238,16 @@ macro genMontyMagics(T: typed): untyped = ) ) ) + # const MyCurve_PrimePlus1div2 = primePlus1div2(MyCurve_Modulus) + result.add newConstStmt( + ident($curve & "_PrimePlus1div2"), newCall( + bindSym"primePlus1div2", + nnkDotExpr.newTree( + bindSym($curve & "_Modulus"), + ident"mres" + ) + ) + ) # echo result.toStrLit @@ -208,6 +279,10 @@ macro getInvModExponent*(C: static Curve): untyped = ## Get modular inversion exponent (Modulus-2 in canonical representation) result = bindSym($C & "_InvModExponent") +macro getPrimePlus1div2*(C: static Curve): untyped = + ## Get (P+1) / 2 for an odd prime + result = bindSym($C & "_PrimePlus1div2") + # ############################################################ # # Debug info printed at compile-time @@ -234,5 +309,5 @@ macro debugConsts(): untyped = result.add quote do: echo "----------------------------------------------------------------------------" -debug: - debugConsts() +# debug: +# debugConsts() diff --git a/constantine/tower_field_extensions/abelian_groups.nim b/constantine/tower_field_extensions/abelian_groups.nim index 0ee83b7..07b7ca0 100644 --- a/constantine/tower_field_extensions/abelian_groups.nim +++ b/constantine/tower_field_extensions/abelian_groups.nim @@ -7,7 +7,7 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import - ../arithmetic/finite_fields, + ../arithmetic, ../config/common, ../primitives diff --git a/constantine/tower_field_extensions/fp2_complex.nim b/constantine/tower_field_extensions/fp2_complex.nim index bdcf628..5f74040 100644 --- a/constantine/tower_field_extensions/fp2_complex.nim +++ b/constantine/tower_field_extensions/fp2_complex.nim @@ -44,7 +44,7 @@ # TODO: Clarify some assumptions about the prime p ≡ 3 (mod 4) import - ../arithmetic/finite_fields, + ../arithmetic, ../config/curves, ./abelian_groups @@ -151,7 +151,7 @@ func inv*(r: var Fp2, a: Fp2) = t0 += t1 # t0 = a0² + a1² (the norm / squared magnitude of a) # [1 Inv, 2 Sqr, 1 Add] - inv(t0) # t0 = 1 / (a0² + a1²) + t0.inv(t0) # t0 = 1 / (a0² + a1²) # [1 Inv, 2 Mul, 2 Sqr, 1 Add, 1 Neg] r.c0.prod(a.c0, t0) # r0 = a0 / (a0² + a1²) diff --git a/constantine/tower_field_extensions/fp2_sqrt_minus2.nim b/constantine/tower_field_extensions/fp2_sqrt_minus2.nim index 56418e2..0d44826 100644 --- a/constantine/tower_field_extensions/fp2_sqrt_minus2.nim +++ b/constantine/tower_field_extensions/fp2_sqrt_minus2.nim @@ -33,7 +33,7 @@ import - ../arithmetic/finite_fields, + ../arithmetic, ../config/curves, ./abelian_groups diff --git a/constantine/tower_field_extensions/fp2_sqrt_minus5.nim b/constantine/tower_field_extensions/fp2_sqrt_minus5.nim index 3339dd5..c8a3b34 100644 --- a/constantine/tower_field_extensions/fp2_sqrt_minus5.nim +++ b/constantine/tower_field_extensions/fp2_sqrt_minus5.nim @@ -32,7 +32,7 @@ # https://eprint.iacr.org/2010/354 import - ../arithmetic/finite_fields, + ../arithmetic, ../config/curves, ./abelian_groups diff --git a/formal_verification/bls12_381_q_64.nim b/formal_verification/bls12_381_q_64.nim index 109ffc6..53f2fb0 100644 --- a/formal_verification/bls12_381_q_64.nim +++ b/formal_verification/bls12_381_q_64.nim @@ -128,7 +128,7 @@ func fromHex(output: var openArray[byte], hexStr: string, order: static[Endianne # ------------------------------------------------------------------------- when isMainModule: - import random, std/monotimes, times, strformat, ./timers + import random, std/monotimes, times, strformat, ../helpers/timers const Iters = 1_000_000 const InvIters = 1000 diff --git a/formal_verification/timers.nim b/formal_verification/timers.nim deleted file mode 100644 index 45d415f..0000000 --- a/formal_verification/timers.nim +++ /dev/null @@ -1,49 +0,0 @@ -when defined(i386) or defined(amd64): - # From Linux - # - # The RDTSC instruction is not ordered relative to memory - # access. The Intel SDM and the AMD APM are both vague on this - # point, but empirically an RDTSC instruction can be - # speculatively executed before prior loads. An RDTSC - # immediately after an appropriate barrier appears to be - # ordered as a normal load, that is, it provides the same - # ordering guarantees as reading from a global memory location - # that some other imaginary CPU is updating continuously with a - # time stamp. - # - # From Intel SDM - # https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf - when not defined(vcc): - when defined(amd64): - proc getTicks*(): int64 {.inline.} = - var lo, hi: int64 - # TODO: Provide a compile-time flag for RDTSCP support - # and use it instead of lfence + RDTSC - {.emit: """asm volatile( - "lfence\n" - "rdtsc\n" - : "=a"(`lo`), "=d"(`hi`) - : - : "memory" - );""".} - return (hi shl 32) or lo - else: - proc getTicks*(): int64 {.inline.} = - # TODO: Provide a compile-time flag for RDTSCP support - # and use it instead of lfence + RDTSC - {.emit: """asm volatile( - "lfence\n" - "rdtsc\n" - : "=a"(`result`) - : - : "memory" - );""".} - else: - proc rdtsc(): int64 {.sideeffect, importc: "__rdtsc", header: "".} - proc lfence() {.importc: "__mm_lfence", header: "".} - - proc getTicks*(): int64 {.inline.} = - lfence() - return rdtsc() -else: - {.error: "getticks is not supported on this CPU architecture".} diff --git a/helpers/README.md b/helpers/README.md new file mode 100644 index 0000000..4035900 --- /dev/null +++ b/helpers/README.md @@ -0,0 +1,3 @@ +# Helpers, utilities, tools, miscellaneous + +This folder holds helper functions that are not part of Constantine but facilitates testing and benchmarking. diff --git a/tests/prng.nim b/helpers/prng.nim similarity index 98% rename from tests/prng.nim rename to helpers/prng.nim index 43d68af..50ca9ae 100644 --- a/tests/prng.nim +++ b/helpers/prng.nim @@ -98,5 +98,5 @@ func random[T](rng: var RngState, a: var T, C: static Curve) {.noInit.}= rng.random(field, C) func random*(rng: var RngState, T: typedesc): T = - ## Create a random Field or Extension FIeld Element + ## Create a random Field or Extension Field Element rng.random(result, T.C) diff --git a/helpers/static_for.nim b/helpers/static_for.nim new file mode 100644 index 0000000..289e973 --- /dev/null +++ b/helpers/static_for.nim @@ -0,0 +1,28 @@ +import macros + +proc replaceNodes(ast: NimNode, what: NimNode, by: NimNode): NimNode = + # Replace "what" ident node by "by" + proc inspect(node: NimNode): NimNode = + case node.kind: + of {nnkIdent, nnkSym}: + if node.eqIdent(what): + return by + return node + of nnkEmpty: + return node + of nnkLiterals: + return node + else: + var rTree = node.kind.newTree() + for child in node: + rTree.add inspect(child) + return rTree + result = inspect(ast) + +macro staticFor*(idx: untyped{nkIdent}, start, stopEx: static int, body: untyped): untyped = + result = newStmtList() + for i in start ..< stopEx: + result.add nnkBlockStmt.newTree( + ident("unrolledIter_" & $idx & $i), + body.replaceNodes(idx, newLit i) + ) diff --git a/helpers/timers.nim b/helpers/timers.nim new file mode 100644 index 0000000..9a31232 --- /dev/null +++ b/helpers/timers.nim @@ -0,0 +1,14 @@ +when defined(i386) or defined(amd64): + import x86 + export getTicks + +# This doesn't always work unfortunately ... + +proc volatilize(x: ptr byte) {.codegenDecl: "$# $#(char const volatile *x)", inline.} = + discard + +template preventOptimAway*[T](x: var T) = + volatilize(cast[ptr byte](unsafeAddr x)) + +template preventOptimAway*[T](x: T) = + volatilize(cast[ptr byte](x)) diff --git a/helpers/x86.nim b/helpers/x86.nim new file mode 100644 index 0000000..dbbcfea --- /dev/null +++ b/helpers/x86.nim @@ -0,0 +1,74 @@ +# Cpu Name +# ------------------------------------------------------- + +{.passC:"-std=gnu99".} # TODO may conflict with milagro "-std=c99" + +proc cpuID(eaxi, ecxi: int32): tuple[eax, ebx, ecx, edx: int32] = + when defined(vcc): + proc cpuidVcc(cpuInfo: ptr int32; functionID: int32) + {.importc: "__cpuidex", header: "intrin.h".} + cpuidVcc(addr result.eax, eaxi, ecxi) + else: + var (eaxr, ebxr, ecxr, edxr) = (0'i32, 0'i32, 0'i32, 0'i32) + asm """ + cpuid + :"=a"(`eaxr`), "=b"(`ebxr`), "=c"(`ecxr`), "=d"(`edxr`) + :"a"(`eaxi`), "c"(`ecxi`)""" + (eaxr, ebxr, ecxr, edxr) + +proc cpuName*(): string = + var leaves {.global.} = cast[array[48, char]]([ + cpuID(eaxi = 0x80000002'i32, ecxi = 0), + cpuID(eaxi = 0x80000003'i32, ecxi = 0), + cpuID(eaxi = 0x80000004'i32, ecxi = 0)]) + result = $cast[cstring](addr leaves[0]) + +# Counting cycles +# ------------------------------------------------------- + +# From Linux +# +# The RDTSC instruction is not ordered relative to memory +# access. The Intel SDM and the AMD APM are both vague on this +# point, but empirically an RDTSC instruction can be +# speculatively executed before prior loads. An RDTSC +# immediately after an appropriate barrier appears to be +# ordered as a normal load, that is, it provides the same +# ordering guarantees as reading from a global memory location +# that some other imaginary CPU is updating continuously with a +# time stamp. +# +# From Intel SDM +# https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf + +proc getTicks*(): int64 {.inline.} = + when defined(vcc): + proc rdtsc(): int64 {.sideeffect, importc: "__rdtsc", header: "".} + proc lfence() {.importc: "__mm_lfence", header: "".} + + lfence() + return rdtsc() + + else: + when defined(amd64): + var lo, hi: int64 + # TODO: Provide a compile-time flag for RDTSCP support + # and use it instead of lfence + RDTSC + {.emit: """asm volatile( + "lfence\n" + "rdtsc\n" + : "=a"(`lo`), "=d"(`hi`) + : + : "memory" + );""".} + return (hi shl 32) or lo + else: # 32-bit x86 + # TODO: Provide a compile-time flag for RDTSCP support + # and use it instead of lfence + RDTSC + {.emit: """asm volatile( + "lfence\n" + "rdtsc\n" + : "=a"(`result`) + : + : "memory" + );""".} diff --git a/tests/test_bigints.nim b/tests/test_bigints.nim index c747ce3..9010d1d 100644 --- a/tests/test_bigints.nim +++ b/tests/test_bigints.nim @@ -8,11 +8,11 @@ import unittest, ../constantine/io/io_bigints, - ../constantine/arithmetic/bigints, + ../constantine/arithmetic, ../constantine/config/common, ../constantine/primitives -proc main() = +proc mainArith() = suite "isZero": test "isZero for zero": var x: BigInt[128] @@ -128,6 +128,16 @@ proc main() = bool(a == c) not bool(carry) + suite "BigInt + Word": + test "Addition limbs carry": + block: # P256 / 2 + var a = BigInt[256].fromhex"0x7fffffff800000008000000000000000000000007fffffffffffffffffffffff" + + let expected = BigInt[256].fromHex"7fffffff80000000800000000000000000000000800000000000000000000000" + + discard a.add(Word 1) + check: bool(a == expected) + suite "Modular operations - small modulus": # Vectors taken from Stint - https://github.com/status-im/nim-stint test "100 mod 13": @@ -205,4 +215,249 @@ proc main() = check: bool(r == BigInt[40].fromUint(u mod v)) -main() +proc mainNeg() = + suite "Conditional negation": + test "Conditional negation": + block: + var a = fromHex(BigInt[128], "0x12345678_FF11FFAA_00321321_CAFECAFE") + var b = fromHex(BigInt[128], "0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF") + + let a2 = a + let b2 = b + + a.cneg(CtTrue) + b.cneg(CtTrue) + + discard a.add(a2) + discard b.add(b2) + + check: + bool(a.isZero) + bool(b.isZero) + + block: + var a = fromHex(BigInt[128], "0x12345678_FF11FFAA_00321321_CAFECAFE") + var b = fromHex(BigInt[128], "0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF") + + let a2 = a + let b2 = b + + a.cneg(CtFalse) + b.cneg(CtFalse) + + check: + bool(a == a2) + bool(b == b2) + + test "Conditional negation with carries": + block: + var a = fromHex(BigInt[128], "0x12345678_FF11FFAA_00321321_FFFFFFFF") + var b = fromHex(BigInt[128], "0xFFFFFFFF_FFFFFFFF_00000000_00000000") + + let a2 = a + let b2 = b + + a.cneg(CtTrue) + b.cneg(CtTrue) + + discard a.add(a2) + discard b.add(b2) + + check: + bool(a.isZero) + bool(b.isZero) + + block: + var a = fromHex(BigInt[128], "0x12345678_00000000_00321321_FFFFFFFF") + var b = fromHex(BigInt[128], "0xFFFFFFFF_FFFFFFFF_00000000_00000000") + + let a2 = a + let b2 = b + + a.cneg(CtFalse) + b.cneg(CtFalse) + + check: + bool(a == a2) + bool(b == b2) + + test "Conditional all-zero bit or all-one bit": + block: + var a = fromHex(BigInt[128], "0x00000000_00000000_00000000_00000000") + var b = fromHex(BigInt[128], "0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF") + + let a2 = a + let b2 = b + + a.cneg(CtTrue) + b.cneg(CtTrue) + + discard a.add(a2) + discard b.add(b2) + + check: + bool(a.isZero) + bool(b.isZero) + + block: + var a = fromHex(BigInt[128], "0x00000000_00000000_00000000_00000000") + var b = fromHex(BigInt[128], "0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF") + + let a2 = a + let b2 = b + + a.cneg(CtFalse) + b.cneg(CtFalse) + + check: + bool(a == a2) + bool(b == b2) + +proc mainCopySwap() = + suite "Copy and Swap": + test "Conditional copy": + block: + var a = fromHex(BigInt[128], "0x12345678_FF11FFAA_00321321_CAFECAFE") + let b = fromHex(BigInt[128], "0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF") + + var expected = a + a.ccopy(b, CtFalse) + + check: bool(expected == a) + + block: + var a = fromHex(BigInt[128], "0x00000000_FFFFFFFF_FFFFFFFF_FFFFFFFF") + let b = fromHex(BigInt[128], "0x00000000_00000000_00000000_00000001") + + var expected = b + a.ccopy(b, CtTrue) + + check: bool(expected == b) + + test "Conditional swap": + block: + var a = fromHex(BigInt[128], "0x12345678_FF11FFAA_00321321_CAFECAFE") + var b = fromHex(BigInt[128], "0xDEADBEEF_DEADBEEF_DEADBEEF_DEADBEEF") + + let eA = a + let eB = b + + a.cswap(b, CtFalse) + check: + bool(eA == a) + bool(eB == b) + + block: + var a = fromHex(BigInt[128], "0x00000000_FFFFFFFF_FFFFFFFF_FFFFFFFF") + var b = fromHex(BigInt[128], "0x00000000_00000000_00000000_00000001") + + let eA = b + let eB = a + + a.cswap(b, CtTrue) + check: + bool(eA == a) + bool(eB == b) + +proc mainModularInverse() = + suite "Modular Inverse (with odd modulus)": + # Note: We don't define multi-precision multiplication + # because who needs it when you have Montgomery? + # ¯\_(ツ)_/¯ + test "42^-1 (mod 2017) = 1969": + block: # small int + let a = BigInt[16].fromUint(42'u16) + let M = BigInt[16].fromUint(2017'u16) + + var mp1div2 = M + discard mp1div2.add(Word 1) + mp1div2.shiftRight(1) + + let expected = BigInt[16].fromUint(1969'u16) + var r {.noInit.}: BigInt[16] + + r.invmod(a, M, mp1div2) + + check: bool(r == expected) + + block: # huge int + let a = BigInt[381].fromUint(42'u16) + let M = BigInt[381].fromUint(2017'u16) + + var mp1div2 = M + discard mp1div2.add(Word 1) + mp1div2.shiftRight(1) + + let expected = BigInt[381].fromUint(1969'u16) + var r {.noInit.}: BigInt[381] + + r.invmod(a, M, mp1div2) + + check: bool(r == expected) + + test "271^-1 (mod 383) = 106": + block: # small int + let a = BigInt[16].fromUint(271'u16) + let M = BigInt[16].fromUint(383'u16) + + var mp1div2 = M + discard mp1div2.add(Word 1) + mp1div2.shiftRight(1) + + let expected = BigInt[16].fromUint(106'u16) + var r {.noInit.}: BigInt[16] + + r.invmod(a, M, mp1div2) + + check: bool(r == expected) + + block: # huge int + let a = BigInt[381].fromUint(271'u16) + let M = BigInt[381].fromUint(383'u16) + + var mp1div2 = M + discard mp1div2.add(Word 1) + mp1div2.shiftRight(1) + + let expected = BigInt[381].fromUint(106'u16) + var r {.noInit.}: BigInt[381] + + r.invmod(a, M, mp1div2) + + check: bool(r == expected) + + test "BN254_Modulus^-1 (mod BLS12_381)": + let a = BigInt[381].fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") + let M = BigInt[381].fromHex("0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab") + + var mp1div2 = M + discard mp1div2.add(Word 1) + mp1div2.shiftRight(1) + + let expected = BigInt[381].fromHex("0x0636759a0f3034fa47174b2c0334902f11e9915b7bd89c6a2b3082b109abbc9837da17201f6d8286fe6203caa1b9d4c8") + + var r {.noInit.}: BigInt[381] + r.invmod(a, M, mp1div2) + + check: bool(r == expected) + + test "0^-1 (mod 0) = 0 (need for tower of extension fields)": + let a = BigInt[16].fromUint(0'u16) + let M = BigInt[16].fromUint(2017'u16) + + var mp1div2 = M + mp1div2.shiftRight(1) + discard mp1div2.add(Word 1) + + let expected = BigInt[16].fromUint(0'u16) + var r {.noInit.}: BigInt[16] + + r.invmod(a, M, mp1div2) + + check: bool(r == expected) + + +mainArith() +mainNeg() +mainCopySwap() +mainModularInverse() diff --git a/tests/test_bigints_multimod.nim b/tests/test_bigints_multimod.nim index 0a9fd12..e020b4c 100644 --- a/tests/test_bigints_multimod.nim +++ b/tests/test_bigints_multimod.nim @@ -11,7 +11,7 @@ import unittest, # Third-party ../constantine/io/io_bigints, - ../constantine/arithmetic/bigints, + ../constantine/arithmetic, ../constantine/primitives proc main() = diff --git a/tests/test_bigints_vs_gmp.nim b/tests/test_bigints_vs_gmp.nim index 2bfe2b4..fe71145 100644 --- a/tests/test_bigints_vs_gmp.nim +++ b/tests/test_bigints_vs_gmp.nim @@ -13,8 +13,8 @@ import gmp, stew/byteutils, # Internal ../constantine/io/io_bigints, - ../constantine/arithmetic/bigints, - ../constantine/primitives/constant_time + ../constantine/arithmetic, + ../constantine/primitives # We test up to 1024-bit, more is really slow diff --git a/tests/test_finite_fields.nim b/tests/test_finite_fields.nim index 61c181d..f85f70b 100644 --- a/tests/test_finite_fields.nim +++ b/tests/test_finite_fields.nim @@ -7,7 +7,7 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import unittest, - ../constantine/arithmetic/finite_fields, + ../constantine/arithmetic, ../constantine/io/io_fields, ../constantine/config/curves diff --git a/tests/test_finite_fields_mulsquare.nim b/tests/test_finite_fields_mulsquare.nim index 25066ae..a340730 100644 --- a/tests/test_finite_fields_mulsquare.nim +++ b/tests/test_finite_fields_mulsquare.nim @@ -7,11 +7,11 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import std/unittest, std/times, - ../constantine/arithmetic/[bigints, finite_fields], + ../constantine/arithmetic, ../constantine/io/[io_bigints, io_fields], ../constantine/config/curves, # Test utilities - ./prng + ../helpers/prng const Iters = 128 @@ -25,17 +25,22 @@ static: doAssert defined(testingCurves), "This modules requires the -d:testingCu import ../constantine/config/common proc sanity(C: static Curve) = - test "Squaring 0,1,2 with "& $C & " [FastSquaring = " & $Fake101.canUseNoCarryMontySquare & "]": + test "Squaring 0,1,2 with "& $Curve(C) & " [FastSquaring = " & $C.canUseNoCarryMontySquare & "]": block: # 0² mod var n: Fp[C] n.fromUint(0'u32) let expected = n + # Out-of-place var r: Fp[C] r.square(n) + # In-place + n.square() - check: bool(r == expected) + check: + bool(r == expected) + bool(n == expected) block: # 1² mod var n: Fp[C] @@ -43,10 +48,15 @@ proc sanity(C: static Curve) = n.fromUint(1'u32) let expected = n + # Out-of-place var r: Fp[C] r.square(n) + # In-place + n.square() - check: bool(r == expected) + check: + bool(r == expected) + bool(n == expected) block: # 2² mod var n, expected: Fp[C] @@ -54,10 +64,15 @@ proc sanity(C: static Curve) = n.fromUint(2'u32) expected.fromUint(4'u32) + # Out-of-place var r: Fp[C] r.square(n) + # In-place + n.square() - check: bool(r == expected) + check: + bool(r == expected) + bool(n == expected) proc mainSanity() = suite "Modular squaring is consistent with multiplication on special elements": diff --git a/tests/test_finite_fields_powinv.nim b/tests/test_finite_fields_powinv.nim index ffb712e..f0bc51d 100644 --- a/tests/test_finite_fields_powinv.nim +++ b/tests/test_finite_fields_powinv.nim @@ -7,7 +7,7 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import unittest, - ../constantine/arithmetic/[bigints, finite_fields], + ../constantine/arithmetic, ../constantine/io/[io_bigints, io_fields], ../constantine/config/curves @@ -144,14 +144,14 @@ proc main() = suite "Modular inversion over prime fields": test "x^(-1) mod p": - var x: Fp[BLS12_381] + var r, x: Fp[BLS12_381] # BN254 field modulus x.fromHex("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47") let expected = "0x0636759a0f3034fa47174b2c0334902f11e9915b7bd89c6a2b3082b109abbc9837da17201f6d8286fe6203caa1b9d4c8" - x.inv() - let computed = x.toHex() + r.inv(x) + let computed = r.toHex() check: computed == expected diff --git a/tests/test_finite_fields_vs_gmp.nim b/tests/test_finite_fields_vs_gmp.nim index 0c29b0d..a4b2e42 100644 --- a/tests/test_finite_fields_vs_gmp.nim +++ b/tests/test_finite_fields_vs_gmp.nim @@ -13,18 +13,21 @@ import gmp, stew/byteutils, # Internal ../constantine/io/[io_bigints, io_fields], - ../constantine/arithmetic/[finite_fields, bigints], - ../constantine/primitives/constant_time, + ../constantine/arithmetic, + ../constantine/primitives, ../constantine/config/curves -# We test up to 1024-bit, more is really slow - var RNG {.compileTime.} = initRand(1234) const CurveParams = [ + P224: (224, "0xffffffffffffffffffffffffffffffff000000000000000000000001"), BN254: (254, "0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"), + P256: (256, "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff"), + Secp256k1: (256, "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F"), BLS12_381: (381, "0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab") ] +const AvailableCurves = [P224, BN254, P256, Secp256k1, BLS12_381] + const # https://gmplib.org/manual/Integer-Import-and-Export.html GMP_WordLittleEndian = -1'i32 GMP_WordNativeEndian = 0'i32 @@ -53,7 +56,7 @@ proc binary_prologue[C: static Curve, N: static int]( mpz_urandomb(b, gmpRng, uint bits) # Set modulus to curve modulus let err = mpz_set_str(p, CurveParams[C][1], 0) - doAssert err == 0 + doAssert err == 0, "Error on prime for curve " & $Curve(C) ######################################################### # Conversion buffers @@ -189,8 +192,8 @@ proc invTests(gmpRng: var gmp_randstate_t, a, b, p, r: var mpz_t, C: static Curv let exist = mpz_invert(r, a, p) doAssert exist != 0 - var rTest = aTest - rTest.inv() + var rTest {.noInit.}: Fp[C] + rTest.inv(aTest) binary_epilogue(r, a, b, rTest, aBuf, bBuf, "Inversion (b is unused)") @@ -206,7 +209,7 @@ macro randomTests(numTests: static int, curveSym, body: untyped): untyped = result = newStmtList() for _ in 0 ..< numTests: - let curve = RNG.rand([BN254, BLS12_381]) + let curve = RNG.rand(AvailableCurves) result.add quote do: block: diff --git a/tests/test_fp2.nim b/tests/test_fp2.nim index 86e96c3..05cc0fa 100644 --- a/tests/test_fp2.nim +++ b/tests/test_fp2.nim @@ -12,9 +12,9 @@ import # Internals ../constantine/tower_field_extensions/[abelian_groups, fp2_complex], ../constantine/config/[common, curves], - ../constantine/arithmetic/bigints, + ../constantine/arithmetic, # Test utilities - ./prng + ../helpers/prng const Iters = 128 diff --git a/tests/test_io_bigints.nim b/tests/test_io_bigints.nim index f5a87e1..2a506c5 100644 --- a/tests/test_io_bigints.nim +++ b/tests/test_io_bigints.nim @@ -9,7 +9,7 @@ import unittest, random, ../constantine/io/io_bigints, ../constantine/config/common, - ../constantine/arithmetic/bigints + ../constantine/arithmetic randomize(0xDEADBEEF) # Random seed for reproducibility type T = BaseType diff --git a/tests/test_io_fields.nim b/tests/test_io_fields.nim index 177f488..fbee1e2 100644 --- a/tests/test_io_fields.nim +++ b/tests/test_io_fields.nim @@ -10,7 +10,7 @@ import unittest, random, ../constantine/io/[io_bigints, io_fields], ../constantine/config/curves, ../constantine/config/common, - ../constantine/arithmetic/[bigints, finite_fields] + ../constantine/arithmetic randomize(0xDEADBEEF) # Random seed for reproducibility type T = BaseType