mirror of
https://github.com/codex-storage/constantine.git
synced 2025-01-27 02:54:56 +00:00
SHA256 Hash function
This commit is contained in:
parent
c89c78d2d9
commit
e89429e822
108
benchmarks/bench_blueprint.nim
Normal file
108
benchmarks/bench_blueprint.nim
Normal file
@ -0,0 +1,108 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Benchmark blueprint
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
import
|
||||
# Internal
|
||||
../constantine/config/common,
|
||||
# Helpers
|
||||
../helpers/[prng_unsafe, static_for],
|
||||
./platforms,
|
||||
# Standard library
|
||||
std/[monotimes, times, strformat, strutils, macros]
|
||||
|
||||
export strformat, platforms, times, monotimes, macros
|
||||
|
||||
var rng*: RngState
|
||||
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
|
||||
rng.seed(seed)
|
||||
echo "bench xoshiro512** seed: ", seed
|
||||
|
||||
# warmup
|
||||
proc warmup*() =
|
||||
# Warmup - make sure cpu is on max perf
|
||||
let start = cpuTime()
|
||||
var foo = 123
|
||||
for i in 0 ..< 300_000_000:
|
||||
foo += i*i mod 456
|
||||
foo = foo mod 789
|
||||
|
||||
# Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
|
||||
let stop = cpuTime()
|
||||
echo &"Warmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n"
|
||||
|
||||
warmup()
|
||||
|
||||
when defined(gcc):
|
||||
echo "\nCompiled with GCC"
|
||||
elif defined(clang):
|
||||
echo "\nCompiled with Clang"
|
||||
elif defined(vcc):
|
||||
echo "\nCompiled with MSVC"
|
||||
elif defined(icc):
|
||||
echo "\nCompiled with ICC"
|
||||
else:
|
||||
echo "\nCompiled with an unknown compiler"
|
||||
|
||||
echo "Optimization level => "
|
||||
echo " no optimization: ", not defined(release)
|
||||
echo " release: ", defined(release)
|
||||
echo " danger: ", defined(danger)
|
||||
echo " inline assembly: ", UseASM_X86_64
|
||||
|
||||
when (sizeof(int) == 4) or defined(Constantine32):
|
||||
echo "⚠️ Warning: using Constantine with 32-bit limbs"
|
||||
else:
|
||||
echo "Using Constantine with 64-bit limbs"
|
||||
|
||||
when SupportsCPUName:
|
||||
echo "Running on ", cpuName(), ""
|
||||
|
||||
when SupportsGetTicks:
|
||||
echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)"
|
||||
|
||||
echo "\n=================================================================================================================\n"
|
||||
|
||||
proc separator*(length: int) =
|
||||
echo "-".repeat(length)
|
||||
|
||||
proc notes*() =
|
||||
echo "Notes:"
|
||||
echo " - Compilers:"
|
||||
echo " Compilers are severely limited on multiprecision arithmetic."
|
||||
echo " Constantine compile-time assembler is used by default (nimble bench_fp)."
|
||||
echo " GCC is significantly slower than Clang on multiprecision arithmetic due to catastrophic handling of carries."
|
||||
echo " GCC also seems to have issues with large temporaries and register spilling."
|
||||
echo " This is somewhat alleviated by Constantine compile-time assembler."
|
||||
echo " Bench on specific compiler with assembler: \"nimble bench_ec_g1_gcc\" or \"nimble bench_ec_g1_clang\"."
|
||||
echo " Bench on specific compiler with assembler: \"nimble bench_ec_g1_gcc_noasm\" or \"nimble bench_ec_g1_clang_noasm\"."
|
||||
echo " - The simplest operations might be optimized away by the compiler."
|
||||
echo " - Fast Squaring and Fast Multiplication are possible if there are spare bits in the prime representation (i.e. the prime uses 254 bits out of 256 bits)"
|
||||
|
||||
template measure*(iters: int,
|
||||
startTime, stopTime: untyped,
|
||||
startClk, stopClk: untyped,
|
||||
body: untyped): untyped =
|
||||
let startTime = getMonotime()
|
||||
when SupportsGetTicks:
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< iters:
|
||||
body
|
||||
when SupportsGetTicks:
|
||||
let stopClk = getTicks()
|
||||
let stopTime = getMonotime()
|
||||
|
||||
when not SupportsGetTicks:
|
||||
let startClk = -1'i64
|
||||
let stopClk = -1'i64
|
@ -21,85 +21,12 @@ import
|
||||
# Helpers
|
||||
../helpers/[prng_unsafe, static_for],
|
||||
./platforms,
|
||||
# Standard library
|
||||
std/[monotimes, times, strformat, strutils, macros],
|
||||
./bench_blueprint,
|
||||
# Reference unsafe scalar multiplication
|
||||
../tests/support/ec_reference_scalar_mult
|
||||
|
||||
var rng: RngState
|
||||
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
|
||||
rng.seed(seed)
|
||||
echo "bench xoshiro512** seed: ", seed
|
||||
|
||||
# warmup
|
||||
proc warmup*() =
|
||||
# Warmup - make sure cpu is on max perf
|
||||
let start = cpuTime()
|
||||
var foo = 123
|
||||
for i in 0 ..< 300_000_000:
|
||||
foo += i*i mod 456
|
||||
foo = foo mod 789
|
||||
|
||||
# Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
|
||||
let stop = cpuTime()
|
||||
echo &"Warmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n"
|
||||
|
||||
warmup()
|
||||
|
||||
when defined(gcc):
|
||||
echo "\nCompiled with GCC"
|
||||
elif defined(clang):
|
||||
echo "\nCompiled with Clang"
|
||||
elif defined(vcc):
|
||||
echo "\nCompiled with MSVC"
|
||||
elif defined(icc):
|
||||
echo "\nCompiled with ICC"
|
||||
else:
|
||||
echo "\nCompiled with an unknown compiler"
|
||||
|
||||
echo "Optimization level => "
|
||||
echo " no optimization: ", not defined(release)
|
||||
echo " release: ", defined(release)
|
||||
echo " danger: ", defined(danger)
|
||||
echo " inline assembly: ", UseASM_X86_64
|
||||
|
||||
when (sizeof(int) == 4) or defined(Constantine32):
|
||||
echo "⚠️ Warning: using Constantine with 32-bit limbs"
|
||||
else:
|
||||
echo "Using Constantine with 64-bit limbs"
|
||||
|
||||
when SupportsCPUName:
|
||||
echo "Running on ", cpuName(), ""
|
||||
|
||||
when SupportsGetTicks:
|
||||
echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)"
|
||||
|
||||
echo "\n=================================================================================================================\n"
|
||||
|
||||
proc separator*() =
|
||||
echo "-".repeat(177)
|
||||
|
||||
proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
let ns = inNanoseconds((stop-start) div iters)
|
||||
let throughput = 1e9 / float64(ns)
|
||||
when SupportsGetTicks:
|
||||
echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
|
||||
else:
|
||||
echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op"
|
||||
|
||||
proc notes*() =
|
||||
echo "Notes:"
|
||||
echo " - Compilers:"
|
||||
echo " Compilers are severely limited on multiprecision arithmetic."
|
||||
echo " Constantine compile-time assembler is used by default (nimble bench_fp)."
|
||||
echo " GCC is significantly slower than Clang on multiprecision arithmetic due to catastrophic handling of carries."
|
||||
echo " GCC also seems to have issues with large temporaries and register spilling."
|
||||
echo " This is somewhat alleviated by Constantine compile-time assembler."
|
||||
echo " Bench on specific compiler with assembler: \"nimble bench_ec_g1_gcc\" or \"nimble bench_ec_g1_clang\"."
|
||||
echo " Bench on specific compiler with assembler: \"nimble bench_ec_g1_gcc_noasm\" or \"nimble bench_ec_g1_clang_noasm\"."
|
||||
echo " - The simplest operations might be optimized away by the compiler."
|
||||
echo " - Fast Squaring and Fast Multiplication are possible if there are spare bits in the prime representation (i.e. the prime uses 254 bits out of 256 bits)"
|
||||
export notes
|
||||
proc separator*() = separator(177)
|
||||
|
||||
macro fixEllipticDisplay(T: typedesc): untyped =
|
||||
# At compile-time, enums are integers and their display is buggy
|
||||
@ -111,21 +38,17 @@ macro fixEllipticDisplay(T: typedesc): untyped =
|
||||
name.add "[" & fieldName & "[" & curveName & "]]"
|
||||
result = newLit name
|
||||
|
||||
proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
let ns = inNanoseconds((stop-start) div iters)
|
||||
let throughput = 1e9 / float64(ns)
|
||||
when SupportsGetTicks:
|
||||
echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
|
||||
else:
|
||||
echo &"{op:<60} {elliptic:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op"
|
||||
|
||||
template bench(op: string, T: typedesc, iters: int, body: untyped): untyped =
|
||||
let start = getMonotime()
|
||||
when SupportsGetTicks:
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< iters:
|
||||
body
|
||||
when SupportsGetTicks:
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
|
||||
when not SupportsGetTicks:
|
||||
let startClk = -1'i64
|
||||
let stopClk = -1'i64
|
||||
|
||||
report(op, fixEllipticDisplay(T), start, stop, startClk, stopClk, iters)
|
||||
measure(iters, startTime, stopTime, startClk, stopClk, body)
|
||||
report(op, fixEllipticDisplay(T), startTime, stopTime, startClk, stopClk, iters)
|
||||
|
||||
proc addBench*(T: typedesc, iters: int) =
|
||||
const G1_or_G2 = when T.F is Fp: "G1" else: "G2"
|
||||
|
@ -19,63 +19,10 @@ import
|
||||
../constantine/towers,
|
||||
# Helpers
|
||||
../helpers/[prng_unsafe, static_for],
|
||||
./platforms,
|
||||
# Standard library
|
||||
std/[monotimes, times, strformat, strutils, macros]
|
||||
./bench_blueprint
|
||||
|
||||
var rng: RngState
|
||||
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
|
||||
rng.seed(seed)
|
||||
echo "bench xoshiro512** seed: ", seed
|
||||
|
||||
# warmup
|
||||
proc warmup*() =
|
||||
# Warmup - make sure cpu is on max perf
|
||||
let start = cpuTime()
|
||||
var foo = 123
|
||||
for i in 0 ..< 300_000_000:
|
||||
foo += i*i mod 456
|
||||
foo = foo mod 789
|
||||
|
||||
# Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
|
||||
let stop = cpuTime()
|
||||
echo &"Warmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n"
|
||||
|
||||
warmup()
|
||||
|
||||
when defined(gcc):
|
||||
echo "\nCompiled with GCC"
|
||||
elif defined(clang):
|
||||
echo "\nCompiled with Clang"
|
||||
elif defined(vcc):
|
||||
echo "\nCompiled with MSVC"
|
||||
elif defined(icc):
|
||||
echo "\nCompiled with ICC"
|
||||
else:
|
||||
echo "\nCompiled with an unknown compiler"
|
||||
|
||||
echo "Optimization level => "
|
||||
echo " no optimization: ", not defined(release)
|
||||
echo " release: ", defined(release)
|
||||
echo " danger: ", defined(danger)
|
||||
echo " inline assembly: ", UseASM_X86_64
|
||||
|
||||
when (sizeof(int) == 4) or defined(Constantine32):
|
||||
echo "⚠️ Warning: using Constantine with 32-bit limbs"
|
||||
else:
|
||||
echo "Using Constantine with 64-bit limbs"
|
||||
|
||||
when SupportsCPUName:
|
||||
echo "Running on ", cpuName(), ""
|
||||
|
||||
when SupportsGetTicks:
|
||||
echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)"
|
||||
|
||||
echo "\n=================================================================================================================\n"
|
||||
|
||||
proc separator*() =
|
||||
echo "-".repeat(145)
|
||||
export notes
|
||||
proc separator*() = separator(145)
|
||||
|
||||
proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
let ns = inNanoseconds((stop-start) div iters)
|
||||
@ -85,19 +32,6 @@ proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64,
|
||||
else:
|
||||
echo &"{op:<50} {field:<18} {throughput:>15.3f} ops/s {ns:>9} ns/op"
|
||||
|
||||
proc notes*() =
|
||||
echo "Notes:"
|
||||
echo " - Compilers:"
|
||||
echo " Compilers are severely limited on multiprecision arithmetic."
|
||||
echo " Constantine compile-time assembler is used by default (nimble bench_fp)."
|
||||
echo " GCC is significantly slower than Clang on multiprecision arithmetic due to catastrophic handling of carries."
|
||||
echo " GCC also seems to have issues with large temporaries and register spilling."
|
||||
echo " This is somewhat alleviated by Constantine compile-time assembler."
|
||||
echo " Bench on specific compiler with assembler: \"nimble bench_fp_gcc\" or \"nimble bench_fp_clang\"."
|
||||
echo " Bench on specific compiler with assembler: \"nimble bench_fp_gcc_noasm\" or \"nimble bench_fp_clang_noasm\"."
|
||||
echo " - The simplest operations might be optimized away by the compiler."
|
||||
echo " - Fast Squaring and Fast Multiplication are possible if there are spare bits in the prime representation (i.e. the prime uses 254 bits out of 256 bits)"
|
||||
|
||||
macro fixFieldDisplay(T: typedesc): untyped =
|
||||
# At compile-time, enums are integers and their display is buggy
|
||||
# we get the Curve ID instead of the curve name.
|
||||
@ -107,20 +41,8 @@ macro fixFieldDisplay(T: typedesc): untyped =
|
||||
result = newLit name
|
||||
|
||||
template bench(op: string, T: typedesc, iters: int, body: untyped): untyped =
|
||||
let start = getMonotime()
|
||||
when SupportsGetTicks:
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< iters:
|
||||
body
|
||||
when SupportsGetTicks:
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
|
||||
when not SupportsGetTicks:
|
||||
let startClk = -1'i64
|
||||
let stopClk = -1'i64
|
||||
|
||||
report(op, fixFieldDisplay(T), start, stop, startClk, stopClk, iters)
|
||||
measure(iters, startTime, stopTime, startClk, stopClk, body)
|
||||
report(op, fixFieldDisplay(T), startTime, stopTime, startClk, stopClk, iters)
|
||||
|
||||
proc addBench*(T: typedesc, iters: int) =
|
||||
var x = rng.random_unsafe(T)
|
||||
|
@ -28,101 +28,23 @@ import
|
||||
pairing_bn
|
||||
],
|
||||
# Helpers
|
||||
../helpers/[prng_unsafe, static_for],
|
||||
./platforms,
|
||||
# Standard library
|
||||
std/[monotimes, times, strformat, strutils, macros]
|
||||
../helpers/prng_unsafe,
|
||||
./bench_blueprint
|
||||
|
||||
var rng: RngState
|
||||
let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32
|
||||
rng.seed(seed)
|
||||
echo "bench xoshiro512** seed: ", seed
|
||||
export notes
|
||||
proc separator*() = separator(177)
|
||||
|
||||
# warmup
|
||||
proc warmup*() =
|
||||
# Warmup - make sure cpu is on max perf
|
||||
let start = cpuTime()
|
||||
var foo = 123
|
||||
for i in 0 ..< 300_000_000:
|
||||
foo += i*i mod 456
|
||||
foo = foo mod 789
|
||||
|
||||
# Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
|
||||
let stop = cpuTime()
|
||||
echo &"Warmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n"
|
||||
|
||||
warmup()
|
||||
|
||||
when defined(gcc):
|
||||
echo "\nCompiled with GCC"
|
||||
elif defined(clang):
|
||||
echo "\nCompiled with Clang"
|
||||
elif defined(vcc):
|
||||
echo "\nCompiled with MSVC"
|
||||
elif defined(icc):
|
||||
echo "\nCompiled with ICC"
|
||||
else:
|
||||
echo "\nCompiled with an unknown compiler"
|
||||
|
||||
echo "Optimization level => "
|
||||
echo " no optimization: ", not defined(release)
|
||||
echo " release: ", defined(release)
|
||||
echo " danger: ", defined(danger)
|
||||
echo " inline assembly: ", UseASM_X86_64
|
||||
|
||||
when (sizeof(int) == 4) or defined(Constantine32):
|
||||
echo "⚠️ Warning: using Constantine with 32-bit limbs"
|
||||
else:
|
||||
echo "Using Constantine with 64-bit limbs"
|
||||
|
||||
when SupportsCPUName:
|
||||
echo "Running on ", cpuName(), ""
|
||||
|
||||
when SupportsGetTicks:
|
||||
echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them."
|
||||
echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)"
|
||||
|
||||
echo "\n=================================================================================================================\n"
|
||||
|
||||
proc separator*() =
|
||||
echo "-".repeat(177)
|
||||
|
||||
proc report(op, curve: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
let ns = inNanoseconds((stop-start) div iters)
|
||||
proc report(op, curve: string, startTime, stopTime: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
let ns = inNanoseconds((stopTime-startTime) div iters)
|
||||
let throughput = 1e9 / float64(ns)
|
||||
when SupportsGetTicks:
|
||||
echo &"{op:<60} {curve:<15} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
|
||||
else:
|
||||
echo &"{op:<60} {curve:<15} {throughput:>15.3f} ops/s {ns:>9} ns/op"
|
||||
|
||||
proc notes*() =
|
||||
echo "Notes:"
|
||||
echo " - Compilers:"
|
||||
echo " Compilers are severely limited on multiprecision arithmetic."
|
||||
echo " Constantine compile-time assembler is used by default (nimble bench_fp)."
|
||||
echo " GCC is significantly slower than Clang on multiprecision arithmetic due to catastrophic handling of carries."
|
||||
echo " GCC also seems to have issues with large temporaries and register spilling."
|
||||
echo " This is somewhat alleviated by Constantine compile-time assembler."
|
||||
echo " Bench on specific compiler with assembler: \"nimble bench_ec_g1_gcc\" or \"nimble bench_ec_g1_clang\"."
|
||||
echo " Bench on specific compiler with assembler: \"nimble bench_ec_g1_gcc_noasm\" or \"nimble bench_ec_g1_clang_noasm\"."
|
||||
echo " - The simplest operations might be optimized away by the compiler."
|
||||
echo " - Fast Squaring and Fast Multiplication are possible if there are spare bits in the prime representation (i.e. the prime uses 254 bits out of 256 bits)"
|
||||
|
||||
template bench(op: string, C: static Curve, iters: int, body: untyped): untyped =
|
||||
let start = getMonotime()
|
||||
when SupportsGetTicks:
|
||||
let startClk = getTicks()
|
||||
for _ in 0 ..< iters:
|
||||
body
|
||||
when SupportsGetTicks:
|
||||
let stopClk = getTicks()
|
||||
let stop = getMonotime()
|
||||
|
||||
when not SupportsGetTicks:
|
||||
let startClk = -1'i64
|
||||
let stopClk = -1'i64
|
||||
|
||||
report(op, $C, start, stop, startClk, stopClk, iters)
|
||||
measure(iters, startTime, stopTime, startClk, stopClk, body)
|
||||
report(op, $C, startTime, stopTime, startClk, stopClk, iters)
|
||||
|
||||
func random_point*(rng: var RngState, EC: typedesc): EC {.noInit.} =
|
||||
result = rng.random_unsafe(EC)
|
||||
|
58
benchmarks/bench_sha256.nim
Normal file
58
benchmarks/bench_sha256.nim
Normal file
@ -0,0 +1,58 @@
|
||||
import
|
||||
# Internals
|
||||
../constantine/hashes/h_sha256,
|
||||
# Helpers
|
||||
../helpers/prng_unsafe,
|
||||
./bench_blueprint
|
||||
|
||||
proc separator*() = separator(69)
|
||||
|
||||
proc SHA256[T: byte|char](
|
||||
msg: openarray[T],
|
||||
digest: ptr array[32, byte] = nil
|
||||
): ptr array[32, byte] {.cdecl, dynlib: "libssl.so", importc.}
|
||||
|
||||
proc SHA256_OpenSSL[T: byte|char](
|
||||
digest: var array[32, byte],
|
||||
s: openarray[T]) =
|
||||
discard SHA256(s, digest.addr)
|
||||
|
||||
proc report(op: string, bytes: int, startTime, stopTime: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
let ns = inNanoseconds((stopTime-startTime) div iters)
|
||||
let throughput = 1e9 / float64(ns)
|
||||
when SupportsGetTicks:
|
||||
let cycles = (stopClk - startClk) div iters
|
||||
let cyclePerByte = cycles.float64 / bytes.float64
|
||||
echo &"{op:<30} {throughput:>15.3f} ops/s {ns:>9} ns/op {cycles:>10} cycles {cyclePerByte:>5.2f} cycles/byte"
|
||||
else:
|
||||
echo &"{op:<30} {throughput:>15.3f} ops/s {ns:>9} ns/op"
|
||||
|
||||
template bench(op: string, bytes: int, iters: int, body: untyped): untyped =
|
||||
measure(iters, startTime, stopTime, startClk, stopClk, body)
|
||||
report(op, bytes, startTime, stopTime, startClk, stopClk, iters)
|
||||
|
||||
proc benchSHA256_constantine[T](msg: openarray[T], msgComment: string, iters: int) =
|
||||
var digest: array[32, byte]
|
||||
bench("SHA256 - Constantine - " & msgComment, msg.len, iters):
|
||||
sha256.hash(digest, msg)
|
||||
|
||||
proc benchSHA256_openssl[T](msg: openarray[T], msgComment: string, iters: int) =
|
||||
var digest: array[32, byte]
|
||||
bench("SHA256 - OpenSSL - " & msgComment, msg.len, iters):
|
||||
SHA256_OpenSSL(digest, msg)
|
||||
|
||||
when isMainModule:
|
||||
proc main() =
|
||||
block:
|
||||
let msg128B = rng.random_byte_seq(128)
|
||||
benchSHA256_constantine(msg128B, "128B", 128)
|
||||
benchSHA256_openssl(msg128B, "128B", 128)
|
||||
block:
|
||||
let msg5MB = rng.random_byte_seq(5_000_000)
|
||||
benchSHA256_constantine(msg5MB, "5MB", 16)
|
||||
benchSHA256_openssl(msg5MB, "5MB", 16)
|
||||
block:
|
||||
let msg100MB = rng.random_byte_seq(100_000_000)
|
||||
benchSHA256_constantine(msg100MB, "100MB", 3)
|
||||
benchSHA256_openssl(msg100MB, "100MB", 3)
|
||||
main()
|
@ -129,17 +129,40 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
|
||||
("tests/t_pairing_bn254_snarks_optate.nim", false),
|
||||
("tests/t_pairing_bls12_377_optate.nim", false),
|
||||
("tests/t_pairing_bls12_381_optate.nim", false),
|
||||
|
||||
# Hashing vs OpenSSL
|
||||
("tests/t_hash_sha256_vs_openssl.nim", true),
|
||||
]
|
||||
|
||||
# For temporary (hopefully) investigation that can only be reproduced in CI
|
||||
const useDebug = [
|
||||
"tests/t_bigints.nim"
|
||||
"tests/t_bigints.nim",
|
||||
"tests/t_hash_sha256_vs_openssl.nim",
|
||||
]
|
||||
|
||||
# Tests that uses sequences require Nim GC, stack scanning and nil pointer passed to openarray
|
||||
# In particular the tests that uses the json test vectors, don't sanitize them.
|
||||
# we do use gc:none to help
|
||||
const skipSanitizers = [
|
||||
"tests/t_ec_sage_bn254_nogami.nim",
|
||||
"tests/t_ec_sage_bn254_snarks.nim",
|
||||
"tests/t_ec_sage_bls12_377.nim",
|
||||
"tests/t_ec_sage_bls12_381.nim",
|
||||
]
|
||||
|
||||
const sanitizers =
|
||||
" --passC:-fsanitize=undefined --passL:-fsanitize=undefined" &
|
||||
" --passC:-fno-sanitize-recover" & # Enforce crash on undefined behaviour
|
||||
" --gc:none" # The conservative stack scanning of Nim default GC triggers, alignment UB and stack-buffer-overflow check.
|
||||
# " --passC:-fsanitize=address --passL:-fsanitize=address" & # Requires too much stack for the inline assembly
|
||||
|
||||
|
||||
# Helper functions
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
proc clearParallelBuild() =
|
||||
exec "> " & buildParallel
|
||||
|
||||
proc test(flags, path: string, commandFile = false) =
|
||||
# commandFile should be a "file" but Nimscript doesn't support IO
|
||||
if not dirExists "build":
|
||||
@ -153,6 +176,7 @@ proc test(flags, path: string, commandFile = false) =
|
||||
if existsEnv"CC":
|
||||
cc = " --cc:" & getEnv"CC"
|
||||
|
||||
var flags = flags & " --passC:-fstack-protector-all"
|
||||
let command = "nim " & lang & cc & " " & flags &
|
||||
" --verbosity:0 --outdir:build/testsuite -r --hints:off --warnings:off " &
|
||||
" --nimcache:nimcache/" & path & " " &
|
||||
@ -160,11 +184,10 @@ proc test(flags, path: string, commandFile = false) =
|
||||
|
||||
if not commandFile:
|
||||
echo "\n=============================================================================================="
|
||||
echo "Running [flags: ", flags, "] ", path
|
||||
echo "Running [flags:", flags, "] ", path
|
||||
echo "=============================================================================================="
|
||||
exec command
|
||||
else:
|
||||
# commandFile.writeLine command
|
||||
exec "echo \'" & command & "\' >> " & buildParallel
|
||||
|
||||
proc runBench(benchName: string, compiler = "", useAsm = true) =
|
||||
@ -181,24 +204,29 @@ proc runBench(benchName: string, compiler = "", useAsm = true) =
|
||||
" --nimcache:nimcache/" & benchName & "_" & compiler & "_" & (if useAsm: "useASM" else: "noASM") &
|
||||
" -r --hints:off --warnings:off benchmarks/" & benchName & ".nim"
|
||||
|
||||
proc runTests(requireGMP: bool, dumpCmdFile = false, test32bit = false, testASM = true) =
|
||||
for td in testDesc:
|
||||
if not(td.useGMP and not requireGMP):
|
||||
var flags = ""
|
||||
if not testASM:
|
||||
flags &= " -d:ConstantineASM=false"
|
||||
if test32bit:
|
||||
flags &= " -d:Constantine32"
|
||||
if td.path in useDebug:
|
||||
flags &= " -d:debugConstantine"
|
||||
if td.path notin skipSanitizers:
|
||||
flags &= sanitizers
|
||||
test flags, td.path, dumpCmdFile
|
||||
|
||||
# Tasks
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
task test, "Run all tests":
|
||||
# -d:testingCurves is configured in a *.nim.cfg for convenience
|
||||
|
||||
for td in testDesc:
|
||||
if td.path in useDebug:
|
||||
test "-d:debugConstantine", td.path
|
||||
else:
|
||||
test "", td.path
|
||||
runTests(requireGMP = true)
|
||||
|
||||
# if sizeof(int) == 8: # 32-bit tests on 64-bit arch
|
||||
# for td in testDesc:
|
||||
# if td.path in useDebug:
|
||||
# test "-d:Constantine32 -d:debugConstantine", td.path
|
||||
# else:
|
||||
# test "-d:Constantine32", td.path
|
||||
# runTests(requireGMP = true, test32bit = true)
|
||||
|
||||
# Ensure benchmarks stay relevant. Ignore Windows 32-bit at the moment
|
||||
if not defined(windows) or not (existsEnv"UCPU" or getEnv"UCPU" == "i686"):
|
||||
@ -213,23 +241,14 @@ task test, "Run all tests":
|
||||
runBench("bench_pairing_bls12_381")
|
||||
runBench("bench_pairing_bn254_nogami")
|
||||
runBench("bench_pairing_bn254_snarks")
|
||||
runBench("bench_sha256")
|
||||
|
||||
task test_no_gmp, "Run tests that don't require GMP":
|
||||
# -d:testingCurves is configured in a *.nim.cfg for convenience
|
||||
for td in testDesc:
|
||||
if not td.useGMP:
|
||||
if td.path in useDebug:
|
||||
test "-d:debugConstantine", td.path
|
||||
else:
|
||||
test "", td.path
|
||||
runTests(requireGMP = false)
|
||||
|
||||
if sizeof(int) == 8: # 32-bit tests on 64-bit arch
|
||||
for td in testDesc:
|
||||
if not td.useGMP:
|
||||
if td.path in useDebug:
|
||||
test "-d:Constantine32 -d:debugConstantine", td.path
|
||||
else:
|
||||
test "-d:Constantine32", td.path
|
||||
# if sizeof(int) == 8: # 32-bit tests on 64-bit arch
|
||||
# runTests(requireGMP = true, test32bit = true)
|
||||
|
||||
# Ensure benchmarks stay relevant. Ignore Windows 32-bit at the moment
|
||||
if not defined(windows) or not (existsEnv"UCPU" or getEnv"UCPU" == "i686"):
|
||||
@ -243,31 +262,17 @@ task test_no_gmp, "Run tests that don't require GMP":
|
||||
runBench("bench_pairing_bls12_381")
|
||||
runBench("bench_pairing_bn254_nogami")
|
||||
runBench("bench_pairing_bn254_snarks")
|
||||
runBench("bench_sha256")
|
||||
|
||||
task test_parallel, "Run all tests in parallel (via GNU parallel)":
|
||||
# -d:testingCurves is configured in a *.nim.cfg for convenience
|
||||
let cmdFile = true # open(buildParallel, mode = fmWrite) # Nimscript doesn't support IO :/
|
||||
exec "> " & buildParallel
|
||||
|
||||
for td in testDesc:
|
||||
if td.path in useDebug:
|
||||
test "-d:debugConstantine", td.path, cmdFile
|
||||
else:
|
||||
test "", td.path, cmdFile
|
||||
|
||||
# cmdFile.close()
|
||||
# Execute everything in parallel with GNU parallel
|
||||
clearParallelBuild()
|
||||
runTests(requireGMP = true, dumpCmdFile = true)
|
||||
exec "parallel --keep-order --group < " & buildParallel
|
||||
|
||||
exec "> " & buildParallel
|
||||
if sizeof(int) == 8: # 32-bit tests on 64-bit arch
|
||||
for td in testDesc:
|
||||
if td.path in useDebug:
|
||||
test "-d:Constantine32 -d:debugConstantine", td.path, cmdFile
|
||||
else:
|
||||
test "-d:Constantine32", td.path, cmdFile
|
||||
# cmdFile.close()
|
||||
# Execute everything in parallel with GNU parallel
|
||||
clearParallelBuild()
|
||||
runTests(requireGMP = true, dumpCmdFile = true, test32bit = true)
|
||||
exec "parallel --keep-order --group < " & buildParallel
|
||||
|
||||
# Now run the benchmarks
|
||||
@ -286,31 +291,18 @@ task test_parallel, "Run all tests in parallel (via GNU parallel)":
|
||||
runBench("bench_pairing_bls12_381")
|
||||
runBench("bench_pairing_bn254_nogami")
|
||||
runBench("bench_pairing_bn254_snarks")
|
||||
runBench("bench_sha256")
|
||||
|
||||
task test_parallel_no_assembler, "Run all tests (without macro assembler) in parallel (via GNU parallel)":
|
||||
# -d:testingCurves is configured in a *.nim.cfg for convenience
|
||||
let cmdFile = true # open(buildParallel, mode = fmWrite) # Nimscript doesn't support IO :/
|
||||
exec "> " & buildParallel
|
||||
|
||||
for td in testDesc:
|
||||
if td.path in useDebug:
|
||||
test "-d:debugConstantine -d:ConstantineASM=false", td.path, cmdFile
|
||||
else:
|
||||
test " -d:ConstantineASM=false", td.path, cmdFile
|
||||
|
||||
# cmdFile.close()
|
||||
# Execute everything in parallel with GNU parallel
|
||||
clearParallelBuild()
|
||||
runTests(requireGMP = true, dumpCmdFile = true, testASM = false)
|
||||
exec "parallel --keep-order --group < " & buildParallel
|
||||
|
||||
exec "> " & buildParallel
|
||||
if sizeof(int) == 8: # 32-bit tests on 64-bit arch
|
||||
for td in testDesc:
|
||||
if td.path in useDebug:
|
||||
test "-d:Constantine32 -d:debugConstantine -d:ConstantineASM=false", td.path, cmdFile
|
||||
else:
|
||||
test "-d:Constantine32 -d:ConstantineASM=false", td.path, cmdFile
|
||||
# cmdFile.close()
|
||||
# Execute everything in parallel with GNU parallel
|
||||
clearParallelBuild()
|
||||
runTests(requireGMP = true, dumpCmdFile = true, test32bit = true, testASM = false)
|
||||
exec "parallel --keep-order --group < " & buildParallel
|
||||
|
||||
# Now run the benchmarks
|
||||
@ -329,33 +321,17 @@ task test_parallel_no_assembler, "Run all tests (without macro assembler) in par
|
||||
runBench("bench_pairing_bls12_381")
|
||||
runBench("bench_pairing_bn254_nogami")
|
||||
runBench("bench_pairing_bn254_snarks")
|
||||
runBench("bench_sha256")
|
||||
|
||||
task test_parallel_no_gmp, "Run all tests in parallel (via GNU parallel)":
|
||||
# -d:testingCurves is configured in a *.nim.cfg for convenience
|
||||
let cmdFile = true # open(buildParallel, mode = fmWrite) # Nimscript doesn't support IO :/
|
||||
exec "> " & buildParallel
|
||||
|
||||
for td in testDesc:
|
||||
if not td.useGMP:
|
||||
if td.path in useDebug:
|
||||
test "-d:debugConstantine", td.path, cmdFile
|
||||
else:
|
||||
test "", td.path, cmdFile
|
||||
|
||||
# cmdFile.close()
|
||||
# Execute everything in parallel with GNU parallel
|
||||
clearParallelBuild()
|
||||
runTests(requireGMP = false, dumpCmdFile = true)
|
||||
exec "parallel --keep-order --group < " & buildParallel
|
||||
|
||||
exec "> " & buildParallel
|
||||
if sizeof(int) == 8: # 32-bit tests on 64-bit arch
|
||||
for td in testDesc:
|
||||
if not td.useGMP:
|
||||
if td.path in useDebug:
|
||||
test "-d:Constantine32 -d:debugConstantine", td.path, cmdFile
|
||||
else:
|
||||
test "-d:Constantine32", td.path, cmdFile
|
||||
# cmdFile.close()
|
||||
# Execute everything in parallel with GNU parallel
|
||||
clearParallelBuild()
|
||||
runTests(requireGMP = false, dumpCmdFile = true, test32bit = true)
|
||||
exec "parallel --keep-order --group < " & buildParallel
|
||||
|
||||
# Now run the benchmarks
|
||||
@ -374,33 +350,18 @@ task test_parallel_no_gmp, "Run all tests in parallel (via GNU parallel)":
|
||||
runBench("bench_pairing_bls12_381")
|
||||
runBench("bench_pairing_bn254_nogami")
|
||||
runBench("bench_pairing_bn254_snarks")
|
||||
runBench("bench_sha256")
|
||||
|
||||
task test_parallel_no_gmp_no_assembler, "Run all tests in parallel (via GNU parallel)":
|
||||
# -d:testingCurves is configured in a *.nim.cfg for convenience
|
||||
let cmdFile = true # open(buildParallel, mode = fmWrite) # Nimscript doesn't support IO :/
|
||||
exec "> " & buildParallel
|
||||
|
||||
for td in testDesc:
|
||||
if not td.useGMP:
|
||||
if td.path in useDebug:
|
||||
test "-d:debugConstantine -d:ConstantineASM=false", td.path, cmdFile
|
||||
else:
|
||||
test "-d:ConstantineASM=false", td.path, cmdFile
|
||||
|
||||
# cmdFile.close()
|
||||
# Execute everything in parallel with GNU parallel
|
||||
clearParallelBuild()
|
||||
runTests(requireGMP = false, dumpCmdFile = true, testASM = false)
|
||||
exec "parallel --keep-order --group < " & buildParallel
|
||||
|
||||
exec "> " & buildParallel
|
||||
if sizeof(int) == 8: # 32-bit tests on 64-bit arch
|
||||
for td in testDesc:
|
||||
if not td.useGMP:
|
||||
if td.path in useDebug:
|
||||
test "-d:Constantine32 -d:debugConstantine", td.path, cmdFile
|
||||
else:
|
||||
test "-d:Constantine32", td.path, cmdFile
|
||||
# cmdFile.close()
|
||||
# Execute everything in parallel with GNU parallel
|
||||
clearParallelBuild()
|
||||
runTests(requireGMP = false, dumpCmdFile = true, test32bit = true, testASM = false)
|
||||
exec "parallel --keep-order --group < " & buildParallel
|
||||
|
||||
# Now run the benchmarks
|
||||
@ -419,6 +380,7 @@ task test_parallel_no_gmp_no_assembler, "Run all tests in parallel (via GNU para
|
||||
runBench("bench_pairing_bls12_381")
|
||||
runBench("bench_pairing_bn254_nogami")
|
||||
runBench("bench_pairing_bn254_snarks")
|
||||
runBench("bench_sha256")
|
||||
|
||||
task bench_fp, "Run benchmark 𝔽p with your default compiler":
|
||||
runBench("bench_fp")
|
||||
@ -599,3 +561,6 @@ task bench_pairing_bn254_snarks_gcc_noasm, "Run pairings benchmarks for BN254-Sn
|
||||
|
||||
task bench_pairing_bn254_snarks_clang_noasm, "Run pairings benchmarks for BN254-Snarks - Clang no Assembly":
|
||||
runBench("bench_pairing_bn254_snarks", "clang", useAsm = false)
|
||||
|
||||
task bench_sha256, "Run SHA256 benchmarks":
|
||||
runBench("bench_sha256")
|
||||
|
348
constantine/hashes/h_sha256.nim
Normal file
348
constantine/hashes/h_sha256.nim
Normal file
@ -0,0 +1,348 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
../config/common,
|
||||
../io/endians
|
||||
|
||||
# SHA256, a hash function from the SHA2 family
|
||||
# --------------------------------------------------------------------------------
|
||||
#
|
||||
# References:
|
||||
# - NIST: https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.180-4.pdf
|
||||
# - IETF: US Secure Hash Algorithms (SHA and HMAC-SHA) https://tools.ietf.org/html/rfc4634
|
||||
# - Intel optimization https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/sha-256-implementations-paper.pdf
|
||||
# - Parallelizing message schedules
|
||||
# to accelerate the computations of hash functions
|
||||
# Shay Gueron, Vlad Krasnov, 2012
|
||||
# https://eprint.iacr.org/2012/067.pdf
|
||||
#
|
||||
# Vectors:
|
||||
# - https://csrc.nist.gov/CSRC/media/Projects/Cryptographic-Standards-and-Guidelines/documents/examples/SHA256.pdf
|
||||
|
||||
# Types and constants
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
const
|
||||
DigestSize = 32
|
||||
BlockSize = 64
|
||||
HashSize = DigestSize div sizeof(uint32) # 8
|
||||
|
||||
type
|
||||
Sha256Context* = object
|
||||
## Align to 64 for cache line and SIMD friendliness
|
||||
H{.align: 64}: array[HashSize, uint32]
|
||||
buf{.align: 64}: array[BlockSize, byte]
|
||||
msgLen: uint64
|
||||
bufIdx: uint8
|
||||
|
||||
sha256* = Sha256Context
|
||||
|
||||
# Internal
|
||||
# ----------------------------------------------------------------
|
||||
# TODO: vectorized implementations
|
||||
|
||||
# No exceptions allowed in core cryptographic operations
|
||||
{.push raises: [].}
|
||||
{.push checks: off.}
|
||||
|
||||
template rotr(x, n: uint32): uint32 =
|
||||
## Rotate right the bits
|
||||
# We always use it with constants in 0 ..< 32
|
||||
# so undefined behaviour.
|
||||
(x shr n) or (x shl (32 - n))
|
||||
|
||||
template ch(x, y, z: uint32): uint32 =
|
||||
## "Choose" function of SHA256
|
||||
## Choose bit i from yi or zi depending on xi
|
||||
when false: # Spec FIPS 180-4
|
||||
(x and y) xor (not(x) and z)
|
||||
else: # RFC4634
|
||||
((x and (y xor z)) xor z)
|
||||
|
||||
template maj(x, y, z: uint32): uint32 =
|
||||
## "Majority" function of SHA256
|
||||
when false: # Spec FIPS 180-4
|
||||
(x and y) xor (x and z) xor (y and z)
|
||||
else: # RFC4634
|
||||
(x and (y or z)) or (y and z)
|
||||
|
||||
template S0(x: uint32): uint32 =
|
||||
# Σ₀
|
||||
rotr(x, 2) xor rotr(x, 13) xor rotr(x, 22)
|
||||
|
||||
template S1(x: uint32): uint32 =
|
||||
# Σ₁
|
||||
rotr(x, 6) xor rotr(x, 11) xor rotr(x, 25)
|
||||
|
||||
template s0(x: uint32): uint32 =
|
||||
# σ₀
|
||||
rotr(x, 7) xor rotr(x, 18) xor (x shr 3)
|
||||
|
||||
template s1(x: uint32): uint32 =
|
||||
# σ₁
|
||||
rotr(x, 17) xor rotr(x, 19) xor (x shr 10)
|
||||
|
||||
func setZero[N](a: var array[N, SomeNumber]){.inline.} =
|
||||
for i in 0 ..< a.len:
|
||||
a[i] = 0
|
||||
|
||||
func hashMessageBlocks[T: byte|char](
|
||||
H: var array[HashSize, uint32],
|
||||
message: openarray[T]): uint =
|
||||
## Hash a message block by block
|
||||
## Sha256 block size is 64 bytes hence
|
||||
## a message will be process 64 by 64 bytes.
|
||||
## FIPS.180-4 6.2.2. SHA-256 Hash Computation
|
||||
|
||||
result = 0
|
||||
let numBlocks = message.len.uint div BlockSize
|
||||
if numBlocks == 0:
|
||||
return 0
|
||||
|
||||
const K256 = [
|
||||
0x428a2f98'u32, 0x71374491'u32, 0xb5c0fbcf'u32, 0xe9b5dba5'u32, 0x3956c25b'u32, 0x59f111f1'u32, 0x923f82a4'u32, 0xab1c5ed5'u32,
|
||||
0xd807aa98'u32, 0x12835b01'u32, 0x243185be'u32, 0x550c7dc3'u32, 0x72be5d74'u32, 0x80deb1fe'u32, 0x9bdc06a7'u32, 0xc19bf174'u32,
|
||||
0xe49b69c1'u32, 0xefbe4786'u32, 0x0fc19dc6'u32, 0x240ca1cc'u32, 0x2de92c6f'u32, 0x4a7484aa'u32, 0x5cb0a9dc'u32, 0x76f988da'u32,
|
||||
0x983e5152'u32, 0xa831c66d'u32, 0xb00327c8'u32, 0xbf597fc7'u32, 0xc6e00bf3'u32, 0xd5a79147'u32, 0x06ca6351'u32, 0x14292967'u32,
|
||||
0x27b70a85'u32, 0x2e1b2138'u32, 0x4d2c6dfc'u32, 0x53380d13'u32, 0x650a7354'u32, 0x766a0abb'u32, 0x81c2c92e'u32, 0x92722c85'u32,
|
||||
0xa2bfe8a1'u32, 0xa81a664b'u32, 0xc24b8b70'u32, 0xc76c51a3'u32, 0xd192e819'u32, 0xd6990624'u32, 0xf40e3585'u32, 0x106aa070'u32,
|
||||
0x19a4c116'u32, 0x1e376c08'u32, 0x2748774c'u32, 0x34b0bcb5'u32, 0x391c0cb3'u32, 0x4ed8aa4a'u32, 0x5b9cca4f'u32, 0x682e6ff3'u32,
|
||||
0x748f82ee'u32, 0x78a5636f'u32, 0x84c87814'u32, 0x8cc70208'u32, 0x90befffa'u32, 0xa4506ceb'u32, 0xbef9a3f7'u32, 0xc67178f2'u32
|
||||
]
|
||||
|
||||
var
|
||||
a = H[0]
|
||||
b = H[1]
|
||||
c = H[2]
|
||||
d = H[3]
|
||||
e = H[4]
|
||||
f = H[5]
|
||||
g = H[6]
|
||||
h = H[7]
|
||||
|
||||
for _ in 0 ..< numBlocks:
|
||||
# The first 16 bytes have different handling
|
||||
# from bytes 16..<64.
|
||||
# Using an array[64, uint32] will span it
|
||||
# across 8 cache lines impacting performance
|
||||
|
||||
# Workspace with message schedule Wₜ
|
||||
var W{.noInit.}: array[16, uint32]
|
||||
var t = 0'u32
|
||||
while t < 16: # Wₜ = Mⁱₜ
|
||||
W[t].parseFromBlob(message, result, bigEndian)
|
||||
let T1 = h + S1(e) + ch(e, f, g) + K256[t] + W[t]
|
||||
let T2 = S0(a) + maj(a, b, c)
|
||||
h = g
|
||||
g = f
|
||||
f = e
|
||||
e = d + T1
|
||||
d = c
|
||||
c = b
|
||||
b = a
|
||||
a = T1+T2
|
||||
|
||||
t += 1
|
||||
|
||||
while t < 64:
|
||||
W[t mod 16] += s1(W[(t-2) mod 16]) +
|
||||
W[(t-7) mod 16] +
|
||||
s0(W[(t-15) mod 16])
|
||||
let T1 = h + S1(e) + ch(e, f, g) + K256[t] + W[t mod 16]
|
||||
let T2 = S0(a) + maj(a, b, c)
|
||||
h = g
|
||||
g = f
|
||||
f = e
|
||||
e = d + T1
|
||||
d = c
|
||||
c = b
|
||||
b = a
|
||||
a = T1+T2
|
||||
|
||||
t += 1
|
||||
|
||||
a += H[0]; H[0] = a
|
||||
b += H[1]; H[1] = b
|
||||
c += H[2]; H[2] = c
|
||||
d += H[3]; H[3] = d
|
||||
e += H[4]; H[4] = e
|
||||
f += H[5]; H[5] = f
|
||||
g += H[6]; H[6] = g
|
||||
h += H[7]; H[7] = h
|
||||
|
||||
func dumpHash(
|
||||
digest: var array[DigestSize, byte],
|
||||
H: array[HashSize, uint32]) =
|
||||
## Convert the internal hash into a message digest
|
||||
var dstIdx = 0'u
|
||||
for i in 0 ..< H.len:
|
||||
digest.dumpRawInt(H[i], dstIdx, bigEndian)
|
||||
dstIdx += uint sizeof(uint32)
|
||||
|
||||
func copy[N: static int, T: byte|char](
|
||||
dst: var array[N, byte],
|
||||
dStart: SomeInteger,
|
||||
src: openArray[T],
|
||||
sStart: SomeInteger,
|
||||
len: SomeInteger
|
||||
) =
|
||||
## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len]
|
||||
## Unlike the standard library, this cannot throw
|
||||
## even a defect.
|
||||
## It also handles copy of char into byte arrays
|
||||
debug:
|
||||
doAssert 0 <= dStart and dStart+len <= dst.len.uint
|
||||
doAssert 0 <= sStart and sStart+len <= src.len.uint
|
||||
|
||||
for i in 0 ..< len:
|
||||
dst[dStart + i] = byte src[sStart + i]
|
||||
|
||||
func hashBuffer(ctx: var Sha256Context) =
|
||||
discard ctx.H.hashMessageBlocks(ctx.buf)
|
||||
ctx.buf.setZero()
|
||||
ctx.bufIdx = 0
|
||||
|
||||
# Public API
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
func init*(ctx: var Sha256Context) =
|
||||
## Initialize or reinitialize a Sha256 context
|
||||
|
||||
ctx.msgLen = 0
|
||||
ctx.buf.setZero()
|
||||
ctx.bufIdx = 0
|
||||
|
||||
ctx.H[0] = 0x6a09e667'u32;
|
||||
ctx.H[1] = 0xbb67ae85'u32;
|
||||
ctx.H[2] = 0x3c6ef372'u32;
|
||||
ctx.H[3] = 0xa54ff53a'u32;
|
||||
ctx.H[4] = 0x510e527f'u32;
|
||||
ctx.H[5] = 0x9b05688c'u32;
|
||||
ctx.H[6] = 0x1f83d9ab'u32;
|
||||
ctx.H[7] = 0x5be0cd19'u32;
|
||||
|
||||
func update*[T: char|byte](ctx: var Sha256Context, message: openarray[T]) =
|
||||
## Append a message to a SHA256 context
|
||||
## for incremental SHA256 computation
|
||||
##
|
||||
## Security note: the tail of your message might be stored
|
||||
## in an internal buffer.
|
||||
## if sensitive content is used, ensure that
|
||||
## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible.
|
||||
## Additionally ensure that the message(s) passed were stored
|
||||
## in memory considered secure for your threat model.
|
||||
##
|
||||
## For passwords and secret keys, you MUST NOT use raw SHA-256
|
||||
## use a Key Derivation Function instead (KDF)
|
||||
|
||||
debug:
|
||||
doAssert: 0 <= ctx.bufIdx and ctx.bufIdx.int < ctx.buf.len
|
||||
for i in ctx.bufIdx ..< ctx.buf.len:
|
||||
doAssert ctx.buf[i] == 0
|
||||
|
||||
var # Message processing state machine
|
||||
cur = 0'u
|
||||
bytesLeft = message.len.uint
|
||||
|
||||
ctx.msgLen += bytesLeft
|
||||
|
||||
if ctx.bufIdx != 0: # Previous partial update
|
||||
let bufIdx = ctx.bufIdx.uint
|
||||
let free = ctx.buf.sizeof().uint - bufIdx
|
||||
|
||||
if free > bytesLeft:
|
||||
# Enough free space, store in buffer
|
||||
ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = bytesLeft)
|
||||
ctx.bufIdx += bytesLeft.uint8
|
||||
return
|
||||
else:
|
||||
# Fill the buffer and do one sha256 hash
|
||||
ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = free)
|
||||
ctx.hashBuffer()
|
||||
|
||||
# Update message state for further processing
|
||||
cur = free
|
||||
bytesLeft -= free
|
||||
|
||||
# Process n blocks (64 byte each)
|
||||
let consumed = ctx.H.hashMessageBlocks(
|
||||
message.toOpenArray(int cur, message.len-1))
|
||||
cur += consumed
|
||||
bytesLeft -= consumed
|
||||
|
||||
if bytesLeft != 0:
|
||||
# Store the tail in buffer
|
||||
debug: # TODO: state machine formal verification - https://nim-lang.org/docs/drnim.html
|
||||
doAssert ctx.bufIdx == 0
|
||||
doAssert cur + bytesLeft == message.len.uint
|
||||
|
||||
ctx.buf.copy(dStart = 0'u, message, sStart = cur, len = bytesLeft)
|
||||
ctx.bufIdx = uint8 bytesLeft
|
||||
|
||||
func finish*(ctx: var Sha256Context, digest: var array[32, byte]) =
|
||||
## Finalize a SHA256 computation and output the
|
||||
## message digest to the `digest` buffer.
|
||||
##
|
||||
## Security note: this does not clear the internal buffer.
|
||||
## if sensitive content is used, use "ctx.clear()"
|
||||
## and also make sure that the message(s) passed were stored
|
||||
## in memory considered secure for your threat model.
|
||||
##
|
||||
## For passwords and secret keys, you MUST NOT use raw SHA-256
|
||||
## use a Key Derivation Function instead (KDF)
|
||||
|
||||
debug:
|
||||
doAssert: 0 <= ctx.bufIdx and ctx.bufIdx.int < ctx.buf.len
|
||||
for i in ctx.bufIdx ..< ctx.buf.len:
|
||||
doAssert ctx.buf[i] == 0
|
||||
|
||||
# Add '1' bit at the end of the message (+7 zero bits)
|
||||
ctx.buf[ctx.bufIdx] = 0b1000_0000
|
||||
|
||||
# Add k bits so that msgLenBits + 1 + k ≡ 448 mod 512
|
||||
# Hence in bytes msgLen + 1 + K ≡ 56 mod 64
|
||||
const padZone = 56
|
||||
if ctx.bufIdx >= padZone:
|
||||
# We are in the 56..<64 mod 64 byte count
|
||||
# and need to rollover to 0
|
||||
ctx.hashBuffer()
|
||||
|
||||
let lenInBits = ctx.msgLen.uint64 * 8
|
||||
ctx.buf.dumpRawInt(lenInBits, padZone, bigEndian)
|
||||
discard ctx.H.hashMessageBlocks(ctx.buf)
|
||||
digest.dumpHash(ctx.H)
|
||||
|
||||
func clear*(ctx: var Sha256Context) =
|
||||
## Clear the context internal buffers
|
||||
## Security note:
|
||||
## For passwords and secret keys, you MUST NOT use raw SHA-256
|
||||
## use a Key Derivation Function instead (KDF)
|
||||
# TODO: ensure compiler cannot optimize the code away
|
||||
ctx.buf.setZero()
|
||||
|
||||
func hash*[T: char|byte](
|
||||
HashKind: type sha256,
|
||||
digest: var array[32, byte],
|
||||
message: openarray[T],
|
||||
clearMem = false) =
|
||||
## Produce a SHA256 digest from a message
|
||||
var ctx {.noInit.}: HashKind
|
||||
ctx.init()
|
||||
ctx.update(message)
|
||||
ctx.finish(digest)
|
||||
|
||||
if clearMem:
|
||||
ctx.clear()
|
||||
|
||||
func hash*[T: char|byte](
|
||||
HashKind: type sha256,
|
||||
message: openarray[T],
|
||||
clearmem = false): array[32, byte] =
|
||||
## Produce a SHA256 digest from a message
|
||||
HashKind.hash(result, message, clearMem)
|
78
constantine/io/endians.nim
Normal file
78
constantine/io/endians.nim
Normal file
@ -0,0 +1,78 @@
|
||||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import ../config/common
|
||||
|
||||
# perf critical we don't want bound checks here
|
||||
# So no checks and we avoid signed int to ensur eno exceptions.
|
||||
# TODO: Nim formal verification: https://nim-lang.org/docs/drnim.html
|
||||
{.push checks:off, raises: [].}
|
||||
|
||||
template toByte*(x: SomeUnsignedInt): byte =
|
||||
## At compile-time, conversion to bytes checks the range
|
||||
## we want to ensure this is done at the register level
|
||||
## at runtime in a single "mov byte" instruction
|
||||
when nimvm:
|
||||
byte(x and 0xFF)
|
||||
else:
|
||||
byte(x)
|
||||
|
||||
func parseFromBlob*[T: byte|char](
|
||||
dst: var SomeUnsignedInt,
|
||||
src: openArray[T],
|
||||
cursor: var uint, endian: static Endianness) {.inline.} =
|
||||
## Read an unsigned integer from a raw binary blob.
|
||||
## The `cursor` represents the current index in the array and is updated
|
||||
## by N bytes where N is the size of `dst` type in bytes.
|
||||
## The binary blob is interpreted as:
|
||||
## - an array of words traversed from 0 ..< len (little-endian), via an incremented `cursor`
|
||||
## - with each word being of `endian` ordering for deserialization purpose.
|
||||
debug:
|
||||
doAssert 0 <= cursor and cursor < src.len.uint
|
||||
doAssert cursor + sizeof(dst).uint <= src.len.uint,
|
||||
"cursor (" & $cursor & ") + sizeof(dst) (" & $sizeof(dst) &
|
||||
") <= src.len (" & $src.len & ")"
|
||||
|
||||
type U = typeof(dst)
|
||||
const L = sizeof(dst)
|
||||
|
||||
var accum: U = 0
|
||||
when endian == littleEndian:
|
||||
for i in 0'u ..< L:
|
||||
accum = accum or (U(src[cursor+i]) shl (i * 8))
|
||||
else:
|
||||
for i in 0'u ..< L:
|
||||
accum = accum or (U(src[cursor+i]) shl ((L - 1 - i) * 8))
|
||||
dst = accum
|
||||
cursor.inc(L)
|
||||
|
||||
func dumpRawInt*[T: byte|char](
|
||||
dst: var openArray[T],
|
||||
src: SomeUnsignedInt,
|
||||
cursor: uint, endian: static Endianness) {.inline.} =
|
||||
## Dump an integer into raw binary form
|
||||
## The `cursor` represents the current index in the array and is updated
|
||||
## by N bytes where N is the size of `src` type in bytes.
|
||||
## The binary blob is interpreted as:
|
||||
## - an array of words traversed from 0 ..< len (little-endian), via an incremented `cursor`
|
||||
## - with each word being of `endian` ordering for deserialization purpose.
|
||||
debug:
|
||||
doAssert 0 <= cursor and cursor < dst.len.uint
|
||||
doAssert cursor + sizeof(src).uint <= dst.len.uint,
|
||||
"cursor (" & $cursor & ") + sizeof(src) (" & $sizeof(src) &
|
||||
") <= dst.len (" & $dst.len & ")"
|
||||
|
||||
type U = typeof(src)
|
||||
const L = uint sizeof(src)
|
||||
|
||||
when endian == littleEndian:
|
||||
for i in 0'u ..< L:
|
||||
dst[cursor+i] = toByte(src shr (i * 8))
|
||||
else:
|
||||
for i in 0'u ..< L:
|
||||
dst[cursor+i] = toByte(src shr ((L-i-1) * 8))
|
@ -12,7 +12,8 @@
|
||||
|
||||
import
|
||||
../primitives/constant_time,
|
||||
../config/[common, type_bigint]
|
||||
../config/[common, type_bigint],
|
||||
./endians
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
@ -152,24 +153,17 @@ func fromUint*(
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
template toByte(x: SomeUnsignedInt): byte =
|
||||
## At compile-time, conversion to bytes checks the range
|
||||
## we want to ensure this is done at the register level
|
||||
## at runtime in a single "mov byte" instruction
|
||||
when nimvm:
|
||||
byte(x and 0xFF)
|
||||
else:
|
||||
byte(x)
|
||||
|
||||
template blobFrom(dst: var openArray[byte], src: SomeUnsignedInt, startIdx: int, endian: static Endianness) =
|
||||
## Write an integer into a raw binary blob
|
||||
## Swapping endianness if needed
|
||||
## startidx is the first written array item if littleEndian is requested
|
||||
## or the last if bigEndian is requested
|
||||
when endian == cpuEndian:
|
||||
for i in 0 ..< sizeof(src):
|
||||
dst[startIdx+i] = toByte((src shr (i * 8)))
|
||||
dst[startIdx+i] = toByte(src shr (i * 8))
|
||||
else:
|
||||
for i in 0 ..< sizeof(src):
|
||||
dst[startIdx+sizeof(src)-1-i] = toByte((src shr (i * 8)))
|
||||
dst[startIdx+sizeof(src)-1-i] = toByte(src shr (i * 8))
|
||||
|
||||
func exportRawUintLE(
|
||||
dst: var openarray[byte],
|
||||
|
@ -364,6 +364,14 @@ func random_long01Seq_with_randZ*(rng: var RngState, T: typedesc[ECP_ShortW_Proj
|
||||
## Skewed towards long bitstrings of 0 or 1
|
||||
rng.random_long01Seq_with_randZ(result)
|
||||
|
||||
# Byte sequences
|
||||
# ------------------------------------------------------------
|
||||
|
||||
func random_byte_seq*(rng: var RngState, length: int): seq[byte] =
|
||||
result.newSeq(length)
|
||||
for b in result.mitems:
|
||||
b = byte rng.next()
|
||||
|
||||
# Sanity checks
|
||||
# ------------------------------------------------------------
|
||||
|
||||
|
115
tests/t_hash_sha256_vs_openssl.nim
Normal file
115
tests/t_hash_sha256_vs_openssl.nim
Normal file
@ -0,0 +1,115 @@
|
||||
import
|
||||
# Internals
|
||||
../constantine/hashes/h_sha256,
|
||||
# Helpers
|
||||
../helpers/prng_unsafe,
|
||||
# Third-party
|
||||
stew/byteutils
|
||||
|
||||
proc SHA256[T: byte|char](
|
||||
msg: openarray[T],
|
||||
digest: ptr array[32, byte] = nil
|
||||
): ptr array[32, byte] {.cdecl, dynlib: "libssl.so", importc.}
|
||||
|
||||
proc SHA256_OpenSSL[T: byte|char](
|
||||
digest: var array[32, byte],
|
||||
s: openarray[T]) =
|
||||
discard SHA256(s, digest.addr)
|
||||
|
||||
echo "\n------------------------------------------------------\n"
|
||||
const SmallSizeIters = 128
|
||||
const LargeSizeIters = 10
|
||||
|
||||
proc sanityABC =
|
||||
var bufCt: array[32, byte]
|
||||
let msg = "abc"
|
||||
|
||||
let hashed = hexToByteArray[32](
|
||||
"BA7816BF8F01CFEA414140DE5DAE2223" &
|
||||
"B00361A396177A9CB410FF61F20015AD")
|
||||
|
||||
sha256.hash(bufCt, msg)
|
||||
|
||||
doAssert bufCt == hashed
|
||||
|
||||
proc sanityABC2 =
|
||||
var bufCt: array[32, byte]
|
||||
let msg = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
|
||||
|
||||
let hashed = hexToByteArray[32](
|
||||
"248D6A61D20638B8E5C026930C3E6039" &
|
||||
"A33CE45964FF2167F6ECEDD419DB06C1")
|
||||
|
||||
sha256.hash(bufCt, msg)
|
||||
|
||||
doAssert bufCt == hashed
|
||||
|
||||
proc innerTest(rng: var RngState, sizeRange: Slice[int]) =
|
||||
let size = rng.random_unsafe(sizeRange)
|
||||
let msg = rng.random_byte_seq(size)
|
||||
|
||||
var bufCt, bufOssl: array[32, byte]
|
||||
|
||||
sha256.hash(bufCt, msg)
|
||||
SHA256_OpenSSL(bufOssl, msg)
|
||||
doAssert bufCt == bufOssl
|
||||
|
||||
proc chunkTest(rng: var RngState, sizeRange: Slice[int]) =
|
||||
let size = rng.random_unsafe(sizeRange)
|
||||
let msg = rng.random_byte_seq(size)
|
||||
|
||||
let chunkSize = rng.random_unsafe(2 ..< 20)
|
||||
|
||||
var bufOnePass: array[32, byte]
|
||||
sha256.hash(bufOnePass, msg)
|
||||
|
||||
var bufChunked: array[32, byte]
|
||||
let maxChunk = max(2, sizeRange.b div 10) # Consume up to 10% at once
|
||||
|
||||
var ctx: Sha256Context
|
||||
ctx.init()
|
||||
var cur = 0
|
||||
while size - cur > 0:
|
||||
let chunkSize = rng.random_unsafe(0 ..< maxChunk)
|
||||
let stop = min(cur+chunkSize-1, size-1)
|
||||
let consumed = stop-cur+1
|
||||
ctx.update(msg.toOpenArray(cur, stop))
|
||||
cur += consumed
|
||||
|
||||
ctx.finish(bufChunked)
|
||||
|
||||
doAssert bufOnePass == bufChunked
|
||||
|
||||
proc main() =
|
||||
echo "SHA256 - sanity checks"
|
||||
sanityABC()
|
||||
sanityABC2()
|
||||
|
||||
echo "SHA256 - Starting differential testing vs OpenSSL"
|
||||
|
||||
var rng: RngState
|
||||
rng.seed(0xFACADE)
|
||||
|
||||
echo "SHA256 - 0 <= size < 64 - exhaustive"
|
||||
for i in 0 ..< 64:
|
||||
rng.innerTest(i .. i)
|
||||
|
||||
echo "SHA256 - 0 <= size < 64 - exhaustive chunked"
|
||||
for i in 0 ..< 64:
|
||||
rng.chunkTest(i .. i)
|
||||
|
||||
echo "SHA256 - 64 <= size < 1024B"
|
||||
for _ in 0 ..< SmallSizeIters:
|
||||
rng.innerTest(0 ..< 1024)
|
||||
|
||||
echo "SHA256 - 64 <= size < 1024B - chunked"
|
||||
for _ in 0 ..< SmallSizeIters:
|
||||
rng.chunkTest(0 ..< 1024)
|
||||
|
||||
echo "SHA256 - 1MB <= size < 50MB"
|
||||
for _ in 0 ..< LargeSizeIters:
|
||||
rng.innerTest(1_000_000 ..< 50_000_000)
|
||||
|
||||
echo "SHA256 - Differential testing vs OpenSSL - SUCCESS"
|
||||
|
||||
main()
|
Loading…
x
Reference in New Issue
Block a user