Poly1305 Message Authentication Code (#186)
* Groundwork for Poly1305 MAC * Implement fast reduction for Poly1305 * don't import assembly files when compiling without assembly
This commit is contained in:
parent
c2eb42b769
commit
742cecce08
|
@ -0,0 +1,65 @@
|
||||||
|
import
|
||||||
|
# Internals
|
||||||
|
../constantine/mac/mac_poly1305,
|
||||||
|
# Helpers
|
||||||
|
../helpers/prng_unsafe,
|
||||||
|
./bench_blueprint,
|
||||||
|
# C API
|
||||||
|
system/ansi_c
|
||||||
|
|
||||||
|
proc separator*() = separator(69)
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------
|
||||||
|
|
||||||
|
proc report(op: string, bytes: int, startTime, stopTime: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||||
|
let ns = inNanoseconds((stopTime-startTime) div iters)
|
||||||
|
let throughput = 1e9 / float64(ns)
|
||||||
|
when SupportsGetTicks:
|
||||||
|
let cycles = (stopClk - startClk) div iters
|
||||||
|
let cyclePerByte = cycles.float64 / bytes.float64
|
||||||
|
echo &"{op:<30} {throughput:>15.3f} ops/s {ns:>9} ns/op {cycles:>10} cycles {cyclePerByte:>5.2f} cycles/byte"
|
||||||
|
else:
|
||||||
|
echo &"{op:<30} {throughput:>15.3f} ops/s {ns:>9} ns/op"
|
||||||
|
|
||||||
|
template bench(op: string, bytes: int, iters: int, body: untyped): untyped =
|
||||||
|
measure(iters, startTime, stopTime, startClk, stopClk, body)
|
||||||
|
report(op, bytes, startTime, stopTime, startClk, stopClk, iters)
|
||||||
|
|
||||||
|
proc benchPoly1305_constantine[T](msg: openarray[T], msgComment: string, iters: int) =
|
||||||
|
var tag: array[16, byte]
|
||||||
|
let ikm = [
|
||||||
|
byte 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
|
||||||
|
0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
|
||||||
|
0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
|
||||||
|
0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b
|
||||||
|
]
|
||||||
|
bench("Poly1305 - Constantine - " & msgComment, msg.len, iters):
|
||||||
|
poly1305.auth(tag, msg, ikm)
|
||||||
|
|
||||||
|
when isMainModule:
|
||||||
|
proc main() =
|
||||||
|
block:
|
||||||
|
let msg32B = rng.random_byte_seq(32)
|
||||||
|
benchPoly1305_constantine(msg32B, "32B", 100)
|
||||||
|
block:
|
||||||
|
let msg64B = rng.random_byte_seq(64)
|
||||||
|
benchPoly1305_constantine(msg64B, "64B", 100)
|
||||||
|
block:
|
||||||
|
let msg128B = rng.random_byte_seq(128)
|
||||||
|
benchPoly1305_constantine(msg128B, "128B", 100)
|
||||||
|
block:
|
||||||
|
let msg576B = rng.random_byte_seq(576)
|
||||||
|
benchPoly1305_constantine(msg576B, "576B", 50)
|
||||||
|
block:
|
||||||
|
let msg8192B = rng.random_byte_seq(8192)
|
||||||
|
benchPoly1305_constantine(msg8192B, "8192B", 25)
|
||||||
|
block:
|
||||||
|
let msg1MB = rng.random_byte_seq(1_000_000)
|
||||||
|
benchPoly1305_constantine(msg1MB, "1MB", 16)
|
||||||
|
block:
|
||||||
|
let msg10MB = rng.random_byte_seq(10_000_000)
|
||||||
|
benchPoly1305_constantine(msg10MB, "10MB", 16)
|
||||||
|
block:
|
||||||
|
let msg100MB = rng.random_byte_seq(100_000_000)
|
||||||
|
benchPoly1305_constantine(msg100MB, "100MB", 3)
|
||||||
|
main()
|
|
@ -69,17 +69,33 @@ proc benchSHA256_openssl[T](msg: openarray[T], msgComment: string, iters: int) =
|
||||||
when isMainModule:
|
when isMainModule:
|
||||||
proc main() =
|
proc main() =
|
||||||
block:
|
block:
|
||||||
let msg128B = rng.random_byte_seq(32)
|
let msg32B = rng.random_byte_seq(32)
|
||||||
benchSHA256_constantine(msg128B, "32B", 32)
|
benchSHA256_constantine(msg32B, "32B", 100)
|
||||||
benchSHA256_openssl(msg128B, "32B", 32)
|
benchSHA256_openssl(msg32B, "32B", 100)
|
||||||
|
block:
|
||||||
|
let msg64B = rng.random_byte_seq(64)
|
||||||
|
benchSHA256_constantine(msg64B, "64B", 100)
|
||||||
|
benchSHA256_openssl(msg64B, "64B", 100)
|
||||||
block:
|
block:
|
||||||
let msg128B = rng.random_byte_seq(128)
|
let msg128B = rng.random_byte_seq(128)
|
||||||
benchSHA256_constantine(msg128B, "128B", 128)
|
benchSHA256_constantine(msg128B, "128B", 100)
|
||||||
benchSHA256_openssl(msg128B, "128B", 128)
|
benchSHA256_openssl(msg128B, "128B", 100)
|
||||||
block:
|
block:
|
||||||
let msg5MB = rng.random_byte_seq(5_000_000)
|
let msg576B = rng.random_byte_seq(576)
|
||||||
benchSHA256_constantine(msg5MB, "5MB", 16)
|
benchSHA256_constantine(msg576B, "576B", 50)
|
||||||
benchSHA256_openssl(msg5MB, "5MB", 16)
|
benchSHA256_openssl(msg576B, "576B", 50)
|
||||||
|
block:
|
||||||
|
let msg8192B = rng.random_byte_seq(8192)
|
||||||
|
benchSHA256_constantine(msg8192B, "8192B", 25)
|
||||||
|
benchSHA256_openssl(msg8192B, "8192B", 25)
|
||||||
|
block:
|
||||||
|
let msg1MB = rng.random_byte_seq(1_000_000)
|
||||||
|
benchSHA256_constantine(msg1MB, "1MB", 16)
|
||||||
|
benchSHA256_openssl(msg1MB, "1MB", 16)
|
||||||
|
block:
|
||||||
|
let msg10MB = rng.random_byte_seq(10_000_000)
|
||||||
|
benchSHA256_constantine(msg10MB, "10MB", 16)
|
||||||
|
benchSHA256_openssl(msg10MB, "10MB", 16)
|
||||||
block:
|
block:
|
||||||
let msg100MB = rng.random_byte_seq(100_000_000)
|
let msg100MB = rng.random_byte_seq(100_000_000)
|
||||||
benchSHA256_constantine(msg100MB, "100MB", 3)
|
benchSHA256_constantine(msg100MB, "100MB", 3)
|
||||||
|
|
|
@ -192,6 +192,10 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
|
||||||
# ----------------------------------------------------------
|
# ----------------------------------------------------------
|
||||||
("tests/t_cipher_chacha20.nim", false),
|
("tests/t_cipher_chacha20.nim", false),
|
||||||
|
|
||||||
|
# Message Authentication Code
|
||||||
|
# ----------------------------------------------------------
|
||||||
|
("tests/t_mac_poly1305.nim", false),
|
||||||
|
|
||||||
# Protocols
|
# Protocols
|
||||||
# ----------------------------------------------------------
|
# ----------------------------------------------------------
|
||||||
("tests/t_ethereum_evm_precompiles.nim", false),
|
("tests/t_ethereum_evm_precompiles.nim", false),
|
||||||
|
|
|
@ -58,7 +58,7 @@ func hash*[DigestSize: static int, T: char|byte](
|
||||||
func hash*[T: char|byte](
|
func hash*[T: char|byte](
|
||||||
HashKind: type CryptoHash,
|
HashKind: type CryptoHash,
|
||||||
message: openarray[T],
|
message: openarray[T],
|
||||||
clearmem = false): array[HashKind.sizeInBytes, byte] =
|
clearmem = false): array[HashKind.sizeInBytes, byte] {.noInit.} =
|
||||||
## Produce a digest from a message
|
## Produce a digest from a message
|
||||||
HashKind.hash(result, message, clearMem)
|
HashKind.hash(result, message, clearMem)
|
||||||
|
|
||||||
|
|
|
@ -51,10 +51,6 @@ type
|
||||||
{.push raises: [].}
|
{.push raises: [].}
|
||||||
{.push checks: off.}
|
{.push checks: off.}
|
||||||
|
|
||||||
func setZero[N](a: var array[N, SomeNumber]){.inline.} =
|
|
||||||
for i in 0 ..< a.len:
|
|
||||||
a[i] = 0
|
|
||||||
|
|
||||||
template rotr(x, n: uint32): uint32 =
|
template rotr(x, n: uint32): uint32 =
|
||||||
## Rotate right the bits
|
## Rotate right the bits
|
||||||
# We always use it with constants in 0 ..< 32
|
# We always use it with constants in 0 ..< 32
|
||||||
|
@ -272,24 +268,6 @@ func dumpHash(
|
||||||
digest.dumpRawInt(H[i], dstIdx, bigEndian)
|
digest.dumpRawInt(H[i], dstIdx, bigEndian)
|
||||||
dstIdx += uint sizeof(uint32)
|
dstIdx += uint sizeof(uint32)
|
||||||
|
|
||||||
func copy[N: static int, T: byte|char](
|
|
||||||
dst: var array[N, byte],
|
|
||||||
dStart: SomeInteger,
|
|
||||||
src: openArray[T],
|
|
||||||
sStart: SomeInteger,
|
|
||||||
len: SomeInteger
|
|
||||||
) =
|
|
||||||
## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len]
|
|
||||||
## Unlike the standard library, this cannot throw
|
|
||||||
## even a defect.
|
|
||||||
## It also handles copy of char into byte arrays
|
|
||||||
debug:
|
|
||||||
doAssert 0 <= dStart and dStart+len <= dst.len.uint
|
|
||||||
doAssert 0 <= sStart and sStart+len <= src.len.uint
|
|
||||||
|
|
||||||
for i in 0 ..< len:
|
|
||||||
dst[dStart + i] = byte src[sStart + i]
|
|
||||||
|
|
||||||
func hashBuffer(ctx: var Sha256Context) =
|
func hashBuffer(ctx: var Sha256Context) =
|
||||||
discard ctx.H.hashMessageBlocks(ctx.buf)
|
discard ctx.H.hashMessageBlocks(ctx.buf)
|
||||||
ctx.buf.setZero()
|
ctx.buf.setZero()
|
||||||
|
@ -445,4 +423,7 @@ func clear*(ctx: var Sha256Context) =
|
||||||
## For passwords and secret keys, you MUST NOT use raw SHA-256
|
## For passwords and secret keys, you MUST NOT use raw SHA-256
|
||||||
## use a Key Derivation Function instead (KDF)
|
## use a Key Derivation Function instead (KDF)
|
||||||
# TODO: ensure compiler cannot optimize the code away
|
# TODO: ensure compiler cannot optimize the code away
|
||||||
|
ctx.H.setZero()
|
||||||
ctx.buf.setZero()
|
ctx.buf.setZero()
|
||||||
|
ctx.msgLen = 0
|
||||||
|
ctx.bufIdx = 0
|
|
@ -0,0 +1,355 @@
|
||||||
|
# Constantine
|
||||||
|
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||||
|
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||||
|
# Licensed and distributed under either of
|
||||||
|
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||||
|
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||||
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||||
|
|
||||||
|
import
|
||||||
|
../platforms/abstractions,
|
||||||
|
../math/arithmetic/bigints,
|
||||||
|
../math/arithmetic/[limbs, limbs_extmul],
|
||||||
|
../math/io/io_bigints
|
||||||
|
|
||||||
|
when UseASM_X86_64:
|
||||||
|
import ../math/arithmetic/assembly/limbs_asm_modular_x86
|
||||||
|
|
||||||
|
# No exceptions allowed
|
||||||
|
{.push raises: [].}
|
||||||
|
|
||||||
|
# ############################################################
|
||||||
|
#
|
||||||
|
# Poly1305 Message Authentication Code
|
||||||
|
#
|
||||||
|
# ############################################################
|
||||||
|
|
||||||
|
# TODO: instead of using a saturated representation,
|
||||||
|
# since there is 62 extra bits unused in the last limb
|
||||||
|
# use an unsaturated representation and remove all carry dependency chains.
|
||||||
|
# Given the number of add with carries, this would significantly
|
||||||
|
# improve instruction level parallelism.
|
||||||
|
#
|
||||||
|
# Also vectorizing the code requires removing carry chains anyway.
|
||||||
|
|
||||||
|
const P1305 = BigInt[130].fromHex"0x3fffffffffffffffffffffffffffffffb"
|
||||||
|
|
||||||
|
func partialReduce_1305[N1, N2: static int](r: var Limbs[N1], a: Limbs[N2]) =
|
||||||
|
## The prime 2¹³⁰-5 has a special form 2ᵐ-c
|
||||||
|
## called "Crandall prime" or Pseudo-Mersenne Prime
|
||||||
|
## in the litterature
|
||||||
|
## which allows fast reduction from the fact that
|
||||||
|
## 2ᵐ-c ≡ 0 (mod p)
|
||||||
|
## <=> 2ᵐ ≡ c (mod p) [1]
|
||||||
|
## <=> a2ᵐ+b ≡ ac + b (mod p)
|
||||||
|
##
|
||||||
|
## This partially reduces the input in range [0, 2¹³⁰)
|
||||||
|
#
|
||||||
|
# Assuming 64-bit words,
|
||||||
|
# N1 = 3 words (192-bit necessary for 2¹³⁰-1)
|
||||||
|
# N2 = 4 words (256-bit necessary for 2¹³¹.2¹²⁴)
|
||||||
|
# Assuming 32-bit words,
|
||||||
|
# N1 = 5 words (160-bit necessary for 2¹³⁰-1)
|
||||||
|
# N2 = 8 words (288-bit necessary for 2¹³¹.2¹²⁴)
|
||||||
|
#
|
||||||
|
# from 64-bit, starting from [1]
|
||||||
|
# 2ᵐ ≡ c (mod p)
|
||||||
|
# 2¹³⁰ ≡ 5 (mod p)
|
||||||
|
# 2¹³⁰.2⁶² ≡ 5.2⁶² (mod p)
|
||||||
|
# 2¹⁹² ≡ 5.2⁶² (mod p)
|
||||||
|
#
|
||||||
|
# Hence if we call a the [2¹⁹², 2²⁶⁰) range
|
||||||
|
# and b the [0, 2¹⁹²) range
|
||||||
|
# we have
|
||||||
|
# a2¹⁹²+b ≡ a.5.2⁶² + b (mod p)
|
||||||
|
#
|
||||||
|
# Then we can handle the highest word which has
|
||||||
|
# 62 bits that should be folded back as well
|
||||||
|
#
|
||||||
|
# Similarly for 32-bit
|
||||||
|
# 2¹⁶⁰ ≡ 5.2³⁰ (mod p)
|
||||||
|
# and we need to fold back the top 30 bits
|
||||||
|
#
|
||||||
|
# But there is a twist. 5.2⁶² need 65-bit not 64
|
||||||
|
# and 5.2³⁰ need 33-bit not 32
|
||||||
|
|
||||||
|
when WordBitwidth == 64:
|
||||||
|
static:
|
||||||
|
doAssert N1 == 3
|
||||||
|
doAssert N2 == 4
|
||||||
|
|
||||||
|
block:
|
||||||
|
# First pass, fold everything greater than 2¹⁹²-1
|
||||||
|
# a2¹⁹²+b ≡ a.5.2⁶² + b (mod p)
|
||||||
|
# scale by 5.2⁶¹ first as 5.2⁶² does not fit in 64-bit words
|
||||||
|
const c = SecretWord 5
|
||||||
|
const cExcess = c shl 61
|
||||||
|
|
||||||
|
var carry: Carry
|
||||||
|
var hi, lo: SecretWord
|
||||||
|
mul(hi, lo, a[3], cExcess)
|
||||||
|
addC(carry, r[0], lo, a[0], Carry(0))
|
||||||
|
addC(carry, r[1], hi, a[1], carry)
|
||||||
|
addC(carry, r[2], Zero, a[2], carry)
|
||||||
|
# finally double to scale by 5.2⁶²
|
||||||
|
addC(carry, r[0], lo, r[0], Carry(0))
|
||||||
|
addC(carry, r[1], hi, r[1], carry)
|
||||||
|
addC(carry, r[2], Zero, r[2], carry)
|
||||||
|
else:
|
||||||
|
static:
|
||||||
|
doAssert N1 == 5
|
||||||
|
doAssert N2 == 8
|
||||||
|
|
||||||
|
block:
|
||||||
|
# First pass, fold everything greater than 2¹⁶⁰-1
|
||||||
|
# a2¹⁶⁰+b ≡ a.5.2³⁰ + b (mod p)
|
||||||
|
# scale by 5.2²⁹ first as 5.2³⁰ does not fit in 32-bit words
|
||||||
|
const c = SecretWord 5
|
||||||
|
const cExcess = c shl 29
|
||||||
|
|
||||||
|
staticFor i, 0, N1:
|
||||||
|
r[i] = a[i]
|
||||||
|
|
||||||
|
mulDoubleAcc(r[2], r[1], r[0], a[5], cExcess)
|
||||||
|
mulDoubleAcc(r[3], r[2], r[1], a[6], cExcess)
|
||||||
|
mulDoubleAcc(r[4], r[3], r[2], a[7], cExcess)
|
||||||
|
|
||||||
|
const bits = 130
|
||||||
|
const excessBits = wordsRequired(bits)*WordBitWidth - bits
|
||||||
|
|
||||||
|
# Second pass, fold everything greater than 2¹³⁰-1
|
||||||
|
# into the lower bits
|
||||||
|
var carry, carry2: Carry
|
||||||
|
var hi = r[N1-1] shr (WordBitWidth - excessBits)
|
||||||
|
r[N1-1] = r[N1-1] and (MaxWord shr excessBits)
|
||||||
|
|
||||||
|
# hi *= 5, with overflow stored in carry
|
||||||
|
let hi4 = hi shl 2 # Cannot overflow as we have 2 spare bits
|
||||||
|
addC(carry2, hi, hi, hi4, Carry(0)) # Use the carry bit for storing a 63/31 bit result
|
||||||
|
|
||||||
|
# Process with actual fold
|
||||||
|
addC(carry, r[0], r[0], hi, Carry(0))
|
||||||
|
addC(carry, r[1], r[1], SecretWord(carry2), carry)
|
||||||
|
staticFor i, 2, N1:
|
||||||
|
addC(carry, r[i], r[i], Zero, carry)
|
||||||
|
|
||||||
|
func finalReduce_1305[N: static int](a: var Limbs[N]) =
|
||||||
|
## Maps an input in redundant representation [0, 2¹³¹-10)
|
||||||
|
## to the canonical representation in [0, 2¹³⁰-5)
|
||||||
|
# Algorithm:
|
||||||
|
# 1. substract p = 2¹³⁰-5
|
||||||
|
# 2. if borrow, add back p.
|
||||||
|
when UseASM_X86_64 and a.len <= 6:
|
||||||
|
submod_asm(a, a, P1305.limbs, P1305.limbs)
|
||||||
|
else:
|
||||||
|
let underflowed = SecretBool sub(a, P1305.limbs)
|
||||||
|
discard cadd(a, P1305.limbs, underflowed)
|
||||||
|
|
||||||
|
const BlockSize = 16
|
||||||
|
|
||||||
|
type Poly1305_CTX = object
|
||||||
|
acc: BigInt[130+1] # After an unreduced sum, up to 131 bit may be used
|
||||||
|
r: BigInt[124] # r is 124-bit after clamping
|
||||||
|
s: BigInt[128]
|
||||||
|
buf: array[BlockSize, byte]
|
||||||
|
msgLen: uint64
|
||||||
|
bufIdx: uint8
|
||||||
|
|
||||||
|
type poly1305* = Poly1305_CTX
|
||||||
|
|
||||||
|
func macMessageBlocks[T: byte|char](
|
||||||
|
acc: var BigInt[130+1],
|
||||||
|
r: BigInt[124],
|
||||||
|
message: openArray[T],
|
||||||
|
blockSize = BlockSize): uint =
|
||||||
|
## Authenticate a message block by block
|
||||||
|
## Poly1305 block size is 16 bytes.
|
||||||
|
## Return the number of bytes processed.
|
||||||
|
##
|
||||||
|
## If hashing one partial block,
|
||||||
|
## set blocksize to the remaining bytes to process
|
||||||
|
|
||||||
|
result = 0
|
||||||
|
let numBlocks = int(message.len.uint div BlockSize)
|
||||||
|
if numBlocks == 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
var input {.noInit.}: BigInt[130+1]
|
||||||
|
# r is 124-bit after clambing
|
||||||
|
var t{.noInit.}: BigInt[130+1+124]
|
||||||
|
|
||||||
|
for curBlock in 0 ..< numBlocks:
|
||||||
|
# range [0, 2¹²⁸-1)
|
||||||
|
when T is byte:
|
||||||
|
input.unmarshal(
|
||||||
|
message.toOpenArray(curBlock*BlockSize, curBlock*BlockSize + BlockSize - 1),
|
||||||
|
littleEndian
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
input.unmarshal(
|
||||||
|
message.toOpenArrayByte(curBlock*BlockSize, curBlock*BlockSize + BlockSize - 1),
|
||||||
|
littleEndian
|
||||||
|
)
|
||||||
|
input.setBit(8*blockSize) # range [2¹²⁸, 2¹²⁸+2¹²⁸-1)
|
||||||
|
acc += input # range [2¹²⁸, 2¹³⁰-1+2¹²⁸+2¹²⁸-1)
|
||||||
|
t.prod(acc, r) # range [2²⁵⁶, (2¹²⁴-1)(2¹³⁰+2(2¹²⁸-1)))
|
||||||
|
|
||||||
|
acc.limbs.partialReduce_1305(t.limbs)
|
||||||
|
|
||||||
|
return BlockSize * numBlocks.uint
|
||||||
|
|
||||||
|
func macBuffer(ctx: var Poly1305_CTX, blockSize: int) =
|
||||||
|
discard ctx.acc.macMessageBlocks(
|
||||||
|
ctx.r, ctx.buf, blockSize
|
||||||
|
)
|
||||||
|
ctx.buf.setZero()
|
||||||
|
ctx.bufIdx = 0
|
||||||
|
|
||||||
|
# Public API
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
|
||||||
|
func init*(ctx: var Poly1305_CTX, nonReusedKey: array[32, byte]) =
|
||||||
|
## Initialize Poly1305 MAC (Message Authentication Code) context.
|
||||||
|
## nonReusedKey is an unique not-reused pre-shared key
|
||||||
|
## between the parties that want to authenticate messages between each other
|
||||||
|
ctx.acc.setZero()
|
||||||
|
|
||||||
|
const clamp = BigInt[128].fromHex"0x0ffffffc0ffffffc0ffffffc0fffffff"
|
||||||
|
ctx.r.unmarshal(nonReusedKey.toOpenArray(0, 15), littleEndian)
|
||||||
|
staticFor i, 0, ctx.r.limbs.len:
|
||||||
|
ctx.r.limbs[i] = ctx.r.limbs[i] and clamp.limbs[i]
|
||||||
|
|
||||||
|
ctx.s.unmarshal(nonReusedKey.toOpenArray(16, 31), littleEndian)
|
||||||
|
ctx.buf.setZero()
|
||||||
|
ctx.msgLen = 0
|
||||||
|
ctx.bufIdx = 0
|
||||||
|
|
||||||
|
func update*[T: char|byte](ctx: var Poly1305_CTX, message: openArray[T]) =
|
||||||
|
## Append a message to a Poly1305 authentication context.
|
||||||
|
## for incremental Poly1305 computation
|
||||||
|
##
|
||||||
|
## Security note: the tail of your message might be stored
|
||||||
|
## in an internal buffer.
|
||||||
|
## if sensitive content is used, ensure that
|
||||||
|
## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible.
|
||||||
|
## Additionally ensure that the message(s) passed were stored
|
||||||
|
## in memory considered secure for your threat model.
|
||||||
|
|
||||||
|
debug:
|
||||||
|
doAssert: 0 <= ctx.bufIdx and ctx.bufIdx.int < ctx.buf.len
|
||||||
|
for i in ctx.bufIdx ..< ctx.buf.len:
|
||||||
|
doAssert ctx.buf[i] == 0
|
||||||
|
|
||||||
|
if message.len == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
var # Message processing state machine
|
||||||
|
cur = 0'u
|
||||||
|
bytesLeft = message.len.uint
|
||||||
|
|
||||||
|
ctx.msgLen += bytesLeft
|
||||||
|
|
||||||
|
if ctx.bufIdx != 0: # Previous partial update
|
||||||
|
let bufIdx = ctx.bufIdx.uint
|
||||||
|
let free = ctx.buf.sizeof().uint - bufIdx
|
||||||
|
|
||||||
|
if free > bytesLeft:
|
||||||
|
# Enough free space, store in buffer
|
||||||
|
ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = bytesLeft)
|
||||||
|
ctx.bufIdx += bytesLeft.uint8
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
# Fill the buffer and do one Poly1305 MAC
|
||||||
|
ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = free)
|
||||||
|
ctx.macBuffer(blockSize = BlockSize)
|
||||||
|
|
||||||
|
# Update message state for further processing
|
||||||
|
cur = free
|
||||||
|
bytesLeft -= free
|
||||||
|
|
||||||
|
# Process n blocks (16 bytes each)
|
||||||
|
let consumed = ctx.acc.macMessageBlocks(
|
||||||
|
ctx.r,
|
||||||
|
message.toOpenArray(int cur, message.len-1),
|
||||||
|
blockSize = BlockSize
|
||||||
|
)
|
||||||
|
cur += consumed
|
||||||
|
bytesLeft -= consumed
|
||||||
|
|
||||||
|
if bytesLeft != 0:
|
||||||
|
# Store the tail in buffer
|
||||||
|
debug: # TODO: state machine formal verification - https://nim-lang.org/docs/drnim.html
|
||||||
|
doAssert ctx.bufIdx == 0
|
||||||
|
doAssert cur + bytesLeft == message.len.uint
|
||||||
|
|
||||||
|
ctx.buf.copy(dStart = 0'u, message, sStart = cur, len = bytesLeft)
|
||||||
|
ctx.bufIdx = uint8 bytesLeft
|
||||||
|
|
||||||
|
func finish*(ctx: var Poly1305_CTX, tag: var array[16, byte]) =
|
||||||
|
## Finalize a Poly1305 authentication
|
||||||
|
## and output an authentication tag to the `tag` buffer
|
||||||
|
##
|
||||||
|
## Security note: this does not clear the internal context.
|
||||||
|
## if sensitive content is used, use "ctx.clear()"
|
||||||
|
## and also make sure that the message(s) passed were stored
|
||||||
|
## in memory considered secure for your threat model.
|
||||||
|
|
||||||
|
debug:
|
||||||
|
doAssert: 0 <= ctx.bufIdx and ctx.bufIdx.int < ctx.buf.len
|
||||||
|
for i in ctx.bufIdx ..< ctx.buf.len:
|
||||||
|
doAssert ctx.buf[i] == 0
|
||||||
|
|
||||||
|
if ctx.bufIdx != 0:
|
||||||
|
ctx.macBuffer(blockSize = ctx.bufIdx.int)
|
||||||
|
|
||||||
|
# Input is only partially reduced to [0, 2¹³⁰)
|
||||||
|
# Map it to [0, 2¹³⁰-5)
|
||||||
|
ctx.acc.limbs.finalReduce_1305()
|
||||||
|
|
||||||
|
# Starting from now, we only care about the 128 least significant bits
|
||||||
|
var acc128{.noInit.}: BigInt[128]
|
||||||
|
acc128.copyTruncatedFrom(ctx.acc)
|
||||||
|
acc128 += ctx.s
|
||||||
|
|
||||||
|
tag.marshal(acc128, littleEndian)
|
||||||
|
|
||||||
|
debug:
|
||||||
|
doAssert ctx.bufIdx == 0
|
||||||
|
for i in 0 ..< ctx.buf.len:
|
||||||
|
doAssert ctx.buf[i] == 0
|
||||||
|
|
||||||
|
func clear*(ctx: var Poly1305_CTX) =
|
||||||
|
## Clear the context internal buffers
|
||||||
|
# TODO: ensure compiler cannot optimize the code away
|
||||||
|
ctx.acc.setZero()
|
||||||
|
ctx.r.setZero()
|
||||||
|
ctx.s.setZero()
|
||||||
|
ctx.buf.setZero()
|
||||||
|
ctx.msgLen = 0
|
||||||
|
ctx.bufIdx = 0
|
||||||
|
|
||||||
|
func auth*[T: char|byte](
|
||||||
|
_: type poly1305,
|
||||||
|
tag: var array[16, byte],
|
||||||
|
message: openArray[T],
|
||||||
|
nonReusedKey: array[32, byte],
|
||||||
|
clearMem = false) =
|
||||||
|
## Produce an authentication tag from a message
|
||||||
|
## and a preshared unique non-reused secret key
|
||||||
|
|
||||||
|
var ctx {.noInit.}: poly1305
|
||||||
|
ctx.init(nonReusedKey)
|
||||||
|
ctx.update(message)
|
||||||
|
ctx.finish(tag)
|
||||||
|
|
||||||
|
if clearMem:
|
||||||
|
ctx.clear()
|
||||||
|
|
||||||
|
func auth*[T: char|byte](
|
||||||
|
_: type poly1305,
|
||||||
|
message: openArray[T],
|
||||||
|
nonReusedKey: array[32, byte],
|
||||||
|
clearMem = false): array[16, byte]{.noInit.}=
|
||||||
|
## Produce an authentication tag from a message
|
||||||
|
## and a preshared unique non-reused secret key
|
||||||
|
poly1305.auth(result, message, nonReusedKey, clearMem)
|
|
@ -344,6 +344,16 @@ func bit0*(a: BigInt): Ct[uint8] =
|
||||||
## Access the least significant bit
|
## Access the least significant bit
|
||||||
ct(a.limbs[0] and One, uint8)
|
ct(a.limbs[0] and One, uint8)
|
||||||
|
|
||||||
|
func setBit*[bits: static int](a: var BigInt[bits], index: int) =
|
||||||
|
## Set an individual bit of `a` to 1.
|
||||||
|
## This has no effect if it is already 1
|
||||||
|
const SlotShift = log2_vartime(WordBitWidth.uint32)
|
||||||
|
const SelectMask = WordBitWidth - 1
|
||||||
|
|
||||||
|
let slot = a.limbs[index shr SlotShift].addr
|
||||||
|
let shifted = One shl (index and SelectMask)
|
||||||
|
slot[] = slot[] or shifted
|
||||||
|
|
||||||
# Multiplication by small cosntants
|
# Multiplication by small cosntants
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -57,13 +57,3 @@ const
|
||||||
const CttASM {.booldefine.} = true
|
const CttASM {.booldefine.} = true
|
||||||
const UseASM_X86_32* = CttASM and X86 and GCC_Compatible
|
const UseASM_X86_32* = CttASM and X86 and GCC_Compatible
|
||||||
const UseASM_X86_64* = WordBitWidth == 64 and UseASM_X86_32
|
const UseASM_X86_64* = WordBitWidth == 64 and UseASM_X86_32
|
||||||
|
|
||||||
# ############################################################
|
|
||||||
#
|
|
||||||
# Instrumentation
|
|
||||||
#
|
|
||||||
# ############################################################
|
|
||||||
|
|
||||||
template debug*(body: untyped): untyped =
|
|
||||||
when defined(debugConstantine):
|
|
||||||
body
|
|
||||||
|
|
|
@ -94,37 +94,6 @@ when sizeof(int) == 8:
|
||||||
#
|
#
|
||||||
# ############################################################
|
# ############################################################
|
||||||
|
|
||||||
func mulDoubleAdd2*[T: Ct[uint32]|Ct[uint64]](r2: var Carry, r1, r0: var T, a, b, c: T, dHi: Carry, dLo: T) {.inline.} =
|
|
||||||
## (r2, r1, r0) <- 2*a*b + c + (dHi, dLo)
|
|
||||||
## with r = (r2, r1, r0) a triple-word number
|
|
||||||
## and d = (dHi, dLo) a double-word number
|
|
||||||
## r2 and dHi are carries, either 0 or 1
|
|
||||||
|
|
||||||
var carry: Carry
|
|
||||||
|
|
||||||
# (r1, r0) <- a*b
|
|
||||||
# Note: 0xFFFFFFFF_FFFFFFFF² -> (hi: 0xFFFFFFFF_FFFFFFFE, lo: 0x00000000_00000001)
|
|
||||||
mul(r1, r0, a, b)
|
|
||||||
|
|
||||||
# (r2, r1, r0) <- 2*a*b
|
|
||||||
# Then (hi: 0xFFFFFFFF_FFFFFFFE, lo: 0x00000000_00000001) * 2
|
|
||||||
# (carry: 1, hi: 0xFFFFFFFF_FFFFFFFC, lo: 0x00000000_00000002)
|
|
||||||
addC(carry, r0, r0, r0, Carry(0))
|
|
||||||
addC(r2, r1, r1, r1, carry)
|
|
||||||
|
|
||||||
# (r1, r0) <- (r1, r0) + c
|
|
||||||
# Adding any uint64 cannot overflow into r2 for example Adding 2^64-1
|
|
||||||
# (carry: 1, hi: 0xFFFFFFFF_FFFFFFFD, lo: 0x00000000_00000001)
|
|
||||||
addC(carry, r0, r0, c, Carry(0))
|
|
||||||
addC(carry, r1, r1, T(0), carry)
|
|
||||||
|
|
||||||
# (r1, r0) <- (r1, r0) + (dHi, dLo) with dHi a carry (previous limb r2)
|
|
||||||
# (dHi, dLo) is at most (dhi: 1, dlo: 0xFFFFFFFF_FFFFFFFF)
|
|
||||||
# summing into (carry: 1, hi: 0xFFFFFFFF_FFFFFFFD, lo: 0x00000000_00000001)
|
|
||||||
# result at most in (carry: 1, hi: 0xFFFFFFFF_FFFFFFFF, lo: 0x00000000_00000000)
|
|
||||||
addC(carry, r0, r0, dLo, Carry(0))
|
|
||||||
addC(carry, r1, r1, T(dHi), carry)
|
|
||||||
|
|
||||||
func mulAcc*[T: Ct[uint32]|Ct[uint64]](t, u, v: var T, a, b: T) {.inline.} =
|
func mulAcc*[T: Ct[uint32]|Ct[uint64]](t, u, v: var T, a, b: T) {.inline.} =
|
||||||
## (t, u, v) <- (t, u, v) + a * b
|
## (t, u, v) <- (t, u, v) + a * b
|
||||||
var UV: array[2, T]
|
var UV: array[2, T]
|
||||||
|
|
|
@ -104,6 +104,11 @@ template `*`*[T: Ct](x, y: T): T =
|
||||||
# but this is not always true, especially on ARMv7 and ARMv9
|
# but this is not always true, especially on ARMv7 and ARMv9
|
||||||
fmap(x, `*`, y)
|
fmap(x, `*`, y)
|
||||||
|
|
||||||
|
template `*=`*[T: Ct](x, y: T) =
|
||||||
|
# Warning ⚠️ : We assume that mul hardware multiplication is constant time
|
||||||
|
# but this is not always true, especially on ARMv7 and ARMv9
|
||||||
|
fmapAsgn(x, `*=`, y)
|
||||||
|
|
||||||
# We don't implement div/mod as we can't assume the hardware implementation
|
# We don't implement div/mod as we can't assume the hardware implementation
|
||||||
# is constant-time
|
# is constant-time
|
||||||
|
|
||||||
|
|
|
@ -33,3 +33,41 @@ export
|
||||||
when X86 and GCC_Compatible:
|
when X86 and GCC_Compatible:
|
||||||
import isa/[cpuinfo_x86, macro_assembler_x86]
|
import isa/[cpuinfo_x86, macro_assembler_x86]
|
||||||
export cpuinfo_x86, macro_assembler_x86
|
export cpuinfo_x86, macro_assembler_x86
|
||||||
|
|
||||||
|
# ############################################################
|
||||||
|
#
|
||||||
|
# Instrumentation
|
||||||
|
#
|
||||||
|
# ############################################################
|
||||||
|
|
||||||
|
template debug*(body: untyped): untyped =
|
||||||
|
when defined(debugConstantine):
|
||||||
|
body
|
||||||
|
|
||||||
|
# ############################################################
|
||||||
|
#
|
||||||
|
# Buffers
|
||||||
|
#
|
||||||
|
# ############################################################
|
||||||
|
|
||||||
|
func setZero*[N](a: var array[N, SomeNumber]){.inline.} =
|
||||||
|
for i in 0 ..< a.len:
|
||||||
|
a[i] = 0
|
||||||
|
|
||||||
|
func copy*[N: static int, T: byte|char](
|
||||||
|
dst: var array[N, byte],
|
||||||
|
dStart: SomeInteger,
|
||||||
|
src: openArray[T],
|
||||||
|
sStart: SomeInteger,
|
||||||
|
len: SomeInteger
|
||||||
|
) {.inline.} =
|
||||||
|
## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len]
|
||||||
|
## Unlike the standard library, this cannot throw
|
||||||
|
## even a defect.
|
||||||
|
## It also handles copy of char into byte arrays
|
||||||
|
debug:
|
||||||
|
doAssert 0 <= dStart and dStart+len <= dst.len.uint, "dStart: " & $dStart & ", dStart+len: " & $(dStart+len) & ", dst.len: " & $dst.len
|
||||||
|
doAssert 0 <= sStart and sStart+len <= src.len.uint, "sStart: " & $sStart & ", sStart+len: " & $(sStart+len) & ", src.len: " & $src.len
|
||||||
|
|
||||||
|
for i in 0 ..< len:
|
||||||
|
dst[dStart + i] = byte src[sStart + i]
|
|
@ -0,0 +1,31 @@
|
||||||
|
# Constantine
|
||||||
|
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||||
|
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||||
|
# Licensed and distributed under either of
|
||||||
|
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||||
|
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||||
|
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||||
|
|
||||||
|
import
|
||||||
|
std/unittest,
|
||||||
|
../constantine/mac/mac_poly1305
|
||||||
|
|
||||||
|
suite "[Message Authentication Code] Poly1305":
|
||||||
|
test "Test vector 1 - RFC8439":
|
||||||
|
let ikm = [
|
||||||
|
byte 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
|
||||||
|
0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
|
||||||
|
0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
|
||||||
|
0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b
|
||||||
|
]
|
||||||
|
let message = "Cryptographic Forum Research Group"
|
||||||
|
|
||||||
|
let expectedTag = [
|
||||||
|
byte 0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6,
|
||||||
|
0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9
|
||||||
|
]
|
||||||
|
|
||||||
|
var tag: array[16, byte]
|
||||||
|
poly1305.auth(tag, message, ikm)
|
||||||
|
|
||||||
|
doAssert tag == expectedTag
|
Loading…
Reference in New Issue