Poly1305 Message Authentication Code (#186)
* Groundwork for Poly1305 MAC * Implement fast reduction for Poly1305 * don't import assembly files when compiling without assembly
This commit is contained in:
parent
c2eb42b769
commit
742cecce08
|
@ -0,0 +1,65 @@
|
|||
import
|
||||
# Internals
|
||||
../constantine/mac/mac_poly1305,
|
||||
# Helpers
|
||||
../helpers/prng_unsafe,
|
||||
./bench_blueprint,
|
||||
# C API
|
||||
system/ansi_c
|
||||
|
||||
proc separator*() = separator(69)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
proc report(op: string, bytes: int, startTime, stopTime: MonoTime, startClk, stopClk: int64, iters: int) =
|
||||
let ns = inNanoseconds((stopTime-startTime) div iters)
|
||||
let throughput = 1e9 / float64(ns)
|
||||
when SupportsGetTicks:
|
||||
let cycles = (stopClk - startClk) div iters
|
||||
let cyclePerByte = cycles.float64 / bytes.float64
|
||||
echo &"{op:<30} {throughput:>15.3f} ops/s {ns:>9} ns/op {cycles:>10} cycles {cyclePerByte:>5.2f} cycles/byte"
|
||||
else:
|
||||
echo &"{op:<30} {throughput:>15.3f} ops/s {ns:>9} ns/op"
|
||||
|
||||
template bench(op: string, bytes: int, iters: int, body: untyped): untyped =
|
||||
measure(iters, startTime, stopTime, startClk, stopClk, body)
|
||||
report(op, bytes, startTime, stopTime, startClk, stopClk, iters)
|
||||
|
||||
proc benchPoly1305_constantine[T](msg: openarray[T], msgComment: string, iters: int) =
|
||||
var tag: array[16, byte]
|
||||
let ikm = [
|
||||
byte 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
|
||||
0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
|
||||
0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
|
||||
0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b
|
||||
]
|
||||
bench("Poly1305 - Constantine - " & msgComment, msg.len, iters):
|
||||
poly1305.auth(tag, msg, ikm)
|
||||
|
||||
when isMainModule:
|
||||
proc main() =
|
||||
block:
|
||||
let msg32B = rng.random_byte_seq(32)
|
||||
benchPoly1305_constantine(msg32B, "32B", 100)
|
||||
block:
|
||||
let msg64B = rng.random_byte_seq(64)
|
||||
benchPoly1305_constantine(msg64B, "64B", 100)
|
||||
block:
|
||||
let msg128B = rng.random_byte_seq(128)
|
||||
benchPoly1305_constantine(msg128B, "128B", 100)
|
||||
block:
|
||||
let msg576B = rng.random_byte_seq(576)
|
||||
benchPoly1305_constantine(msg576B, "576B", 50)
|
||||
block:
|
||||
let msg8192B = rng.random_byte_seq(8192)
|
||||
benchPoly1305_constantine(msg8192B, "8192B", 25)
|
||||
block:
|
||||
let msg1MB = rng.random_byte_seq(1_000_000)
|
||||
benchPoly1305_constantine(msg1MB, "1MB", 16)
|
||||
block:
|
||||
let msg10MB = rng.random_byte_seq(10_000_000)
|
||||
benchPoly1305_constantine(msg10MB, "10MB", 16)
|
||||
block:
|
||||
let msg100MB = rng.random_byte_seq(100_000_000)
|
||||
benchPoly1305_constantine(msg100MB, "100MB", 3)
|
||||
main()
|
|
@ -69,17 +69,33 @@ proc benchSHA256_openssl[T](msg: openarray[T], msgComment: string, iters: int) =
|
|||
when isMainModule:
|
||||
proc main() =
|
||||
block:
|
||||
let msg128B = rng.random_byte_seq(32)
|
||||
benchSHA256_constantine(msg128B, "32B", 32)
|
||||
benchSHA256_openssl(msg128B, "32B", 32)
|
||||
let msg32B = rng.random_byte_seq(32)
|
||||
benchSHA256_constantine(msg32B, "32B", 100)
|
||||
benchSHA256_openssl(msg32B, "32B", 100)
|
||||
block:
|
||||
let msg64B = rng.random_byte_seq(64)
|
||||
benchSHA256_constantine(msg64B, "64B", 100)
|
||||
benchSHA256_openssl(msg64B, "64B", 100)
|
||||
block:
|
||||
let msg128B = rng.random_byte_seq(128)
|
||||
benchSHA256_constantine(msg128B, "128B", 128)
|
||||
benchSHA256_openssl(msg128B, "128B", 128)
|
||||
benchSHA256_constantine(msg128B, "128B", 100)
|
||||
benchSHA256_openssl(msg128B, "128B", 100)
|
||||
block:
|
||||
let msg5MB = rng.random_byte_seq(5_000_000)
|
||||
benchSHA256_constantine(msg5MB, "5MB", 16)
|
||||
benchSHA256_openssl(msg5MB, "5MB", 16)
|
||||
let msg576B = rng.random_byte_seq(576)
|
||||
benchSHA256_constantine(msg576B, "576B", 50)
|
||||
benchSHA256_openssl(msg576B, "576B", 50)
|
||||
block:
|
||||
let msg8192B = rng.random_byte_seq(8192)
|
||||
benchSHA256_constantine(msg8192B, "8192B", 25)
|
||||
benchSHA256_openssl(msg8192B, "8192B", 25)
|
||||
block:
|
||||
let msg1MB = rng.random_byte_seq(1_000_000)
|
||||
benchSHA256_constantine(msg1MB, "1MB", 16)
|
||||
benchSHA256_openssl(msg1MB, "1MB", 16)
|
||||
block:
|
||||
let msg10MB = rng.random_byte_seq(10_000_000)
|
||||
benchSHA256_constantine(msg10MB, "10MB", 16)
|
||||
benchSHA256_openssl(msg10MB, "10MB", 16)
|
||||
block:
|
||||
let msg100MB = rng.random_byte_seq(100_000_000)
|
||||
benchSHA256_constantine(msg100MB, "100MB", 3)
|
||||
|
|
|
@ -192,6 +192,10 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
|
|||
# ----------------------------------------------------------
|
||||
("tests/t_cipher_chacha20.nim", false),
|
||||
|
||||
# Message Authentication Code
|
||||
# ----------------------------------------------------------
|
||||
("tests/t_mac_poly1305.nim", false),
|
||||
|
||||
# Protocols
|
||||
# ----------------------------------------------------------
|
||||
("tests/t_ethereum_evm_precompiles.nim", false),
|
||||
|
|
|
@ -58,7 +58,7 @@ func hash*[DigestSize: static int, T: char|byte](
|
|||
func hash*[T: char|byte](
|
||||
HashKind: type CryptoHash,
|
||||
message: openarray[T],
|
||||
clearmem = false): array[HashKind.sizeInBytes, byte] =
|
||||
clearmem = false): array[HashKind.sizeInBytes, byte] {.noInit.} =
|
||||
## Produce a digest from a message
|
||||
HashKind.hash(result, message, clearMem)
|
||||
|
||||
|
|
|
@ -51,10 +51,6 @@ type
|
|||
{.push raises: [].}
|
||||
{.push checks: off.}
|
||||
|
||||
func setZero[N](a: var array[N, SomeNumber]){.inline.} =
|
||||
for i in 0 ..< a.len:
|
||||
a[i] = 0
|
||||
|
||||
template rotr(x, n: uint32): uint32 =
|
||||
## Rotate right the bits
|
||||
# We always use it with constants in 0 ..< 32
|
||||
|
@ -272,24 +268,6 @@ func dumpHash(
|
|||
digest.dumpRawInt(H[i], dstIdx, bigEndian)
|
||||
dstIdx += uint sizeof(uint32)
|
||||
|
||||
func copy[N: static int, T: byte|char](
|
||||
dst: var array[N, byte],
|
||||
dStart: SomeInteger,
|
||||
src: openArray[T],
|
||||
sStart: SomeInteger,
|
||||
len: SomeInteger
|
||||
) =
|
||||
## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len]
|
||||
## Unlike the standard library, this cannot throw
|
||||
## even a defect.
|
||||
## It also handles copy of char into byte arrays
|
||||
debug:
|
||||
doAssert 0 <= dStart and dStart+len <= dst.len.uint
|
||||
doAssert 0 <= sStart and sStart+len <= src.len.uint
|
||||
|
||||
for i in 0 ..< len:
|
||||
dst[dStart + i] = byte src[sStart + i]
|
||||
|
||||
func hashBuffer(ctx: var Sha256Context) =
|
||||
discard ctx.H.hashMessageBlocks(ctx.buf)
|
||||
ctx.buf.setZero()
|
||||
|
@ -445,4 +423,7 @@ func clear*(ctx: var Sha256Context) =
|
|||
## For passwords and secret keys, you MUST NOT use raw SHA-256
|
||||
## use a Key Derivation Function instead (KDF)
|
||||
# TODO: ensure compiler cannot optimize the code away
|
||||
ctx.H.setZero()
|
||||
ctx.buf.setZero()
|
||||
ctx.msgLen = 0
|
||||
ctx.bufIdx = 0
|
|
@ -0,0 +1,355 @@
|
|||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
../platforms/abstractions,
|
||||
../math/arithmetic/bigints,
|
||||
../math/arithmetic/[limbs, limbs_extmul],
|
||||
../math/io/io_bigints
|
||||
|
||||
when UseASM_X86_64:
|
||||
import ../math/arithmetic/assembly/limbs_asm_modular_x86
|
||||
|
||||
# No exceptions allowed
|
||||
{.push raises: [].}
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Poly1305 Message Authentication Code
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
# TODO: instead of using a saturated representation,
|
||||
# since there is 62 extra bits unused in the last limb
|
||||
# use an unsaturated representation and remove all carry dependency chains.
|
||||
# Given the number of add with carries, this would significantly
|
||||
# improve instruction level parallelism.
|
||||
#
|
||||
# Also vectorizing the code requires removing carry chains anyway.
|
||||
|
||||
const P1305 = BigInt[130].fromHex"0x3fffffffffffffffffffffffffffffffb"
|
||||
|
||||
func partialReduce_1305[N1, N2: static int](r: var Limbs[N1], a: Limbs[N2]) =
|
||||
## The prime 2¹³⁰-5 has a special form 2ᵐ-c
|
||||
## called "Crandall prime" or Pseudo-Mersenne Prime
|
||||
## in the litterature
|
||||
## which allows fast reduction from the fact that
|
||||
## 2ᵐ-c ≡ 0 (mod p)
|
||||
## <=> 2ᵐ ≡ c (mod p) [1]
|
||||
## <=> a2ᵐ+b ≡ ac + b (mod p)
|
||||
##
|
||||
## This partially reduces the input in range [0, 2¹³⁰)
|
||||
#
|
||||
# Assuming 64-bit words,
|
||||
# N1 = 3 words (192-bit necessary for 2¹³⁰-1)
|
||||
# N2 = 4 words (256-bit necessary for 2¹³¹.2¹²⁴)
|
||||
# Assuming 32-bit words,
|
||||
# N1 = 5 words (160-bit necessary for 2¹³⁰-1)
|
||||
# N2 = 8 words (288-bit necessary for 2¹³¹.2¹²⁴)
|
||||
#
|
||||
# from 64-bit, starting from [1]
|
||||
# 2ᵐ ≡ c (mod p)
|
||||
# 2¹³⁰ ≡ 5 (mod p)
|
||||
# 2¹³⁰.2⁶² ≡ 5.2⁶² (mod p)
|
||||
# 2¹⁹² ≡ 5.2⁶² (mod p)
|
||||
#
|
||||
# Hence if we call a the [2¹⁹², 2²⁶⁰) range
|
||||
# and b the [0, 2¹⁹²) range
|
||||
# we have
|
||||
# a2¹⁹²+b ≡ a.5.2⁶² + b (mod p)
|
||||
#
|
||||
# Then we can handle the highest word which has
|
||||
# 62 bits that should be folded back as well
|
||||
#
|
||||
# Similarly for 32-bit
|
||||
# 2¹⁶⁰ ≡ 5.2³⁰ (mod p)
|
||||
# and we need to fold back the top 30 bits
|
||||
#
|
||||
# But there is a twist. 5.2⁶² need 65-bit not 64
|
||||
# and 5.2³⁰ need 33-bit not 32
|
||||
|
||||
when WordBitwidth == 64:
|
||||
static:
|
||||
doAssert N1 == 3
|
||||
doAssert N2 == 4
|
||||
|
||||
block:
|
||||
# First pass, fold everything greater than 2¹⁹²-1
|
||||
# a2¹⁹²+b ≡ a.5.2⁶² + b (mod p)
|
||||
# scale by 5.2⁶¹ first as 5.2⁶² does not fit in 64-bit words
|
||||
const c = SecretWord 5
|
||||
const cExcess = c shl 61
|
||||
|
||||
var carry: Carry
|
||||
var hi, lo: SecretWord
|
||||
mul(hi, lo, a[3], cExcess)
|
||||
addC(carry, r[0], lo, a[0], Carry(0))
|
||||
addC(carry, r[1], hi, a[1], carry)
|
||||
addC(carry, r[2], Zero, a[2], carry)
|
||||
# finally double to scale by 5.2⁶²
|
||||
addC(carry, r[0], lo, r[0], Carry(0))
|
||||
addC(carry, r[1], hi, r[1], carry)
|
||||
addC(carry, r[2], Zero, r[2], carry)
|
||||
else:
|
||||
static:
|
||||
doAssert N1 == 5
|
||||
doAssert N2 == 8
|
||||
|
||||
block:
|
||||
# First pass, fold everything greater than 2¹⁶⁰-1
|
||||
# a2¹⁶⁰+b ≡ a.5.2³⁰ + b (mod p)
|
||||
# scale by 5.2²⁹ first as 5.2³⁰ does not fit in 32-bit words
|
||||
const c = SecretWord 5
|
||||
const cExcess = c shl 29
|
||||
|
||||
staticFor i, 0, N1:
|
||||
r[i] = a[i]
|
||||
|
||||
mulDoubleAcc(r[2], r[1], r[0], a[5], cExcess)
|
||||
mulDoubleAcc(r[3], r[2], r[1], a[6], cExcess)
|
||||
mulDoubleAcc(r[4], r[3], r[2], a[7], cExcess)
|
||||
|
||||
const bits = 130
|
||||
const excessBits = wordsRequired(bits)*WordBitWidth - bits
|
||||
|
||||
# Second pass, fold everything greater than 2¹³⁰-1
|
||||
# into the lower bits
|
||||
var carry, carry2: Carry
|
||||
var hi = r[N1-1] shr (WordBitWidth - excessBits)
|
||||
r[N1-1] = r[N1-1] and (MaxWord shr excessBits)
|
||||
|
||||
# hi *= 5, with overflow stored in carry
|
||||
let hi4 = hi shl 2 # Cannot overflow as we have 2 spare bits
|
||||
addC(carry2, hi, hi, hi4, Carry(0)) # Use the carry bit for storing a 63/31 bit result
|
||||
|
||||
# Process with actual fold
|
||||
addC(carry, r[0], r[0], hi, Carry(0))
|
||||
addC(carry, r[1], r[1], SecretWord(carry2), carry)
|
||||
staticFor i, 2, N1:
|
||||
addC(carry, r[i], r[i], Zero, carry)
|
||||
|
||||
func finalReduce_1305[N: static int](a: var Limbs[N]) =
|
||||
## Maps an input in redundant representation [0, 2¹³¹-10)
|
||||
## to the canonical representation in [0, 2¹³⁰-5)
|
||||
# Algorithm:
|
||||
# 1. substract p = 2¹³⁰-5
|
||||
# 2. if borrow, add back p.
|
||||
when UseASM_X86_64 and a.len <= 6:
|
||||
submod_asm(a, a, P1305.limbs, P1305.limbs)
|
||||
else:
|
||||
let underflowed = SecretBool sub(a, P1305.limbs)
|
||||
discard cadd(a, P1305.limbs, underflowed)
|
||||
|
||||
const BlockSize = 16
|
||||
|
||||
type Poly1305_CTX = object
|
||||
acc: BigInt[130+1] # After an unreduced sum, up to 131 bit may be used
|
||||
r: BigInt[124] # r is 124-bit after clamping
|
||||
s: BigInt[128]
|
||||
buf: array[BlockSize, byte]
|
||||
msgLen: uint64
|
||||
bufIdx: uint8
|
||||
|
||||
type poly1305* = Poly1305_CTX
|
||||
|
||||
func macMessageBlocks[T: byte|char](
|
||||
acc: var BigInt[130+1],
|
||||
r: BigInt[124],
|
||||
message: openArray[T],
|
||||
blockSize = BlockSize): uint =
|
||||
## Authenticate a message block by block
|
||||
## Poly1305 block size is 16 bytes.
|
||||
## Return the number of bytes processed.
|
||||
##
|
||||
## If hashing one partial block,
|
||||
## set blocksize to the remaining bytes to process
|
||||
|
||||
result = 0
|
||||
let numBlocks = int(message.len.uint div BlockSize)
|
||||
if numBlocks == 0:
|
||||
return 0
|
||||
|
||||
var input {.noInit.}: BigInt[130+1]
|
||||
# r is 124-bit after clambing
|
||||
var t{.noInit.}: BigInt[130+1+124]
|
||||
|
||||
for curBlock in 0 ..< numBlocks:
|
||||
# range [0, 2¹²⁸-1)
|
||||
when T is byte:
|
||||
input.unmarshal(
|
||||
message.toOpenArray(curBlock*BlockSize, curBlock*BlockSize + BlockSize - 1),
|
||||
littleEndian
|
||||
)
|
||||
else:
|
||||
input.unmarshal(
|
||||
message.toOpenArrayByte(curBlock*BlockSize, curBlock*BlockSize + BlockSize - 1),
|
||||
littleEndian
|
||||
)
|
||||
input.setBit(8*blockSize) # range [2¹²⁸, 2¹²⁸+2¹²⁸-1)
|
||||
acc += input # range [2¹²⁸, 2¹³⁰-1+2¹²⁸+2¹²⁸-1)
|
||||
t.prod(acc, r) # range [2²⁵⁶, (2¹²⁴-1)(2¹³⁰+2(2¹²⁸-1)))
|
||||
|
||||
acc.limbs.partialReduce_1305(t.limbs)
|
||||
|
||||
return BlockSize * numBlocks.uint
|
||||
|
||||
func macBuffer(ctx: var Poly1305_CTX, blockSize: int) =
|
||||
discard ctx.acc.macMessageBlocks(
|
||||
ctx.r, ctx.buf, blockSize
|
||||
)
|
||||
ctx.buf.setZero()
|
||||
ctx.bufIdx = 0
|
||||
|
||||
# Public API
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
func init*(ctx: var Poly1305_CTX, nonReusedKey: array[32, byte]) =
|
||||
## Initialize Poly1305 MAC (Message Authentication Code) context.
|
||||
## nonReusedKey is an unique not-reused pre-shared key
|
||||
## between the parties that want to authenticate messages between each other
|
||||
ctx.acc.setZero()
|
||||
|
||||
const clamp = BigInt[128].fromHex"0x0ffffffc0ffffffc0ffffffc0fffffff"
|
||||
ctx.r.unmarshal(nonReusedKey.toOpenArray(0, 15), littleEndian)
|
||||
staticFor i, 0, ctx.r.limbs.len:
|
||||
ctx.r.limbs[i] = ctx.r.limbs[i] and clamp.limbs[i]
|
||||
|
||||
ctx.s.unmarshal(nonReusedKey.toOpenArray(16, 31), littleEndian)
|
||||
ctx.buf.setZero()
|
||||
ctx.msgLen = 0
|
||||
ctx.bufIdx = 0
|
||||
|
||||
func update*[T: char|byte](ctx: var Poly1305_CTX, message: openArray[T]) =
|
||||
## Append a message to a Poly1305 authentication context.
|
||||
## for incremental Poly1305 computation
|
||||
##
|
||||
## Security note: the tail of your message might be stored
|
||||
## in an internal buffer.
|
||||
## if sensitive content is used, ensure that
|
||||
## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible.
|
||||
## Additionally ensure that the message(s) passed were stored
|
||||
## in memory considered secure for your threat model.
|
||||
|
||||
debug:
|
||||
doAssert: 0 <= ctx.bufIdx and ctx.bufIdx.int < ctx.buf.len
|
||||
for i in ctx.bufIdx ..< ctx.buf.len:
|
||||
doAssert ctx.buf[i] == 0
|
||||
|
||||
if message.len == 0:
|
||||
return
|
||||
|
||||
var # Message processing state machine
|
||||
cur = 0'u
|
||||
bytesLeft = message.len.uint
|
||||
|
||||
ctx.msgLen += bytesLeft
|
||||
|
||||
if ctx.bufIdx != 0: # Previous partial update
|
||||
let bufIdx = ctx.bufIdx.uint
|
||||
let free = ctx.buf.sizeof().uint - bufIdx
|
||||
|
||||
if free > bytesLeft:
|
||||
# Enough free space, store in buffer
|
||||
ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = bytesLeft)
|
||||
ctx.bufIdx += bytesLeft.uint8
|
||||
return
|
||||
else:
|
||||
# Fill the buffer and do one Poly1305 MAC
|
||||
ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = free)
|
||||
ctx.macBuffer(blockSize = BlockSize)
|
||||
|
||||
# Update message state for further processing
|
||||
cur = free
|
||||
bytesLeft -= free
|
||||
|
||||
# Process n blocks (16 bytes each)
|
||||
let consumed = ctx.acc.macMessageBlocks(
|
||||
ctx.r,
|
||||
message.toOpenArray(int cur, message.len-1),
|
||||
blockSize = BlockSize
|
||||
)
|
||||
cur += consumed
|
||||
bytesLeft -= consumed
|
||||
|
||||
if bytesLeft != 0:
|
||||
# Store the tail in buffer
|
||||
debug: # TODO: state machine formal verification - https://nim-lang.org/docs/drnim.html
|
||||
doAssert ctx.bufIdx == 0
|
||||
doAssert cur + bytesLeft == message.len.uint
|
||||
|
||||
ctx.buf.copy(dStart = 0'u, message, sStart = cur, len = bytesLeft)
|
||||
ctx.bufIdx = uint8 bytesLeft
|
||||
|
||||
func finish*(ctx: var Poly1305_CTX, tag: var array[16, byte]) =
|
||||
## Finalize a Poly1305 authentication
|
||||
## and output an authentication tag to the `tag` buffer
|
||||
##
|
||||
## Security note: this does not clear the internal context.
|
||||
## if sensitive content is used, use "ctx.clear()"
|
||||
## and also make sure that the message(s) passed were stored
|
||||
## in memory considered secure for your threat model.
|
||||
|
||||
debug:
|
||||
doAssert: 0 <= ctx.bufIdx and ctx.bufIdx.int < ctx.buf.len
|
||||
for i in ctx.bufIdx ..< ctx.buf.len:
|
||||
doAssert ctx.buf[i] == 0
|
||||
|
||||
if ctx.bufIdx != 0:
|
||||
ctx.macBuffer(blockSize = ctx.bufIdx.int)
|
||||
|
||||
# Input is only partially reduced to [0, 2¹³⁰)
|
||||
# Map it to [0, 2¹³⁰-5)
|
||||
ctx.acc.limbs.finalReduce_1305()
|
||||
|
||||
# Starting from now, we only care about the 128 least significant bits
|
||||
var acc128{.noInit.}: BigInt[128]
|
||||
acc128.copyTruncatedFrom(ctx.acc)
|
||||
acc128 += ctx.s
|
||||
|
||||
tag.marshal(acc128, littleEndian)
|
||||
|
||||
debug:
|
||||
doAssert ctx.bufIdx == 0
|
||||
for i in 0 ..< ctx.buf.len:
|
||||
doAssert ctx.buf[i] == 0
|
||||
|
||||
func clear*(ctx: var Poly1305_CTX) =
|
||||
## Clear the context internal buffers
|
||||
# TODO: ensure compiler cannot optimize the code away
|
||||
ctx.acc.setZero()
|
||||
ctx.r.setZero()
|
||||
ctx.s.setZero()
|
||||
ctx.buf.setZero()
|
||||
ctx.msgLen = 0
|
||||
ctx.bufIdx = 0
|
||||
|
||||
func auth*[T: char|byte](
|
||||
_: type poly1305,
|
||||
tag: var array[16, byte],
|
||||
message: openArray[T],
|
||||
nonReusedKey: array[32, byte],
|
||||
clearMem = false) =
|
||||
## Produce an authentication tag from a message
|
||||
## and a preshared unique non-reused secret key
|
||||
|
||||
var ctx {.noInit.}: poly1305
|
||||
ctx.init(nonReusedKey)
|
||||
ctx.update(message)
|
||||
ctx.finish(tag)
|
||||
|
||||
if clearMem:
|
||||
ctx.clear()
|
||||
|
||||
func auth*[T: char|byte](
|
||||
_: type poly1305,
|
||||
message: openArray[T],
|
||||
nonReusedKey: array[32, byte],
|
||||
clearMem = false): array[16, byte]{.noInit.}=
|
||||
## Produce an authentication tag from a message
|
||||
## and a preshared unique non-reused secret key
|
||||
poly1305.auth(result, message, nonReusedKey, clearMem)
|
|
@ -344,6 +344,16 @@ func bit0*(a: BigInt): Ct[uint8] =
|
|||
## Access the least significant bit
|
||||
ct(a.limbs[0] and One, uint8)
|
||||
|
||||
func setBit*[bits: static int](a: var BigInt[bits], index: int) =
|
||||
## Set an individual bit of `a` to 1.
|
||||
## This has no effect if it is already 1
|
||||
const SlotShift = log2_vartime(WordBitWidth.uint32)
|
||||
const SelectMask = WordBitWidth - 1
|
||||
|
||||
let slot = a.limbs[index shr SlotShift].addr
|
||||
let shifted = One shl (index and SelectMask)
|
||||
slot[] = slot[] or shifted
|
||||
|
||||
# Multiplication by small cosntants
|
||||
# ------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -56,14 +56,4 @@ const
|
|||
# We need to support register spills for large limbs
|
||||
const CttASM {.booldefine.} = true
|
||||
const UseASM_X86_32* = CttASM and X86 and GCC_Compatible
|
||||
const UseASM_X86_64* = WordBitWidth == 64 and UseASM_X86_32
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Instrumentation
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
template debug*(body: untyped): untyped =
|
||||
when defined(debugConstantine):
|
||||
body
|
||||
const UseASM_X86_64* = WordBitWidth == 64 and UseASM_X86_32
|
|
@ -94,37 +94,6 @@ when sizeof(int) == 8:
|
|||
#
|
||||
# ############################################################
|
||||
|
||||
func mulDoubleAdd2*[T: Ct[uint32]|Ct[uint64]](r2: var Carry, r1, r0: var T, a, b, c: T, dHi: Carry, dLo: T) {.inline.} =
|
||||
## (r2, r1, r0) <- 2*a*b + c + (dHi, dLo)
|
||||
## with r = (r2, r1, r0) a triple-word number
|
||||
## and d = (dHi, dLo) a double-word number
|
||||
## r2 and dHi are carries, either 0 or 1
|
||||
|
||||
var carry: Carry
|
||||
|
||||
# (r1, r0) <- a*b
|
||||
# Note: 0xFFFFFFFF_FFFFFFFF² -> (hi: 0xFFFFFFFF_FFFFFFFE, lo: 0x00000000_00000001)
|
||||
mul(r1, r0, a, b)
|
||||
|
||||
# (r2, r1, r0) <- 2*a*b
|
||||
# Then (hi: 0xFFFFFFFF_FFFFFFFE, lo: 0x00000000_00000001) * 2
|
||||
# (carry: 1, hi: 0xFFFFFFFF_FFFFFFFC, lo: 0x00000000_00000002)
|
||||
addC(carry, r0, r0, r0, Carry(0))
|
||||
addC(r2, r1, r1, r1, carry)
|
||||
|
||||
# (r1, r0) <- (r1, r0) + c
|
||||
# Adding any uint64 cannot overflow into r2 for example Adding 2^64-1
|
||||
# (carry: 1, hi: 0xFFFFFFFF_FFFFFFFD, lo: 0x00000000_00000001)
|
||||
addC(carry, r0, r0, c, Carry(0))
|
||||
addC(carry, r1, r1, T(0), carry)
|
||||
|
||||
# (r1, r0) <- (r1, r0) + (dHi, dLo) with dHi a carry (previous limb r2)
|
||||
# (dHi, dLo) is at most (dhi: 1, dlo: 0xFFFFFFFF_FFFFFFFF)
|
||||
# summing into (carry: 1, hi: 0xFFFFFFFF_FFFFFFFD, lo: 0x00000000_00000001)
|
||||
# result at most in (carry: 1, hi: 0xFFFFFFFF_FFFFFFFF, lo: 0x00000000_00000000)
|
||||
addC(carry, r0, r0, dLo, Carry(0))
|
||||
addC(carry, r1, r1, T(dHi), carry)
|
||||
|
||||
func mulAcc*[T: Ct[uint32]|Ct[uint64]](t, u, v: var T, a, b: T) {.inline.} =
|
||||
## (t, u, v) <- (t, u, v) + a * b
|
||||
var UV: array[2, T]
|
||||
|
|
|
@ -104,6 +104,11 @@ template `*`*[T: Ct](x, y: T): T =
|
|||
# but this is not always true, especially on ARMv7 and ARMv9
|
||||
fmap(x, `*`, y)
|
||||
|
||||
template `*=`*[T: Ct](x, y: T) =
|
||||
# Warning ⚠️ : We assume that mul hardware multiplication is constant time
|
||||
# but this is not always true, especially on ARMv7 and ARMv9
|
||||
fmapAsgn(x, `*=`, y)
|
||||
|
||||
# We don't implement div/mod as we can't assume the hardware implementation
|
||||
# is constant-time
|
||||
|
||||
|
|
|
@ -33,3 +33,41 @@ export
|
|||
when X86 and GCC_Compatible:
|
||||
import isa/[cpuinfo_x86, macro_assembler_x86]
|
||||
export cpuinfo_x86, macro_assembler_x86
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Instrumentation
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
template debug*(body: untyped): untyped =
|
||||
when defined(debugConstantine):
|
||||
body
|
||||
|
||||
# ############################################################
|
||||
#
|
||||
# Buffers
|
||||
#
|
||||
# ############################################################
|
||||
|
||||
func setZero*[N](a: var array[N, SomeNumber]){.inline.} =
|
||||
for i in 0 ..< a.len:
|
||||
a[i] = 0
|
||||
|
||||
func copy*[N: static int, T: byte|char](
|
||||
dst: var array[N, byte],
|
||||
dStart: SomeInteger,
|
||||
src: openArray[T],
|
||||
sStart: SomeInteger,
|
||||
len: SomeInteger
|
||||
) {.inline.} =
|
||||
## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len]
|
||||
## Unlike the standard library, this cannot throw
|
||||
## even a defect.
|
||||
## It also handles copy of char into byte arrays
|
||||
debug:
|
||||
doAssert 0 <= dStart and dStart+len <= dst.len.uint, "dStart: " & $dStart & ", dStart+len: " & $(dStart+len) & ", dst.len: " & $dst.len
|
||||
doAssert 0 <= sStart and sStart+len <= src.len.uint, "sStart: " & $sStart & ", sStart+len: " & $(sStart+len) & ", src.len: " & $src.len
|
||||
|
||||
for i in 0 ..< len:
|
||||
dst[dStart + i] = byte src[sStart + i]
|
|
@ -0,0 +1,31 @@
|
|||
# Constantine
|
||||
# Copyright (c) 2018-2019 Status Research & Development GmbH
|
||||
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
|
||||
# Licensed and distributed under either of
|
||||
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
|
||||
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
|
||||
# at your option. This file may not be copied, modified, or distributed except according to those terms.
|
||||
|
||||
import
|
||||
std/unittest,
|
||||
../constantine/mac/mac_poly1305
|
||||
|
||||
suite "[Message Authentication Code] Poly1305":
|
||||
test "Test vector 1 - RFC8439":
|
||||
let ikm = [
|
||||
byte 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
|
||||
0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
|
||||
0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
|
||||
0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b
|
||||
]
|
||||
let message = "Cryptographic Forum Research Group"
|
||||
|
||||
let expectedTag = [
|
||||
byte 0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6,
|
||||
0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9
|
||||
]
|
||||
|
||||
var tag: array[16, byte]
|
||||
poly1305.auth(tag, message, ikm)
|
||||
|
||||
doAssert tag == expectedTag
|
Loading…
Reference in New Issue