From 742cecce0898c71564f9709003996604f7af34d9 Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Sat, 5 Mar 2022 23:39:24 +0100 Subject: [PATCH] Poly1305 Message Authentication Code (#186) * Groundwork for Poly1305 MAC * Implement fast reduction for Poly1305 * don't import assembly files when compiling without assembly --- benchmarks/bench_poly1305.nim | 65 ++++ benchmarks/bench_sha256.nim | 32 +- constantine.nimble | 4 + constantine/hashes.nim | 2 +- constantine/hashes/h_sha256.nim | 25 +- constantine/mac/mac_poly1305.nim | 355 ++++++++++++++++++ constantine/math/arithmetic/bigints.nim | 10 + constantine/platforms/abstractions.nim | 12 +- .../compilers/extended_precision.nim | 31 -- .../platforms/constant_time/ct_routines.nim | 5 + constantine/platforms/primitives.nim | 38 ++ tests/t_mac_poly1305.nim | 31 ++ 12 files changed, 537 insertions(+), 73 deletions(-) create mode 100644 benchmarks/bench_poly1305.nim create mode 100644 constantine/mac/mac_poly1305.nim create mode 100644 tests/t_mac_poly1305.nim diff --git a/benchmarks/bench_poly1305.nim b/benchmarks/bench_poly1305.nim new file mode 100644 index 0000000..dbe3e60 --- /dev/null +++ b/benchmarks/bench_poly1305.nim @@ -0,0 +1,65 @@ +import + # Internals + ../constantine/mac/mac_poly1305, + # Helpers + ../helpers/prng_unsafe, + ./bench_blueprint, + # C API + system/ansi_c + +proc separator*() = separator(69) + +# -------------------------------------------------------------------- + +proc report(op: string, bytes: int, startTime, stopTime: MonoTime, startClk, stopClk: int64, iters: int) = + let ns = inNanoseconds((stopTime-startTime) div iters) + let throughput = 1e9 / float64(ns) + when SupportsGetTicks: + let cycles = (stopClk - startClk) div iters + let cyclePerByte = cycles.float64 / bytes.float64 + echo &"{op:<30} {throughput:>15.3f} ops/s {ns:>9} ns/op {cycles:>10} cycles {cyclePerByte:>5.2f} cycles/byte" + else: + echo &"{op:<30} {throughput:>15.3f} ops/s {ns:>9} ns/op" + +template bench(op: string, bytes: int, iters: int, body: untyped): untyped = + measure(iters, startTime, stopTime, startClk, stopClk, body) + report(op, bytes, startTime, stopTime, startClk, stopClk, iters) + +proc benchPoly1305_constantine[T](msg: openarray[T], msgComment: string, iters: int) = + var tag: array[16, byte] + let ikm = [ + byte 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33, + 0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8, + 0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd, + 0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b + ] + bench("Poly1305 - Constantine - " & msgComment, msg.len, iters): + poly1305.auth(tag, msg, ikm) + +when isMainModule: + proc main() = + block: + let msg32B = rng.random_byte_seq(32) + benchPoly1305_constantine(msg32B, "32B", 100) + block: + let msg64B = rng.random_byte_seq(64) + benchPoly1305_constantine(msg64B, "64B", 100) + block: + let msg128B = rng.random_byte_seq(128) + benchPoly1305_constantine(msg128B, "128B", 100) + block: + let msg576B = rng.random_byte_seq(576) + benchPoly1305_constantine(msg576B, "576B", 50) + block: + let msg8192B = rng.random_byte_seq(8192) + benchPoly1305_constantine(msg8192B, "8192B", 25) + block: + let msg1MB = rng.random_byte_seq(1_000_000) + benchPoly1305_constantine(msg1MB, "1MB", 16) + block: + let msg10MB = rng.random_byte_seq(10_000_000) + benchPoly1305_constantine(msg10MB, "10MB", 16) + block: + let msg100MB = rng.random_byte_seq(100_000_000) + benchPoly1305_constantine(msg100MB, "100MB", 3) + main() diff --git a/benchmarks/bench_sha256.nim b/benchmarks/bench_sha256.nim index b10d6da..e6a31d3 100644 --- a/benchmarks/bench_sha256.nim +++ b/benchmarks/bench_sha256.nim @@ -69,17 +69,33 @@ proc benchSHA256_openssl[T](msg: openarray[T], msgComment: string, iters: int) = when isMainModule: proc main() = block: - let msg128B = rng.random_byte_seq(32) - benchSHA256_constantine(msg128B, "32B", 32) - benchSHA256_openssl(msg128B, "32B", 32) + let msg32B = rng.random_byte_seq(32) + benchSHA256_constantine(msg32B, "32B", 100) + benchSHA256_openssl(msg32B, "32B", 100) + block: + let msg64B = rng.random_byte_seq(64) + benchSHA256_constantine(msg64B, "64B", 100) + benchSHA256_openssl(msg64B, "64B", 100) block: let msg128B = rng.random_byte_seq(128) - benchSHA256_constantine(msg128B, "128B", 128) - benchSHA256_openssl(msg128B, "128B", 128) + benchSHA256_constantine(msg128B, "128B", 100) + benchSHA256_openssl(msg128B, "128B", 100) block: - let msg5MB = rng.random_byte_seq(5_000_000) - benchSHA256_constantine(msg5MB, "5MB", 16) - benchSHA256_openssl(msg5MB, "5MB", 16) + let msg576B = rng.random_byte_seq(576) + benchSHA256_constantine(msg576B, "576B", 50) + benchSHA256_openssl(msg576B, "576B", 50) + block: + let msg8192B = rng.random_byte_seq(8192) + benchSHA256_constantine(msg8192B, "8192B", 25) + benchSHA256_openssl(msg8192B, "8192B", 25) + block: + let msg1MB = rng.random_byte_seq(1_000_000) + benchSHA256_constantine(msg1MB, "1MB", 16) + benchSHA256_openssl(msg1MB, "1MB", 16) + block: + let msg10MB = rng.random_byte_seq(10_000_000) + benchSHA256_constantine(msg10MB, "10MB", 16) + benchSHA256_openssl(msg10MB, "10MB", 16) block: let msg100MB = rng.random_byte_seq(100_000_000) benchSHA256_constantine(msg100MB, "100MB", 3) diff --git a/constantine.nimble b/constantine.nimble index 7216308..4dd3f95 100644 --- a/constantine.nimble +++ b/constantine.nimble @@ -192,6 +192,10 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[ # ---------------------------------------------------------- ("tests/t_cipher_chacha20.nim", false), + # Message Authentication Code + # ---------------------------------------------------------- + ("tests/t_mac_poly1305.nim", false), + # Protocols # ---------------------------------------------------------- ("tests/t_ethereum_evm_precompiles.nim", false), diff --git a/constantine/hashes.nim b/constantine/hashes.nim index caca378..d2986c4 100644 --- a/constantine/hashes.nim +++ b/constantine/hashes.nim @@ -58,7 +58,7 @@ func hash*[DigestSize: static int, T: char|byte]( func hash*[T: char|byte]( HashKind: type CryptoHash, message: openarray[T], - clearmem = false): array[HashKind.sizeInBytes, byte] = + clearmem = false): array[HashKind.sizeInBytes, byte] {.noInit.} = ## Produce a digest from a message HashKind.hash(result, message, clearMem) diff --git a/constantine/hashes/h_sha256.nim b/constantine/hashes/h_sha256.nim index 76ea41d..8c39e39 100644 --- a/constantine/hashes/h_sha256.nim +++ b/constantine/hashes/h_sha256.nim @@ -51,10 +51,6 @@ type {.push raises: [].} {.push checks: off.} -func setZero[N](a: var array[N, SomeNumber]){.inline.} = - for i in 0 ..< a.len: - a[i] = 0 - template rotr(x, n: uint32): uint32 = ## Rotate right the bits # We always use it with constants in 0 ..< 32 @@ -272,24 +268,6 @@ func dumpHash( digest.dumpRawInt(H[i], dstIdx, bigEndian) dstIdx += uint sizeof(uint32) -func copy[N: static int, T: byte|char]( - dst: var array[N, byte], - dStart: SomeInteger, - src: openArray[T], - sStart: SomeInteger, - len: SomeInteger - ) = - ## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len] - ## Unlike the standard library, this cannot throw - ## even a defect. - ## It also handles copy of char into byte arrays - debug: - doAssert 0 <= dStart and dStart+len <= dst.len.uint - doAssert 0 <= sStart and sStart+len <= src.len.uint - - for i in 0 ..< len: - dst[dStart + i] = byte src[sStart + i] - func hashBuffer(ctx: var Sha256Context) = discard ctx.H.hashMessageBlocks(ctx.buf) ctx.buf.setZero() @@ -445,4 +423,7 @@ func clear*(ctx: var Sha256Context) = ## For passwords and secret keys, you MUST NOT use raw SHA-256 ## use a Key Derivation Function instead (KDF) # TODO: ensure compiler cannot optimize the code away + ctx.H.setZero() ctx.buf.setZero() + ctx.msgLen = 0 + ctx.bufIdx = 0 \ No newline at end of file diff --git a/constantine/mac/mac_poly1305.nim b/constantine/mac/mac_poly1305.nim new file mode 100644 index 0000000..45811be --- /dev/null +++ b/constantine/mac/mac_poly1305.nim @@ -0,0 +1,355 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + ../platforms/abstractions, + ../math/arithmetic/bigints, + ../math/arithmetic/[limbs, limbs_extmul], + ../math/io/io_bigints + +when UseASM_X86_64: + import ../math/arithmetic/assembly/limbs_asm_modular_x86 + +# No exceptions allowed +{.push raises: [].} + +# ############################################################ +# +# Poly1305 Message Authentication Code +# +# ############################################################ + +# TODO: instead of using a saturated representation, +# since there is 62 extra bits unused in the last limb +# use an unsaturated representation and remove all carry dependency chains. +# Given the number of add with carries, this would significantly +# improve instruction level parallelism. +# +# Also vectorizing the code requires removing carry chains anyway. + +const P1305 = BigInt[130].fromHex"0x3fffffffffffffffffffffffffffffffb" + +func partialReduce_1305[N1, N2: static int](r: var Limbs[N1], a: Limbs[N2]) = + ## The prime 2¹³⁰-5 has a special form 2ᵐ-c + ## called "Crandall prime" or Pseudo-Mersenne Prime + ## in the litterature + ## which allows fast reduction from the fact that + ## 2ᵐ-c ≡ 0 (mod p) + ## <=> 2ᵐ ≡ c (mod p) [1] + ## <=> a2ᵐ+b ≡ ac + b (mod p) + ## + ## This partially reduces the input in range [0, 2¹³⁰) + # + # Assuming 64-bit words, + # N1 = 3 words (192-bit necessary for 2¹³⁰-1) + # N2 = 4 words (256-bit necessary for 2¹³¹.2¹²⁴) + # Assuming 32-bit words, + # N1 = 5 words (160-bit necessary for 2¹³⁰-1) + # N2 = 8 words (288-bit necessary for 2¹³¹.2¹²⁴) + # + # from 64-bit, starting from [1] + # 2ᵐ ≡ c (mod p) + # 2¹³⁰ ≡ 5 (mod p) + # 2¹³⁰.2⁶² ≡ 5.2⁶² (mod p) + # 2¹⁹² ≡ 5.2⁶² (mod p) + # + # Hence if we call a the [2¹⁹², 2²⁶⁰) range + # and b the [0, 2¹⁹²) range + # we have + # a2¹⁹²+b ≡ a.5.2⁶² + b (mod p) + # + # Then we can handle the highest word which has + # 62 bits that should be folded back as well + # + # Similarly for 32-bit + # 2¹⁶⁰ ≡ 5.2³⁰ (mod p) + # and we need to fold back the top 30 bits + # + # But there is a twist. 5.2⁶² need 65-bit not 64 + # and 5.2³⁰ need 33-bit not 32 + + when WordBitwidth == 64: + static: + doAssert N1 == 3 + doAssert N2 == 4 + + block: + # First pass, fold everything greater than 2¹⁹²-1 + # a2¹⁹²+b ≡ a.5.2⁶² + b (mod p) + # scale by 5.2⁶¹ first as 5.2⁶² does not fit in 64-bit words + const c = SecretWord 5 + const cExcess = c shl 61 + + var carry: Carry + var hi, lo: SecretWord + mul(hi, lo, a[3], cExcess) + addC(carry, r[0], lo, a[0], Carry(0)) + addC(carry, r[1], hi, a[1], carry) + addC(carry, r[2], Zero, a[2], carry) + # finally double to scale by 5.2⁶² + addC(carry, r[0], lo, r[0], Carry(0)) + addC(carry, r[1], hi, r[1], carry) + addC(carry, r[2], Zero, r[2], carry) + else: + static: + doAssert N1 == 5 + doAssert N2 == 8 + + block: + # First pass, fold everything greater than 2¹⁶⁰-1 + # a2¹⁶⁰+b ≡ a.5.2³⁰ + b (mod p) + # scale by 5.2²⁹ first as 5.2³⁰ does not fit in 32-bit words + const c = SecretWord 5 + const cExcess = c shl 29 + + staticFor i, 0, N1: + r[i] = a[i] + + mulDoubleAcc(r[2], r[1], r[0], a[5], cExcess) + mulDoubleAcc(r[3], r[2], r[1], a[6], cExcess) + mulDoubleAcc(r[4], r[3], r[2], a[7], cExcess) + + const bits = 130 + const excessBits = wordsRequired(bits)*WordBitWidth - bits + + # Second pass, fold everything greater than 2¹³⁰-1 + # into the lower bits + var carry, carry2: Carry + var hi = r[N1-1] shr (WordBitWidth - excessBits) + r[N1-1] = r[N1-1] and (MaxWord shr excessBits) + + # hi *= 5, with overflow stored in carry + let hi4 = hi shl 2 # Cannot overflow as we have 2 spare bits + addC(carry2, hi, hi, hi4, Carry(0)) # Use the carry bit for storing a 63/31 bit result + + # Process with actual fold + addC(carry, r[0], r[0], hi, Carry(0)) + addC(carry, r[1], r[1], SecretWord(carry2), carry) + staticFor i, 2, N1: + addC(carry, r[i], r[i], Zero, carry) + +func finalReduce_1305[N: static int](a: var Limbs[N]) = + ## Maps an input in redundant representation [0, 2¹³¹-10) + ## to the canonical representation in [0, 2¹³⁰-5) + # Algorithm: + # 1. substract p = 2¹³⁰-5 + # 2. if borrow, add back p. + when UseASM_X86_64 and a.len <= 6: + submod_asm(a, a, P1305.limbs, P1305.limbs) + else: + let underflowed = SecretBool sub(a, P1305.limbs) + discard cadd(a, P1305.limbs, underflowed) + +const BlockSize = 16 + +type Poly1305_CTX = object + acc: BigInt[130+1] # After an unreduced sum, up to 131 bit may be used + r: BigInt[124] # r is 124-bit after clamping + s: BigInt[128] + buf: array[BlockSize, byte] + msgLen: uint64 + bufIdx: uint8 + +type poly1305* = Poly1305_CTX + +func macMessageBlocks[T: byte|char]( + acc: var BigInt[130+1], + r: BigInt[124], + message: openArray[T], + blockSize = BlockSize): uint = + ## Authenticate a message block by block + ## Poly1305 block size is 16 bytes. + ## Return the number of bytes processed. + ## + ## If hashing one partial block, + ## set blocksize to the remaining bytes to process + + result = 0 + let numBlocks = int(message.len.uint div BlockSize) + if numBlocks == 0: + return 0 + + var input {.noInit.}: BigInt[130+1] + # r is 124-bit after clambing + var t{.noInit.}: BigInt[130+1+124] + + for curBlock in 0 ..< numBlocks: + # range [0, 2¹²⁸-1) + when T is byte: + input.unmarshal( + message.toOpenArray(curBlock*BlockSize, curBlock*BlockSize + BlockSize - 1), + littleEndian + ) + else: + input.unmarshal( + message.toOpenArrayByte(curBlock*BlockSize, curBlock*BlockSize + BlockSize - 1), + littleEndian + ) + input.setBit(8*blockSize) # range [2¹²⁸, 2¹²⁸+2¹²⁸-1) + acc += input # range [2¹²⁸, 2¹³⁰-1+2¹²⁸+2¹²⁸-1) + t.prod(acc, r) # range [2²⁵⁶, (2¹²⁴-1)(2¹³⁰+2(2¹²⁸-1))) + + acc.limbs.partialReduce_1305(t.limbs) + + return BlockSize * numBlocks.uint + +func macBuffer(ctx: var Poly1305_CTX, blockSize: int) = + discard ctx.acc.macMessageBlocks( + ctx.r, ctx.buf, blockSize + ) + ctx.buf.setZero() + ctx.bufIdx = 0 + +# Public API +# ---------------------------------------------------------------- + +func init*(ctx: var Poly1305_CTX, nonReusedKey: array[32, byte]) = + ## Initialize Poly1305 MAC (Message Authentication Code) context. + ## nonReusedKey is an unique not-reused pre-shared key + ## between the parties that want to authenticate messages between each other + ctx.acc.setZero() + + const clamp = BigInt[128].fromHex"0x0ffffffc0ffffffc0ffffffc0fffffff" + ctx.r.unmarshal(nonReusedKey.toOpenArray(0, 15), littleEndian) + staticFor i, 0, ctx.r.limbs.len: + ctx.r.limbs[i] = ctx.r.limbs[i] and clamp.limbs[i] + + ctx.s.unmarshal(nonReusedKey.toOpenArray(16, 31), littleEndian) + ctx.buf.setZero() + ctx.msgLen = 0 + ctx.bufIdx = 0 + +func update*[T: char|byte](ctx: var Poly1305_CTX, message: openArray[T]) = + ## Append a message to a Poly1305 authentication context. + ## for incremental Poly1305 computation + ## + ## Security note: the tail of your message might be stored + ## in an internal buffer. + ## if sensitive content is used, ensure that + ## `ctx.finish(...)` and `ctx.clear()` are called as soon as possible. + ## Additionally ensure that the message(s) passed were stored + ## in memory considered secure for your threat model. + + debug: + doAssert: 0 <= ctx.bufIdx and ctx.bufIdx.int < ctx.buf.len + for i in ctx.bufIdx ..< ctx.buf.len: + doAssert ctx.buf[i] == 0 + + if message.len == 0: + return + + var # Message processing state machine + cur = 0'u + bytesLeft = message.len.uint + + ctx.msgLen += bytesLeft + + if ctx.bufIdx != 0: # Previous partial update + let bufIdx = ctx.bufIdx.uint + let free = ctx.buf.sizeof().uint - bufIdx + + if free > bytesLeft: + # Enough free space, store in buffer + ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = bytesLeft) + ctx.bufIdx += bytesLeft.uint8 + return + else: + # Fill the buffer and do one Poly1305 MAC + ctx.buf.copy(dStart = bufIdx, message, sStart = 0, len = free) + ctx.macBuffer(blockSize = BlockSize) + + # Update message state for further processing + cur = free + bytesLeft -= free + + # Process n blocks (16 bytes each) + let consumed = ctx.acc.macMessageBlocks( + ctx.r, + message.toOpenArray(int cur, message.len-1), + blockSize = BlockSize + ) + cur += consumed + bytesLeft -= consumed + + if bytesLeft != 0: + # Store the tail in buffer + debug: # TODO: state machine formal verification - https://nim-lang.org/docs/drnim.html + doAssert ctx.bufIdx == 0 + doAssert cur + bytesLeft == message.len.uint + + ctx.buf.copy(dStart = 0'u, message, sStart = cur, len = bytesLeft) + ctx.bufIdx = uint8 bytesLeft + +func finish*(ctx: var Poly1305_CTX, tag: var array[16, byte]) = + ## Finalize a Poly1305 authentication + ## and output an authentication tag to the `tag` buffer + ## + ## Security note: this does not clear the internal context. + ## if sensitive content is used, use "ctx.clear()" + ## and also make sure that the message(s) passed were stored + ## in memory considered secure for your threat model. + + debug: + doAssert: 0 <= ctx.bufIdx and ctx.bufIdx.int < ctx.buf.len + for i in ctx.bufIdx ..< ctx.buf.len: + doAssert ctx.buf[i] == 0 + + if ctx.bufIdx != 0: + ctx.macBuffer(blockSize = ctx.bufIdx.int) + + # Input is only partially reduced to [0, 2¹³⁰) + # Map it to [0, 2¹³⁰-5) + ctx.acc.limbs.finalReduce_1305() + + # Starting from now, we only care about the 128 least significant bits + var acc128{.noInit.}: BigInt[128] + acc128.copyTruncatedFrom(ctx.acc) + acc128 += ctx.s + + tag.marshal(acc128, littleEndian) + + debug: + doAssert ctx.bufIdx == 0 + for i in 0 ..< ctx.buf.len: + doAssert ctx.buf[i] == 0 + +func clear*(ctx: var Poly1305_CTX) = + ## Clear the context internal buffers + # TODO: ensure compiler cannot optimize the code away + ctx.acc.setZero() + ctx.r.setZero() + ctx.s.setZero() + ctx.buf.setZero() + ctx.msgLen = 0 + ctx.bufIdx = 0 + +func auth*[T: char|byte]( + _: type poly1305, + tag: var array[16, byte], + message: openArray[T], + nonReusedKey: array[32, byte], + clearMem = false) = + ## Produce an authentication tag from a message + ## and a preshared unique non-reused secret key + + var ctx {.noInit.}: poly1305 + ctx.init(nonReusedKey) + ctx.update(message) + ctx.finish(tag) + + if clearMem: + ctx.clear() + +func auth*[T: char|byte]( + _: type poly1305, + message: openArray[T], + nonReusedKey: array[32, byte], + clearMem = false): array[16, byte]{.noInit.}= + ## Produce an authentication tag from a message + ## and a preshared unique non-reused secret key + poly1305.auth(result, message, nonReusedKey, clearMem) diff --git a/constantine/math/arithmetic/bigints.nim b/constantine/math/arithmetic/bigints.nim index 0dae80c..e99210d 100644 --- a/constantine/math/arithmetic/bigints.nim +++ b/constantine/math/arithmetic/bigints.nim @@ -344,6 +344,16 @@ func bit0*(a: BigInt): Ct[uint8] = ## Access the least significant bit ct(a.limbs[0] and One, uint8) +func setBit*[bits: static int](a: var BigInt[bits], index: int) = + ## Set an individual bit of `a` to 1. + ## This has no effect if it is already 1 + const SlotShift = log2_vartime(WordBitWidth.uint32) + const SelectMask = WordBitWidth - 1 + + let slot = a.limbs[index shr SlotShift].addr + let shifted = One shl (index and SelectMask) + slot[] = slot[] or shifted + # Multiplication by small cosntants # ------------------------------------------------------------ diff --git a/constantine/platforms/abstractions.nim b/constantine/platforms/abstractions.nim index c5a9c50..adf07cc 100644 --- a/constantine/platforms/abstractions.nim +++ b/constantine/platforms/abstractions.nim @@ -56,14 +56,4 @@ const # We need to support register spills for large limbs const CttASM {.booldefine.} = true const UseASM_X86_32* = CttASM and X86 and GCC_Compatible -const UseASM_X86_64* = WordBitWidth == 64 and UseASM_X86_32 - -# ############################################################ -# -# Instrumentation -# -# ############################################################ - -template debug*(body: untyped): untyped = - when defined(debugConstantine): - body +const UseASM_X86_64* = WordBitWidth == 64 and UseASM_X86_32 \ No newline at end of file diff --git a/constantine/platforms/compilers/extended_precision.nim b/constantine/platforms/compilers/extended_precision.nim index 117a551..02bd8de 100644 --- a/constantine/platforms/compilers/extended_precision.nim +++ b/constantine/platforms/compilers/extended_precision.nim @@ -94,37 +94,6 @@ when sizeof(int) == 8: # # ############################################################ -func mulDoubleAdd2*[T: Ct[uint32]|Ct[uint64]](r2: var Carry, r1, r0: var T, a, b, c: T, dHi: Carry, dLo: T) {.inline.} = - ## (r2, r1, r0) <- 2*a*b + c + (dHi, dLo) - ## with r = (r2, r1, r0) a triple-word number - ## and d = (dHi, dLo) a double-word number - ## r2 and dHi are carries, either 0 or 1 - - var carry: Carry - - # (r1, r0) <- a*b - # Note: 0xFFFFFFFF_FFFFFFFF² -> (hi: 0xFFFFFFFF_FFFFFFFE, lo: 0x00000000_00000001) - mul(r1, r0, a, b) - - # (r2, r1, r0) <- 2*a*b - # Then (hi: 0xFFFFFFFF_FFFFFFFE, lo: 0x00000000_00000001) * 2 - # (carry: 1, hi: 0xFFFFFFFF_FFFFFFFC, lo: 0x00000000_00000002) - addC(carry, r0, r0, r0, Carry(0)) - addC(r2, r1, r1, r1, carry) - - # (r1, r0) <- (r1, r0) + c - # Adding any uint64 cannot overflow into r2 for example Adding 2^64-1 - # (carry: 1, hi: 0xFFFFFFFF_FFFFFFFD, lo: 0x00000000_00000001) - addC(carry, r0, r0, c, Carry(0)) - addC(carry, r1, r1, T(0), carry) - - # (r1, r0) <- (r1, r0) + (dHi, dLo) with dHi a carry (previous limb r2) - # (dHi, dLo) is at most (dhi: 1, dlo: 0xFFFFFFFF_FFFFFFFF) - # summing into (carry: 1, hi: 0xFFFFFFFF_FFFFFFFD, lo: 0x00000000_00000001) - # result at most in (carry: 1, hi: 0xFFFFFFFF_FFFFFFFF, lo: 0x00000000_00000000) - addC(carry, r0, r0, dLo, Carry(0)) - addC(carry, r1, r1, T(dHi), carry) - func mulAcc*[T: Ct[uint32]|Ct[uint64]](t, u, v: var T, a, b: T) {.inline.} = ## (t, u, v) <- (t, u, v) + a * b var UV: array[2, T] diff --git a/constantine/platforms/constant_time/ct_routines.nim b/constantine/platforms/constant_time/ct_routines.nim index 106a898..3d4764c 100644 --- a/constantine/platforms/constant_time/ct_routines.nim +++ b/constantine/platforms/constant_time/ct_routines.nim @@ -104,6 +104,11 @@ template `*`*[T: Ct](x, y: T): T = # but this is not always true, especially on ARMv7 and ARMv9 fmap(x, `*`, y) +template `*=`*[T: Ct](x, y: T) = + # Warning ⚠️ : We assume that mul hardware multiplication is constant time + # but this is not always true, especially on ARMv7 and ARMv9 + fmapAsgn(x, `*=`, y) + # We don't implement div/mod as we can't assume the hardware implementation # is constant-time diff --git a/constantine/platforms/primitives.nim b/constantine/platforms/primitives.nim index 78ed928..06cef87 100644 --- a/constantine/platforms/primitives.nim +++ b/constantine/platforms/primitives.nim @@ -33,3 +33,41 @@ export when X86 and GCC_Compatible: import isa/[cpuinfo_x86, macro_assembler_x86] export cpuinfo_x86, macro_assembler_x86 + +# ############################################################ +# +# Instrumentation +# +# ############################################################ + +template debug*(body: untyped): untyped = + when defined(debugConstantine): + body + +# ############################################################ +# +# Buffers +# +# ############################################################ + +func setZero*[N](a: var array[N, SomeNumber]){.inline.} = + for i in 0 ..< a.len: + a[i] = 0 + +func copy*[N: static int, T: byte|char]( + dst: var array[N, byte], + dStart: SomeInteger, + src: openArray[T], + sStart: SomeInteger, + len: SomeInteger + ) {.inline.} = + ## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len] + ## Unlike the standard library, this cannot throw + ## even a defect. + ## It also handles copy of char into byte arrays + debug: + doAssert 0 <= dStart and dStart+len <= dst.len.uint, "dStart: " & $dStart & ", dStart+len: " & $(dStart+len) & ", dst.len: " & $dst.len + doAssert 0 <= sStart and sStart+len <= src.len.uint, "sStart: " & $sStart & ", sStart+len: " & $(sStart+len) & ", src.len: " & $src.len + + for i in 0 ..< len: + dst[dStart + i] = byte src[sStart + i] \ No newline at end of file diff --git a/tests/t_mac_poly1305.nim b/tests/t_mac_poly1305.nim new file mode 100644 index 0000000..d556887 --- /dev/null +++ b/tests/t_mac_poly1305.nim @@ -0,0 +1,31 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + std/unittest, + ../constantine/mac/mac_poly1305 + +suite "[Message Authentication Code] Poly1305": + test "Test vector 1 - RFC8439": + let ikm = [ + byte 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33, + 0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8, + 0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd, + 0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b + ] + let message = "Cryptographic Forum Research Group" + + let expectedTag = [ + byte 0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6, + 0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9 + ] + + var tag: array[16, byte] + poly1305.auth(tag, message, ikm) + + doAssert tag == expectedTag \ No newline at end of file