From 8fa5b481673c68bbe9d01cdb1f155a3c197fa564 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Thu, 6 Jun 2024 22:38:17 +0200 Subject: [PATCH] double keccak speed https://github.com/nim-lang/Nim/issues/23688 causes the implementation to fall into the `when nimvm` versions of THETA1, CHI etc - these operate on openArray and get interspersed with range checks and other random inefficiencies. This fix doubles the speed of the 256-bit keccak. --- nimcrypto/keccak.nim | 103 ++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 51 deletions(-) diff --git a/nimcrypto/keccak.nim b/nimcrypto/keccak.nim index 0b86a23..c803dc2 100644 --- a/nimcrypto/keccak.nim +++ b/nimcrypto/keccak.nim @@ -57,29 +57,30 @@ type # This difference in implementation was made because Nim VM do not support more # then 256 registers and so it is not enough for it to perform round in # template. +# See also https://github.com/nim-lang/Nim/issues/23688 when nimvm: - proc THETA1(a: var openArray[uint64], b: openArray[uint64], - c: int) {.inline.} = + proc THETA1V(a: var openArray[uint64], b: openArray[uint64], + c: int) {.inline.} = a[c] = b[c] xor b[c + 5] xor b[c + 10] xor b[c + 15] xor b[c + 20] - proc THETA2(a: var uint64, b: openArray[uint64], c: int) {.inline.} = + proc THETA2V(a: var uint64, b: openArray[uint64], c: int) {.inline.} = a = b[(c + 4) mod 5] xor ROL(uint64(b[(c + 1) mod 5]), 1) - proc THETA3(a: var openArray[uint64], b: int, c: uint64) {.inline.} = + proc THETA3V(a: var openArray[uint64], b: int, c: uint64) {.inline.} = a[b] = a[b] xor c a[b + 5] = a[b + 5] xor c a[b + 10] = a[b + 10] xor c a[b + 15] = a[b + 15] xor c a[b + 20] = a[b + 20] xor c - proc RHOPI(a: var openArray[uint64], b: var openArray[uint64], c: var uint64, + proc RHOPIV(a: var openArray[uint64], b: var openArray[uint64], c: var uint64, d, e: int) {.inline.} = a[0] = b[d] b[d] = ROL(c, e) c = uint64(a[0]) - proc CHI(a: var openArray[uint64], b: var openArray[uint64], - c: int) {.inline.} = + proc CHIV(a: var openArray[uint64], b: var openArray[uint64], + c: int) {.inline.} = a[0] = b[c] a[1] = b[c + 1] a[2] = b[c + 2] @@ -94,54 +95,54 @@ when nimvm: proc KECCAKROUNDP(a: var openArray[uint64], b: var openArray[uint64], c: var uint64, r: int) {.inline.} = - THETA1(b, a, 0) - THETA1(b, a, 1) - THETA1(b, a, 2) - THETA1(b, a, 3) - THETA1(b, a, 4) + THETA1V(b, a, 0) + THETA1V(b, a, 1) + THETA1V(b, a, 2) + THETA1V(b, a, 3) + THETA1V(b, a, 4) - THETA2(c, b, 0) - THETA3(a, 0, c) - THETA2(c, b, 1) - THETA3(a, 1, c) - THETA2(c, b, 2) - THETA3(a, 2, c) - THETA2(c, b, 3) - THETA3(a, 3, c) - THETA2(c, b, 4) - THETA3(a, 4, c) + THETA2V(c, b, 0) + THETA3V(a, 0, c) + THETA2V(c, b, 1) + THETA3V(a, 1, c) + THETA2V(c, b, 2) + THETA3V(a, 2, c) + THETA2V(c, b, 3) + THETA3V(a, 3, c) + THETA2V(c, b, 4) + THETA3V(a, 4, c) c = a[1] - RHOPI(b, a, c, 10, 1) - RHOPI(b, a, c, 7, 3) - RHOPI(b, a, c, 11, 6) - RHOPI(b, a, c, 17, 10) - RHOPI(b, a, c, 18, 15) - RHOPI(b, a, c, 3, 21) - RHOPI(b, a, c, 5, 28) - RHOPI(b, a, c, 16, 36) - RHOPI(b, a, c, 8, 45) - RHOPI(b, a, c, 21, 55) - RHOPI(b, a, c, 24, 2) - RHOPI(b, a, c, 4, 14) - RHOPI(b, a, c, 15, 27) - RHOPI(b, a, c, 23, 41) - RHOPI(b, a, c, 19, 56) - RHOPI(b, a, c, 13, 8) - RHOPI(b, a, c, 12, 25) - RHOPI(b, a, c, 2, 43) - RHOPI(b, a, c, 20, 62) - RHOPI(b, a, c, 14, 18) - RHOPI(b, a, c, 22, 39) - RHOPI(b, a, c, 9, 61) - RHOPI(b, a, c, 6, 20) - RHOPI(b, a, c, 1, 44) + RHOPIV(b, a, c, 10, 1) + RHOPIV(b, a, c, 7, 3) + RHOPIV(b, a, c, 11, 6) + RHOPIV(b, a, c, 17, 10) + RHOPIV(b, a, c, 18, 15) + RHOPIV(b, a, c, 3, 21) + RHOPIV(b, a, c, 5, 28) + RHOPIV(b, a, c, 16, 36) + RHOPIV(b, a, c, 8, 45) + RHOPIV(b, a, c, 21, 55) + RHOPIV(b, a, c, 24, 2) + RHOPIV(b, a, c, 4, 14) + RHOPIV(b, a, c, 15, 27) + RHOPIV(b, a, c, 23, 41) + RHOPIV(b, a, c, 19, 56) + RHOPIV(b, a, c, 13, 8) + RHOPIV(b, a, c, 12, 25) + RHOPIV(b, a, c, 2, 43) + RHOPIV(b, a, c, 20, 62) + RHOPIV(b, a, c, 14, 18) + RHOPIV(b, a, c, 22, 39) + RHOPIV(b, a, c, 9, 61) + RHOPIV(b, a, c, 6, 20) + RHOPIV(b, a, c, 1, 44) - CHI(b, a, 0) - CHI(b, a, 5) - CHI(b, a, 10) - CHI(b, a, 15) - CHI(b, a, 20) + CHIV(b, a, 0) + CHIV(b, a, 5) + CHIV(b, a, 10) + CHIV(b, a, 15) + CHIV(b, a, 20) a[0] = a[0] xor RNDC[r]