Merge pull request #904 from mir-protocol/optimize-blake2b

Optimize blake2b
This commit is contained in:
Nicholas Ward 2023-03-15 18:32:50 -07:00 committed by GitHub
commit 35fb14992f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 186 additions and 88 deletions

View File

@ -1,19 +1,3 @@
// Load the initial hash value (the IV, but with params XOR'd into the first word).
%macro blake2b_initial_hash_value
%blake2b_iv_i(7)
%blake2b_iv_i(6)
%blake2b_iv_i(5)
%blake2b_iv_i(4)
%blake2b_iv_i(3)
%blake2b_iv_i(2)
%blake2b_iv_i(1)
// stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7
PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40
%blake2b_iv_i(0)
XOR
// stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7
%endmacro
// Address where the working version of the hash value is stored.
%macro blake2b_hash_value_addr
PUSH 0

View File

@ -2,7 +2,9 @@ global blake2b_compression:
// stack: retdest
PUSH 0
// stack: cur_block = 0, retdest
%blake2b_initial_hash_value
PUSH compression_loop
// stack: compression_loop, cur_block, retdest
%jump(blake2b_initial_hash_value)
compression_loop:
// stack: h_0, ..., h_7, cur_block, retdest
@ -181,40 +183,20 @@ compression_loop:
POP
POP
// stack: cur_block, retdest
%blake2b_internal_state_addr
// stack: start, cur_block, retdest
PUSH 0
// stack: round=0, start, cur_block, retdest
// Run 12 rounds of G functions.
%rep 12
// stack: round, start, cur_block, retdest
%call_blake2b_g_function(0, 4, 8, 12, 0, 1)
%call_blake2b_g_function(1, 5, 9, 13, 2, 3)
%call_blake2b_g_function(2, 6, 10, 14, 4, 5)
%call_blake2b_g_function(3, 7, 11, 15, 6, 7)
%call_blake2b_g_function(0, 5, 10, 15, 8, 9)
%call_blake2b_g_function(1, 6, 11, 12, 10, 11)
%call_blake2b_g_function(2, 7, 8, 13, 12, 13)
%call_blake2b_g_function(3, 4, 9, 14, 14, 15)
// stack: round, start, cur_block, retdest
%increment
// stack: round + 1, start, cur_block, retdest
%endrep
// stack: 12, start, cur_block, retdest
POP
POP
PUSH g_functions_return
// stack: g_functions_return, cur_block, retdest
%blake2b_internal_state_addr
// stack: start, g_functions_return, cur_block, retdest
%jump(run_12_rounds_g_function)
g_functions_return:
// Finalize hash value.
// stack: cur_block, retdest
%blake2b_generate_new_hash_value(7)
%blake2b_generate_new_hash_value(6)
%blake2b_generate_new_hash_value(5)
%blake2b_generate_new_hash_value(4)
%blake2b_generate_new_hash_value(3)
%blake2b_generate_new_hash_value(2)
%blake2b_generate_new_hash_value(1)
%blake2b_generate_new_hash_value(0)
PUSH hash_generate_return
// stack: hash_generate_return, cur_block, retdest
%jump(blake2b_generate_all_hash_values)
hash_generate_return:
// stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest
DUP9
// stack: cur_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest

View File

@ -3,28 +3,38 @@
// are in the range 0..16) in the internal state.
// The internal state is stored in memory starting at the address start.
// stack: a, b, c, d, x, y, start
%stack (indices: 4) -> (indices, indices)
DUP4
DUP4
DUP4
DUP4
// stack: a, b, c, d, a, b, c, d, x, y, start
DUP11
// stack: start, a, b, c, d, a, b, c, d, x, y, start
%stack (start, a, b, c, d) -> (d, start, c, start, b, start, a, start)
// stack: d, start, c, start, b, start, a, start, a, b, c, d, x, y, start
ADD
%mload_kernel_general
// stack: v[d], c, start, b, start, a, start, a, b, c, d, x, y, start
%stack (vd, remaining: 6) -> (remaining, vd)
// stack: c, start, b, start, a, start, v[d], a, b, c, d, x, y, start
// stack: v[a], b, c, d, a, b, c, d, x, y, start
SWAP1
// stack: b, v[a], c, d, a, b, c, d, x, y, start
DUP11
// stack: start, b, v[a], c, d, a, b, c, d, x, y, start
ADD
%mload_kernel_general
%stack (vc, remaining: 4) -> (remaining, vc)
// stack: b, start, a, start, v[c], v[d], a, b, c, d, x, y, start
// stack: v[b], v[a], c, d, a, b, c, d, x, y, start
SWAP2
// stack: c, v[a], v[b], d, a, b, c, d, x, y, start
DUP11
// stack: start, c, v[a], v[b], d, a, b, c, d, x, y, start
ADD
%mload_kernel_general
// stack: v[b], a, start, v[c], v[d], a, b, c, d, x, y, start
%stack (vb, remaining: 2) -> (remaining, vb)
// stack: a, start, v[b], v[c], v[d], a, b, c, d, x, y, start
// stack: v[c], v[a], v[b], d, a, b, c, d, x, y, start
SWAP3
// stack: d, v[a], v[b], v[c], a, b, c, d, x, y, start
DUP11
// stack: start, d, v[a], v[b], v[c], a, b, c, d, x, y, start
ADD
%mload_kernel_general
// stack: v[d], v[a], v[b], v[c], a, b, c, d, x, y, start
%stack (vd, vs: 3) -> (vs, vd)
// stack: v[a], v[b], v[c], v[d], a, b, c, d, x, y, start
DUP2
// stack: v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start
@ -124,3 +134,45 @@
%blake2b_g_function
// stack: round, start
%endmacro
run_g_function_round:
// stack: round, start, retdest
%call_blake2b_g_function(0, 4, 8, 12, 0, 1)
%call_blake2b_g_function(1, 5, 9, 13, 2, 3)
%call_blake2b_g_function(2, 6, 10, 14, 4, 5)
%call_blake2b_g_function(3, 7, 11, 15, 6, 7)
%call_blake2b_g_function(0, 5, 10, 15, 8, 9)
%call_blake2b_g_function(1, 6, 11, 12, 10, 11)
%call_blake2b_g_function(2, 7, 8, 13, 12, 13)
%call_blake2b_g_function(3, 4, 9, 14, 14, 15)
%stack (r, s, ret) -> (ret, r, s)
// stack: retdest, round, start
JUMP
global run_12_rounds_g_function:
// stack: start, retdest
PUSH 0
// stack: round=0, start, retdest
run_next_round_g_function:
// stack: round, start, retdest
PUSH run_next_round_g_function_return
// stack: run_next_round_g_function_return, round, start, retdest
SWAP2
// stack: start, round, run_next_round_g_function_return, retdest
SWAP1
// stack: round, start, run_next_round_g_function_return, retdest
%jump(run_g_function_round)
run_next_round_g_function_return:
// stack: round, start, retdest
%increment
// stack: round+1, start, retdest
DUP1
// stack: round+1, round+1, start, retdest
%lt_const(12)
// stack: round+1 < 12, round+1, start, retdest
%jumpi(run_next_round_g_function)
// stack: round+1, start, retdest
%pop2
// stack: retdest
JUMP

View File

@ -1,18 +1,54 @@
%macro blake2b_generate_new_hash_value(i)
blake2b_generate_new_hash_value:
// stack: i, retdest
%blake2b_hash_value_addr
%add_const($i)
// stack: addr, i, retdest
DUP2
ADD
%mload_kernel_general
// stack: h_i, ...
// stack: h_i, i, retdest
%blake2b_internal_state_addr
%add_const($i)
// stack: addr, h_i, i, retdest
DUP3
ADD
%mload_kernel_general
// stack: v_i, h_i, ...
// stack: v_i, h_i, i, retdest
%blake2b_internal_state_addr
%add_const($i)
// stack: addr, v_i, h_i, i, retdest
SWAP1
// stack: v_i, addr, h_i, i, retdest
SWAP3
// stack: i, addr, h_i, v_i, retdest
ADD
%add_const(8)
%mload_kernel_general
// stack: v_(i+8), v_i, h_i, ...
// stack: v_(i+8), h_i, v_i, retdest
XOR
XOR
// stack: h_i' = v_(i+8) ^ v_i ^ h_i, ...
%endmacro
// stack: h_i' = v_(i+8) ^ v_i ^ h_i, retdest
SWAP1
JUMP
global blake2b_generate_all_hash_values:
// stack: retdest
PUSH 8
// stack: i=8, retdest
blake2b_generate_hash_loop:
// stack: i, h_i', ..., h_7', retdest
%decrement
// stack: i-1, h_i', ..., h_7', retdest
PUSH blake2b_generate_hash_return
// stack: blake2b_generate_hash_return, i-1, h_i', ..., h_7', retdest
DUP2
// stack: i-1, blake2b_generate_hash_return, i-1, h_i', ..., h_7', retdest
%jump(blake2b_generate_new_hash_value)
blake2b_generate_hash_return:
// stack: h_(i-1)', i-1, h_i', ..., h_7', retdest
SWAP1
// stack: i-1, h_(i-1)', h_i', ..., h_7', retdest
DUP1
// stack: i-1, i-1, h_(i-1)', ..., h_7', retdest
%jumpi(blake2b_generate_hash_loop)
// stack: i-1=0, h_0', ..., h_7', retdest
%stack (i, h: 8, ret) -> (ret, h)
// stack: retdest, h_0'...h_7'
JUMP

View File

@ -33,30 +33,63 @@ global blake2b_iv_const:
BYTES 91, 224, 205, 25
BYTES 19, 126, 33, 121
%macro blake2b_iv
// stack: i, ...
global blake2b_iv:
// stack: i, retdest
PUSH blake2b_iv_const
// stack: blake2b_iv_const, i, ...
// stack: blake2b_iv_const, i, retdest
SWAP1
// stack: i, blake2b_iv_const, ...
// stack: i, blake2b_iv_const, retdest
%mul_const(8)
ADD
// stack: blake2b_iv_const + 2 * i, ...
// stack: blake2b_iv_const + 2 * i, retdest
DUP1
// stack: blake2b_iv_const + 2 * i, blake2b_iv_const + 2 * i, ...
// stack: blake2b_iv_const + 2 * i, blake2b_iv_const + 2 * i, retdest
%add_const(4)
// stack: blake2b_iv_const + 2 * i + 1, blake2b_iv_const + 2 * i, ...
// stack: blake2b_iv_const + 2 * i + 1, blake2b_iv_const + 2 * i, retdest
%mload_kernel_code_u32
SWAP1
%mload_kernel_code_u32
// stack: IV_i[32:], IV_i[:32], ...
// stack: IV_i[32:], IV_i[:32], retdest
%shl_const(32)
// stack: IV_i[32:] << 32, IV_i[:32], ...
// stack: IV_i[32:] << 32, IV_i[:32], retdest
ADD // OR
// stack: IV_i, ...
// stack: IV_i, retdest
SWAP1
JUMP
%macro blake2b_iv
%stack (i) -> (i, %%after)
%jump(blake2b_iv)
%%after:
%endmacro
%macro blake2b_iv_i(i)
PUSH $i
%blake2b_iv
%endmacro
// Load the initial hash value (the IV, but with params XOR'd into the first word).
global blake2b_initial_hash_value:
// stack: retdest
PUSH 8
// stack: i=8, retdest
blake2b_initial_hash_loop:
// stack: i, IV_i, ..., IV_7, retdest
%decrement
// stack: i-1, IV_i, ..., IV_7, retdest
PUSH blake2b_initial_hash_return
// stack: blake2b_initial_hash_return, i-1, IV_i, ..., IV_7, retdest
DUP2
// stack: i-1, blake2b_initial_hash_return, i-1, IV_i, ..., IV_7, retdest
%jump(blake2b_iv)
blake2b_initial_hash_return:
// stack: IV_(i-1), i-1, IV_i, ..., IV_7, retdest
SWAP1
// stack: i-1, IV_(i-1), IV_i, ..., IV_7, retdest
DUP1
// stack: i-1, i-1, IV_(i-1), ..., IV_7, retdest
%jumpi(blake2b_initial_hash_loop)
// stack: i-1=0, IV_0, ..., IV_7, retdest
POP
// stack: IV_0, ..., IV_7, retdest
PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40
XOR
// stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7, retdest
%stack(iv: 8, ret) -> (ret, iv)
JUMP

View File

@ -58,17 +58,28 @@ global permutation_9_constants:
BYTES 15, 11, 9, 14
BYTES 3, 12, 13, 0
%macro blake2b_permutation
// stack: round, i
global blake2b_permutation:
// stack: i, round, retdest
PUSH permutation_0_constants
// stack: permutation_0_constants, round, i
SWAP1
// stack: round, permutation_1_constants, i
// stack: permutation_0_constants, i, round, retdest
SWAP2
// stack: round, i, permutation_0_constants, retdest
%mod_const(10)
// stack: round % 10, permutation_1_constants, i
// stack: round % 10, i, permutation_0_constants, retdest
%mul_const(16)
ADD
// stack: permutation_(round)_constants, i
ADD
%mload_kernel_code
// stack: permutation_(round%10)_constants[i], retdest
SWAP1
JUMP
%macro blake2b_permutation
// stack: round, i
PUSH %%after
// stack: %%after, round, i
SWAP2
// stack: i, round, %%after
%jump(blake2b_permutation)
%%after:
%endmacro