mirror of
https://github.com/logos-storage/plonky2.git
synced 2026-01-09 01:03:08 +00:00
241 lines
11 KiB
NASM
241 lines
11 KiB
NASM
// We put the message schedule in memory starting at 64 * num_blocks + 2.
|
|
%macro message_schedule_addr_from_num_blocks
|
|
// stack: num_blocks
|
|
%mul_const(64)
|
|
%add_const(2)
|
|
%endmacro
|
|
|
|
// Precodition: stack contains address of one message block, followed by output address
|
|
// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks
|
|
// of message schedule (in four-byte increments)
|
|
gen_message_schedule_from_block:
|
|
// stack: block_addr, output_addr, retdest
|
|
DUP1
|
|
// stack: block_addr, block_addr, output_addr, retdest
|
|
%add_const(32)
|
|
// stack: block_addr + 32, block_addr, output_addr, retdest
|
|
SWAP1
|
|
// stack: block_addr, block_addr + 32, output_addr, retdest
|
|
%mload_kernel_general_u256
|
|
// stack: block[0], block_addr + 32, output_addr, retdest
|
|
SWAP1
|
|
// stack: block_addr + 32, block[0], output_addr, retdest
|
|
%mload_kernel_general_u256
|
|
// stack: block[1], block[0], output_addr, retdest
|
|
SWAP2
|
|
// stack: output_addr, block[0], block[1], retdest
|
|
%add_const(28)
|
|
PUSH 8
|
|
// stack: counter=8, output_addr + 28, block[0], block[1], retdest
|
|
%jump(gen_message_schedule_from_block_0_loop)
|
|
gen_message_schedule_from_block_0_loop:
|
|
// Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule.
|
|
// stack: counter, output_addr, block[0], block[1], retdest
|
|
SWAP2
|
|
// stack: block[0], output_addr, counter, block[1], retdest
|
|
DUP1
|
|
// stack: block[0], block[0], output_addr, counter, block[1], retdest
|
|
%shr_const(32)
|
|
// stack: block[0] >> 32, block[0], output_addr, counter, block[1], retdest
|
|
SWAP1
|
|
// stack: block[0], block[0] >> 32, output_addr, counter, block[1], retdest
|
|
%as_u32
|
|
// stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest
|
|
DUP3
|
|
// stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest
|
|
%mstore_kernel_general_u32
|
|
// stack: block[0] >> 32, output_addr, counter, block[1], retdest
|
|
SWAP1
|
|
// stack: output_addr, block[0] >> 32, counter, block[1], retdest
|
|
%sub_const(4)
|
|
// stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest
|
|
SWAP1
|
|
// stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest
|
|
SWAP2
|
|
// stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest
|
|
%decrement
|
|
DUP1
|
|
ISZERO
|
|
%jumpi(gen_message_schedule_from_block_0_end)
|
|
%jump(gen_message_schedule_from_block_0_loop)
|
|
gen_message_schedule_from_block_0_end:
|
|
// stack: old counter=0, output_addr, block[0], block[1], retdest
|
|
POP
|
|
PUSH 8
|
|
// stack: counter=8, output_addr, block[0], block[1], retdest
|
|
%stack (counter, out, b0, b1) -> (out, counter, b1, b0)
|
|
// stack: output_addr, counter, block[1], block[0], retdest
|
|
%add_const(64)
|
|
// stack: output_addr + 64, counter, block[1], block[0], retdest
|
|
SWAP1
|
|
// stack: counter, output_addr + 64, block[1], block[0], retdest
|
|
gen_message_schedule_from_block_1_loop:
|
|
// Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule.
|
|
// stack: counter, output_addr, block[1], block[0], retdest
|
|
SWAP2
|
|
// stack: block[1], output_addr, counter, block[0], retdest
|
|
DUP1
|
|
// stack: block[1], block[1], output_addr, counter, block[0], retdest
|
|
%shr_const(32)
|
|
// stack: block[1] >> 32, block[1], output_addr, counter, block[0], retdest
|
|
SWAP1
|
|
// stack: block[1], block[1] >> 32, output_addr, counter, block[0], retdest
|
|
%as_u32
|
|
// stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest
|
|
DUP3
|
|
// stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest
|
|
%mstore_kernel_general_u32
|
|
// stack: block[1] >> 32, output_addr, counter, block[0], retdest
|
|
SWAP1
|
|
// stack: output_addr, block[1] >> 32, counter, block[0], retdest
|
|
%sub_const(4)
|
|
// stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest
|
|
SWAP1
|
|
// stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest
|
|
SWAP2
|
|
// stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest
|
|
%decrement
|
|
DUP1
|
|
ISZERO
|
|
%jumpi(gen_message_schedule_from_block_1_end)
|
|
%jump(gen_message_schedule_from_block_1_loop)
|
|
gen_message_schedule_from_block_1_end:
|
|
// stack: old counter=0, output_addr, block[1], block[0], retdest
|
|
POP
|
|
// stack: output_addr, block[0], block[1], retdest
|
|
PUSH 48
|
|
// stack: counter=48, output_addr, block[0], block[1], retdest
|
|
SWAP1
|
|
// stack: output_addr, counter, block[0], block[1], retdest
|
|
%add_const(36)
|
|
// stack: output_addr + 36, counter, block[0], block[1], retdest
|
|
SWAP1
|
|
// stack: counter, output_addr + 36, block[0], block[1], retdest
|
|
gen_message_schedule_remaining_loop:
|
|
// Generate the next 48 chunks of the message schedule, one at a time, from prior chunks.
|
|
// stack: counter, output_addr, block[0], block[1], retdest
|
|
SWAP1
|
|
// stack: output_addr, counter, block[0], block[1], retdest
|
|
DUP1
|
|
// stack: output_addr, output_addr, counter, block[0], block[1], retdest
|
|
PUSH 2
|
|
PUSH 4
|
|
MUL
|
|
SWAP1
|
|
SUB
|
|
// stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest
|
|
%mload_kernel_general_u32
|
|
// stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest
|
|
%sha2_sigma_1
|
|
// stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest
|
|
SWAP1
|
|
// stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
DUP1
|
|
// stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
PUSH 7
|
|
PUSH 4
|
|
MUL
|
|
SWAP1
|
|
SUB
|
|
// stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
%mload_kernel_general_u32
|
|
// stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
SWAP1
|
|
// stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
DUP1
|
|
// stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
PUSH 15
|
|
PUSH 4
|
|
MUL
|
|
SWAP1
|
|
SUB
|
|
// stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
%mload_kernel_general_u32
|
|
// stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
%sha2_sigma_0
|
|
// stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
SWAP1
|
|
// stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
DUP1
|
|
// stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
PUSH 16
|
|
PUSH 4
|
|
MUL
|
|
SWAP1
|
|
SUB
|
|
// stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
%mload_kernel_general_u32
|
|
// stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
SWAP1
|
|
// stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
|
|
SWAP4
|
|
// stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest
|
|
%add_u32
|
|
%add_u32
|
|
%add_u32
|
|
// stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest
|
|
DUP2
|
|
// stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest
|
|
%mstore_kernel_general_u32
|
|
// stack: output_addr, counter, block[0], block[1], retdest
|
|
%add_const(4)
|
|
// stack: output_addr + 4, counter, block[0], block[1], retdest
|
|
SWAP1
|
|
// stack: counter, output_addr + 4, block[0], block[1], retdest
|
|
%decrement
|
|
// stack: counter - 1, output_addr + 4, block[0], block[1], retdest
|
|
DUP1
|
|
ISZERO
|
|
%jumpi(gen_message_schedule_remaining_end)
|
|
%jump(gen_message_schedule_remaining_loop)
|
|
gen_message_schedule_remaining_end:
|
|
// stack: counter=0, output_addr, block[0], block[1], retdest
|
|
%pop4
|
|
JUMP
|
|
|
|
// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63]
|
|
// stack contains output_addr
|
|
// Postcondition: starting at output_addr, set of 256 bytes per block
|
|
// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments)
|
|
global sha2_gen_all_message_schedules:
|
|
// stack: output_addr, retdest
|
|
DUP1
|
|
// stack: output_addr, output_addr, retdest
|
|
PUSH 0
|
|
// stack: 0, output_addr, output_addr, retdest
|
|
%mload_kernel_general
|
|
// stack: num_blocks, output_addr, output_addr, retdest
|
|
PUSH 1
|
|
// stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest
|
|
gen_all_message_schedules_loop:
|
|
// stack: cur_addr, counter, cur_output_addr, output_addr, retdest
|
|
PUSH gen_all_message_schedules_loop_end
|
|
// stack: new_retdest = gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest
|
|
DUP4
|
|
// stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest
|
|
DUP3
|
|
// stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest
|
|
%jump(gen_message_schedule_from_block)
|
|
gen_all_message_schedules_loop_end:
|
|
// stack: cur_addr, counter, cur_output_addr, output_addr, retdest
|
|
%add_const(64)
|
|
// stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest
|
|
SWAP1
|
|
%decrement
|
|
SWAP1
|
|
// stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest
|
|
SWAP2
|
|
%add_const(256)
|
|
SWAP2
|
|
// stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
|
|
DUP2
|
|
// stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
|
|
ISZERO
|
|
%jumpi(gen_all_message_schedules_end)
|
|
%jump(gen_all_message_schedules_loop)
|
|
gen_all_message_schedules_end:
|
|
// stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
|
|
%pop3
|
|
// stack: output_addr, retdest
|
|
%jump(sha2_compression)
|