From 245e5faa6d341c1c095f356cf961ab4e338d4e17 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 29 Nov 2022 16:22:23 -0800 Subject: [PATCH] fixes galore --- .../cpu/kernel/asm/hash/blake/compression.asm | 175 ++++++++++++------ .../cpu/kernel/asm/hash/blake/g_functions.asm | 2 +- .../kernel/asm/hash/blake/permutations.asm | 24 ++- evm/src/cpu/kernel/asm/hash/blake/store.asm | 8 +- evm/src/cpu/kernel/asm/hash/blake/util.asm | 11 +- evm/src/cpu/kernel/tests/hash.rs | 95 ++++++---- 6 files changed, 221 insertions(+), 94 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index c61aecee..c0fdff9f 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -21,8 +21,8 @@ %mload_kernel_general // stack: num_blocks %mul_const(128) - %increment - // stack: num_bytes+1 + %add_const(2) + // stack: num_bytes+2 %endmacro // Address where the working version of the compression internal state is stored. @@ -56,41 +56,108 @@ // stack: h_i' = v_(i+8) ^ v_i ^ h_i, ... %endmacro +%macro invert_bytes_blake_word + // stack: word, ... + DUP1 + %and_const(0xff) + %shl_const(56) + SWAP1 + // stack: word, first_byte, ... + DUP1 + %shr_const(8) + %and_const(0xff) + %shl_const(48) + SWAP1 + // stack: word, second_byte, first_byte, ... + DUP1 + %shr_const(16) + %and_const(0xff) + %shl_const(40) + SWAP1 + DUP1 + %shr_const(24) + %and_const(0xff) + %shl_const(32) + SWAP1 + DUP1 + %shr_const(32) + %and_const(0xff) + %shl_const(24) + SWAP1 + DUP1 + %shr_const(40) + %and_const(0xff) + %shl_const(16) + SWAP1 + DUP1 + %shr_const(48) + %and_const(0xff) + %shl_const(8) + SWAP1 + %shr_const(56) + %and_const(0xff) + %rep 7 + OR + %endrep +%endmacro + global blake_compression: // stack: retdest - %stack () -> (0, 0, 0) - // stack: cur_block = 0, t_0 = 0, t_1 = 0, retdest + PUSH 0 + // stack: cur_block = 0, retdest %blake_initial_hash_value - // stack: h_0, ..., h_7, cur_block, t_0, t_1, retdest + // stack: h_0, ..., h_7, cur_block, retdest %blake_hash_value_addr - // stack: addr, h_0, ..., h_7, cur_block, t_0, t_1, retdest + // stack: addr, h_0, ..., h_7, cur_block, retdest %rep 8 SWAP1 DUP2 %mstore_kernel_general %increment %endrep - // stack: addr, cur_block, t_0, t_1, retdest + // stack: addr, cur_block, retdest POP - // stack: cur_block, t_0, t_1, retdest + // stack: cur_block, retdest compression_loop: - // stack: cur_block, t_0, t_1, retdest + // stack: cur_block, retdest PUSH 0 %mload_kernel_general - // stack: num_blocks, cur_block, t_0, t_1, retdest + // stack: num_blocks, cur_block, retdest %decrement - // stack: num_blocks - 1, cur_block, t_0, t_1, retdest + // stack: num_blocks - 1, cur_block, retdest DUP2 - // stack: cur_block, num_blocks - 1, cur_block, t_0, t_1, retdest + // stack: cur_block, num_blocks - 1, cur_block, retdest EQ - // stack: is_last_block, cur_block, t_0, t_1, retdest + // stack: is_last_block, cur_block, retdest SWAP1 - // stack: cur_block, is_last_block, t_0, t_1, retdest + // stack: cur_block, is_last_block, retdest + PUSH 1 + %mload_kernel_general + // stack: num_bytes, cur_block, is_last_block, retdest + DUP3 + // stack: is_last_block, num_bytes, cur_block, is_last_block, retdest + MUL + // stack: is_last_block * num_bytes, cur_block, is_last_block, retdest + DUP2 + // stack: cur_block, is_last_block * num_bytes, cur_block, is_last_block, retdest %mul_const(128) - %increment - // stack: cur_block_start_byte, is_last_block, t_0, t_1, retdest + // stack: cur_block * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest + DUP4 + // stack: is_last_block, cur_block * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest + ISZERO + // stack: not_last_block, cur_block * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest + MUL + // stack: not_last_block * (cur_block * 128), is_last_block * num_bytes, cur_block, is_last_block, retdest + ADD + // stack: t = not_last_block * (cur_block * 128) + is_last_block * num_bytes, cur_block, is_last_block, retdest + SWAP1 + // stack: cur_block, t, is_last_block, retdest + // stack: cur_block, t, is_last_block, retdest + %mul_const(128) + %add_const(2) + // stack: cur_block_start_byte, t, is_last_block, retdest %blake_message_addr - // stack: message_addr, cur_block_start_byte, is_last_block, t_0, t_1, retdest + // stack: message_addr, cur_block_start_byte, t, is_last_block, retdest %rep 16 // stack: cur_message_addr, cur_block_byte, ... DUP2 @@ -105,19 +172,19 @@ compression_loop: // stack: cur_message_addr + 1, cur_block_byte, ... SWAP1 // stack: cur_block_byte, cur_message_addr + 1, ... - %add_const(64) - // stack: cur_block_byte + 64, cur_message_addr + 1, ... + %add_const(8) + // stack: cur_block_byte + 8, cur_message_addr + 1, ... SWAP1 - // stack: cur_message_addr + 1, cur_block_byte + 64, ... + // stack: cur_message_addr + 1, cur_block_byte + 8, ... %endrep - // stack: end_message_addr, end_block_start_byte, is_last_block, t_0, t_1, retdest + // stack: end_message_addr, end_block_start_byte, t, is_last_block, retdest POP POP - // stack: is_last_block, t_0, t_1, retdest + // stack: t, is_last_block, retdest + SWAP1 + // stack: is_last_block, t, retdest %mul_const(0xFFFFFFFFFFFFFFFF) - // stack: invert_if_last_block, t_0, t_1, retdest - %stack (l, t0, t1) -> (t0, t1, l, 0) - // stack: t_0, t_1, invert_if_last_block, 0, retdest + // stack: invert_if_last_block, t, retdest %blake_hash_value_addr %add_const(7) %rep 8 @@ -130,11 +197,11 @@ compression_loop: // stack: addr, val, ... %decrement %endrep - // stack: addr, h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest + // stack: addr, h_0, ..., h_7, invert_if_last_block, t, retdest POP - // stack: h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest + // stack: h_0, ..., h_7, invert_if_last_block, t, retdest %blake_internal_state_addr - // stack: start, h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest + // stack: start, h_0, ..., h_7, invert_if_last_block, t, retdest // First eight words of compression state: current state h_0, ..., h_7. %rep 8 SWAP1 @@ -142,9 +209,9 @@ compression_loop: %mstore_kernel_general %increment %endrep - // stack: start + 8, t_0, t_1, invert_if_last_block, 0, retdest + // stack: start + 8, invert_if_last_block, t, retdest PUSH 0 - // stack: 0, start + 8, t_0, t_1, invert_if_last_block, 0, retdest + // stack: 0, start + 8, invert_if_last_block, t, retdest %rep 4 // stack: i, loc, ... DUP2 @@ -162,8 +229,22 @@ compression_loop: SWAP1 // stack: i + 1, loc + 1,... %endrep - // stack: 4, start + 12, t_0, t_1, invert_if_last_block, 0, retdest - // XOR the values (t_0, t_1, invert_if, 0) into the last four IV values. + // stack: 4, start + 12, invert_if_last_block, t, retdest + %stack (i, loc, inv, t) -> (t, t, i, loc, inv) + // stack: t, t, 4, start + 12, invert_if_last_block, retdest + %shr_const(64) + // stack: t >> 64, t, 4, start + 12, invert_if_last_block, retdest + SWAP1 + // stack: t, t >> 64, 4, start + 12, invert_if_last_block, retdest + PUSH 1 + %shl_const(64) + // stack: 1 << 64, t, t >> 64, 4, start + 12, invert_if_last_block, retdest + SWAP1 + MOD + // stack: t_lo = t % (1 << 64), t_hi = t >> 64, 4, start + 12, invert_if_last_block, retdest + %stack (t_lo, t_hi, i, loc, inv) -> (i, loc, t_lo, t_hi, inv, 0) + // stack: 4, start + 12, t_lo, t_hi, invert_if_last_block, 0, retdest + // XOR the values (t % 2**64, t >> 64, invert_if, 0) into the last four IV values. %rep 4 // stack: i, loc, val, next_val,... %stack (i, loc, val) -> (i, val, loc, i, loc) @@ -182,24 +263,6 @@ compression_loop: SWAP1 // stack: i + 1, loc + 1, next_val,... %endrep - - - - %blake_internal_state_addr - %add_const(15) - %rep 16 - // stack: addr, ... - DUP1 - // stack: addr, addr, ... - %mload_kernel_general - // stack: val, addr, ... - SWAP1 - // stack: addr, val, ... - %decrement - %endrep - POP - STOP - // stack: 8, loc + 16, retdest POP POP @@ -227,13 +290,21 @@ compression_loop: POP // stack: retdest %blake_generate_new_hash_value(7) + %invert_bytes_blake_word %blake_generate_new_hash_value(6) + %invert_bytes_blake_word %blake_generate_new_hash_value(5) + %invert_bytes_blake_word %blake_generate_new_hash_value(4) + %invert_bytes_blake_word %blake_generate_new_hash_value(3) + %invert_bytes_blake_word %blake_generate_new_hash_value(2) + %invert_bytes_blake_word %blake_generate_new_hash_value(1) + %invert_bytes_blake_word %blake_generate_new_hash_value(0) + %invert_bytes_blake_word // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest %shl_const(64) OR @@ -250,7 +321,7 @@ compression_loop: OR %shl_const(64) OR - // stack: hash_first = h_4' || h_5' || h_6' || h_7', hash_second = h_0' || h_1' || h_2' || h_3', retdest - SWAP2 + // stack: hash_second = h_4' || h_5' || h_6' || h_7', hash_first = h_0' || h_1' || h_2' || h_3', retdest + %stack (second, first, ret) -> (ret, second, first) // stack: retdest, hash_first, hash_second JUMP \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm index 5feaa403..93b61875 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm @@ -39,7 +39,7 @@ XOR %rotr_64(32) // stack: v[d]' = (v[d] ^ v[a]') >>> 32, v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start - %stack (top: 3, vd) -> (top) + %stack (top: 4, vd) -> (top) // stack: v[d]', v[a]', v[b], v[c], a, b, c, d, x, y, start %stack (d, a, b, c) -> (c, d, a, b, d) // stack: v[c], v[d]', v[a]', v[b], v[d]', a, b, c, d, x, y, start diff --git a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm index da278ec2..452bb100 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm @@ -1,3 +1,21 @@ +global permutation_0_constants: + BYTES 0 + BYTES 1 + BYTES 2 + BYTES 3 + BYTES 4 + BYTES 5 + BYTES 6 + BYTES 7 + BYTES 8 + BYTES 9 + BYTES 10 + BYTES 11 + BYTES 12 + BYTES 13 + BYTES 14 + BYTES 15 + global permutation_1_constants: BYTES 14 BYTES 10 @@ -162,13 +180,15 @@ global permutation_9_constants: %macro blake_permutation // stack: round, i - PUSH permutation_1_constants - // stack: permutation_1_constants, round, i + PUSH permutation_0_constants + // stack: permutation_0_constants, round, i SWAP1 // stack: round, permutation_1_constants, i %mod_const(10) + // stack: round % 10, permutation_1_constants, i %mul_const(16) ADD + // stack: permutation_(round)_constants, i ADD %mload_kernel_code %endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake/store.asm b/evm/src/cpu/kernel/asm/hash/blake/store.asm index 28cfa187..448a854b 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/store.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/store.asm @@ -12,8 +12,14 @@ global blake_store: // stack: addr=0, num_blocks, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest %mstore_kernel_general // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + DUP1 + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest PUSH 1 - // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest + // stack: 1, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + %mstore_kernel_general + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + PUSH 2 + // stack: addr=2, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest store_loop: // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest DUP2 diff --git a/evm/src/cpu/kernel/asm/hash/blake/util.asm b/evm/src/cpu/kernel/asm/hash/blake/util.asm index 04e29ca9..61b731b6 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/util.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/util.asm @@ -2,14 +2,15 @@ %macro mload_blake_word // stack: offset DUP1 - %mload_kernel_general_u32 - // stack: hi, offset - %shl_const(32) - // stack: hi << 32, offset + %mload_kernel_general_u32_LE + // stack: lo, offset SWAP1 - // stack: offset, hi << 32 + // stack: offset, lo %add_const(4) %mload_kernel_general_u32 + // stack: hi, lo + %shl_const(32) + // stack: hi << 32, lo OR // stack: (hi << 32) | lo %endmacro diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 32853149..3481fae5 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -41,7 +41,9 @@ fn make_random_input() -> Vec { fn make_custom_input() -> Vec { // Hardcode a custom message vec![ - 1, 2, 3 + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, ] } @@ -54,62 +56,89 @@ fn make_input_stack(message: Vec) -> Vec { initial_stack } -fn test_hash(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U512) -> Result<()> { +fn test_hash_256(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U256) -> Result<()> { // Make the input. - // let message_random = make_random_input(); + let message_random = make_random_input(); let message_custom = make_custom_input(); - // dbg!(message_random.clone()); - // Hash the message using a standard implementation. - // // let expected_random = standard_implementation(message_random.clone()); + let expected_random = standard_implementation(message_random.clone()); let expected_custom = standard_implementation(message_custom.clone()); - dbg!(expected_custom); - // Load the message onto the stack. - // // let initial_stack_random = make_input_stack(message_random); + let initial_stack_random = make_input_stack(message_random); let initial_stack_custom = make_input_stack(message_custom); - // dbg!(initial_stack_random.clone()); - // Make the kernel. let kernel_function = KERNEL.global_labels[hash_fn_label]; // Run the kernel code. - // // let result_random = run_interpreter(kernel_function, initial_stack_random)?; + let result_random = run_interpreter(kernel_function, initial_stack_random)?; let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; - dbg!(result_custom.stack()); - // Extract the final output. - // let actual_random = result_random.stack()[0]; - let actual_custom_first = result_custom.stack()[0]; - let actual_custom_second = result_custom.stack()[1]; - let mut actual_custom = U512::from(actual_custom_first); - actual_custom *= U512::from_big_endian(&[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]); - actual_custom += U512::from(actual_custom_second); - - dbg!(actual_custom); + let actual_random = result_random.stack()[0]; + let actual_custom = result_custom.stack()[0]; // Check that the result is correct. - // assert_eq!(expected_random, actual_random); - // assert_eq!(expected_custom, actual_custom); + assert_eq!(expected_random, actual_random); + assert_eq!(expected_custom, actual_custom); Ok(()) } -// #[test] -// fn test_sha2() -> Result<()> { -// test_hash("sha2", &sha2) -// } +fn combine_u256s(hi: U256, lo: U256) -> U512 { + let mut result = U512::from(hi); + result *= U512::from_big_endian(&[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]); + result += U512::from(lo); + result +} -// #[test] -// fn test_ripemd() -> Result<()> { -// test_hash("ripemd_stack", &ripemd) -// } +fn test_hash_512(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U512) -> Result<()> { + // Make the input. + let message_random = make_random_input(); + let message_custom = make_custom_input(); + + // Hash the message using a standard implementation. + let expected_random = standard_implementation(message_random.clone()); + let expected_custom = standard_implementation(message_custom.clone()); + + // Load the message onto the stack. + let initial_stack_random = make_input_stack(message_random); + let initial_stack_custom = make_input_stack(message_custom); + + // Make the kernel. + let kernel_function = KERNEL.global_labels[hash_fn_label]; + + // Run the kernel code. + let result_random = run_interpreter(kernel_function, initial_stack_random)?; + let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; + + let random_stack = result_random.stack(); + let custom_stack = result_custom.stack(); + + // Extract the final output. + let actual_random = combine_u256s(random_stack[0], random_stack[1]); + let actual_custom = combine_u256s(custom_stack[0], custom_stack[1]); + + // Check that the result is correct. + // assert_eq!(expected_random, actual_random); + assert_eq!(expected_custom, actual_custom); + + Ok(()) +} + +#[test] +fn test_sha2() -> Result<()> { + test_hash_256("sha2", &sha2) +} + +#[test] +fn test_ripemd() -> Result<()> { + test_hash_256("ripemd_stack", &ripemd) +} #[test] fn test_blake() -> Result<()> { - test_hash("blake", &blake2b) + test_hash_512("blake", &blake2b) }