From 24d6627a62a3c84a72c429fe7b15d001fd6a873a Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 12 Dec 2022 16:38:40 -0800 Subject: [PATCH] addressed comments --- evm/src/cpu/kernel/aggregator.rs | 1 - .../kernel/asm/hash/blake2b/compression.asm | 53 ++--- .../kernel/asm/hash/blake2b/g_functions.asm | 2 +- evm/src/cpu/kernel/asm/hash/blake2b/ops.asm | 4 - .../kernel/asm/hash/blake2b/permutations.asm | 200 ++++-------------- evm/src/cpu/kernel/asm/hash/blake2b/util.asm | 47 ---- evm/src/cpu/kernel/asm/memory/core.asm | 30 ++- evm/src/cpu/kernel/asm/util/basic_macros.asm | 57 +++++ evm/src/cpu/kernel/tests/hash.rs | 5 +- 9 files changed, 156 insertions(+), 243 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/hash/blake2b/util.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index a7c01cec..f8d2860e 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -47,7 +47,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/hash/blake2b/ops.asm"), include_str!("asm/hash/blake2b/permutations.asm"), include_str!("asm/hash/blake2b/store.asm"), - include_str!("asm/hash/blake2b/util.asm"), include_str!("asm/hash/ripemd/box.asm"), include_str!("asm/hash/ripemd/compression.asm"), include_str!("asm/hash/ripemd/constants.asm"), diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index c821da19..e7b81072 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -67,7 +67,7 @@ compression_loop: // stack: cur_message_addr, cur_block_byte, ... DUP2 // stack: cur_block_byte, cur_message_addr, cur_block_byte, ... - %mload_blake2b_word_from_bytes + %mload_kernel_general_u64_LE // stack: m_i, cur_message_addr, cur_block_byte, ... DUP2 // stack: cur_message_addr, m_i, cur_message_addr, cur_block_byte, ... @@ -233,35 +233,40 @@ compression_loop: %jump(compression_loop) compression_end: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest - PUSH 0 - // stack: dummy=0, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest // Invert the bytes of each hash value. + %reverse_bytes_u64_blake + // stack: h_0'', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP1 - %invert_bytes_blake2b_word - SWAP1 - SWAP2 - %invert_bytes_blake2b_word + // stack: h_1', h_0'', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_1'', h_0'', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP2 + // stack: h_2', h_0'', h_1'', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_2'', h_0'', h_1'', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP3 - %invert_bytes_blake2b_word - SWAP3 - SWAP4 - %invert_bytes_blake2b_word + // stack: h_3', h_0'', h_1'', h_2'', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_3'', h_0'', h_1'', h_2'', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP4 + // stack: h_4', h_0'', h_1'', h_2'', h_3'', h_5', h_6', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_4'', h_0'', h_1'', h_2'', h_3'', h_5', h_6', h_7', cur_block + 1, retdest SWAP5 - %invert_bytes_blake2b_word - SWAP5 - SWAP6 - %invert_bytes_blake2b_word + // stack: h_5', h_0'', h_1'', h_2'', h_3'', h_4'', h_6', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_5'', h_0'', h_1'', h_2'', h_3'', h_4'', h_6', h_7', cur_block + 1, retdest SWAP6 + // stack: h_6', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_6'', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_7', cur_block + 1, retdest SWAP7 - %invert_bytes_blake2b_word - SWAP7 - SWAP8 - %invert_bytes_blake2b_word - SWAP8 - POP + // stack: h_7', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_6'', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_7'', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_6'', cur_block + 1, retdest + %stack (h_7, h_s: 7) -> (h_s, h_7) + // stack: h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_6'', h_7'', cur_block + 1, retdest // Combine hash values. %shl_const(64) @@ -270,16 +275,16 @@ compression_end: OR %shl_const(64) OR - // stack: h_0' || h_1' || h_2' || h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + // stack: h_0'' || h_1'' || h_2'' || h_3'', h_4'', h_5'', h_6'', h_7'', cur_block + 1, retdest %stack (first, second: 4, cur) -> (second, first) - // stack: h_4', h_5', h_6', h_7', h_0' || h_1' || h_2' || h_3', retdest + // stack: h_4'', h_5'', h_6'', h_7'', h_0'' || h_1'' || h_2'' || h_3'', retdest %shl_const(64) OR %shl_const(64) OR %shl_const(64) OR - // stack: hash_second = h_4' || h_5' || h_6' || h_7', hash_first = h_0' || h_1' || h_2' || h_3', retdest + // stack: hash_second = h_4'' || h_5'' || h_6'' || h_7'', hash_first = h_0'' || h_1'' || h_2'' || h_3'', retdest %stack (second, first, ret) -> (ret, second, first) // stack: retdest, hash_first, hash_second JUMP diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm index 23844f5d..11e879fc 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm @@ -123,4 +123,4 @@ // stack: a, b, c, d, m[s[x_idx]], m[s[y_idx]], start, round, start %blake2b_g_function // stack: round, start -%endmacro \ No newline at end of file +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/ops.asm b/evm/src/cpu/kernel/asm/hash/blake2b/ops.asm index e587abef..2b40db7f 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/ops.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/ops.asm @@ -1,7 +1,3 @@ -%macro as_u64 - %and_const(0xffffffffffffffff) -%endmacro - // 64-bit right rotation %macro rotr_64(rot) // stack: value diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm index c5ee9ba4..d3d2b0e4 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm @@ -1,182 +1,62 @@ global permutation_0_constants: - BYTES 0 - BYTES 1 - BYTES 2 - BYTES 3 - BYTES 4 - BYTES 5 - BYTES 6 - BYTES 7 - BYTES 8 - BYTES 9 - BYTES 10 - BYTES 11 - BYTES 12 - BYTES 13 - BYTES 14 - BYTES 15 + BYTES 0, 1, 2, 3 + BYTES 4, 5, 6, 7 + BYTES 8, 9, 10, 11 + BYTES 12, 13, 14, 15 global permutation_1_constants: - BYTES 14 - BYTES 10 - BYTES 4 - BYTES 8 - BYTES 9 - BYTES 15 - BYTES 13 - BYTES 6 - BYTES 1 - BYTES 12 - BYTES 0 - BYTES 2 - BYTES 11 - BYTES 7 - BYTES 5 - BYTES 3 + BYTES 14, 10, 4, 8 + BYTES 9, 15, 13, 6 + BYTES 1, 12, 0, 2 + BYTES 11, 7, 5, 3 global permutation_2_constants: - BYTES 11 - BYTES 8 - BYTES 12 - BYTES 0 - BYTES 5 - BYTES 2 - BYTES 15 - BYTES 13 - BYTES 10 - BYTES 14 - BYTES 3 - BYTES 6 - BYTES 7 - BYTES 1 - BYTES 9 - BYTES 4 + BYTES 11, 8, 12, 0 + BYTES 5, 2, 15, 13 + BYTES 10, 4, 3, 6 + BYTES 7, 1, 9, 4 global permutation_3_constants: - BYTES 7 - BYTES 9 - BYTES 3 - BYTES 1 - BYTES 13 - BYTES 12 - BYTES 11 - BYTES 14 - BYTES 2 - BYTES 6 - BYTES 5 - BYTES 10 - BYTES 4 - BYTES 0 - BYTES 15 - BYTES 8 + BYTES 7, 9, 3, 1 + BYTES 13, 2, 11, 14 + BYTES 2, 6, 5, 10 + BYTES 4, 0, 15, 8 global permutation_4_constants: - BYTES 9 - BYTES 0 - BYTES 5 - BYTES 7 - BYTES 2 - BYTES 4 - BYTES 10 - BYTES 15 - BYTES 14 - BYTES 1 - BYTES 11 - BYTES 12 - BYTES 6 - BYTES 8 - BYTES 3 - BYTES 13 + BYTES 9, 0, 5, 7 + BYTES 2, 4, 10, 15 + BYTES 14, 1, 11, 12 + BYTES 6, 8, 3, 13 global permutation_5_constants: - BYTES 2 - BYTES 12 - BYTES 6 - BYTES 10 - BYTES 0 - BYTES 11 - BYTES 8 - BYTES 3 - BYTES 4 - BYTES 13 - BYTES 7 - BYTES 5 - BYTES 15 - BYTES 14 - BYTES 1 - BYTES 9 + BYTES 2, 2, 6, 10 + BYTES 0, 1, 8, 3 + BYTES 4, 3, 7, 5 + BYTES 15, 4, 1, 9 global permutation_6_constants: - BYTES 12 - BYTES 5 - BYTES 1 - BYTES 15 - BYTES 14 - BYTES 13 - BYTES 4 - BYTES 10 - BYTES 0 - BYTES 7 - BYTES 6 - BYTES 3 - BYTES 9 - BYTES 2 - BYTES 8 - BYTES 11 + BYTES 12, 5, 1, 15 + BYTES 14, 3, 4, 10 + BYTES 0, 7, 6, 3 + BYTES 9, 2, 8, 11 global permutation_7_constants: - BYTES 13 - BYTES 11 - BYTES 7 - BYTES 14 - BYTES 12 - BYTES 1 - BYTES 3 - BYTES 9 - BYTES 5 - BYTES 0 - BYTES 15 - BYTES 4 - BYTES 8 - BYTES 6 - BYTES 2 - BYTES 10 + BYTES 13, 11, 7, 14 + BYTES 12, 1, 3, 9 + BYTES 5, 0, 15, 4 + BYTES 8, 6, 2, 10 global permutation_8_constants: - BYTES 6 - BYTES 15 - BYTES 14 - BYTES 9 - BYTES 11 - BYTES 3 - BYTES 0 - BYTES 8 - BYTES 12 - BYTES 2 - BYTES 13 - BYTES 7 - BYTES 1 - BYTES 4 - BYTES 10 - BYTES 5 + BYTES 6, 15, 14, 9 + BYTES 11, 3, 0, 8 + BYTES 12, 2, 13, 7 + BYTES 1, 4, 10, 5 global permutation_9_constants: - BYTES 10 - BYTES 2 - BYTES 8 - BYTES 4 - BYTES 7 - BYTES 6 - BYTES 1 - BYTES 5 - BYTES 15 - BYTES 11 - BYTES 9 - BYTES 14 - BYTES 3 - BYTES 12 - BYTES 13 - BYTES 0 + BYTES 10, 2, 8, 4 + BYTES 7, 6, 1, 5 + BYTES 15, 11, 9, 14 + BYTES 3, 12, 13, 0 %macro blake2b_permutation // stack: round, i diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/util.asm b/evm/src/cpu/kernel/asm/hash/blake2b/util.asm deleted file mode 100644 index 7fdee98d..00000000 --- a/evm/src/cpu/kernel/asm/hash/blake2b/util.asm +++ /dev/null @@ -1,47 +0,0 @@ -// Load a 64-bit word from kernel general memory. -%macro mload_blake2b_word_from_bytes - // stack: offset - DUP1 - %mload_kernel_general_u32_LE - // stack: lo, offset - SWAP1 - // stack: offset, lo - %add_const(4) - %mload_kernel_general_u32_LE - // stack: hi, lo - %shl_const(32) - // stack: hi << 32, lo - OR - // stack: (hi << 32) | lo -%endmacro - -// Invert the order of the four bytes in a word. -%macro invert_four_byte_word - // stack: word - %mul_const(0x1000000010000000100) - %and_const(0xff0000ff00ff00000000ff0000) - %mod_const(0xffffffffffff) - // stack: word_inverted -%endmacro - -// Invert the order of the eight bytes in a Blake word. -%macro invert_bytes_blake2b_word - // stack: word - DUP1 - // stack: word, word - %and_const(0xffffffff) - // stack: word_lo, word - SWAP1 - // stack: word, word_lo - %shr_const(32) - // stack: word_hi, word_lo - %invert_four_byte_word - // stack: word_hi_inverted, word_lo - SWAP1 - // stack: word_lo, word_hi_inverted - %invert_four_byte_word - // stack: word_lo_inverted, word_hi_inverted - %shl_const(32) - OR - // stack: word_inverted -%endmacro diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index f6bb99b6..a979f930 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -97,7 +97,7 @@ // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0 %endmacro -// Load from the kernel a little-endian u32, consisting of 4 bytes (c_0, c_1, c_2, c_3) +// Load from the kernel a little-endian u32, consisting of 4 bytes (c_0, c_1, c_2, c_3). %macro mload_kernel_u32_LE(segment) // stack: offset DUP1 @@ -123,6 +123,24 @@ // stack: c0 | (c1 << 8) | (c2 << 16) | (c3 << 24) %endmacro +// Load from the kernel a little-endian u64, consisting of 8 bytes +// (c_0, c_1, c_2, c_3, c_4, c_5, c_6, c_7). +%macro mload_kernel_u64_LE(segment) + // stack: offset + DUP1 + %mload_kernel_u32_LE($segment) + // stack: lo, offset + SWAP1 + // stack: offset, lo + %add_const(4) + %mload_kernel_u32_LE($segment) + // stack: hi, lo + %shl_const(32) + // stack: hi << 32, lo + OR + // stack: (hi << 32) | lo +%endmacro + // Load a u256 (big-endian) from the kernel. %macro mload_kernel_u256(segment) // stack: offset @@ -292,7 +310,7 @@ // stack: value %endmacro -// Load a little-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// Load a little-endian u32, consisting of 4 bytes (c_0, c_1, c_2, c_3), // from kernel general memory. %macro mload_kernel_general_u32_LE // stack: offset @@ -300,6 +318,14 @@ // stack: value %endmacro +// Load a little-endian u64, consisting of 8 bytes +// (c_0, c_1, c_2, c_3, c_4, c_5, c_6, c_7), from kernel general memory. +%macro mload_kernel_general_u64_LE + // stack: offset + %mload_kernel_u64_LE(@SEGMENT_KERNEL_GENERAL) + // stack: value +%endmacro + // Load a u256 (big-endian) from kernel code. %macro mload_kernel_code_u256 // stack: offset diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 6ec13835..aa6b908e 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -240,6 +240,10 @@ %and_const(0xffffffff) %endmacro +%macro as_u64 + %and_const(0xffffffffffffffff) +%endmacro + %macro not_u32 // stack: x PUSH 0xffffffff @@ -310,3 +314,56 @@ OR // stack: dcba %endmacro + +%macro reverse_bytes_u64 + // stack: word + DUP1 + // stack: word, word + %and_const(0xffffffff) + // stack: word_lo, word + SWAP1 + // stack: word, word_lo + %shr_const(32) + // stack: word_hi, word_lo + %reverse_bytes_u32 + // stack: word_hi_inverted, word_lo + SWAP1 + // stack: word_lo, word_hi_inverted + %reverse_bytes_u32 + // stack: word_lo_inverted, word_hi_inverted + %shl_const(32) + OR + // stack: word_inverted +%endmacro + + +// Invert the order of the four bytes in a word. +%macro invert_four_byte_word + // stack: word + %mul_const(0x1000000010000000100) + %and_const(0xff0000ff00ff00000000ff0000) + %mod_const(0xffffffffffff) + // stack: word_inverted +%endmacro + +// Invert the order of the eight bytes in a Blake word. +%macro reverse_bytes_u64_blake + // stack: word + DUP1 + // stack: word, word + %and_const(0xffffffff) + // stack: word_lo, word + SWAP1 + // stack: word, word_lo + %shr_const(32) + // stack: word_hi, word_lo + %invert_four_byte_word + // stack: word_hi_inverted, word_lo + SWAP1 + // stack: word_lo, word_hi_inverted + %invert_four_byte_word + // stack: word_lo_inverted, word_hi_inverted + %shl_const(32) + OR + // stack: word_inverted +%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 74cf4cd0..b24317e0 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -58,10 +58,7 @@ fn make_input_stack(message: Vec) -> Vec { fn combine_u256s(hi: U256, lo: U256) -> U512 { let mut result = U512::from(hi); - result *= U512::from_big_endian(&[ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, - ]); + result <<= 256; result += U512::from(lo); result }