diff --git a/evm/Cargo.toml b/evm/Cargo.toml index 6db81902..5ee3b1ff 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -8,14 +8,15 @@ edition = "2021" plonky2 = { path = "../plonky2", default-features = false, features = ["rand", "timing"] } plonky2_util = { path = "../util" } eth-trie-utils = { git = "https://github.com/mir-protocol/eth-trie-utils.git", rev = "dd3595b4ba7923f8d465450d210f17a2b4e20f96" } -maybe_rayon = { path = "../maybe_rayon" } anyhow = "1.0.40" env_logger = "0.9.0" ethereum-types = "0.14.0" hex = { version = "0.4.3", optional = true } hex-literal = "0.3.4" itertools = "0.10.3" +keccak-hash = "0.9.0" log = "0.4.14" +maybe_rayon = { path = "../maybe_rayon" } once_cell = "1.13.0" pest = "2.1.3" pest_derive = "2.1.0" @@ -23,7 +24,7 @@ rand = "0.8.5" rand_chacha = "0.3.1" rlp = "0.5.1" serde = { version = "1.0.144", features = ["derive"] } -keccak-hash = "0.9.0" +sha2 = "0.10.2" tiny-keccak = "2.0.2" [dev-dependencies] diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 002a84fb..0c3015f2 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -39,9 +39,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/memory/metadata.asm"), include_str!("asm/memory/packing.asm"), include_str!("asm/memory/txn_fields.asm"), - include_str!("asm/rlp/encode.asm"), - include_str!("asm/rlp/decode.asm"), - include_str!("asm/rlp/read_to_memory.asm"), include_str!("asm/mpt/hash.asm"), include_str!("asm/mpt/hash_trie_specific.asm"), include_str!("asm/mpt/hex_prefix.asm"), @@ -51,6 +48,16 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/mpt/storage_write.asm"), include_str!("asm/mpt/util.asm"), include_str!("asm/mpt/write.asm"), + include_str!("asm/rlp/encode.asm"), + include_str!("asm/rlp/decode.asm"), + include_str!("asm/rlp/read_to_memory.asm"), + include_str!("asm/sha2/compression.asm"), + include_str!("asm/sha2/constants.asm"), + include_str!("asm/sha2/message_schedule.asm"), + include_str!("asm/sha2/ops.asm"), + include_str!("asm/sha2/store_pad.asm"), + include_str!("asm/sha2/temp_words.asm"), + include_str!("asm/sha2/write_length.asm"), include_str!("asm/transactions/router.asm"), include_str!("asm/transactions/type_0.asm"), include_str!("asm/transactions/type_1.asm"), diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index 6722b0ca..26196df5 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -55,6 +55,148 @@ // stack: (empty) %endmacro +// Load a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// from the kernel. +%macro mload_kernel_u32(segment) + // stack: offset + DUP1 + %mload_kernel($segment) + // stack: c_3, offset + %shl_const(8) + // stack: c_3 << 8, offset + DUP2 + %add_const(1) + %mload_kernel($segment) + OR + // stack: (c_3 << 8) | c_2, offset + %shl_const(8) + // stack: ((c_3 << 8) | c_2) << 8, offset + DUP2 + %add_const(2) + %mload_kernel($segment) + OR + // stack: (((c_3 << 8) | c_2) << 8) | c_1, offset + %shl_const(8) + // stack: ((((c_3 << 8) | c_2) << 8) | c_1) << 8, offset + SWAP1 + %add_const(3) + %mload_kernel($segment) + OR + // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0 +%endmacro + +// Load a u256 (big-endian) from the kernel. +%macro mload_kernel_u256(segment) + // stack: offset + DUP1 + %mload_kernel_u32($segment) + // stack: c_7, offset + %shl_const(32) + // stack: c7 << 32, offset + DUP2 + %add_const(4) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 32) | c_6, offset + %shl_const(32) + // stack: ((c_7 << 32) | c_6) << 32, offset + DUP2 + %add_const(8) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 64) | (c_6 << 32) | c_5, offset + %shl_const(32) + // stack: ((c_7 << 64) | (c_6 << 32) | c_5) << 32, offset + DUP2 + %add_const(12) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4, offset + %shl_const(32) + // stack: ((c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4) << 32, offset + DUP2 + %add_const(16) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3, offset + %shl_const(32) + // stack: ((c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3) << 32, offset + DUP2 + %add_const(20) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2, offset + %shl_const(32) + // stack: ((c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2) << 32, offset + DUP2 + %add_const(24) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1, offset + %shl_const(32) + // stack: ((c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1) << 32, offset + DUP2 + %add_const(28) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0, offset + SWAP1 + POP + // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0 +%endmacro + +// Store a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// to the kernel. +%macro mstore_kernel_u32(segment) + // stack: offset, value + SWAP1 + // stack: value, offset + DUP1 + // stack: value, value, offset + %and_const(0xff) + // stack: c_0 = value % (1 << 8), value, offset + SWAP1 + // stack: value, c_0, offset + %shr_const(8) + // stack: value >> 8, c_0, offset + DUP1 + // stack: value >> 8, value >> 8, c_0, offset + %and_const(0xff) + // stack: c_1 = (value >> 8) % (1 << 8), value >> 8, c_0, offset + SWAP1 + // stack: value >> 8, c_1, c_0, offset + %shr_const(8) + // stack: value >> 16, c_1, c_0, offset + DUP1 + // stack: value >> 16, value >> 16, c_1, c_0, offset + %and_const(0xff) + // stack: c_2 = (value >> 16) % (1 << 8), value >> 16, c_1, c_0, offset + SWAP1 + // stack: value >> 16, c_2, c_1, c_0, offset + %shr_const(8) + // stack: value >> 24, c_2, c_1, c_0, offset + %and_const(0xff) + // stack: c_3 = (value >> 24) % (1 << 8), c_2, c_1, c_0, offset + DUP5 + // stack: offset, c_3, c_2, c_1, c_0, offset + %mstore_kernel($segment) + // stack: c_2, c_1, c_0, offset + DUP4 + // stack: offset, c_2, c_1, c_0, offset + %add_const(1) + %mstore_kernel($segment) + // stack: c_1, c_0, offset + DUP3 + // stack: offset, c_1, c_0, offset + %add_const(2) + %mstore_kernel($segment) + // stack: c_0, offset + SWAP1 + // stack: offset, c_0 + %add_const(3) + %mstore_kernel($segment) +%endmacro + // Load a single byte from kernel code. %macro mload_kernel_code // stack: offset @@ -62,34 +204,41 @@ // stack: value %endmacro +// Load a single byte from kernel general memory. +%macro mload_kernel_general + // stack: offset + %mload_kernel(@SEGMENT_KERNEL_GENERAL) + // stack: value +%endmacro + // Load a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), // from kernel code. %macro mload_kernel_code_u32 // stack: offset - DUP1 - %mload_kernel_code - // stack: c_3, offset - %shl_const(8) - // stack: c_3 << 8, offset - DUP2 - %add_const(1) - %mload_kernel_code - OR - // stack: (c_3 << 8) | c_2, offset - %shl_const(8) - // stack: ((c_3 << 8) | c_2) << 8, offset - DUP2 - %add_const(2) - %mload_kernel_code - OR - // stack: (((c_3 << 8) | c_2) << 8) | c_1, offset - %shl_const(8) - // stack: ((((c_3 << 8) | c_2) << 8) | c_1) << 8, offset - SWAP1 - %add_const(3) - %mload_kernel_code - OR - // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0 + %mload_kernel_u32(@SEGMENT_CODE) + // stack: value +%endmacro + +// Load a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// from kernel general memory. +%macro mload_kernel_general_u32 + // stack: offset + %mload_kernel_u32(@SEGMENT_KERNEL_GENERAL) + // stack: value +%endmacro + +// Load a u256 (big-endian) from kernel code. +%macro mload_kernel_code_u256 + // stack: offset + %mload_kernel_u256(@SEGMENT_CODE) + // stack: value +%endmacro + +// Load a u256 (big-endian) from kernel general memory. +%macro mload_kernel_general_u256 + // stack: offset + %mload_kernel_u256(@SEGMENT_KERNEL_GENERAL) + // stack: value %endmacro // Store a single byte to kernel code. @@ -99,6 +248,27 @@ // stack: (empty) %endmacro +// Store a single byte to kernel general memory. +%macro mstore_kernel_general + // stack: offset, value + %mstore_kernel(@SEGMENT_KERNEL_GENERAL) + // stack: (empty) +%endmacro + +// Store a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// to kernel code. +%macro mstore_kernel_code_u32 + // stack: offset, value + %mstore_kernel_u32(@SEGMENT_CODE) +%endmacro + +// Store a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// to kernel general memory. +%macro mstore_kernel_general_u32 + // stack: offset, value + %mstore_kernel_u32(@SEGMENT_KERNEL_GENERAL) +%endmacro + // Store a single byte to @SEGMENT_RLP_RAW. %macro mstore_rlp // stack: offset, value diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm new file mode 100644 index 00000000..eb9b73b8 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -0,0 +1,306 @@ +// We use memory starting at 320 * num_blocks + 2 (after the message schedule +// space) as scratch space to store stack values. +%macro scratch_space_addr_from_num_blocks + // stack: num_blocks + %mul_const(320) + %add_const(2) +%endmacro + +global sha2_compression: + // stack: message_schedule_addr, retdest + PUSH 0 + // stack: i=0, message_schedule_addr, retdest + SWAP1 + // stack: message_schedule_addr, i=0, retdest + PUSH 0 + // stack: 0, message_schedule_addr, i=0, retdest + %mload_kernel_general + // stack: num_blocks, message_schedule_addr, i=0, retdest + DUP1 + // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest + %scratch_space_addr_from_num_blocks + // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest + SWAP1 + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + PUSH sha2_constants_h + %add_const(28) + %mload_kernel_code_u32 + // stack: h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + PUSH sha2_constants_h + %add_const(24) + %mload_kernel_code_u32 + // stack: g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + PUSH sha2_constants_h + %add_const(20) + %mload_kernel_code_u32 + // stack: f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + PUSH sha2_constants_h + %add_const(16) + %mload_kernel_code_u32 + // stack: e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + PUSH sha2_constants_h + %add_const(12) + %mload_kernel_code_u32 + // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + PUSH sha2_constants_h + %add_const(8) + %mload_kernel_code_u32 + // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + PUSH sha2_constants_h + %add_const(4) + %mload_kernel_code_u32 + // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + PUSH sha2_constants_h + %mload_kernel_code_u32 + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest +compression_start_block: + // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. + DUP10 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP2 + DUP2 + // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP3 + DUP2 + // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP4 + DUP2 + // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP5 + DUP2 + // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP6 + DUP2 + // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP7 + DUP2 + // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP8 + DUP2 + // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP9 + DUP2 + // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + POP + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest +compression_loop: + // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP11 + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP13 + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + ADD + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + PUSH sha2_constants_k + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP14 + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + ADD + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_code_u32 + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %stack (start: 6, e, f, g, h) -> (e, f, g, h, start, e, f, g, h) + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word1 + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %stack (t, a, b, c) -> (a, b, c, t, a, b, c) + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word2 + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP6 + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP3 + // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP2 + // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %stack (a, e, b, c, d, old_d, f, g, h, old_h) -> (a, b, c, d, e, f, g, h) + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %increment + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP1 + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %eq_const(64) + // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP1 + // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP12 + // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SUB + // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP13 + // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + SWAP1 + // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + PUSH 256 + MUL + // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + ADD + // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + SWAP12 + // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest + SWAP10 + // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + POP + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + %and_const(63) + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest + SWAP12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + POP + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + DUP12 + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + ISZERO + %jumpi(compression_end_block) + %jump(compression_loop) +compression_end_block: + // Add the initial values of the eight working variables (from the start of this block's compression) back into them. + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP1 + // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(4) + %mload_kernel_general_u32 + // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP2 + // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(8) + %mload_kernel_general_u32 + // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP3 + // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(12) + %mload_kernel_general_u32 + // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP4 + // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(16) + %mload_kernel_general_u32 + // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP5 + // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(20) + %mload_kernel_general_u32 + // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP6 + // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(24) + %mload_kernel_general_u32 + // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP7 + // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(28) + %mload_kernel_general_u32 + // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP8 + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + DUP1 + // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + ISZERO + // In this case, we've finished all the blocks. + %jumpi(compression_end) + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + %stack (num_blocks, working: 8) -> (working, num_blocks) + %jump(compression_start_block) +compression_end: + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + POP + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest + SWAP3 + // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + %pop3 + // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + SWAP1 + JUMP diff --git a/evm/src/cpu/kernel/asm/sha2/constants.asm b/evm/src/cpu/kernel/asm/sha2/constants.asm new file mode 100644 index 00000000..d39661f8 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/constants.asm @@ -0,0 +1,75 @@ +global sha2_constants_k: + BYTES 66, 138, 47, 152 + BYTES 113, 55, 68, 145 + BYTES 181, 192, 251, 207 + BYTES 233, 181, 219, 165 + BYTES 57, 86, 194, 91 + BYTES 89, 241, 17, 241 + BYTES 146, 63, 130, 164 + BYTES 171, 28, 94, 213 + BYTES 216, 7, 170, 152 + BYTES 18, 131, 91, 1 + BYTES 36, 49, 133, 190 + BYTES 85, 12, 125, 195 + BYTES 114, 190, 93, 116 + BYTES 128, 222, 177, 254 + BYTES 155, 220, 6, 167 + BYTES 193, 155, 241, 116 + BYTES 228, 155, 105, 193 + BYTES 239, 190, 71, 134 + BYTES 15, 193, 157, 198 + BYTES 36, 12, 161, 204 + BYTES 45, 233, 44, 111 + BYTES 74, 116, 132, 170 + BYTES 92, 176, 169, 220 + BYTES 118, 249, 136, 218 + BYTES 152, 62, 81, 82 + BYTES 168, 49, 198, 109 + BYTES 176, 3, 39, 200 + BYTES 191, 89, 127, 199 + BYTES 198, 224, 11, 243 + BYTES 213, 167, 145, 71 + BYTES 6, 202, 99, 81 + BYTES 20, 41, 41, 103 + BYTES 39, 183, 10, 133 + BYTES 46, 27, 33, 56 + BYTES 77, 44, 109, 252 + BYTES 83, 56, 13, 19 + BYTES 101, 10, 115, 84 + BYTES 118, 106, 10, 187 + BYTES 129, 194, 201, 46 + BYTES 146, 114, 44, 133 + BYTES 162, 191, 232, 161 + BYTES 168, 26, 102, 75 + BYTES 194, 75, 139, 112 + BYTES 199, 108, 81, 163 + BYTES 209, 146, 232, 25 + BYTES 214, 153, 6, 36 + BYTES 244, 14, 53, 133 + BYTES 16, 106, 160, 112 + BYTES 25, 164, 193, 22 + BYTES 30, 55, 108, 8 + BYTES 39, 72, 119, 76 + BYTES 52, 176, 188, 181 + BYTES 57, 28, 12, 179 + BYTES 78, 216, 170, 74 + BYTES 91, 156, 202, 79 + BYTES 104, 46, 111, 243 + BYTES 116, 143, 130, 238 + BYTES 120, 165, 99, 111 + BYTES 132, 200, 120, 20 + BYTES 140, 199, 2, 8 + BYTES 144, 190, 255, 250 + BYTES 164, 80, 108, 235 + BYTES 190, 249, 163, 247 + BYTES 198, 113, 120, 242 + +global sha2_constants_h: + BYTES 106, 9, 230, 103 + BYTES 187, 103, 174, 133 + BYTES 60, 110, 243, 114 + BYTES 165, 79, 245, 58 + BYTES 81, 14, 82, 127 + BYTES 155, 5, 104, 140 + BYTES 31, 131, 217, 171 + BYTES 91, 224, 205, 25 diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm new file mode 100644 index 00000000..78d98634 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm @@ -0,0 +1,240 @@ +// We put the message schedule in memory starting at 64 * num_blocks + 2. +%macro message_schedule_addr_from_num_blocks + // stack: num_blocks + %mul_const(64) + %add_const(2) +%endmacro + +// Precodition: stack contains address of one message block, followed by output address +// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks +// of message schedule (in four-byte increments) +gen_message_schedule_from_block: + // stack: block_addr, output_addr, retdest + DUP1 + // stack: block_addr, block_addr, output_addr, retdest + %add_const(32) + // stack: block_addr + 32, block_addr, output_addr, retdest + SWAP1 + // stack: block_addr, block_addr + 32, output_addr, retdest + %mload_kernel_general_u256 + // stack: block[0], block_addr + 32, output_addr, retdest + SWAP1 + // stack: block_addr + 32, block[0], output_addr, retdest + %mload_kernel_general_u256 + // stack: block[1], block[0], output_addr, retdest + SWAP2 + // stack: output_addr, block[0], block[1], retdest + %add_const(28) + PUSH 8 + // stack: counter=8, output_addr + 28, block[0], block[1], retdest + %jump(gen_message_schedule_from_block_0_loop) +gen_message_schedule_from_block_0_loop: + // Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule. + // stack: counter, output_addr, block[0], block[1], retdest + SWAP2 + // stack: block[0], output_addr, counter, block[1], retdest + DUP1 + // stack: block[0], block[0], output_addr, counter, block[1], retdest + %shr_const(32) + // stack: block[0] >> 32, block[0], output_addr, counter, block[1], retdest + SWAP1 + // stack: block[0], block[0] >> 32, output_addr, counter, block[1], retdest + %as_u32 + // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest + DUP3 + // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest + %mstore_kernel_general_u32 + // stack: block[0] >> 32, output_addr, counter, block[1], retdest + SWAP1 + // stack: output_addr, block[0] >> 32, counter, block[1], retdest + %sub_const(4) + // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest + SWAP1 + // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest + SWAP2 + // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest + %decrement + DUP1 + ISZERO + %jumpi(gen_message_schedule_from_block_0_end) + %jump(gen_message_schedule_from_block_0_loop) +gen_message_schedule_from_block_0_end: + // stack: old counter=0, output_addr, block[0], block[1], retdest + POP + PUSH 8 + // stack: counter=8, output_addr, block[0], block[1], retdest + %stack (counter, out, b0, b1) -> (out, counter, b1, b0) + // stack: output_addr, counter, block[1], block[0], retdest + %add_const(64) + // stack: output_addr + 64, counter, block[1], block[0], retdest + SWAP1 + // stack: counter, output_addr + 64, block[1], block[0], retdest +gen_message_schedule_from_block_1_loop: + // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule. + // stack: counter, output_addr, block[1], block[0], retdest + SWAP2 + // stack: block[1], output_addr, counter, block[0], retdest + DUP1 + // stack: block[1], block[1], output_addr, counter, block[0], retdest + %shr_const(32) + // stack: block[1] >> 32, block[1], output_addr, counter, block[0], retdest + SWAP1 + // stack: block[1], block[1] >> 32, output_addr, counter, block[0], retdest + %as_u32 + // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest + DUP3 + // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest + %mstore_kernel_general_u32 + // stack: block[1] >> 32, output_addr, counter, block[0], retdest + SWAP1 + // stack: output_addr, block[1] >> 32, counter, block[0], retdest + %sub_const(4) + // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest + SWAP1 + // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest + SWAP2 + // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest + %decrement + DUP1 + ISZERO + %jumpi(gen_message_schedule_from_block_1_end) + %jump(gen_message_schedule_from_block_1_loop) +gen_message_schedule_from_block_1_end: + // stack: old counter=0, output_addr, block[1], block[0], retdest + POP + // stack: output_addr, block[0], block[1], retdest + PUSH 48 + // stack: counter=48, output_addr, block[0], block[1], retdest + SWAP1 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(36) + // stack: output_addr + 36, counter, block[0], block[1], retdest + SWAP1 + // stack: counter, output_addr + 36, block[0], block[1], retdest +gen_message_schedule_remaining_loop: + // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks. + // stack: counter, output_addr, block[0], block[1], retdest + SWAP1 + // stack: output_addr, counter, block[0], block[1], retdest + DUP1 + // stack: output_addr, output_addr, counter, block[0], block[1], retdest + PUSH 2 + PUSH 4 + MUL + SWAP1 + SUB + // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest + %sha2_sigma_1 + // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest + SWAP1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + DUP1 + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + PUSH 7 + PUSH 4 + MUL + SWAP1 + SUB + // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + SWAP1 + // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + DUP1 + // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + PUSH 15 + PUSH 4 + MUL + SWAP1 + SUB + // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %sha2_sigma_0 + // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + SWAP1 + // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + DUP1 + // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + PUSH 16 + PUSH 4 + MUL + SWAP1 + SUB + // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + SWAP1 + // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + SWAP4 + // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %add_u32 + %add_u32 + %add_u32 + // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + DUP2 + // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %mstore_kernel_general_u32 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(4) + // stack: output_addr + 4, counter, block[0], block[1], retdest + SWAP1 + // stack: counter, output_addr + 4, block[0], block[1], retdest + %decrement + // stack: counter - 1, output_addr + 4, block[0], block[1], retdest + DUP1 + ISZERO + %jumpi(gen_message_schedule_remaining_end) + %jump(gen_message_schedule_remaining_loop) +gen_message_schedule_remaining_end: + // stack: counter=0, output_addr, block[0], block[1], retdest + %pop4 + JUMP + +// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +// stack contains output_addr +// Postcondition: starting at output_addr, set of 256 bytes per block +// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) +global sha2_gen_all_message_schedules: + // stack: output_addr, retdest + DUP1 + // stack: output_addr, output_addr, retdest + PUSH 0 + // stack: 0, output_addr, output_addr, retdest + %mload_kernel_general + // stack: num_blocks, output_addr, output_addr, retdest + PUSH 1 + // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest +gen_all_message_schedules_loop: + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest + PUSH gen_all_message_schedules_loop_end + // stack: new_retdest = gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest + DUP4 + // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest + DUP3 + // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest + %jump(gen_message_schedule_from_block) +gen_all_message_schedules_loop_end: + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest + %add_const(64) + // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest + SWAP1 + %decrement + SWAP1 + // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest + SWAP2 + %add_const(256) + SWAP2 + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + DUP2 + // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + ISZERO + %jumpi(gen_all_message_schedules_end) + %jump(gen_all_message_schedules_loop) +gen_all_message_schedules_end: + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + %pop3 + // stack: output_addr, retdest + %jump(sha2_compression) diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm new file mode 100644 index 00000000..7d8054ca --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -0,0 +1,130 @@ +// 32-bit right rotation +%macro rotr(rot) + // stack: value + PUSH $rot + // stack: rot, value + DUP2 + DUP2 + // stack: rot, value, rot, value + SHR + // stack: value >> rot, rot, value + %stack (shifted, rot, value) -> (rot, value, shifted) + // stack: rot, value, value >> rot + PUSH 32 + SUB + // stack: 32 - rot, value, value >> rot + SHL + // stack: value << (32 - rot), value >> rot + %as_u32 + // stack: (value << (32 - rot)) % (1 << 32), value >> rot + ADD +%endmacro + +%macro sha2_sigma_0 + // stack: x + DUP1 + // stack: x, x + %rotr(7) + // stack: rotr(x, 7), x + %stack (rotated, x) -> (x, x, rotated) + // stack: x, x, rotr(x, 7) + %rotr(18) + // stack: rotr(x, 18), x, rotr(x, 7) + SWAP1 + // stack: x, rotr(x, 18), rotr(x, 7) + PUSH 3 + SHR + // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) + XOR + XOR +%endmacro + +%macro sha2_sigma_1 + // stack: x + DUP1 + // stack: x, x + %rotr(17) + // stack: rotr(x, 17), x + %stack (rotated, x) -> (x, x, rotated) + // stack: x, x, rotr(x, 17) + %rotr(19) + // stack: rotr(x, 19), x, rotr(x, 17) + SWAP1 + // stack: x, rotr(x, 19), rotr(x, 17) + PUSH 10 + SHR + // stack: shr(x, 10), rotr(x, 19), rotr(x, 17) + XOR + XOR +%endmacro + +%macro sha2_bigsigma_0 + // stack: x + DUP1 + // stack: x, x + %rotr(2) + // stack: rotr(x, 2), x + %stack (rotated, x) -> (x, x, rotated) + // stack: x, x, rotr(x, 2) + %rotr(13) + // stack: rotr(x, 13), x, rotr(x, 2) + SWAP1 + // stack: x, rotr(x, 13), rotr(x, 2) + %rotr(22) + // stack: rotr(x, 22), rotr(x, 13), rotr(x, 2) + XOR + XOR +%endmacro + +%macro sha2_bigsigma_1 + // stack: x + DUP1 + // stack: x, x + %rotr(6) + // stack: rotr(x, 6), x + %stack (rotated, x) -> (x, x, rotated) + // stack: x, x, rotr(x, 6) + %rotr(11) + // stack: rotr(x, 11), x, rotr(x, 6) + SWAP1 + // stack: x, rotr(x, 11), rotr(x, 6) + %rotr(25) + // stack: rotr(x, 25), rotr(x, 11), rotr(x, 6) + XOR + XOR +%endmacro + +%macro sha2_choice + // stack: x, y, z + DUP1 + // stack: x, x, y, z + NOT + // stack: not x, x, y, z + %stack (notx, x, y, z) -> (notx, z, x, y) + // stack: not x, z, x, y + AND + // stack: (not x) and z, x, y + %stack (nxz, x, y) -> (x, y, nxz) + // stack: x, y, (not x) and z + AND + // stack: x and y, (not x) and z + OR +%endmacro + +%macro sha2_majority + // stack: x, y, z + %stack (xyz: 3) -> (xyz, xyz) + // stack: x, y, z, x, y, z + AND + // stack: x and y, z, x, y, z + SWAP2 + // stack: x, z, x and y, y, z + AND + // stack: x and z, x and y, y, z + %stack (a: 2, b: 2) -> (b, a) + // stack: y, z, x and z, x and y + AND + // stack: y and z, x and z, x and y + OR + OR +%endmacro diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm new file mode 100644 index 00000000..7594eb81 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm @@ -0,0 +1,89 @@ +global sha2: + %jump(sha2_store) + +global sha2_store: + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + DUP1 + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + PUSH 0 + // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + %mstore_kernel_general + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + PUSH 1 + // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest +store_loop: + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + DUP2 + // stack: counter, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + ISZERO + %jumpi(store_end) + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + %stack (addr, counter, val) -> (addr, val, counter, addr) + // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest + %mstore_kernel_general + // stack: counter, addr, ... , x[num_bytes-1], retdest + %decrement + // stack: counter-1, addr, ... , x[num_bytes-1], retdest + SWAP1 + // stack: addr, counter-1, ... , x[num_bytes-1], retdest + %increment + // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest + %jump(store_loop) +store_end: + // stack: addr, counter, retdest + %pop2 + // stack: retdest + %jump(sha2_pad) + +// Precodition: input is in memory, starting at 0 of kernel general segment, of the form +// num_bytes, x[0], x[1], ..., x[num_bytes - 1] +// Postcodition: output is in memory, starting at 0, of the form +// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +global sha2_pad: + // stack: retdest + PUSH 0 + %mload_kernel_general + // stack: num_bytes, retdest + // STEP 1: append 1 + // insert 128 (= 1 << 7) at x[num_bytes+1] + // stack: num_bytes, retdest + PUSH 1 + PUSH 7 + SHL + // stack: 128, num_bytes, retdest + DUP2 + // stack: num_bytes, 128, num_bytes, retdest + %increment + // stack: num_bytes+1, 128, num_bytes, retdest + %mstore_kernel_general + // stack: num_bytes, retdest + // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 + DUP1 + // stack: num_bytes, num_bytes, retdest + %add_const(8) + %div_const(64) + + %increment + // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest + // STEP 3: calculate length := num_bytes*8 + SWAP1 + // stack: num_bytes, num_blocks, retdest + PUSH 8 + MUL + // stack: length = num_bytes*8, num_blocks, retdest + // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] + DUP2 + // stack: num_blocks, length, num_blocks, retdest + PUSH 64 + MUL + // stack: last_addr = num_blocks*64, length, num_blocks, retdest + %sha2_write_length + // stack: num_blocks, retdest + DUP1 + // stack: num_blocks, num_blocks, retdest + // STEP 5: write num_blocks to x[0] + PUSH 0 + %mstore_kernel_general + // stack: num_blocks, retdest + %message_schedule_addr_from_num_blocks + %jump(sha2_gen_all_message_schedules) diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm new file mode 100644 index 00000000..ed610947 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/temp_words.asm @@ -0,0 +1,32 @@ +// "T_1" in the SHA-256 spec +%macro sha2_temp_word1 + // stack: e, f, g, h, K[i], W[i] + DUP1 + // stack: e, e, f, g, h, K[i], W[i] + %sha2_bigsigma_1 + // stack: Sigma_1(e), e, f, g, h, K[i], W[i] + %stack (sig, e, f, g) -> (e, f, g, sig) + // stack: e, f, g, Sigma_1(e), h, K[i], W[i] + %sha2_choice + // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] + %add_u32 + %add_u32 + %add_u32 + %add_u32 + // stack: Ch(e, f, g) + Sigma_1(e) + h + K[i] + W[i] +%endmacro + +// "T_2" in the SHA-256 spec +%macro sha2_temp_word2 + // stack: a, b, c + DUP1 + // stack: a, a, b, c + %sha2_bigsigma_0 + // stack: Sigma_0(a), a, b, c + SWAP3 + // stack: c, a, b, Sigma_0(a) + %sha2_majority + // stack: Maj(c, a, b), Sigma_0(a) + %add_u32 + // stack: Maj(c, a, b) + Sigma_0(a) +%endmacro diff --git a/evm/src/cpu/kernel/asm/sha2/write_length.asm b/evm/src/cpu/kernel/asm/sha2/write_length.asm new file mode 100644 index 00000000..5727498c --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/write_length.asm @@ -0,0 +1,119 @@ +%macro sha2_write_length + // stack: last_addr, length + SWAP1 + // stack: length, last_addr + DUP1 + // stack: length, length, last_addr + %and_const(0xff) + // stack: length % (1 << 8), length, last_addr + DUP3 + // stack: last_addr, length % (1 << 8), length, last_addr + %mstore_kernel_general + + // stack: length, last_addr + SWAP1 + %decrement + SWAP1 + // stack: length, last_addr - 1 + %shr_const(8) + // stack: length >> 8, last_addr - 1 + DUP1 + // stack: length >> 8, length >> 8, last_addr - 1 + %and_const(0xff) + // stack: (length >> 8) % (1 << 8), length >> 8, last_addr - 1 + DUP3 + // stack: last_addr - 1, (length >> 8) % (1 << 8), length >> 8, last_addr - 1 + %mstore_kernel_general + + // stack: length >> 8, last_addr - 1 + SWAP1 + %decrement + SWAP1 + // stack: length >> 8, last_addr - 2 + %shr_const(8) + // stack: length >> 16, last_addr - 2 + DUP1 + // stack: length >> 16, length >> 16, last_addr - 2 + %and_const(0xff) + // stack: (length >> 16) % (1 << 8), length >> 16, last_addr - 2 + DUP3 + // stack: last_addr - 2, (length >> 16) % (1 << 8), length >> 16, last_addr - 2 + %mstore_kernel_general + + // stack: length >> 16, last_addr - 2 + SWAP1 + %decrement + SWAP1 + // stack: length >> 16, last_addr - 3 + %shr_const(8) + // stack: length >> 24, last_addr - 3 + DUP1 + // stack: length >> 24, length >> 24, last_addr - 3 + %and_const(0xff) + // stack: (length >> 24) % (1 << 8), length >> 24, last_addr - 3 + DUP3 + // stack: last_addr - 3, (length >> 24) % (1 << 8), length >> 24, last_addr - 3 + %mstore_kernel_general + + // stack: length >> 24, last_addr - 3 + SWAP1 + %decrement + SWAP1 + // stack: length >> 24, last_addr - 4 + %shr_const(8) + // stack: length >> 32, last_addr - 4 + DUP1 + // stack: length >> 32, length >> 32, last_addr - 4 + %and_const(0xff) + // stack: (length >> 32) % (1 << 8), length >> 32, last_addr - 4 + DUP3 + // stack: last_addr - 4, (length >> 32) % (1 << 8), length >> 32, last_addr - 4 + %mstore_kernel_general + + // stack: length >> 32, last_addr - 4 + SWAP1 + %decrement + SWAP1 + // stack: length >> 32, last_addr - 5 + %shr_const(8) + // stack: length >> 40, last_addr - 5 + DUP1 + // stack: length >> 40, length >> 40, last_addr - 5 + %and_const(0xff) + // stack: (length >> 40) % (1 << 8), length >> 40, last_addr - 5 + DUP3 + // stack: last_addr - 5, (length >> 40) % (1 << 8), length >> 40, last_addr - 5 + %mstore_kernel_general + + // stack: length >> 40, last_addr - 5 + SWAP1 + %decrement + SWAP1 + // stack: length >> 40, last_addr - 6 + %shr_const(8) + // stack: length >> 48, last_addr - 6 + DUP1 + // stack: length >> 48, length >> 48, last_addr - 6 + %and_const(0xff) + // stack: (length >> 48) % (1 << 8), length >> 48, last_addr - 6 + DUP3 + // stack: last_addr - 6, (length >> 48) % (1 << 8), length >> 48, last_addr - 6 + %mstore_kernel_general + + // stack: length >> 48, last_addr - 6 + SWAP1 + %decrement + SWAP1 + // stack: length >> 48, last_addr - 7 + %shr_const(8) + // stack: length >> 56, last_addr - 7 + DUP1 + // stack: length >> 56, length >> 56, last_addr - 7 + %and_const(0xff) + // stack: (length >> 56) % (1 << 8), length >> 56, last_addr - 7 + DUP3 + // stack: last_addr - 7, (length >> 56) % (1 << 8), length >> 56, last_addr - 7 + %mstore_kernel_general + %pop2 + // stack: (empty) +%endmacro diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 13965e39..8ac92258 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -1,46 +1,46 @@ %macro jump(dst) - push $dst + PUSH $dst jump %endmacro %macro jumpi(dst) - push $dst + PUSH $dst jumpi %endmacro %macro pop2 %rep 2 - pop + POP %endrep %endmacro %macro pop3 %rep 3 - pop + POP %endrep %endmacro %macro pop4 %rep 4 - pop + POP %endrep %endmacro %macro pop5 %rep 5 - pop + POP %endrep %endmacro %macro pop6 %rep 6 - pop + POP %endrep %endmacro %macro pop7 %rep 7 - pop + POP %endrep %endmacro @@ -162,21 +162,21 @@ // If pred is zero, yields z; otherwise, yields nz %macro select // stack: pred, nz, z - iszero + ISZERO // stack: pred == 0, nz, z - dup1 + DUP1 // stack: pred == 0, pred == 0, nz, z - iszero + ISZERO // stack: pred != 0, pred == 0, nz, z - swap3 + SWAP3 // stack: z, pred == 0, nz, pred != 0 - mul + MUL // stack: (pred == 0) * z, nz, pred != 0 - swap2 + SWAP2 // stack: pred != 0, nz, (pred == 0) * z - mul + MUL // stack: (pred != 0) * nz, (pred == 0) * z - add + ADD // stack: (pred != 0) * nz + (pred == 0) * z %endmacro @@ -184,27 +184,27 @@ // Assumes pred is boolean (either 0 or 1). %macro select_bool // stack: pred, nz, z - dup1 + DUP1 // stack: pred, pred, nz, z - iszero + ISZERO // stack: notpred, pred, nz, z - swap3 + SWAP3 // stack: z, pred, nz, notpred - mul + MUL // stack: pred * z, nz, notpred - swap2 + SWAP2 // stack: notpred, nz, pred * z - mul + MUL // stack: notpred * nz, pred * z - add + ADD // stack: notpred * nz + pred * z %endmacro %macro square // stack: x - dup1 + DUP1 // stack: x, x - mul + MUL // stack: x^2 %endmacro @@ -229,3 +229,33 @@ %select_bool // stack: max %endmacro + +%macro increment + %add_const(1) +%endmacro + +%macro decrement + %sub_const(1) +%endmacro + +%macro div2 + %div_const(2) +%endmacro + +%macro iseven + %mod_const(2) + ISZERO +%endmacro + +%macro as_u32 + %and_const(0xFFFFFFFF) +%endmacro + +// u32 addition (discarding 2^32 bit) +%macro add_u32 + // stack: x, y + ADD + // stack: x + y + %as_u32 + // stack: (x + y) & u32::MAX +%endmacro diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs index ede60a29..5980e460 100644 --- a/evm/src/cpu/kernel/assembler.rs +++ b/evm/src/cpu/kernel/assembler.rs @@ -579,7 +579,7 @@ mod tests { ); let kernel = parse_and_assemble(&["%stack (a) -> (a)"]); - assert_eq!(kernel.code, vec![]); + assert_eq!(kernel.code, vec![] as Vec); let kernel = parse_and_assemble(&["%stack (a, b, c) -> (c, b, a)"]); assert_eq!(kernel.code, vec![swap2]); diff --git a/evm/src/cpu/kernel/ast.rs b/evm/src/cpu/kernel/ast.rs index 3728aa35..6180b1c8 100644 --- a/evm/src/cpu/kernel/ast.rs +++ b/evm/src/cpu/kernel/ast.rs @@ -38,10 +38,7 @@ pub(crate) enum Item { /// The left hand side of a %stack stack-manipulation macro. #[derive(Eq, PartialEq, Clone, Debug)] -pub(crate) enum StackPlaceholder { - Identifier(String), - Block(String, usize), -} +pub(crate) struct StackPlaceholder(pub String, pub usize); /// The right hand side of a %stack stack-manipulation macro. #[derive(Eq, PartialEq, Clone, Debug)] diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 343f9773..589ba6b3 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -20,7 +20,7 @@ type F = GoldilocksField; /// Halt interpreter execution whenever a jump to this offset is done. const DEFAULT_HALT_OFFSET: usize = 0xdeadbeef; -#[derive(Debug)] +#[derive(Clone, Debug)] pub(crate) struct InterpreterMemory { pub(crate) context_memory: Vec, } @@ -435,14 +435,14 @@ impl<'a> Interpreter<'a> { fn run_shl(&mut self) { let shift = self.pop(); - let x = self.pop(); - self.push(x << shift); + let value = self.pop(); + self.push(value << shift); } fn run_shr(&mut self) { let shift = self.pop(); - let x = self.pop(); - self.push(x >> shift); + let value = self.pop(); + self.push(value >> shift); } fn run_keccak256(&mut self) { @@ -591,6 +591,7 @@ impl<'a> Interpreter<'a> { let segment = Segment::all()[self.pop().as_usize()]; let offset = self.pop().as_usize(); let value = self.memory.mload_general(context, segment, offset); + assert!(value.bits() <= segment.bit_range()); self.push(value); } @@ -599,6 +600,7 @@ impl<'a> Interpreter<'a> { let segment = Segment::all()[self.pop().as_usize()]; let offset = self.pop().as_usize(); let value = self.pop(); + assert!(value.bits() <= segment.bit_range()); self.memory.mstore_general(context, segment, offset, value); } } diff --git a/evm/src/cpu/kernel/parser.rs b/evm/src/cpu/kernel/parser.rs index fd762eae..b7a8124b 100644 --- a/evm/src/cpu/kernel/parser.rs +++ b/evm/src/cpu/kernel/parser.rs @@ -119,12 +119,12 @@ fn parse_stack_placeholder(target: Pair) -> StackPlaceholder { assert_eq!(target.as_rule(), Rule::stack_placeholder); let inner = target.into_inner().next().unwrap(); match inner.as_rule() { - Rule::identifier => StackPlaceholder::Identifier(inner.as_str().into()), + Rule::identifier => StackPlaceholder(inner.as_str().into(), 1), Rule::stack_block => { let mut block = inner.into_inner(); let identifier = block.next().unwrap().as_str(); let length = block.next().unwrap().as_str().parse().unwrap(); - StackPlaceholder::Block(identifier.to_string(), length) + StackPlaceholder(identifier.to_string(), length) } _ => panic!("Unexpected {:?}", inner.as_rule()), } diff --git a/evm/src/cpu/kernel/stack/stack_manipulation.rs b/evm/src/cpu/kernel/stack/stack_manipulation.rs index ebc54af1..36e4b83a 100644 --- a/evm/src/cpu/kernel/stack/stack_manipulation.rs +++ b/evm/src/cpu/kernel/stack/stack_manipulation.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; use std::collections::hash_map::Entry::{Occupied, Vacant}; -use std::collections::{BinaryHeap, HashMap, HashSet}; +use std::collections::{BinaryHeap, HashMap}; use std::hash::Hash; use itertools::Itertools; @@ -27,25 +27,18 @@ pub(crate) fn expand_stack_manipulation(body: Vec) -> Vec { fn expand(names: Vec, replacements: Vec) -> Vec { let mut stack_blocks = HashMap::new(); - let mut stack_names = HashSet::new(); let mut src = names .iter() .cloned() - .flat_map(|item| match item { - StackPlaceholder::Identifier(name) => { - stack_names.insert(name.clone()); - vec![StackItem::NamedItem(name)] - } - StackPlaceholder::Block(name, n) => { - stack_blocks.insert(name.clone(), n); - (0..n) - .map(|i| { - let literal_name = format!("block_{}_{}", name, i); - StackItem::NamedItem(literal_name) - }) - .collect_vec() - } + .flat_map(|StackPlaceholder(name, n)| { + stack_blocks.insert(name.clone(), n); + (0..n) + .map(|i| { + let literal_name = format!("@{}.{}", name, i); + StackItem::NamedItem(literal_name) + }) + .collect_vec() }) .collect_vec(); @@ -59,12 +52,10 @@ fn expand(names: Vec, replacements: Vec) -> let n = *stack_blocks.get(&name).unwrap(); (0..n) .map(|i| { - let literal_name = format!("block_{}_{}", name, i); + let literal_name = format!("@{}.{}", name, i); StackItem::NamedItem(literal_name) }) .collect_vec() - } else if stack_names.contains(&name) { - vec![StackItem::NamedItem(name)] } else { vec![StackItem::PushTarget(PushTarget::Label(name))] } diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs new file mode 100644 index 00000000..3acdce2b --- /dev/null +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -0,0 +1,50 @@ +use std::str::FromStr; + +use anyhow::Result; +use ethereum_types::U256; +use rand::{thread_rng, Rng}; +use sha2::{Digest, Sha256}; + +use crate::cpu::kernel::aggregator::combined_kernel; +use crate::cpu::kernel::interpreter::run_with_kernel; + +/// Standard Sha2 implementation. +fn sha2(input: Vec) -> U256 { + let mut hasher = Sha256::new(); + hasher.update(input); + U256::from(&hasher.finalize()[..]) +} + +fn test_hash(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U256) -> Result<()> { + let kernel = combined_kernel(); + let mut rng = thread_rng(); + + // Generate a random message, between 0 and 9999 bytes. + let num_bytes = rng.gen_range(0..10000); + let message: Vec = (0..num_bytes).map(|_| rng.gen()).collect(); + + // Hash the message using a standard implementation. + let expected = standard_implementation(message.clone()); + + // Load the message onto the stack. + let mut initial_stack = vec![U256::from(num_bytes)]; + let bytes: Vec = message.iter().map(|&x| U256::from(x as u32)).collect(); + initial_stack.extend(bytes); + initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); + initial_stack.reverse(); + + // Run the kernel code. + let kernel_function = kernel.global_labels[hash_fn_label]; + let result = run_with_kernel(&kernel, kernel_function, initial_stack)?; + let actual = result.stack()[0]; + + // Check that the result is correct. + assert_eq!(expected, actual); + + Ok(()) +} + +#[test] +fn test_sha2() -> Result<()> { + test_hash("sha2", &sha2) +} diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs index a9c8c08c..45feb238 100644 --- a/evm/src/cpu/kernel/tests/mod.rs +++ b/evm/src/cpu/kernel/tests/mod.rs @@ -2,6 +2,7 @@ mod core; mod curve_ops; mod ecrecover; mod exp; +mod hash; mod mpt; mod packing; mod rlp; diff --git a/evm/src/generation/memory.rs b/evm/src/generation/memory.rs index 5e2919a4..944b42a6 100644 --- a/evm/src/generation/memory.rs +++ b/evm/src/generation/memory.rs @@ -22,13 +22,13 @@ impl Default for MemoryState { } } -#[derive(Default, Debug)] +#[derive(Clone, Default, Debug)] pub(crate) struct MemoryContextState { /// The content of each memory segment. pub segments: [MemorySegmentState; Segment::COUNT], } -#[derive(Default, Debug)] +#[derive(Clone, Default, Debug)] pub(crate) struct MemorySegmentState { pub content: Vec, }