diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 002a84fb..a0516307 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -11,6 +11,8 @@ pub static KERNEL: Lazy = Lazy::new(combined_kernel); pub(crate) fn combined_kernel() -> Kernel { let files = vec![ + include_str!("asm/assertions.asm"), + include_str!("asm/basic_macros.asm"), include_str!("asm/core/bootloader.asm"), include_str!("asm/core/create.asm"), include_str!("asm/core/create_addresses.asm"), @@ -31,7 +33,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/curve/secp256k1/inverse_scalar.asm"), include_str!("asm/curve/secp256k1/lift_x.asm"), include_str!("asm/curve/secp256k1/moddiv.asm"), - include_str!("asm/exp.asm"), include_str!("asm/halt.asm"), include_str!("asm/main.asm"), include_str!("asm/memory/core.asm"), @@ -39,6 +40,15 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/memory/metadata.asm"), include_str!("asm/memory/packing.asm"), include_str!("asm/memory/txn_fields.asm"), + include_str!("asm/exp.asm"), + include_str!("asm/helper_functions.asm"), + include_str!("asm/moddiv.asm"), + include_str!("asm/secp256k1/curve_mul.asm"), + include_str!("asm/secp256k1/curve_add.asm"), + include_str!("asm/secp256k1/moddiv.asm"), + include_str!("asm/secp256k1/lift_x.asm"), + include_str!("asm/secp256k1/inverse_scalar.asm"), + include_str!("asm/ecrecover.asm"), include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/decode.asm"), include_str!("asm/rlp/read_to_memory.asm"), @@ -78,4 +88,195 @@ mod tests { let kernel = combined_kernel(); debug!("Total kernel size: {} bytes", kernel.code.len()); } + + fn u256ify<'a>(hexes: impl IntoIterator) -> Result> { + Ok(hexes + .into_iter() + .map(U256::from_str) + .collect::, _>>()?) + } + + #[test] + fn test_insert() -> Result<()> { + // Make sure we can parse and assemble the entire kernel. + let kernel = combined_kernel(); + let exp = kernel.global_labels["swapn"]; + let mut rng = thread_rng(); + let a = U256([0; 4].map(|_| rng.gen())); + let b = U256([0; 4].map(|_| rng.gen())); + let n = rng.gen_range(0..16); + let n_u256 = U256([n, 0, 0, 0]); + + let mut initial_stack = vec![U256::from_str("0xdeadbeef")?, n_u256, b]; + initial_stack.extend([a; 16]); + let stack_with_kernel = run(&kernel.code, exp, initial_stack); + + dbg!(stack_with_kernel); + let expected_stack = todo!(); + + // assert_eq!(stack_with_kernel, expected_stack); + + Ok(()) + } + + #[test] + fn test_exp() -> Result<()> { + // Make sure we can parse and assemble the entire kernel. + let kernel = combined_kernel(); + let exp = kernel.global_labels["exp"]; + let mut rng = thread_rng(); + let a = U256([0; 4].map(|_| rng.gen())); + let b = U256([0; 4].map(|_| rng.gen())); + + // Random input + let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a]; + let stack_with_kernel = run(&kernel.code, exp, initial_stack); + let initial_stack = vec![b, a]; + let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP + let stack_with_opcode = run(&code, 0, initial_stack); + assert_eq!(stack_with_kernel, stack_with_opcode); + + // 0 base + let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, U256::zero()]; + let stack_with_kernel = run(&kernel.code, exp, initial_stack); + let initial_stack = vec![b, U256::zero()]; + let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP + let stack_with_opcode = run(&code, 0, initial_stack); + assert_eq!(stack_with_kernel, stack_with_opcode); + + // 0 exponent + let initial_stack = vec![U256::from_str("0xdeadbeef")?, U256::zero(), a]; + let stack_with_kernel = run(&kernel.code, exp, initial_stack); + let initial_stack = vec![U256::zero(), a]; + let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP + let stack_with_opcode = run(&code, 0, initial_stack); + assert_eq!(stack_with_kernel, stack_with_opcode); + + Ok(()) + } + + #[test] + fn test_ec_ops() -> Result<()> { + // Make sure we can parse and assemble the entire kernel. + let kernel = combined_kernel(); + let ec_add = kernel.global_labels["ec_add"]; + let ec_double = kernel.global_labels["ec_double"]; + let ec_mul = kernel.global_labels["ec_mul"]; + let identity = ("0x0", "0x0"); + let invalid = ("0x0", "0x3"); // Not on curve + let point0 = ( + "0x1feee7ec986e198890cb83be8b8ba09ee953b3f149db6d9bfdaa5c308a33e58d", + "0x2051cc9a9edd46231604fd88f351e95ec72a285be93e289ac59cb48561efb2c6", + ); + let point1 = ( + "0x15b64d0a5f329fb672029298be8050f444626e6de11903caffa74b388075be1b", + "0x2d9e07340bd5cd7b70687b98f2500ff930a89a30d7b6a3e04b1b4d345319d234", + ); + // point2 = point0 + point1 + let point2 = ( + "0x18659c0e0a8fedcb8747cf463fc7cfa05f667d84e771d0a9521fc1a550688f0c", + "0x283ed10b42703e187e7a808aeb45c6b457bc4cc7d704e53b3348a1e3b0bfa55b", + ); + // point3 = 2 * point0 + let point3 = ( + "0x17da2b7b1a01c8dfdf0f5a6415833c7d755d219aa7e2c4cd0ac83d87d0ca4217", + "0xc9ace9de14aac8114541b50c19320eb40f0eeac3621526d9e34dbcf4c3a6c0f", + ); + let s = "0xabb2a34c0e7956cfe6cef9ddb7e810c45ea19a6ebadd79c21959af09f5ba480a"; + // point4 = s * point0 + let point4 = ( + "0xe519344959cc17021fe98878f947f5c1b1675325533a620c1684cfa6367e6c0", + "0x7496a7575b0b6a821e19ce780ecc3e0b156e605327798693defeb9f265b7a6f", + ); + + // Standard addition #1 + let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point2.1, point2.0])?); + // Standard addition #2 + let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point2.1, point2.0])?); + + // Standard doubling #1 + let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point3.1, point3.0])?); + // Standard doubling #2 + let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?; + let stack = run(&kernel.code, ec_double, initial_stack); + assert_eq!(stack, u256ify([point3.1, point3.0])?); + // Standard doubling #3 + let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, u256ify([point3.1, point3.0])?); + + // Addition with identity #1 + let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point1.1, point1.0])?); + // Addition with identity #2 + let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point1.1, point1.0])?); + // Addition with identity #3 + let initial_stack = + u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([identity.1, identity.0])?); + + // Addition with invalid point(s) #1 + let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, invalid.1, invalid.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, vec![U256::MAX, U256::MAX]); + // Addition with invalid point(s) #2 + let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, point0.1, point0.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, vec![U256::MAX, U256::MAX]); + // Addition with invalid point(s) #3 + let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, identity.1, identity.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, vec![U256::MAX, U256::MAX]); + // Addition with invalid point(s) #4 + let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, invalid.1, invalid.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, vec![U256::MAX, U256::MAX]); + + // Scalar multiplication #1 + let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, u256ify([point4.1, point4.0])?); + // Scalar multiplication #2 + let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, u256ify([identity.1, identity.0])?); + // Scalar multiplication #3 + let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, u256ify([point0.1, point0.0])?); + // Scalar multiplication #4 + let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, u256ify([identity.1, identity.0])?); + // Scalar multiplication #5 + let initial_stack = u256ify(["0xdeadbeef", s, invalid.1, invalid.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, vec![U256::MAX, U256::MAX]); + + // Multiple calls + let ec_mul_hex = format!("0x{:x}", ec_mul); + let initial_stack = u256ify([ + "0xdeadbeef", + s, + &ec_mul_hex, + identity.1, + identity.0, + point0.1, + point0.0, + ])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point4.1, point4.0])?); + + Ok(()) + } } diff --git a/evm/src/cpu/kernel/asm/helper_functions.asm b/evm/src/cpu/kernel/asm/helper_functions.asm index 87627269..8acbbe3f 100644 --- a/evm/src/cpu/kernel/asm/helper_functions.asm +++ b/evm/src/cpu/kernel/asm/helper_functions.asm @@ -1,4 +1,6 @@ -global swapn +global swapn: + JUMPDEST + // stack: n, ... %eq(1) %jumpi(case1) @@ -33,61 +35,81 @@ global swapn %eq(16) %jumpi(case16) case1: + JUMPDEST swap1 + %jump(swapn_end) case2: + JUMPDEST swap2 case3: + JUMPDEST swap3 case4: + JUMPDEST swap4 case5: + JUMPDEST swap5 case6: + JUMPDEST swap6 case7: + JUMPDEST swap7 case8: + JUMPDEST swap8 case9: + JUMPDEST swap9 case10: + JUMPDEST swap10 case11: + JUMPDEST swap11 case12: + JUMPDEST swap12 case13: + JUMPDEST swap13 case14: + JUMPDEST swap14 case15: + JUMPDEST swap15 case16: + JUMPDEST swap16 swapn_end: + JUMPDEST global insertn: - // stack: n, val, ... - dup + JUMPDEST + + // stack: n, val, ... + dup1 // stack: n, n, val, ... swap2 // stack: val, n, n, ... swap1 // stack: n, val, n, ... - %swapn + %jump(swapn) // stack: [nth], n, ..., val swap1 // stack: n, [nth], ..., val swap_back_loop: - // stack: k, k, [kth], ..., [k-1st] - dup + // stack: k, [kth], ..., [k-1st] + dup1 // stack: k, k, [kth], ..., [k-1st] swap2 // stack: [kth], k, k, ..., [k-1st] swap1 // stack: k, [kth], k, ..., [k-1st] - %swapn + %jump(swapn) // stack: [k-1st], k, ..., [k-2nd], [kth] swap1 // stack: k, [k-1st], ..., [k-2nd], [kth] diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm new file mode 100644 index 00000000..ef287cc0 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -0,0 +1,377 @@ +global sha2_compression: + JUMPDEST + // stack: message_schedule_addr, retdest + push 0 + // stack: i=0, message_schedule_addr, retdest + swap1 + // stack: message_schedule_addr, i=0, retdest + push 0 + // stack: 0, message_schedule_addr, i=0, retdest + %mload_kernel_general + // stack: num_blocks, message_schedule_addr, i=0, retdest + dup1 + // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest + %scratch_space_addr_from_num_blocks + // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest + swap1 + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(28) + %mload_kernel_code_u32 + // stack: h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(24) + %mload_kernel_code_u32 + // stack: g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(20) + %mload_kernel_code_u32 + // stack: f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(16) + %mload_kernel_code_u32 + // stack: e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(12) + %mload_kernel_code_u32 + // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(8) + %mload_kernel_code_u32 + // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(4) + %mload_kernel_code_u32 + // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %mload_kernel_code_u32 + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest +sha2_compression_start_block: + // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. + JUMPDEST + dup10 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup2 + dup2 + // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup3 + dup2 + // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup4 + dup2 + // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup5 + dup2 + // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup6 + dup2 + // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup7 + dup2 + // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup8 + dup2 + // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup9 + dup2 + // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + pop + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest +sha2_compression_loop: + // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. + JUMPDEST + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup11 + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup13 + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + add + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + push sha2_constants_k + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup14 + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + add + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_code_u32 + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word1 + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word2 + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup6 + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup3 + // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap1 + // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap8 + // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap7 + // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap1 + swap7 + swap1 + // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + swap7 + swap2 + // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap3 + swap7 + swap3 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap4 + swap7 + swap4 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + swap7 + swap5 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap6 + swap7 + swap6 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %increment + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %eq_const(64) + // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup12 + // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + sub + // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap13 + // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + swap1 + // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + push 256 + mul + // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + add + // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + swap12 + // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest + swap10 + // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + pop + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + push 64 + swap1 + mod + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest + swap12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + pop + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + dup12 + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + //dup10 + //iszero + //dup2 + //iszero + //and + //%jumpi(sha2_stop_lol) + iszero + %jumpi(sha2_compression_end_block) + %jump(sha2_compression_loop) +sha2_compression_end_block: + // Add the initial values of the eight working variables (from the start of this block's compression) back into them. + JUMPDEST + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap1 + // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(4) + %mload_kernel_general_u32 + // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(8) + %mload_kernel_general_u32 + // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap3 + // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(12) + %mload_kernel_general_u32 + // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap4 + // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(16) + %mload_kernel_general_u32 + // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(20) + %mload_kernel_general_u32 + // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap6 + // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(24) + %mload_kernel_general_u32 + // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap7 + // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(28) + %mload_kernel_general_u32 + // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap8 + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + iszero + // In this case, we've finished all the blocks. + %jumpi(sha2_compression_end) + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + // TODO: "insertion" macro for the below + // Move num_blocks to the ninth spot on the stack, past the working variables. + swap1 + swap2 + swap1 + swap2 + swap3 + swap2 + swap3 + swap4 + swap3 + swap4 + swap5 + swap4 + swap5 + swap6 + swap5 + swap6 + swap7 + swap6 + swap7 + swap8 + swap7 + swap8 + %jump(sha2_compression_start_block) +sha2_compression_end: + JUMPDEST + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest + swap3 + // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + %pop3 + // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + STOP \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm new file mode 100644 index 00000000..9d49e06f --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm @@ -0,0 +1,275 @@ +// Precodition: stack contains address of one message block, followed by output address +// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks +// of message schedule (in four-byte increments) +global sha2_gen_message_schedule_from_block: + JUMPDEST + // stack: block_addr, output_addr, retdest + dup1 + // stack: block_addr, block_addr, output_addr, retdest + %add_const(32) + // stack: block_addr + 32, block_addr, output_addr, retdest + swap1 + // stack: block_addr, block_addr + 32, output_addr, retdest + %mload_kernel_general_u256 + // stack: block[0], block_addr + 32, output_addr, retdest + swap1 + // stack: block_addr + 32, block[0], output_addr, retdest + %mload_kernel_general_u256 + // stack: block[1], block[0], output_addr, retdest + swap2 + // stack: output_addr, block[0], block[1], retdest + %add_const(28) + push 8 + // stack: counter=8, output_addr + 28, block[0], block[1], retdest + %jump(sha2_gen_message_schedule_from_block_0_loop) +sha2_gen_message_schedule_from_block_0_loop: + // Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule. + JUMPDEST + // stack: counter, output_addr, block[0], block[1], retdest + swap2 + // stack: block[0], output_addr, counter, block[1], retdest + push 1 + push 32 + shl + // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest + dup2 + dup2 + // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1], retdest + swap1 + // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1], retdest + mod + // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1], retdest + swap2 + // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest + div + // stack: block[0] >> 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest + swap1 + // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest + dup3 + // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest + %mstore_kernel_general_u32 + // stack: block[0] >> 32, output_addr, counter, block[1], retdest + swap1 + // stack: output_addr, block[0] >> 32, counter, block[1], retdest + %sub_const(4) + // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest + swap1 + // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest + swap2 + // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest + %decrement + dup1 + iszero + %jumpi(sha2_gen_message_schedule_from_block_0_end) + %jump(sha2_gen_message_schedule_from_block_0_loop) +sha2_gen_message_schedule_from_block_0_end: + JUMPDEST + // stack: old counter=0, output_addr, block[0], block[1], retdest + pop + push 8 + // stack: counter=8, output_addr, block[0], block[1], retdest + swap2 + // stack: block[0], output_addr, counter, block[1], retdest + swap3 + // stack: block[1], output_addr, counter, block[0], retdest + swap2 + // stack: counter, output_addr, block[1], block[0], retdest + swap1 + // stack: output_addr, counter, block[1], block[0], retdest + %add_const(64) + // stack: output_addr + 64, counter, block[1], block[0], retdest + swap1 + // stack: counter, output_addr + 64, block[1], block[0], retdest +sha2_gen_message_schedule_from_block_1_loop: + // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule. + JUMPDEST + // stack: counter, output_addr, block[1], block[0], retdest + swap2 + // stack: block[1], output_addr, counter, block[0], retdest + push 1 + push 32 + shl + // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest + dup2 + dup2 + // stack: 1 << 32, block[1], 1 << 32, block[1], output_addr, counter, block[0], retdest + swap1 + // stack: block[1], 1 << 32, 1 << 32, block[1], output_addr, counter, block[0], retdest + mod + // stack: block[1] % (1 << 32), 1 << 32, block[1], output_addr, counter, block[0], retdest + swap2 + // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest + div + // stack: block[1] >> 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest + swap1 + // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest + dup3 + // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest + %mstore_kernel_general_u32 + // stack: block[1] >> 32, output_addr, counter, block[0], retdest + swap1 + // stack: output_addr, block[1] >> 32, counter, block[0], retdest + %sub_const(4) + // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest + swap1 + // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest + swap2 + // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest + %decrement + dup1 + iszero + %jumpi(sha2_gen_message_schedule_from_block_1_end) + %jump(sha2_gen_message_schedule_from_block_1_loop) +sha2_gen_message_schedule_from_block_1_end: + JUMPDEST + // stack: old counter=0, output_addr, block[1], block[0], retdest + pop + // stack: output_addr, block[0], block[1], retdest + push 48 + // stack: counter=48, output_addr, block[0], block[1], retdest + swap1 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(36) + // stack: output_addr + 36, counter, block[0], block[1], retdest + swap1 + // stack: counter, output_addr + 36, block[0], block[1], retdest +sha2_gen_message_schedule_remaining_loop: + // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks. + JUMPDEST + // stack: counter, output_addr, block[0], block[1], retdest + swap1 + // stack: output_addr, counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, counter, block[0], block[1], retdest + push 2 + push 4 + mul + swap1 + sub + // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest + %sha2_sigma_1 + // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + push 7 + push 4 + mul + swap1 + sub + // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap1 + // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + push 15 + push 4 + mul + swap1 + sub + // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %sha2_sigma_0 + // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + push 16 + push 4 + mul + swap1 + sub + // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap1 + // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap4 + // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %add_u32 + %add_u32 + %add_u32 + // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest + swap2 + // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, output_addr, counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %mstore_kernel_general_u32 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(4) + // stack: output_addr + 4, counter, block[0], block[1], retdest + swap1 + // stack: counter, output_addr + 4, block[0], block[1], retdest + %decrement + // stack: counter - 1, output_addr + 4, block[0], block[1], retdest + dup1 + iszero + %jumpi(sha2_gen_message_schedule_remaining_end) + %jump(sha2_gen_message_schedule_remaining_loop) +sha2_gen_message_schedule_remaining_end: + JUMPDEST + // stack: counter=0, output_addr, block[0], block[1], retdest + %pop4 + JUMP + +// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +// stack contains output_addr +// Postcondition: starting at output_addr, set of 256 bytes per block +// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) +global sha2_gen_all_message_schedules: + JUMPDEST + // stack: output_addr, retdest + dup1 + // stack: output_addr, output_addr, retdest + push 0 + // stack: 0, output_addr, output_addr, retdest + %mload_kernel_general + // stack: num_blocks, output_addr, output_addr, retdest + push 1 + // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest +sha2_gen_all_message_schedules_loop: + JUMPDEST + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest + push sha2_gen_all_message_schedules_loop_end + // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest + dup4 + // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest + dup3 + // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest + %jump(sha2_gen_message_schedule_from_block) +sha2_gen_all_message_schedules_loop_end: + JUMPDEST + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest + %add_const(64) + // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest + swap1 + %decrement + swap1 + // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest + swap2 + %add_const(256) + swap2 + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + dup2 + // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + iszero + %jumpi(sha2_gen_all_message_schedules_end) + %jump(sha2_gen_all_message_schedules_loop) + JUMPDEST +sha2_gen_all_message_schedules_end: + JUMPDEST + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + %pop3 + // stack: output_addr, retdest + %jump(sha2_compression) diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm new file mode 100644 index 00000000..d27ebaf8 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm @@ -0,0 +1,98 @@ +global sha2_store: + JUMPDEST + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + dup1 + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + push 0 + // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + %mstore_kernel_general + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + push 1 + // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest +sha2_store_loop: + JUMPDEST + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + dup1 + // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + swap3 + // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest + swap1 + // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest + %mstore_kernel_general + // stack: counter, addr, ... , x[num_bytes-1], retdest + %decrement + // stack: counter-1, addr, ... , x[num_bytes-1], retdest + dup1 + // stack: counter-1, counter-1, addr, ... , x[num_bytes-1], retdest + iszero + %jumpi(sha2_store_end) + // stack: counter-1, addr, ... , x[num_bytes-1], retdest + swap1 + // stack: addr, counter-1, ... , x[num_bytes-1], retdest + %increment + // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest + %jump(sha2_store_loop) +sha2_store_end: + JUMPDEST + // stack: counter=0, addr, retdest + %pop2 + // stack: retdest + %jump(sha2_pad) + +// Precodition: input is in memory, starting at 0 of kernel general segment, of the form +// num_bytes, x[0], x[1], ..., x[num_bytes - 1] +// Postcodition: output is in memory, starting at 0, of the form +// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +global sha2_pad: + JUMPDEST + // stack: retdest + push 0 + %mload_kernel_general + // stack: num_bytes, retdest + // STEP 1: append 1 + // insert 128 (= 1 << 7) at x[num_bytes+1] + // stack: num_bytes, retdest + push 1 + push 7 + shl + // stack: 128, num_bytes, retdest + dup2 + // stack: num_bytes, 128, num_bytes, retdest + %increment + // stack: num_bytes+1, 128, num_bytes, retdest + %mstore_kernel_general + // stack: num_bytes, retdest + // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 + dup1 + // stack: num_bytes, num_bytes, retdest + %add_const(8) + %div_const(64) + + %increment + // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest + // STEP 3: calculate length := num_bytes*8 + swap1 + // stack: num_bytes, num_blocks, retdest + push 8 + mul + // stack: length = num_bytes*8, num_blocks, retdest + // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] + dup2 + // stack: num_blocks, length, num_blocks, retdest + push 64 + mul + // stack: last_addr = num_blocks*64, length, num_blocks, retdest + %sha2_write_length + // stack: num_blocks, retdest + dup1 + // stack: num_blocks, num_blocks, retdest + // STEP 5: write num_blocks to x[0] + push 0 + %mstore_kernel_general + // stack: num_blocks, retdest + %message_schedule_addr_from_num_blocks + %jump(sha2_gen_all_message_schedules) + +global sha2: + JUMPDEST + %jump(sha2_store)