From 042da0c8e9106cff0c1fde8b2003960e636471fa Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 14 Jul 2022 09:29:29 -0700 Subject: [PATCH 001/104] starting on sha2 --- evm/src/cpu/kernel/asm/helper_functions.asm | 98 ++++ evm/src/cpu/kernel/asm/sha2.asm | 459 +++++++++++++++++++ evm/src/cpu/kernel/asm/util/basic_macros.asm | 24 + 3 files changed, 581 insertions(+) create mode 100644 evm/src/cpu/kernel/asm/helper_functions.asm create mode 100644 evm/src/cpu/kernel/asm/sha2.asm diff --git a/evm/src/cpu/kernel/asm/helper_functions.asm b/evm/src/cpu/kernel/asm/helper_functions.asm new file mode 100644 index 00000000..87627269 --- /dev/null +++ b/evm/src/cpu/kernel/asm/helper_functions.asm @@ -0,0 +1,98 @@ +global swapn + // stack: n, ... + %eq(1) + %jumpi(case1) + %eq(2) + %jumpi(case2) + %eq(3) + %jumpi(case3) + %eq(4) + %jumpi(case4) + %eq(5) + %jumpi(case5) + %eq(6) + %jumpi(case6) + %eq(7) + %jumpi(case7) + %eq(8) + %jumpi(case8) + %eq(9) + %jumpi(case9) + %eq(10) + %jumpi(case10) + %eq(11) + %jumpi(case11) + %eq(12) + %jumpi(case12) + %eq(13) + %jumpi(case13) + %eq(14) + %jumpi(case14) + %eq(15) + %jumpi(case15) + %eq(16) + %jumpi(case16) +case1: + swap1 +case2: + swap2 +case3: + swap3 +case4: + swap4 +case5: + swap5 +case6: + swap6 +case7: + swap7 +case8: + swap8 +case9: + swap9 +case10: + swap10 +case11: + swap11 +case12: + swap12 +case13: + swap13 +case14: + swap14 +case15: + swap15 +case16: + swap16 +swapn_end: + + +global insertn: + // stack: n, val, ... + dup + // stack: n, n, val, ... + swap2 + // stack: val, n, n, ... + swap1 + // stack: n, val, n, ... + %swapn + // stack: [nth], n, ..., val + swap1 + // stack: n, [nth], ..., val +swap_back_loop: + // stack: k, k, [kth], ..., [k-1st] + dup + // stack: k, k, [kth], ..., [k-1st] + swap2 + // stack: [kth], k, k, ..., [k-1st] + swap1 + // stack: k, [kth], k, ..., [k-1st] + %swapn + // stack: [k-1st], k, ..., [k-2nd], [kth] + swap1 + // stack: k, [k-1st], ..., [k-2nd], [kth] + %decrement + // stack: k-1, [k-1st], ..., [k-2nd], [kth] + iszero + not + %jumpi(swap_back_loop) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm new file mode 100644 index 00000000..f33247dd --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -0,0 +1,459 @@ +global count_bits: + // stack: n (assumed to be > 0) + push 0 + // stack: 0, n + swap1 + // stack: n, 0 +count_bits_loop: + // stack: k, bits + %div2 + // stack: k//2, bits + swap1 + // stack: bits, k//2 + %increment + // stack: bits+1, k//2 + swap1 + // stack: k//2, bits+1 + %jumpi(count_bits_loop) + // stack: 0, bits + pop + // stack: bits + +// Appends a 1 to the end of the u256 at the top of the stack. +global append_1: + dup + count_bits + %eq(256) + %jumpi(append_if256) + %jump(append_else) +append_if256: + push 1 + swap1 + %jump(append_end) +append_else: + push 2 + mul + push 1 + add +append_end: + + +global sha2_append_1: + // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] + // (assume num_u256s <= 16) + dup + // stack: num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] + dup + // stack: num_u256s, num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] + %increment + // stack: num_u256s+1, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] + swapn + // stack: x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s + dup + // stack: x[num_u256s-1], x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s + %count_bits + // stack: num_bits, x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s + %eq(256) + %jumpi(append_if256) + %jump(append_else) +append_if256: + push 1 + // stack: 1, x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s + dup2 + // stack: num_u256s, 1, x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s + push 3 + add + // stack: num_u256s+3, 1, x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s, [] + swapn + // stack: [], x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s, + %jump(append_continue) +append_else: +append_continue: + + dup + + + + +global sha2_pad: + // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] + %jumpi() + + + append 1: + if length of last = 256 + increment length; add new value of 10000000..0 + + stick 1 on the end (last one:) + + if length even: + pad last one to + + if length odd: + + + // stack: num_blocks, block[0][0], block[0][1], ..., block[num_blocks-1][3] + + +// #define K0 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 + + +// BN254 elliptic curve addition. +// Uses the standard affine addition formula. +global ec_add: + // Uncomment for test inputs. + // PUSH 0xdeadbeef + // PUSH 2 + // PUSH 1 + // PUSH 0x1bf9384aa3f0b3ad763aee81940cacdde1af71617c06f46e11510f14f3d5d121 + // PUSH 0xe7313274bb29566ff0c8220eb9841de1d96c2923c6a4028f7dd3c6a14cee770 + JUMPDEST + // stack: x0, y0, x1, y1, retdest + + // Check if points are valid BN254 points. + DUP2 + // stack: y0, x0, y0, x1, y1, retdest + DUP2 + // stack: x0, y0, x0, y0, x1, y1, retdest + %ec_check + // stack: isValid(x0, y0), x0, y0, x1, y1, retdest + DUP5 + // stack: x1, isValid(x0, y0), x0, y0, x1, y1, retdest + DUP5 + // stack: x1, y1, isValid(x0, y0), x0, y0, x1, y1, retdest + %ec_check + // stack: isValid(x1, y1), isValid(x0, y0), x0, y0, x1, y1, retdest + AND + // stack: isValid(x1, y1) & isValid(x0, y0), x0, y0, x1, y1, retdest + %jumpi(ec_add_valid_points) + // stack: x0, y0, x1, y1, retdest + + // Otherwise return + %pop4 + // stack: retdest + %ec_invalid_input + +// BN254 elliptic curve addition. +// Assumption: (x0,y0) and (x1,y1) are valid points. +global ec_add_valid_points: + JUMPDEST + // stack: x0, y0, x1, y1, retdest + + // Check if the first point is the identity. + DUP2 + // stack: y0, x0, y0, x1, y1, retdest + DUP2 + // stack: x0, y0, x0, y0, x1, y1, retdest + %ec_isidentity + // stack: (x0,y0)==(0,0), x0, y0, x1, y1, retdest + %jumpi(ec_add_first_zero) + // stack: x0, y0, x1, y1, retdest + + // Check if the first point is the identity. + DUP4 + // stack: y1, x0, y0, x1, y1, retdest + DUP4 + // stack: x1, y1, x0, y0, x1, y1, retdest + %ec_isidentity + // stack: (x1,y1)==(0,0), x0, y0, x1, y1, retdest + %jumpi(ec_add_snd_zero) + // stack: x0, y0, x1, y1, retdest + + // Check if both points have the same x-coordinate. + DUP3 + // stack: x1, x0, y0, x1, y1, retdest + DUP2 + // stack: x0, x1, x0, y0, x1, y1, retdest + EQ + // stack: x0 == x1, x0, y0, x1, y1, retdest + %jumpi(ec_add_equal_first_coord) + // stack: x0, y0, x1, y1, retdest + + // Otherwise, we can use the standard formula. + // Compute lambda = (y0 - y1)/(x0 - x1) + DUP4 + // stack: y1, x0, y0, x1, y1, retdest + DUP3 + // stack: y0, y1, x0, y0, x1, y1, retdest + %submod + // stack: y0 - y1, x0, y0, x1, y1, retdest + DUP4 + // stack: x1, y0 - y1, x0, y0, x1, y1, retdest + DUP3 + // stack: x0, x1, y0 - y1, x0, y0, x1, y1, retdest + %submod + // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest + %moddiv + // stack: lambda, x0, y0, x1, y1, retdest + %jump(ec_add_valid_points_with_lambda) + +// BN254 elliptic curve addition. +// Assumption: (x0,y0) == (0,0) +ec_add_first_zero: + JUMPDEST + // stack: x0, y0, x1, y1, retdest + + // Just return (x1,y1) + %pop2 + // stack: x1, y1, retdest + SWAP1 + // stack: y1, x1, retdest + SWAP2 + // stack: retdest, x1, y1 + JUMP + +// BN254 elliptic curve addition. +// Assumption: (x1,y1) == (0,0) +ec_add_snd_zero: + JUMPDEST + // stack: x0, y0, x1, y1, retdest + + // Just return (x1,y1) + SWAP2 + // stack: x1, y0, x0, y1, retdest + POP + // stack: y0, x0, y1, retdest + SWAP2 + // stack: y1, x0, y0, retdest + POP + // stack: x0, y0, retdest + SWAP1 + // stack: y0, x0, retdest + SWAP2 + // stack: retdest, x0, y0 + JUMP + +// BN254 elliptic curve addition. +// Assumption: lambda = (y0 - y1)/(x0 - x1) +ec_add_valid_points_with_lambda: + JUMPDEST + // stack: lambda, x0, y0, x1, y1, retdest + + // Compute x2 = lambda^2 - x1 - x0 + DUP2 + // stack: x0, lambda, x0, y0, x1, y1, retdest + DUP5 + // stack: x1, x0, lambda, x0, y0, x1, y1, retdest + %bn_base + // stack: N, x1, x0, lambda, x0, y0, x1, y1, retdest + DUP4 + // stack: lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest + DUP1 + // stack: lambda, lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest + MULMOD + // stack: lambda^2, x1, x0, lambda, x0, y0, x1, y1, retdest + %submod + // stack: lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest + %submod + // stack: x2, lambda, x0, y0, x1, y1, retdest + + // Compute y2 = lambda*(x1 - x2) - y1 + %bn_base + // stack: N, x2, lambda, x0, y0, x1, y1, retdest + DUP2 + // stack: x2, N, x2, lambda, x0, y0, x1, y1, retdest + DUP7 + // stack: x1, x2, N, x2, lambda, x0, y0, x1, y1, retdest + %submod + // stack: x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest + DUP4 + // stack: lambda, x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest + MULMOD + // stack: lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest + DUP7 + // stack: y1, lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest + SWAP1 + // stack: lambda * (x1 - x2), y1, x2, lambda, x0, y0, x1, y1, retdest + %submod + // stack: y2, x2, lambda, x0, y0, x1, y1, retdest + + // Return x2,y2 + SWAP5 + // stack: x1, x2, lambda, x0, y0, y2, y1, retdest + POP + // stack: x2, lambda, x0, y0, y2, y1, retdest + SWAP5 + // stack: y1, lambda, x0, y0, y2, x2, retdest + %pop4 + // stack: y2, x2, retdest + SWAP2 + // stack: retdest, x2, y2 + JUMP + +// BN254 elliptic curve addition. +// Assumption: (x0,y0) and (x1,y1) are valid points and x0 == x1 +ec_add_equal_first_coord: + JUMPDEST + // stack: x0, y0, x1, y1, retdest with x0 == x1 + + // Check if the points are equal + DUP2 + // stack: y0, x0, y0, x1, y1, retdest + DUP5 + // stack: y1, y0, x0, y0, x1, y1, retdest + EQ + // stack: y1 == y0, x0, y0, x1, y1, retdest + %jumpi(ec_add_equal_points) + // stack: x0, y0, x1, y1, retdest + + // Otherwise, one is the negation of the other so we can return (0,0). + %pop4 + // stack: retdest + PUSH 0 + // stack: 0, retdest + PUSH 0 + // stack: 0, 0, retdest + SWAP2 + // stack: retdest, 0, 0 + JUMP + + +// BN254 elliptic curve addition. +// Assumption: x0 == x1 and y0 == y1 +// Standard doubling formula. +ec_add_equal_points: + JUMPDEST + // stack: x0, y0, x1, y1, retdest + + // Compute lambda = 3/2 * x0^2 / y0 + %bn_base + // stack: N, x0, y0, x1, y1, retdest + %bn_base + // stack: N, N, x0, y0, x1, y1, retdest + DUP3 + // stack: x0, N, N, x0, y0, x1, y1, retdest + DUP1 + // stack: x0, x0, N, N, x0, y0, x1, y1, retdest + MULMOD + // stack: x0^2, N, x0, y0, x1, y1, retdest with + PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5 // 3/2 in the base field + // stack: 3/2, x0^2, N, x0, y0, x1, y1, retdest + MULMOD + // stack: 3/2 * x0^2, x0, y0, x1, y1, retdest + DUP3 + // stack: y0, 3/2 * x0^2, x0, y0, x1, y1, retdest + %moddiv + // stack: lambda, x0, y0, x1, y1, retdest + %jump(ec_add_valid_points_with_lambda) + +// BN254 elliptic curve doubling. +// Assumption: (x0,y0) is a valid point. +// Standard doubling formula. +global ec_double: + JUMPDEST + // stack: x0, y0, retdest + DUP2 + // stack: y0, x0, y0, retdest + DUP2 + // stack: x0, y0, x0, y0, retdest + %jump(ec_add_equal_points) + +// Push the order of the BN254 base field. +%macro bn_base + PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 +%endmacro + +// Assumption: x, y < N and 2N < 2^256. +// Note: Doesn't hold for Secp256k1 base field. +%macro submod + // stack: x, y + %bn_base + // stack: N, x, y + ADD + // stack: N + x, y // Doesn't overflow since 2N < 2^256 + SUB + // stack: N + x - y // Doesn't underflow since y < N + %bn_base + // stack: N, N + x - y + SWAP1 + // stack: N + x - y, N + MOD + // stack: (N + x - y) % N = (x-y) % N +%endmacro + +// Check if (x,y) is a valid curve point. +// Puts y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) on top of the stack. +%macro ec_check + // stack: x, y + %bn_base + // stack: N, x, y + DUP2 + // stack: x, N, x, y + LT + // stack: x < N, x, y + %bn_base + // stack: N, x < N, x, y + DUP4 + // stack: y, N, x < N, x, y + LT + // stack: y < N, x < N, x, y + AND + // stack: (y < N) & (x < N), x, y + SWAP2 + // stack: y, x, (y < N) & (x < N), x + SWAP1 + // stack: x, y, (y < N) & (x < N) + %bn_base + // stack: N, x, y, b + %bn_base + // stack: N, N, x, y, b + DUP3 + // stack: x, N, N, x, y, b + %bn_base + // stack: N, x, N, N, x, y, b + DUP2 + // stack: x, N, x, N, N, x, y, b + DUP1 + // stack: x, x, N, x, N, N, x, y, b + MULMOD + // stack: x^2 % N, x, N, N, x, y, b + MULMOD + // stack: x^3 % N, N, x, y, b + PUSH 3 + // stack: 3, x^3 % N, N, x, y, b + ADDMOD + // stack: (x^3 + 3) % N, x, y, b + DUP3 + // stack: y, (x^3 + 3) % N, x, y, b + %bn_base + // stack: N, y, (x^3 + 3) % N, x, y, b + SWAP1 + // stack: y, N, (x^3 + 3) % N, x, y, b + DUP1 + // stack: y, y, N, (x^3 + 3) % N, x, y, b + MULMOD + // stack: y^2 % N, (x^3 + 3) % N, x, y, b + EQ + // stack: y^2 % N == (x^3 + 3) % N, x, y, b + SWAP2 + // stack: y, x, y^2 % N == (x^3 + 3) % N, b + %ec_isidentity + // stack: (x,y)==(0,0), y^2 % N == (x^3 + 3) % N, b + SWAP2 + // stack: b, y^2 % N == (x^3 + 3) % N, (x,y)==(0,0) + AND + // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N), (x,y)==(0,0) + OR + // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) +%endmacro + +// Check if (x,y)==(0,0) +%macro ec_isidentity + // stack: x, y + OR + // stack: x | y + ISZERO + // stack: (x,y) == (0,0) +%endmacro + +// Return (u256::MAX, u256::MAX) which is used to indicate the input was invalid. +%macro ec_invalid_input + // stack: retdest + PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + // stack: u256::MAX, retdest + PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + // stack: u256::MAX, u256::MAX, retdest + SWAP2 + // stack: retdest, u256::MAX, u256::MAX + JUMP +%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 13965e39..d64ee513 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -229,3 +229,27 @@ %select_bool // stack: max %endmacro + +%macro increment + push 1 + swap1 + sub +%endmacro + +%macro decrement + push 1 + swap1 + sub +%endmacro + +%macro div2 + push 2 + swap1 + div +%endmacro + +%macro eq(x) + dup1 + push $x + eq +%endmacro From 4378ff0fc2ea72c72ce8f934c147f99c07541515 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 24 Aug 2022 08:41:28 -0700 Subject: [PATCH 002/104] progress --- evm/src/cpu/kernel/aggregator.rs | 203 +++++++++- evm/src/cpu/kernel/asm/helper_functions.asm | 36 +- evm/src/cpu/kernel/asm/sha2/compression.asm | 377 ++++++++++++++++++ .../cpu/kernel/asm/sha2/message_schedule.asm | 275 +++++++++++++ evm/src/cpu/kernel/asm/sha2/store_pad.asm | 98 +++++ 5 files changed, 981 insertions(+), 8 deletions(-) create mode 100644 evm/src/cpu/kernel/asm/sha2/compression.asm create mode 100644 evm/src/cpu/kernel/asm/sha2/message_schedule.asm create mode 100644 evm/src/cpu/kernel/asm/sha2/store_pad.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 002a84fb..a0516307 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -11,6 +11,8 @@ pub static KERNEL: Lazy = Lazy::new(combined_kernel); pub(crate) fn combined_kernel() -> Kernel { let files = vec![ + include_str!("asm/assertions.asm"), + include_str!("asm/basic_macros.asm"), include_str!("asm/core/bootloader.asm"), include_str!("asm/core/create.asm"), include_str!("asm/core/create_addresses.asm"), @@ -31,7 +33,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/curve/secp256k1/inverse_scalar.asm"), include_str!("asm/curve/secp256k1/lift_x.asm"), include_str!("asm/curve/secp256k1/moddiv.asm"), - include_str!("asm/exp.asm"), include_str!("asm/halt.asm"), include_str!("asm/main.asm"), include_str!("asm/memory/core.asm"), @@ -39,6 +40,15 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/memory/metadata.asm"), include_str!("asm/memory/packing.asm"), include_str!("asm/memory/txn_fields.asm"), + include_str!("asm/exp.asm"), + include_str!("asm/helper_functions.asm"), + include_str!("asm/moddiv.asm"), + include_str!("asm/secp256k1/curve_mul.asm"), + include_str!("asm/secp256k1/curve_add.asm"), + include_str!("asm/secp256k1/moddiv.asm"), + include_str!("asm/secp256k1/lift_x.asm"), + include_str!("asm/secp256k1/inverse_scalar.asm"), + include_str!("asm/ecrecover.asm"), include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/decode.asm"), include_str!("asm/rlp/read_to_memory.asm"), @@ -78,4 +88,195 @@ mod tests { let kernel = combined_kernel(); debug!("Total kernel size: {} bytes", kernel.code.len()); } + + fn u256ify<'a>(hexes: impl IntoIterator) -> Result> { + Ok(hexes + .into_iter() + .map(U256::from_str) + .collect::, _>>()?) + } + + #[test] + fn test_insert() -> Result<()> { + // Make sure we can parse and assemble the entire kernel. + let kernel = combined_kernel(); + let exp = kernel.global_labels["swapn"]; + let mut rng = thread_rng(); + let a = U256([0; 4].map(|_| rng.gen())); + let b = U256([0; 4].map(|_| rng.gen())); + let n = rng.gen_range(0..16); + let n_u256 = U256([n, 0, 0, 0]); + + let mut initial_stack = vec![U256::from_str("0xdeadbeef")?, n_u256, b]; + initial_stack.extend([a; 16]); + let stack_with_kernel = run(&kernel.code, exp, initial_stack); + + dbg!(stack_with_kernel); + let expected_stack = todo!(); + + // assert_eq!(stack_with_kernel, expected_stack); + + Ok(()) + } + + #[test] + fn test_exp() -> Result<()> { + // Make sure we can parse and assemble the entire kernel. + let kernel = combined_kernel(); + let exp = kernel.global_labels["exp"]; + let mut rng = thread_rng(); + let a = U256([0; 4].map(|_| rng.gen())); + let b = U256([0; 4].map(|_| rng.gen())); + + // Random input + let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a]; + let stack_with_kernel = run(&kernel.code, exp, initial_stack); + let initial_stack = vec![b, a]; + let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP + let stack_with_opcode = run(&code, 0, initial_stack); + assert_eq!(stack_with_kernel, stack_with_opcode); + + // 0 base + let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, U256::zero()]; + let stack_with_kernel = run(&kernel.code, exp, initial_stack); + let initial_stack = vec![b, U256::zero()]; + let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP + let stack_with_opcode = run(&code, 0, initial_stack); + assert_eq!(stack_with_kernel, stack_with_opcode); + + // 0 exponent + let initial_stack = vec![U256::from_str("0xdeadbeef")?, U256::zero(), a]; + let stack_with_kernel = run(&kernel.code, exp, initial_stack); + let initial_stack = vec![U256::zero(), a]; + let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP + let stack_with_opcode = run(&code, 0, initial_stack); + assert_eq!(stack_with_kernel, stack_with_opcode); + + Ok(()) + } + + #[test] + fn test_ec_ops() -> Result<()> { + // Make sure we can parse and assemble the entire kernel. + let kernel = combined_kernel(); + let ec_add = kernel.global_labels["ec_add"]; + let ec_double = kernel.global_labels["ec_double"]; + let ec_mul = kernel.global_labels["ec_mul"]; + let identity = ("0x0", "0x0"); + let invalid = ("0x0", "0x3"); // Not on curve + let point0 = ( + "0x1feee7ec986e198890cb83be8b8ba09ee953b3f149db6d9bfdaa5c308a33e58d", + "0x2051cc9a9edd46231604fd88f351e95ec72a285be93e289ac59cb48561efb2c6", + ); + let point1 = ( + "0x15b64d0a5f329fb672029298be8050f444626e6de11903caffa74b388075be1b", + "0x2d9e07340bd5cd7b70687b98f2500ff930a89a30d7b6a3e04b1b4d345319d234", + ); + // point2 = point0 + point1 + let point2 = ( + "0x18659c0e0a8fedcb8747cf463fc7cfa05f667d84e771d0a9521fc1a550688f0c", + "0x283ed10b42703e187e7a808aeb45c6b457bc4cc7d704e53b3348a1e3b0bfa55b", + ); + // point3 = 2 * point0 + let point3 = ( + "0x17da2b7b1a01c8dfdf0f5a6415833c7d755d219aa7e2c4cd0ac83d87d0ca4217", + "0xc9ace9de14aac8114541b50c19320eb40f0eeac3621526d9e34dbcf4c3a6c0f", + ); + let s = "0xabb2a34c0e7956cfe6cef9ddb7e810c45ea19a6ebadd79c21959af09f5ba480a"; + // point4 = s * point0 + let point4 = ( + "0xe519344959cc17021fe98878f947f5c1b1675325533a620c1684cfa6367e6c0", + "0x7496a7575b0b6a821e19ce780ecc3e0b156e605327798693defeb9f265b7a6f", + ); + + // Standard addition #1 + let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point2.1, point2.0])?); + // Standard addition #2 + let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point2.1, point2.0])?); + + // Standard doubling #1 + let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point3.1, point3.0])?); + // Standard doubling #2 + let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?; + let stack = run(&kernel.code, ec_double, initial_stack); + assert_eq!(stack, u256ify([point3.1, point3.0])?); + // Standard doubling #3 + let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, u256ify([point3.1, point3.0])?); + + // Addition with identity #1 + let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point1.1, point1.0])?); + // Addition with identity #2 + let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point1.1, point1.0])?); + // Addition with identity #3 + let initial_stack = + u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([identity.1, identity.0])?); + + // Addition with invalid point(s) #1 + let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, invalid.1, invalid.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, vec![U256::MAX, U256::MAX]); + // Addition with invalid point(s) #2 + let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, point0.1, point0.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, vec![U256::MAX, U256::MAX]); + // Addition with invalid point(s) #3 + let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, identity.1, identity.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, vec![U256::MAX, U256::MAX]); + // Addition with invalid point(s) #4 + let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, invalid.1, invalid.0])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, vec![U256::MAX, U256::MAX]); + + // Scalar multiplication #1 + let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, u256ify([point4.1, point4.0])?); + // Scalar multiplication #2 + let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, u256ify([identity.1, identity.0])?); + // Scalar multiplication #3 + let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, u256ify([point0.1, point0.0])?); + // Scalar multiplication #4 + let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, u256ify([identity.1, identity.0])?); + // Scalar multiplication #5 + let initial_stack = u256ify(["0xdeadbeef", s, invalid.1, invalid.0])?; + let stack = run(&kernel.code, ec_mul, initial_stack); + assert_eq!(stack, vec![U256::MAX, U256::MAX]); + + // Multiple calls + let ec_mul_hex = format!("0x{:x}", ec_mul); + let initial_stack = u256ify([ + "0xdeadbeef", + s, + &ec_mul_hex, + identity.1, + identity.0, + point0.1, + point0.0, + ])?; + let stack = run(&kernel.code, ec_add, initial_stack); + assert_eq!(stack, u256ify([point4.1, point4.0])?); + + Ok(()) + } } diff --git a/evm/src/cpu/kernel/asm/helper_functions.asm b/evm/src/cpu/kernel/asm/helper_functions.asm index 87627269..8acbbe3f 100644 --- a/evm/src/cpu/kernel/asm/helper_functions.asm +++ b/evm/src/cpu/kernel/asm/helper_functions.asm @@ -1,4 +1,6 @@ -global swapn +global swapn: + JUMPDEST + // stack: n, ... %eq(1) %jumpi(case1) @@ -33,61 +35,81 @@ global swapn %eq(16) %jumpi(case16) case1: + JUMPDEST swap1 + %jump(swapn_end) case2: + JUMPDEST swap2 case3: + JUMPDEST swap3 case4: + JUMPDEST swap4 case5: + JUMPDEST swap5 case6: + JUMPDEST swap6 case7: + JUMPDEST swap7 case8: + JUMPDEST swap8 case9: + JUMPDEST swap9 case10: + JUMPDEST swap10 case11: + JUMPDEST swap11 case12: + JUMPDEST swap12 case13: + JUMPDEST swap13 case14: + JUMPDEST swap14 case15: + JUMPDEST swap15 case16: + JUMPDEST swap16 swapn_end: + JUMPDEST global insertn: - // stack: n, val, ... - dup + JUMPDEST + + // stack: n, val, ... + dup1 // stack: n, n, val, ... swap2 // stack: val, n, n, ... swap1 // stack: n, val, n, ... - %swapn + %jump(swapn) // stack: [nth], n, ..., val swap1 // stack: n, [nth], ..., val swap_back_loop: - // stack: k, k, [kth], ..., [k-1st] - dup + // stack: k, [kth], ..., [k-1st] + dup1 // stack: k, k, [kth], ..., [k-1st] swap2 // stack: [kth], k, k, ..., [k-1st] swap1 // stack: k, [kth], k, ..., [k-1st] - %swapn + %jump(swapn) // stack: [k-1st], k, ..., [k-2nd], [kth] swap1 // stack: k, [k-1st], ..., [k-2nd], [kth] diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm new file mode 100644 index 00000000..ef287cc0 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -0,0 +1,377 @@ +global sha2_compression: + JUMPDEST + // stack: message_schedule_addr, retdest + push 0 + // stack: i=0, message_schedule_addr, retdest + swap1 + // stack: message_schedule_addr, i=0, retdest + push 0 + // stack: 0, message_schedule_addr, i=0, retdest + %mload_kernel_general + // stack: num_blocks, message_schedule_addr, i=0, retdest + dup1 + // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest + %scratch_space_addr_from_num_blocks + // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest + swap1 + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(28) + %mload_kernel_code_u32 + // stack: h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(24) + %mload_kernel_code_u32 + // stack: g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(20) + %mload_kernel_code_u32 + // stack: f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(16) + %mload_kernel_code_u32 + // stack: e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(12) + %mload_kernel_code_u32 + // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(8) + %mload_kernel_code_u32 + // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(4) + %mload_kernel_code_u32 + // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %mload_kernel_code_u32 + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest +sha2_compression_start_block: + // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. + JUMPDEST + dup10 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup2 + dup2 + // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup3 + dup2 + // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup4 + dup2 + // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup5 + dup2 + // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup6 + dup2 + // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup7 + dup2 + // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup8 + dup2 + // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup9 + dup2 + // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + pop + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest +sha2_compression_loop: + // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. + JUMPDEST + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup11 + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup13 + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + add + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + push sha2_constants_k + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup14 + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + add + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_code_u32 + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word1 + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word2 + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup6 + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup3 + // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap1 + // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap8 + // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap7 + // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap1 + swap7 + swap1 + // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + swap7 + swap2 + // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap3 + swap7 + swap3 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap4 + swap7 + swap4 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + swap7 + swap5 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap6 + swap7 + swap6 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %increment + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %eq_const(64) + // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup12 + // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + sub + // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap13 + // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + swap1 + // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + push 256 + mul + // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + add + // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + swap12 + // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest + swap10 + // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + pop + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + push 64 + swap1 + mod + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest + swap12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + pop + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + dup12 + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + //dup10 + //iszero + //dup2 + //iszero + //and + //%jumpi(sha2_stop_lol) + iszero + %jumpi(sha2_compression_end_block) + %jump(sha2_compression_loop) +sha2_compression_end_block: + // Add the initial values of the eight working variables (from the start of this block's compression) back into them. + JUMPDEST + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap1 + // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(4) + %mload_kernel_general_u32 + // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(8) + %mload_kernel_general_u32 + // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap3 + // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(12) + %mload_kernel_general_u32 + // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap4 + // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(16) + %mload_kernel_general_u32 + // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(20) + %mload_kernel_general_u32 + // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap6 + // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(24) + %mload_kernel_general_u32 + // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap7 + // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(28) + %mload_kernel_general_u32 + // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap8 + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + iszero + // In this case, we've finished all the blocks. + %jumpi(sha2_compression_end) + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + // TODO: "insertion" macro for the below + // Move num_blocks to the ninth spot on the stack, past the working variables. + swap1 + swap2 + swap1 + swap2 + swap3 + swap2 + swap3 + swap4 + swap3 + swap4 + swap5 + swap4 + swap5 + swap6 + swap5 + swap6 + swap7 + swap6 + swap7 + swap8 + swap7 + swap8 + %jump(sha2_compression_start_block) +sha2_compression_end: + JUMPDEST + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest + swap3 + // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + %pop3 + // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + STOP \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm new file mode 100644 index 00000000..9d49e06f --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm @@ -0,0 +1,275 @@ +// Precodition: stack contains address of one message block, followed by output address +// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks +// of message schedule (in four-byte increments) +global sha2_gen_message_schedule_from_block: + JUMPDEST + // stack: block_addr, output_addr, retdest + dup1 + // stack: block_addr, block_addr, output_addr, retdest + %add_const(32) + // stack: block_addr + 32, block_addr, output_addr, retdest + swap1 + // stack: block_addr, block_addr + 32, output_addr, retdest + %mload_kernel_general_u256 + // stack: block[0], block_addr + 32, output_addr, retdest + swap1 + // stack: block_addr + 32, block[0], output_addr, retdest + %mload_kernel_general_u256 + // stack: block[1], block[0], output_addr, retdest + swap2 + // stack: output_addr, block[0], block[1], retdest + %add_const(28) + push 8 + // stack: counter=8, output_addr + 28, block[0], block[1], retdest + %jump(sha2_gen_message_schedule_from_block_0_loop) +sha2_gen_message_schedule_from_block_0_loop: + // Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule. + JUMPDEST + // stack: counter, output_addr, block[0], block[1], retdest + swap2 + // stack: block[0], output_addr, counter, block[1], retdest + push 1 + push 32 + shl + // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest + dup2 + dup2 + // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1], retdest + swap1 + // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1], retdest + mod + // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1], retdest + swap2 + // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest + div + // stack: block[0] >> 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest + swap1 + // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest + dup3 + // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest + %mstore_kernel_general_u32 + // stack: block[0] >> 32, output_addr, counter, block[1], retdest + swap1 + // stack: output_addr, block[0] >> 32, counter, block[1], retdest + %sub_const(4) + // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest + swap1 + // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest + swap2 + // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest + %decrement + dup1 + iszero + %jumpi(sha2_gen_message_schedule_from_block_0_end) + %jump(sha2_gen_message_schedule_from_block_0_loop) +sha2_gen_message_schedule_from_block_0_end: + JUMPDEST + // stack: old counter=0, output_addr, block[0], block[1], retdest + pop + push 8 + // stack: counter=8, output_addr, block[0], block[1], retdest + swap2 + // stack: block[0], output_addr, counter, block[1], retdest + swap3 + // stack: block[1], output_addr, counter, block[0], retdest + swap2 + // stack: counter, output_addr, block[1], block[0], retdest + swap1 + // stack: output_addr, counter, block[1], block[0], retdest + %add_const(64) + // stack: output_addr + 64, counter, block[1], block[0], retdest + swap1 + // stack: counter, output_addr + 64, block[1], block[0], retdest +sha2_gen_message_schedule_from_block_1_loop: + // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule. + JUMPDEST + // stack: counter, output_addr, block[1], block[0], retdest + swap2 + // stack: block[1], output_addr, counter, block[0], retdest + push 1 + push 32 + shl + // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest + dup2 + dup2 + // stack: 1 << 32, block[1], 1 << 32, block[1], output_addr, counter, block[0], retdest + swap1 + // stack: block[1], 1 << 32, 1 << 32, block[1], output_addr, counter, block[0], retdest + mod + // stack: block[1] % (1 << 32), 1 << 32, block[1], output_addr, counter, block[0], retdest + swap2 + // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest + div + // stack: block[1] >> 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest + swap1 + // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest + dup3 + // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest + %mstore_kernel_general_u32 + // stack: block[1] >> 32, output_addr, counter, block[0], retdest + swap1 + // stack: output_addr, block[1] >> 32, counter, block[0], retdest + %sub_const(4) + // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest + swap1 + // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest + swap2 + // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest + %decrement + dup1 + iszero + %jumpi(sha2_gen_message_schedule_from_block_1_end) + %jump(sha2_gen_message_schedule_from_block_1_loop) +sha2_gen_message_schedule_from_block_1_end: + JUMPDEST + // stack: old counter=0, output_addr, block[1], block[0], retdest + pop + // stack: output_addr, block[0], block[1], retdest + push 48 + // stack: counter=48, output_addr, block[0], block[1], retdest + swap1 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(36) + // stack: output_addr + 36, counter, block[0], block[1], retdest + swap1 + // stack: counter, output_addr + 36, block[0], block[1], retdest +sha2_gen_message_schedule_remaining_loop: + // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks. + JUMPDEST + // stack: counter, output_addr, block[0], block[1], retdest + swap1 + // stack: output_addr, counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, counter, block[0], block[1], retdest + push 2 + push 4 + mul + swap1 + sub + // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest + %sha2_sigma_1 + // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + push 7 + push 4 + mul + swap1 + sub + // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap1 + // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + push 15 + push 4 + mul + swap1 + sub + // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %sha2_sigma_0 + // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + push 16 + push 4 + mul + swap1 + sub + // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap1 + // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap4 + // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %add_u32 + %add_u32 + %add_u32 + // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest + swap2 + // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, output_addr, counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %mstore_kernel_general_u32 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(4) + // stack: output_addr + 4, counter, block[0], block[1], retdest + swap1 + // stack: counter, output_addr + 4, block[0], block[1], retdest + %decrement + // stack: counter - 1, output_addr + 4, block[0], block[1], retdest + dup1 + iszero + %jumpi(sha2_gen_message_schedule_remaining_end) + %jump(sha2_gen_message_schedule_remaining_loop) +sha2_gen_message_schedule_remaining_end: + JUMPDEST + // stack: counter=0, output_addr, block[0], block[1], retdest + %pop4 + JUMP + +// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +// stack contains output_addr +// Postcondition: starting at output_addr, set of 256 bytes per block +// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) +global sha2_gen_all_message_schedules: + JUMPDEST + // stack: output_addr, retdest + dup1 + // stack: output_addr, output_addr, retdest + push 0 + // stack: 0, output_addr, output_addr, retdest + %mload_kernel_general + // stack: num_blocks, output_addr, output_addr, retdest + push 1 + // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest +sha2_gen_all_message_schedules_loop: + JUMPDEST + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest + push sha2_gen_all_message_schedules_loop_end + // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest + dup4 + // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest + dup3 + // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest + %jump(sha2_gen_message_schedule_from_block) +sha2_gen_all_message_schedules_loop_end: + JUMPDEST + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest + %add_const(64) + // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest + swap1 + %decrement + swap1 + // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest + swap2 + %add_const(256) + swap2 + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + dup2 + // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + iszero + %jumpi(sha2_gen_all_message_schedules_end) + %jump(sha2_gen_all_message_schedules_loop) + JUMPDEST +sha2_gen_all_message_schedules_end: + JUMPDEST + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + %pop3 + // stack: output_addr, retdest + %jump(sha2_compression) diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm new file mode 100644 index 00000000..d27ebaf8 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm @@ -0,0 +1,98 @@ +global sha2_store: + JUMPDEST + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + dup1 + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + push 0 + // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + %mstore_kernel_general + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + push 1 + // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest +sha2_store_loop: + JUMPDEST + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + dup1 + // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + swap3 + // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest + swap1 + // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest + %mstore_kernel_general + // stack: counter, addr, ... , x[num_bytes-1], retdest + %decrement + // stack: counter-1, addr, ... , x[num_bytes-1], retdest + dup1 + // stack: counter-1, counter-1, addr, ... , x[num_bytes-1], retdest + iszero + %jumpi(sha2_store_end) + // stack: counter-1, addr, ... , x[num_bytes-1], retdest + swap1 + // stack: addr, counter-1, ... , x[num_bytes-1], retdest + %increment + // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest + %jump(sha2_store_loop) +sha2_store_end: + JUMPDEST + // stack: counter=0, addr, retdest + %pop2 + // stack: retdest + %jump(sha2_pad) + +// Precodition: input is in memory, starting at 0 of kernel general segment, of the form +// num_bytes, x[0], x[1], ..., x[num_bytes - 1] +// Postcodition: output is in memory, starting at 0, of the form +// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +global sha2_pad: + JUMPDEST + // stack: retdest + push 0 + %mload_kernel_general + // stack: num_bytes, retdest + // STEP 1: append 1 + // insert 128 (= 1 << 7) at x[num_bytes+1] + // stack: num_bytes, retdest + push 1 + push 7 + shl + // stack: 128, num_bytes, retdest + dup2 + // stack: num_bytes, 128, num_bytes, retdest + %increment + // stack: num_bytes+1, 128, num_bytes, retdest + %mstore_kernel_general + // stack: num_bytes, retdest + // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 + dup1 + // stack: num_bytes, num_bytes, retdest + %add_const(8) + %div_const(64) + + %increment + // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest + // STEP 3: calculate length := num_bytes*8 + swap1 + // stack: num_bytes, num_blocks, retdest + push 8 + mul + // stack: length = num_bytes*8, num_blocks, retdest + // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] + dup2 + // stack: num_blocks, length, num_blocks, retdest + push 64 + mul + // stack: last_addr = num_blocks*64, length, num_blocks, retdest + %sha2_write_length + // stack: num_blocks, retdest + dup1 + // stack: num_blocks, num_blocks, retdest + // STEP 5: write num_blocks to x[0] + push 0 + %mstore_kernel_general + // stack: num_blocks, retdest + %message_schedule_addr_from_num_blocks + %jump(sha2_gen_all_message_schedules) + +global sha2: + JUMPDEST + %jump(sha2_store) From 2c7b60e2867d47c2fb77bd91b1803d5c2bcc47d0 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 14 Jul 2022 14:58:28 -0700 Subject: [PATCH 003/104] using memory --- evm/src/cpu/kernel/asm/helper_functions.asm | 15 + evm/src/cpu/kernel/asm/sha2.asm | 501 +++----------------- 2 files changed, 90 insertions(+), 426 deletions(-) diff --git a/evm/src/cpu/kernel/asm/helper_functions.asm b/evm/src/cpu/kernel/asm/helper_functions.asm index 8acbbe3f..c628916c 100644 --- a/evm/src/cpu/kernel/asm/helper_functions.asm +++ b/evm/src/cpu/kernel/asm/helper_functions.asm @@ -41,48 +41,63 @@ case1: case2: JUMPDEST swap2 + %jump(swapn_end) case3: JUMPDEST swap3 + %jump(swapn_end) case4: JUMPDEST swap4 + %jump(swapn_end) case5: JUMPDEST swap5 + %jump(swapn_end) case6: JUMPDEST swap6 + %jump(swapn_end) case7: JUMPDEST swap7 + %jump(swapn_end) case8: JUMPDEST swap8 + %jump(swapn_end) case9: JUMPDEST swap9 + %jump(swapn_end) case10: JUMPDEST swap10 + %jump(swapn_end) case11: JUMPDEST swap11 + %jump(swapn_end) case12: JUMPDEST swap12 + %jump(swapn_end) case13: JUMPDEST swap13 + %jump(swapn_end) case14: JUMPDEST swap14 + %jump(swapn_end) case15: JUMPDEST swap15 + %jump(swapn_end) case16: JUMPDEST swap16 + %jump(swapn_end) swapn_end: JUMPDEST diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index f33247dd..9f54e1bf 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -19,441 +19,90 @@ count_bits_loop: pop // stack: bits -// Appends a 1 to the end of the u256 at the top of the stack. -global append_1: - dup - count_bits - %eq(256) - %jumpi(append_if256) - %jump(append_else) -append_if256: - push 1 - swap1 - %jump(append_end) -append_else: - push 2 - mul - push 1 - add -append_end: - - -global sha2_append_1: +global sha2_store: // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] - // (assume num_u256s <= 16) - dup + dup1 // stack: num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] - dup - // stack: num_u256s, num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] - %increment - // stack: num_u256s+1, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] - swapn - // stack: x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s - dup - // stack: x[num_u256s-1], x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s - %count_bits - // stack: num_bits, x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s - %eq(256) - %jumpi(append_if256) - %jump(append_else) -append_if256: + // TODO: use kernel memory, and start address not at 0 + push 0 + // stack: addr=0, num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] + mstore + // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] push 1 - // stack: 1, x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s - dup2 - // stack: num_u256s, 1, x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s - push 3 - add - // stack: num_u256s+3, 1, x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s, [] - swapn - // stack: [], x[num_u256s-1], num_u256s, x[0], x[1], x[2], ... , x[num_u256s-2], num_u256s, - %jump(append_continue) -append_else: -append_continue: - - dup + // stack: addr=1, counter=num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] +sha2_store_loop: + JUMPDEST + // stack: addr, counter, x[num_u256s-counter], ... , x[num_u256s-1] + dup1 + // stack: addr, addr, counter, x[num_u256s-counter], ... , x[num_u256s-1] + swap3 + // stack: x[num_u256s-counter], addr, counter, addr, ... , x[num_u256s-1] + swap1 + // stack: addr, x[num_u256s-counter], counter, addr, ... , x[num_u256s-1] + mstore + // stack: counter, addr, ... , x[num_u256s-1] + %decrement + // stack: counter-1, addr, ... , x[num_u256s-1] + iszero + %jumpi(sha2_store_end) + swap1 + // stack: addr, counter-1, ... , x[num_u256s-1] + %increment + // stack: addr+1, counter-1, ... , x[num_u256s-1] + %jump(sha2_store_loop) +sha2_store_end: + JUMPDEST global sha2_pad: - // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] - %jumpi() - - - append 1: - if length of last = 256 - increment length; add new value of 10000000..0 - - stick 1 on the end (last one:) - - if length even: - pad last one to - - if length odd: - - - // stack: num_blocks, block[0][0], block[0][1], ..., block[num_blocks-1][3] - - -// #define K0 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 - - -// BN254 elliptic curve addition. -// Uses the standard affine addition formula. -global ec_add: - // Uncomment for test inputs. - // PUSH 0xdeadbeef - // PUSH 2 - // PUSH 1 - // PUSH 0x1bf9384aa3f0b3ad763aee81940cacdde1af71617c06f46e11510f14f3d5d121 - // PUSH 0xe7313274bb29566ff0c8220eb9841de1d96c2923c6a4028f7dd3c6a14cee770 + // TODO: use kernel memory, and start address not at 0 + push 0 + mload + // stack: num_u256s + mload + // stack: x[num_u256s-1] + dup1 + // stack: x[num_u256s-1], x[num_u256s-1] + %count_bits + // stack: num_bits, x[num_u256s-1] + %eq(256) + %jumpi(pad_if256) + %jump(pad_else) +pad_if256: JUMPDEST - // stack: x0, y0, x1, y1, retdest - - // Check if points are valid BN254 points. - DUP2 - // stack: y0, x0, y0, x1, y1, retdest - DUP2 - // stack: x0, y0, x0, y0, x1, y1, retdest - %ec_check - // stack: isValid(x0, y0), x0, y0, x1, y1, retdest - DUP5 - // stack: x1, isValid(x0, y0), x0, y0, x1, y1, retdest - DUP5 - // stack: x1, y1, isValid(x0, y0), x0, y0, x1, y1, retdest - %ec_check - // stack: isValid(x1, y1), isValid(x0, y0), x0, y0, x1, y1, retdest - AND - // stack: isValid(x1, y1) & isValid(x0, y0), x0, y0, x1, y1, retdest - %jumpi(ec_add_valid_points) - // stack: x0, y0, x1, y1, retdest - - // Otherwise return - %pop4 - // stack: retdest - %ec_invalid_input - -// BN254 elliptic curve addition. -// Assumption: (x0,y0) and (x1,y1) are valid points. -global ec_add_valid_points: - JUMPDEST - // stack: x0, y0, x1, y1, retdest - - // Check if the first point is the identity. - DUP2 - // stack: y0, x0, y0, x1, y1, retdest - DUP2 - // stack: x0, y0, x0, y0, x1, y1, retdest - %ec_isidentity - // stack: (x0,y0)==(0,0), x0, y0, x1, y1, retdest - %jumpi(ec_add_first_zero) - // stack: x0, y0, x1, y1, retdest - - // Check if the first point is the identity. - DUP4 - // stack: y1, x0, y0, x1, y1, retdest - DUP4 - // stack: x1, y1, x0, y0, x1, y1, retdest - %ec_isidentity - // stack: (x1,y1)==(0,0), x0, y0, x1, y1, retdest - %jumpi(ec_add_snd_zero) - // stack: x0, y0, x1, y1, retdest - - // Check if both points have the same x-coordinate. - DUP3 - // stack: x1, x0, y0, x1, y1, retdest - DUP2 - // stack: x0, x1, x0, y0, x1, y1, retdest - EQ - // stack: x0 == x1, x0, y0, x1, y1, retdest - %jumpi(ec_add_equal_first_coord) - // stack: x0, y0, x1, y1, retdest - - // Otherwise, we can use the standard formula. - // Compute lambda = (y0 - y1)/(x0 - x1) - DUP4 - // stack: y1, x0, y0, x1, y1, retdest - DUP3 - // stack: y0, y1, x0, y0, x1, y1, retdest - %submod - // stack: y0 - y1, x0, y0, x1, y1, retdest - DUP4 - // stack: x1, y0 - y1, x0, y0, x1, y1, retdest - DUP3 - // stack: x0, x1, y0 - y1, x0, y0, x1, y1, retdest - %submod - // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest - %moddiv - // stack: lambda, x0, y0, x1, y1, retdest - %jump(ec_add_valid_points_with_lambda) - -// BN254 elliptic curve addition. -// Assumption: (x0,y0) == (0,0) -ec_add_first_zero: - JUMPDEST - // stack: x0, y0, x1, y1, retdest - - // Just return (x1,y1) + // stack: num_bits, x[num_u256s-1] %pop2 - // stack: x1, y1, retdest - SWAP1 - // stack: y1, x1, retdest - SWAP2 - // stack: retdest, x1, y1 - JUMP - -// BN254 elliptic curve addition. -// Assumption: (x1,y1) == (0,0) -ec_add_snd_zero: + push 0 + mload + // stack: num_u256s + %increment + // stack: num_u256s+1 + dup1 + // stack: num_u256s+1, num_u256s+1 + push 0 + mstore + // stack: num_u256s+1 + push 1 + // stack: 1, num_u256s+1 + swap1 + // stack: num_u256s+1, 1 + mstore + %jump(pad_end) +pad_else: JUMPDEST - // stack: x0, y0, x1, y1, retdest - - // Just return (x1,y1) - SWAP2 - // stack: x1, y0, x0, y1, retdest - POP - // stack: y0, x0, y1, retdest - SWAP2 - // stack: y1, x0, y0, retdest - POP - // stack: x0, y0, retdest - SWAP1 - // stack: y0, x0, retdest - SWAP2 - // stack: retdest, x0, y0 - JUMP - -// BN254 elliptic curve addition. -// Assumption: lambda = (y0 - y1)/(x0 - x1) -ec_add_valid_points_with_lambda: + // stack: num_bits, x[num_u256s-1] + pop + // stack: x[num_u256s-1] + push 2 + mul + // stack: 2*x[num_u256s-1] + %increment + // stack: 2*x[num_u256s-1]+1 + push 0 + mload + // stack: num_u256s, 2*x[num_u256s-1]+1 + mstore +pad_end: JUMPDEST - // stack: lambda, x0, y0, x1, y1, retdest - - // Compute x2 = lambda^2 - x1 - x0 - DUP2 - // stack: x0, lambda, x0, y0, x1, y1, retdest - DUP5 - // stack: x1, x0, lambda, x0, y0, x1, y1, retdest - %bn_base - // stack: N, x1, x0, lambda, x0, y0, x1, y1, retdest - DUP4 - // stack: lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest - DUP1 - // stack: lambda, lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest - MULMOD - // stack: lambda^2, x1, x0, lambda, x0, y0, x1, y1, retdest - %submod - // stack: lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest - %submod - // stack: x2, lambda, x0, y0, x1, y1, retdest - - // Compute y2 = lambda*(x1 - x2) - y1 - %bn_base - // stack: N, x2, lambda, x0, y0, x1, y1, retdest - DUP2 - // stack: x2, N, x2, lambda, x0, y0, x1, y1, retdest - DUP7 - // stack: x1, x2, N, x2, lambda, x0, y0, x1, y1, retdest - %submod - // stack: x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest - DUP4 - // stack: lambda, x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest - MULMOD - // stack: lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest - DUP7 - // stack: y1, lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest - SWAP1 - // stack: lambda * (x1 - x2), y1, x2, lambda, x0, y0, x1, y1, retdest - %submod - // stack: y2, x2, lambda, x0, y0, x1, y1, retdest - - // Return x2,y2 - SWAP5 - // stack: x1, x2, lambda, x0, y0, y2, y1, retdest - POP - // stack: x2, lambda, x0, y0, y2, y1, retdest - SWAP5 - // stack: y1, lambda, x0, y0, y2, x2, retdest - %pop4 - // stack: y2, x2, retdest - SWAP2 - // stack: retdest, x2, y2 - JUMP - -// BN254 elliptic curve addition. -// Assumption: (x0,y0) and (x1,y1) are valid points and x0 == x1 -ec_add_equal_first_coord: - JUMPDEST - // stack: x0, y0, x1, y1, retdest with x0 == x1 - - // Check if the points are equal - DUP2 - // stack: y0, x0, y0, x1, y1, retdest - DUP5 - // stack: y1, y0, x0, y0, x1, y1, retdest - EQ - // stack: y1 == y0, x0, y0, x1, y1, retdest - %jumpi(ec_add_equal_points) - // stack: x0, y0, x1, y1, retdest - - // Otherwise, one is the negation of the other so we can return (0,0). - %pop4 - // stack: retdest - PUSH 0 - // stack: 0, retdest - PUSH 0 - // stack: 0, 0, retdest - SWAP2 - // stack: retdest, 0, 0 - JUMP - - -// BN254 elliptic curve addition. -// Assumption: x0 == x1 and y0 == y1 -// Standard doubling formula. -ec_add_equal_points: - JUMPDEST - // stack: x0, y0, x1, y1, retdest - - // Compute lambda = 3/2 * x0^2 / y0 - %bn_base - // stack: N, x0, y0, x1, y1, retdest - %bn_base - // stack: N, N, x0, y0, x1, y1, retdest - DUP3 - // stack: x0, N, N, x0, y0, x1, y1, retdest - DUP1 - // stack: x0, x0, N, N, x0, y0, x1, y1, retdest - MULMOD - // stack: x0^2, N, x0, y0, x1, y1, retdest with - PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5 // 3/2 in the base field - // stack: 3/2, x0^2, N, x0, y0, x1, y1, retdest - MULMOD - // stack: 3/2 * x0^2, x0, y0, x1, y1, retdest - DUP3 - // stack: y0, 3/2 * x0^2, x0, y0, x1, y1, retdest - %moddiv - // stack: lambda, x0, y0, x1, y1, retdest - %jump(ec_add_valid_points_with_lambda) - -// BN254 elliptic curve doubling. -// Assumption: (x0,y0) is a valid point. -// Standard doubling formula. -global ec_double: - JUMPDEST - // stack: x0, y0, retdest - DUP2 - // stack: y0, x0, y0, retdest - DUP2 - // stack: x0, y0, x0, y0, retdest - %jump(ec_add_equal_points) - -// Push the order of the BN254 base field. -%macro bn_base - PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 -%endmacro - -// Assumption: x, y < N and 2N < 2^256. -// Note: Doesn't hold for Secp256k1 base field. -%macro submod - // stack: x, y - %bn_base - // stack: N, x, y - ADD - // stack: N + x, y // Doesn't overflow since 2N < 2^256 - SUB - // stack: N + x - y // Doesn't underflow since y < N - %bn_base - // stack: N, N + x - y - SWAP1 - // stack: N + x - y, N - MOD - // stack: (N + x - y) % N = (x-y) % N -%endmacro - -// Check if (x,y) is a valid curve point. -// Puts y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) on top of the stack. -%macro ec_check - // stack: x, y - %bn_base - // stack: N, x, y - DUP2 - // stack: x, N, x, y - LT - // stack: x < N, x, y - %bn_base - // stack: N, x < N, x, y - DUP4 - // stack: y, N, x < N, x, y - LT - // stack: y < N, x < N, x, y - AND - // stack: (y < N) & (x < N), x, y - SWAP2 - // stack: y, x, (y < N) & (x < N), x - SWAP1 - // stack: x, y, (y < N) & (x < N) - %bn_base - // stack: N, x, y, b - %bn_base - // stack: N, N, x, y, b - DUP3 - // stack: x, N, N, x, y, b - %bn_base - // stack: N, x, N, N, x, y, b - DUP2 - // stack: x, N, x, N, N, x, y, b - DUP1 - // stack: x, x, N, x, N, N, x, y, b - MULMOD - // stack: x^2 % N, x, N, N, x, y, b - MULMOD - // stack: x^3 % N, N, x, y, b - PUSH 3 - // stack: 3, x^3 % N, N, x, y, b - ADDMOD - // stack: (x^3 + 3) % N, x, y, b - DUP3 - // stack: y, (x^3 + 3) % N, x, y, b - %bn_base - // stack: N, y, (x^3 + 3) % N, x, y, b - SWAP1 - // stack: y, N, (x^3 + 3) % N, x, y, b - DUP1 - // stack: y, y, N, (x^3 + 3) % N, x, y, b - MULMOD - // stack: y^2 % N, (x^3 + 3) % N, x, y, b - EQ - // stack: y^2 % N == (x^3 + 3) % N, x, y, b - SWAP2 - // stack: y, x, y^2 % N == (x^3 + 3) % N, b - %ec_isidentity - // stack: (x,y)==(0,0), y^2 % N == (x^3 + 3) % N, b - SWAP2 - // stack: b, y^2 % N == (x^3 + 3) % N, (x,y)==(0,0) - AND - // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N), (x,y)==(0,0) - OR - // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) -%endmacro - -// Check if (x,y)==(0,0) -%macro ec_isidentity - // stack: x, y - OR - // stack: x | y - ISZERO - // stack: (x,y) == (0,0) -%endmacro - -// Return (u256::MAX, u256::MAX) which is used to indicate the input was invalid. -%macro ec_invalid_input - // stack: retdest - PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff - // stack: u256::MAX, retdest - PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff - // stack: u256::MAX, u256::MAX, retdest - SWAP2 - // stack: retdest, u256::MAX, u256::MAX - JUMP -%endmacro \ No newline at end of file From dd2cbf604b61a4d7f68141e2999e68695432a665 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 14 Jul 2022 16:26:56 -0700 Subject: [PATCH 004/104] updates --- evm/src/cpu/kernel/asm/sha2.asm | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 9f54e1bf..62ce56c7 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -56,7 +56,7 @@ sha2_store_end: -global sha2_pad: +global sha2_append1: // TODO: use kernel memory, and start address not at 0 push 0 mload @@ -70,7 +70,7 @@ global sha2_pad: %eq(256) %jumpi(pad_if256) %jump(pad_else) -pad_if256: +append_if256: JUMPDEST // stack: num_bits, x[num_u256s-1] %pop2 @@ -90,7 +90,7 @@ pad_if256: // stack: num_u256s+1, 1 mstore %jump(pad_end) -pad_else: +append_else: JUMPDEST // stack: num_bits, x[num_u256s-1] pop @@ -104,5 +104,6 @@ pad_else: mload // stack: num_u256s, 2*x[num_u256s-1]+1 mstore -pad_end: +append_end: JUMPDEST + From 7045772ca4ebdee154e104fc960e40ee3ec6bd05 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 14 Jul 2022 16:28:14 -0700 Subject: [PATCH 005/104] fixes --- evm/src/cpu/kernel/asm/sha2.asm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 62ce56c7..414a2604 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -68,8 +68,8 @@ global sha2_append1: %count_bits // stack: num_bits, x[num_u256s-1] %eq(256) - %jumpi(pad_if256) - %jump(pad_else) + %jumpi(append_if256) + %jump(append_else) append_if256: JUMPDEST // stack: num_bits, x[num_u256s-1] @@ -89,7 +89,7 @@ append_if256: swap1 // stack: num_u256s+1, 1 mstore - %jump(pad_end) + %jump(append_end) append_else: JUMPDEST // stack: num_bits, x[num_u256s-1] From 0d22b3f8da6d967985abe327444b9d7e78d93710 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 15 Jul 2022 10:23:44 -0700 Subject: [PATCH 006/104] fix --- evm/src/cpu/kernel/asm/sha2.asm | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 414a2604..f779e1ff 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -1,10 +1,13 @@ global count_bits: + JUMPDEST // stack: n (assumed to be > 0) push 0 // stack: 0, n swap1 // stack: n, 0 + %jump(count_bits_loop) count_bits_loop: + JUMPDEST // stack: k, bits %div2 // stack: k//2, bits @@ -20,6 +23,7 @@ count_bits_loop: // stack: bits global sha2_store: + JUMPDEST // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] dup1 // stack: num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] From aba45e2d2c856ac4d3e63d0ee7917dbfbf3e10e2 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 15 Jul 2022 14:30:15 -0700 Subject: [PATCH 007/104] finished pad --- evm/src/cpu/kernel/asm/sha2.asm | 336 +++++++++++++++++-- evm/src/cpu/kernel/asm/util/basic_macros.asm | 24 +- 2 files changed, 330 insertions(+), 30 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index f779e1ff..f8e5e673 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -34,7 +34,6 @@ global sha2_store: // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] push 1 // stack: addr=1, counter=num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] - sha2_store_loop: JUMPDEST // stack: addr, counter, x[num_u256s-counter], ... , x[num_u256s-1] @@ -58,56 +57,335 @@ sha2_store_loop: sha2_store_end: JUMPDEST - - -global sha2_append1: +// Precodition: input is in memory, starting at [TODO: fix] 0, of the form +// num_u256s, x[0], x[1], ..., x[num_u256s-1] +// Postcodition: input is in memory, starting at [TODO: fix] 0, of the form +// num_blocks, block0[0], block0[1], block1[0], ..., blocklast[1] +global sha2_pad: // TODO: use kernel memory, and start address not at 0 push 0 mload // stack: num_u256s - mload - // stack: x[num_u256s-1] dup1 - // stack: x[num_u256s-1], x[num_u256s-1] + // stack: num_u256s, num_u256s + %iseven + // stack: is_even, num_u256s + swap1 + // stack: num_u256s, is_even + dup1 + // stack: num_u256s, num_u256s, is_even + mload + // stack: x[num_u256s-1], num_u256s, is_even + dup1 + // stack: x[num_u256s-1], x[num_u256s-1], num_u256s, is_even %count_bits - // stack: num_bits, x[num_u256s-1] + // stack: num_bits, x[num_u256s-1], num_u256s, is_even + dup1 + // stack: num_bits, num_bits, x[num_u256s-1], num_u256s, is_even + swap3 + // stack: num_u256s, num_bits, x[num_u256s-1], num_bits, is_even + %decrement + // stack: num_u256s-1, num_bits, x[num_u256s-1], num_bits, is_even + push 256 + mul + // stack: (num_u256s-1)*256, num_bits, x[num_u256s-1], num_bits, is_even + add + // stack: message_bits, x[num_u256s-1], num_bits, is_even + swap2 + // stack: num_bits, x[num_u256s-1], message_bits, is_even + dup1 + // stack: num_bits, num_bits, x[num_u256s-1], message_bits, is_even + dup1 + // stack: num_bits, num_bits, num_bits, x[num_u256s-1], message_bits, is_even + dup1 + %lt(191) + // stack: num_bits<191, num_bits, num_bits,x[num_u256s-1], message_bits, is_even + swap1 + // stack: num_bits, num_bits<191, num_bits, x[num_u256s-1], message_bits, is_even + dup1 %eq(256) - %jumpi(append_if256) - %jump(append_else) -append_if256: + // stack: num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits, is_even + push 0 + // stack: 0, num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits, is_even + swap6 + // stack: is_even, num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits + dup2 + dup2 + and + %jumpi(pad_case1) + not + // stack: is_odd, num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits + dup2 + dup2 + and + %jumpi(pad_case2) + swap1 + // stack: num_bits==256, is_odd, num_bits<191, num_bits, x[num_u256s-1], message_bits + pop + // stack: is_odd, num_bits<191, num_bits, x[num_u256s-1], message_bits + not + // stack: is_even, num_bits<191, num_bits, x[num_u256s-1], message_bits + dup2 + dup2 + and + %jumpi(pad_case3) + not + // stack: is_odd, num_bits<191, num_bits, x[num_u256s-1], message_bits + dup2 + dup2 + and + %jumpi(pad_case4) + swap1 + // stack: num_bits<191, is_odd, num_bits, x[num_u256s-1], message_bits + pop + // stack: is_odd, num_bits, x[num_u256s-1], message_bits + not + // stack: is_even, num_bits, x[num_u256s-1], message_bits + %jumpi(pad_case5) + %jump(pad_case6) +pad_case1: + // CASE 1: num_u256s is even; num_bits == 256 JUMPDEST - // stack: num_bits, x[num_u256s-1] + // stack: is_odd, num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits + %pop5 + // stack: message_bits + push 0 + mload + // stack: num_u256s, message_bits + %increment + // stack: num_u256s+1, message_bits + dup1 + // stack: num_u256s+1, num_u256s+1, message_bits + push 2 + push 255 + %jump(exp) + // stack: 2^255, num_u256s+1, num_u256s+1, message_bits + swap + // stack: num_u256s+1, 2^255, num_u256s+1, message_bits + mstore + // stack: num_u256s+1, message_bits + %increment + // stack: num_u256s+2, message_bits + dup1 + // stack: num_u256s+2, num_u256s+2, message_bits + swap2 + // stack: message_bits, num_u256s+2, num_u256s+2 + swap1 + // stack: num_u256s+2, message_bits, num_u256s+2 + mstore + // stack: num_u256s+2 + %div2 + // stack: num_blocks=(num_u256s+2)//2 + push 0 + mstore + %jump(pad_end) +pad_case2: + // CASE 2: num_u256s is odd; num_bits == 256 + JUMPDEST + // stack: is_even, num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits + %pop5 + // stack: message_bits + push 0 + mload + // stack: num_u256s, message_bits + %increment + // stack: num_u256s+1, message_bits + swap + // stack: message_bits, num_u256s+1 + push 2 + push 255 + %jump(exp) + add + // stack: 2^255 + message_bits, num_u256s+1 + swap1 + // stack: num_u256s+1, 2^255 + message_bits + dup1 + // stack: num_u256s+1, num_u256s+1, 2^255 + message_bits + swap2 + // stack: 2^255 + message_bits, num_u256s+1, num_u256s+1 + swap1 + // stack: num_u256s+1, 2^255 + message_bits, num_u256s+1 + mstore + // stack: num_u256s+1 + div2 + // stack: num_blocks=(num_u256s+1)//2 + push 0 + mstore + %jump(pad_end) +pad_case3: + // CASE 3: num_u256s is even; num_bits < 191 + JUMPDEST + // stack: is_even, num_bits<191, num_bits, x[num_u256s-1], message_bits %pop2 + // stack: num_bits, x[num_u256s-1], message_bits + swap1 + // stack: x[num_u256s-1], num_bits, message_bits + push 2 + mul + %increment + // stack: 2*x[num_u256s-1]+1, num_bits, message_bits + swap1 + // stack: num_bits, 2*x[num_u256s-1]+1, message_bits + push 255 + sub + // stack: 256 - (num_bits + 1), 2*x[num_u256s-1]+1, message_bits + push 2 + %jump(exp) + // stack: 2^(256 - (num_bits + 1)), 2*x[num_u256s-1]+1, message_bits + mul + // stack: [x[num_u256s-1] || 1 || 0s], message_bits + add + // stack: [x[num_u256s-1] || 1 || 0s | message_bits] + push 0 + mload + // stack: num_u256s, [x[num_u256s-1] || 1 || 0s | message_bits] + mstore + push 0 + mload + // stack: num_u256s + %div2 + // stack: num_blocks=num_u256s//2 + push 0 + mstore + %jump(pad_end) +pad_case4: + // CASE 4: num_u256s is odd; num_bits < 191 + JUMPDEST + // stack: is_odd, num_bits<191, num_bits, x[num_u256s-1], message_bits + %pop2 + // stack: num_bits, x[num_u256s-1], message_bits + swap1 + // stack: x[num_u256s-1], num_bits, message_bits + push 2 + mul + %increment + // stack: 2*x[num_u256s-1]+1, num_bits, message_bits + swap1 + // stack: num_bits, 2*x[num_u256s-1]+1, message_bits + push 255 + sub + // stack: 256 - (num_bits + 1), 2*x[num_u256s-1]+1, message_bits + push 2 + %jump(exp) + // stack: 2^(256 - (num_bits + 1)), 2*x[num_u256s-1]+1, message_bits + mul + // stack: [x[num_u256s-1] || 1 || 0s], message_bits + push 0 + mload + // stack: num_u256s, [x[num_u256s-1] || 1 || 0s], message_bits + mstore + // stack: message_bits + push 0 + mload + // stack: num_u256s, message_bits + %increment + // stack: num_u256s+1, message_bits + mstore push 0 mload // stack: num_u256s %increment // stack: num_u256s+1 - dup1 - // stack: num_u256s+1, num_u256s+1 + %div2 + // stack: num_blocks=(num_u256s+1)//2 push 0 mstore - // stack: num_u256s+1 - push 1 - // stack: 1, num_u256s+1 - swap1 - // stack: num_u256s+1, 1 - mstore - %jump(append_end) -append_else: + %jump(pad_end) +pad_case5: + // CASE 5: num_u256s is even; 191 <= num_bits < 256 JUMPDEST - // stack: num_bits, x[num_u256s-1] + // stack: is_even, num_bits, x[num_u256s-1], message_bits pop - // stack: x[num_u256s-1] + // stack: num_bits, x[num_u256s-1], message_bits + swap1 + // stack: x[num_u256s-1], num_bits, message_bits push 2 mul - // stack: 2*x[num_u256s-1] %increment - // stack: 2*x[num_u256s-1]+1 + // stack: 2*x[num_u256s-1]+1, num_bits, message_bits + swap1 + // stack: num_bits, 2*x[num_u256s-1]+1, message_bits + push 255 + sub + // stack: 256 - (num_bits + 1), 2*x[num_u256s-1]+1, message_bits + push 2 + %jump(exp) + // stack: 2^(256 - (num_bits + 1)), 2*x[num_u256s-1]+1, message_bits + mul + // stack: [x[num_u256s-1] || 1 || 0s], message_bits push 0 mload - // stack: num_u256s, 2*x[num_u256s-1]+1 + // stack: num_u256s, [x[num_u256s-1] || 1 || 0s], message_bits + dup1 + // stack: num_u256s, num_u256s, [x[num_u256s-1] || 1 || 0s], message_bits + swap2 + // stack: [x[num_u256s-1] || 1 || 0s], num_u256s, num_u256s, message_bits + swap1 + // stack: num_u256s, [x[num_u256s-1] || 1 || 0s], num_u256s, message_bits mstore -append_end: + // stack: num_u256s, message_bits + push 2 + add + // stack: num_u256s+2, message_bits + dup1 + // stack: num_u256s+2, num_u256s+2, message_bits + swap2 + // stack: message_bits, num_u256s+2, num_u256s+2 + swap1 + // stack: num_u256s+2, message_bits, num_u256s+2 + mstore + // stack: num_u256s+2 + div2 + // stack: num_blocks=(num_u256s+2)//2 + push 0 + mstore + %jump(pad_end) +pad_case6: + // CASE 6: num_u256s is odd; 191 <= num_bits < 256 + JUMPDEST + // stack: is_even, num_bits, x[num_u256s-1], message_bits + pop + // stack: num_bits, x[num_u256s-1], message_bits + swap1 + // stack: x[num_u256s-1], num_bits, message_bits + push 2 + mul + %increment + // stack: 2*x[num_u256s-1]+1, num_bits, message_bits + swap1 + // stack: num_bits, 2*x[num_u256s-1]+1, message_bits + push 255 + sub + // stack: 256 - (num_bits + 1), 2*x[num_u256s-1]+1, message_bits + push 2 + %jump(exp) + // stack: 2^(256 - (num_bits + 1)), 2*x[num_u256s-1]+1, message_bits + mul + // stack: [x[num_u256s-1] || 1 || 0s], message_bits + push 0 + mload + // stack: num_u256s, [x[num_u256s-1] || 1 || 0s], message_bits + dup1 + // stack: num_u256s, num_u256s, [x[num_u256s-1] || 1 || 0s], message_bits + swap2 + // stack: [x[num_u256s-1] || 1 || 0s], num_u256s, num_u256s, message_bits + swap1 + // stack: num_u256s, [x[num_u256s-1] || 1 || 0s], num_u256s, message_bits + mstore + // stack: num_u256s, message_bits + %increment + // stack: num_u256s+1, message_bits + dup1 + // stack: num_u256s+1, num_u256s+1, message_bits + swap2 + // stack: message_bits, num_u256s+1, num_u256s+1 + swap1 + // stack: num_u256s+1, message_bits, num_u256s+1 + mstore + // stack: num_u256s+1 + div2 + // stack: num_blocks=(num_u256s+1)//2 + push 0 + mstore +pad_end: JUMPDEST - diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index d64ee513..0718fae5 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -159,6 +159,16 @@ CONSUME_GAS %endmacro +%macro pop5 + %pop3 + %pop2 +%endmacro + +%macro pop6 + %pop4 + %pop2 +%endmacro + // If pred is zero, yields z; otherwise, yields nz %macro select // stack: pred, nz, z @@ -249,7 +259,19 @@ %endmacro %macro eq(x) - dup1 push $x eq %endmacro + +%macro lt(x) + push $x + swap1 + lt +%endmacro + +%macro iseven + push 2 + swap1 + mod + iszero +%endmacro From 723b047dc770341389088f2d80ace010e3f2a06f Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 18 Jul 2022 16:12:09 -0700 Subject: [PATCH 008/104] constants --- evm/src/cpu/kernel/asm/sha2_constants.asm | 647 ++++++++++++++++++++++ 1 file changed, 647 insertions(+) create mode 100644 evm/src/cpu/kernel/asm/sha2_constants.asm diff --git a/evm/src/cpu/kernel/asm/sha2_constants.asm b/evm/src/cpu/kernel/asm/sha2_constants.asm new file mode 100644 index 00000000..76f5aba6 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2_constants.asm @@ -0,0 +1,647 @@ +global sha2_const_0: + JUMPDEST + push 1116352408 + +global sha2_const_1: + JUMPDEST + push 1899447441 + +global sha2_const_2: + JUMPDEST + push 3049323471 + +global sha2_const_3: + JUMPDEST + push 3921009573 + +global sha2_const_4: + JUMPDEST + push 961987163 + +global sha2_const_5: + JUMPDEST + push 1508970993 + +global sha2_const_6: + JUMPDEST + push 2453635748 + +global sha2_const_7: + JUMPDEST + push 2870763221 + +global sha2_const_8: + JUMPDEST + push 3624381080 + +global sha2_const_9: + JUMPDEST + push 310598401 + +global sha2_const_10: + JUMPDEST + push 607225278 + +global sha2_const_11: + JUMPDEST + push 1426881987 + +global sha2_const_12: + JUMPDEST + push 1925078388 + +global sha2_const_13: + JUMPDEST + push 2162078206 + +global sha2_const_14: + JUMPDEST + push 2614888103 + +global sha2_const_15: + JUMPDEST + push 3248222580 + +global sha2_const_16: + JUMPDEST + push 3835390401 + +global sha2_const_17: + JUMPDEST + push 4022224774 + +global sha2_const_18: + JUMPDEST + push 264347078 + +global sha2_const_19: + JUMPDEST + push 604807628 + +global sha2_const_20: + JUMPDEST + push 770255983 + +global sha2_const_21: + JUMPDEST + push 1249150122 + +global sha2_const_22: + JUMPDEST + push 1555081692 + +global sha2_const_23: + JUMPDEST + push 1996064986 + +global sha2_const_24: + JUMPDEST + push 2554220882 + +global sha2_const_25: + JUMPDEST + push 2821834349 + +global sha2_const_26: + JUMPDEST + push 2952996808 + +global sha2_const_27: + JUMPDEST + push 3210313671 + +global sha2_const_28: + JUMPDEST + push 3336571891 + +global sha2_const_29: + JUMPDEST + push 3584528711 + +global sha2_const_30: + JUMPDEST + push 113926993 + +global sha2_const_31: + JUMPDEST + push 338241895 + +global sha2_const_32: + JUMPDEST + push 666307205 + +global sha2_const_33: + JUMPDEST + push 773529912 + +global sha2_const_34: + JUMPDEST + push 1294757372 + +global sha2_const_35: + JUMPDEST + push 1396182291 + +global sha2_const_36: + JUMPDEST + push 1695183700 + +global sha2_const_37: + JUMPDEST + push 1986661051 + +global sha2_const_38: + JUMPDEST + push 2177026350 + +global sha2_const_39: + JUMPDEST + push 2456956037 + +global sha2_const_40: + JUMPDEST + push 2730485921 + +global sha2_const_41: + JUMPDEST + push 2820302411 + +global sha2_const_42: + JUMPDEST + push 3259730800 + +global sha2_const_43: + JUMPDEST + push 3345764771 + +global sha2_const_44: + JUMPDEST + push 3516065817 + +global sha2_const_45: + JUMPDEST + push 3600352804 + +global sha2_const_46: + JUMPDEST + push 4094571909 + +global sha2_const_47: + JUMPDEST + push 275423344 + +global sha2_const_48: + JUMPDEST + push 430227734 + +global sha2_const_49: + JUMPDEST + push 506948616 + +global sha2_const_50: + JUMPDEST + push 659060556 + +global sha2_const_51: + JUMPDEST + push 883997877 + +global sha2_const_52: + JUMPDEST + push 958139571 + +global sha2_const_53: + JUMPDEST + push 1322822218 + +global sha2_const_54: + JUMPDEST + push 1537002063 + +global sha2_const_55: + JUMPDEST + push 1747873779 + +global sha2_const_56: + JUMPDEST + push 1955562222 + +global sha2_const_57: + JUMPDEST + push 2024104815 + +global sha2_const_58: + JUMPDEST + push 2227730452 + +global sha2_const_59: + JUMPDEST + push 2361852424 + +global sha2_const_60: + JUMPDEST + push 2428436474 + +global sha2_const_61: + JUMPDEST + push 2756734187 + +global sha2_const_62: + JUMPDEST + push 3204031479 + +global sha2_const_63: + JUMPDEST + push 3329325298 + +global sha2_const: + JUMPDEST + // stack: i + + // case: 0 + dup1 + %eq(0) + %jumpi(sha2_const_0) + swap1 + + // case: 1 + dup1 + %eq(1) + %jumpi(sha2_const_1) + swap1 + + // case: 2 + dup1 + %eq(2) + %jumpi(sha2_const_2) + swap1 + + // case: 3 + dup1 + %eq(3) + %jumpi(sha2_const_3) + swap1 + + // case: 4 + dup1 + %eq(4) + %jumpi(sha2_const_4) + swap1 + + // case: 5 + dup1 + %eq(5) + %jumpi(sha2_const_5) + swap1 + + // case: 6 + dup1 + %eq(6) + %jumpi(sha2_const_6) + swap1 + + // case: 7 + dup1 + %eq(7) + %jumpi(sha2_const_7) + swap1 + + // case: 8 + dup1 + %eq(8) + %jumpi(sha2_const_8) + swap1 + + // case: 9 + dup1 + %eq(9) + %jumpi(sha2_const_9) + swap1 + + // case: 10 + dup1 + %eq(10) + %jumpi(sha2_const_10) + swap1 + + // case: 11 + dup1 + %eq(11) + %jumpi(sha2_const_11) + swap1 + + // case: 12 + dup1 + %eq(12) + %jumpi(sha2_const_12) + swap1 + + // case: 13 + dup1 + %eq(13) + %jumpi(sha2_const_13) + swap1 + + // case: 14 + dup1 + %eq(14) + %jumpi(sha2_const_14) + swap1 + + // case: 15 + dup1 + %eq(15) + %jumpi(sha2_const_15) + swap1 + + // case: 16 + dup1 + %eq(16) + %jumpi(sha2_const_16) + swap1 + + // case: 17 + dup1 + %eq(17) + %jumpi(sha2_const_17) + swap1 + + // case: 18 + dup1 + %eq(18) + %jumpi(sha2_const_18) + swap1 + + // case: 19 + dup1 + %eq(19) + %jumpi(sha2_const_19) + swap1 + + // case: 20 + dup1 + %eq(20) + %jumpi(sha2_const_20) + swap1 + + // case: 21 + dup1 + %eq(21) + %jumpi(sha2_const_21) + swap1 + + // case: 22 + dup1 + %eq(22) + %jumpi(sha2_const_22) + swap1 + + // case: 23 + dup1 + %eq(23) + %jumpi(sha2_const_23) + swap1 + + // case: 24 + dup1 + %eq(24) + %jumpi(sha2_const_24) + swap1 + + // case: 25 + dup1 + %eq(25) + %jumpi(sha2_const_25) + swap1 + + // case: 26 + dup1 + %eq(26) + %jumpi(sha2_const_26) + swap1 + + // case: 27 + dup1 + %eq(27) + %jumpi(sha2_const_27) + swap1 + + // case: 28 + dup1 + %eq(28) + %jumpi(sha2_const_28) + swap1 + + // case: 29 + dup1 + %eq(29) + %jumpi(sha2_const_29) + swap1 + + // case: 30 + dup1 + %eq(30) + %jumpi(sha2_const_30) + swap1 + + // case: 31 + dup1 + %eq(31) + %jumpi(sha2_const_31) + swap1 + + // case: 32 + dup1 + %eq(32) + %jumpi(sha2_const_32) + swap1 + + // case: 33 + dup1 + %eq(33) + %jumpi(sha2_const_33) + swap1 + + // case: 34 + dup1 + %eq(34) + %jumpi(sha2_const_34) + swap1 + + // case: 35 + dup1 + %eq(35) + %jumpi(sha2_const_35) + swap1 + + // case: 36 + dup1 + %eq(36) + %jumpi(sha2_const_36) + swap1 + + // case: 37 + dup1 + %eq(37) + %jumpi(sha2_const_37) + swap1 + + // case: 38 + dup1 + %eq(38) + %jumpi(sha2_const_38) + swap1 + + // case: 39 + dup1 + %eq(39) + %jumpi(sha2_const_39) + swap1 + + // case: 40 + dup1 + %eq(40) + %jumpi(sha2_const_40) + swap1 + + // case: 41 + dup1 + %eq(41) + %jumpi(sha2_const_41) + swap1 + + // case: 42 + dup1 + %eq(42) + %jumpi(sha2_const_42) + swap1 + + // case: 43 + dup1 + %eq(43) + %jumpi(sha2_const_43) + swap1 + + // case: 44 + dup1 + %eq(44) + %jumpi(sha2_const_44) + swap1 + + // case: 45 + dup1 + %eq(45) + %jumpi(sha2_const_45) + swap1 + + // case: 46 + dup1 + %eq(46) + %jumpi(sha2_const_46) + swap1 + + // case: 47 + dup1 + %eq(47) + %jumpi(sha2_const_47) + swap1 + + // case: 48 + dup1 + %eq(48) + %jumpi(sha2_const_48) + swap1 + + // case: 49 + dup1 + %eq(49) + %jumpi(sha2_const_49) + swap1 + + // case: 50 + dup1 + %eq(50) + %jumpi(sha2_const_50) + swap1 + + // case: 51 + dup1 + %eq(51) + %jumpi(sha2_const_51) + swap1 + + // case: 52 + dup1 + %eq(52) + %jumpi(sha2_const_52) + swap1 + + // case: 53 + dup1 + %eq(53) + %jumpi(sha2_const_53) + swap1 + + // case: 54 + dup1 + %eq(54) + %jumpi(sha2_const_54) + swap1 + + // case: 55 + dup1 + %eq(55) + %jumpi(sha2_const_55) + swap1 + + // case: 56 + dup1 + %eq(56) + %jumpi(sha2_const_56) + swap1 + + // case: 57 + dup1 + %eq(57) + %jumpi(sha2_const_57) + swap1 + + // case: 58 + dup1 + %eq(58) + %jumpi(sha2_const_58) + swap1 + + // case: 59 + dup1 + %eq(59) + %jumpi(sha2_const_59) + swap1 + + // case: 60 + dup1 + %eq(60) + %jumpi(sha2_const_60) + swap1 + + // case: 61 + dup1 + %eq(61) + %jumpi(sha2_const_61) + swap1 + + // case: 62 + dup1 + %eq(62) + %jumpi(sha2_const_62) + swap1 + + // case: 63 + dup1 + %eq(63) + %jumpi(sha2_const_63) + swap1 + + // stack: i, k[i] + pop + // stack: k[i] \ No newline at end of file From 476e769153bc3a0c80cf8068fb7088adba622517 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 18 Jul 2022 16:12:19 -0700 Subject: [PATCH 009/104] ops --- evm/src/cpu/kernel/asm/sha2_ops.asm | 196 ++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 evm/src/cpu/kernel/asm/sha2_ops.asm diff --git a/evm/src/cpu/kernel/asm/sha2_ops.asm b/evm/src/cpu/kernel/asm/sha2_ops.asm new file mode 100644 index 00000000..31b03d4b --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2_ops.asm @@ -0,0 +1,196 @@ +// 32-bit right rotation +%macro rotr + // stack: rot, value + dup2 + dup2 + // stack: rot, value, rot, value + shr + // stack: value >> rot, rot, value + swap2 + // stack: value, rot, value >> rot + swap1 + // stack: rot, value, value >> rot + push 32 + sub + // stack: 32 - rot, value, value >> rot + shl + // stack: value << (32 - rot), value >> rot + push 32 + push 1 + swap1 + shl + // stack: 1 << 32, value << (32 - rot), value >> rot + swap1 + mod + // stack: (value << (32 - rot)) % (1 << 32), value >> rot + add +%endmacro + +// 32-bit left rotation +%macro rot, + // stack: rot, value + dup2 + dup2 + // stack: rot, value, rot, value + push 32 + sub + // stack: 32 - rot, value, rot, value + shr + // stack: value >> (32 - rot), rot, value + swap2 + // stack: value, rot, value >> (32 - rot) + swap1 + // stack: rot, value, value >> (32 - rot) + shl + // stack: value << rot, value >> (32 - rot) + push 32 + push 1 + swap1 + shl + // stack: 1 << 32, value << rot, value >> (32 - rot) + swap1 + mod + // stack: (value << rot) % (1 << 32), value >> (32 - rot) + add +%endmacro + +global sha2_sigma_0: + JUMPDEST + // stack: x + dup1 + // stack: x, x + push 7 + %rotr + // stack: rotr(x, 7), x + swap1 + // stack: x, rotr(x, 7) + dup1 + // stack: x, x, rotr(x, 7) + push 18 + %rotr + // stack: rotr(x, 18), x, rotr(x, 7) + swap1 + // stack: x, rotr(x, 18), rotr(x, 7) + push 3 + shr + // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) + xor + xor + +global sha2_sigma_1: + JUMPDEST + // stack: x + dup1 + // stack: x, x + push 17 + %rotr + // stack: rotr(x, 17), x + swap1 + // stack: x, rotr(x, 17) + dup1 + // stack: x, x, rotr(x, 17) + push 19 + %rotr + // stack: rotr(x, 19), x, rotr(x, 17) + swap1 + // stack: x, rotr(x, 19), rotr(x, 17) + push 10 + shr + // stack: shr(x, 10), rotr(x, 19), rotr(x, 17) + xor + xor + +global sha2_bigsigma_0: + JUMPDEST + // stack: x + dup1 + // stack: x, x + push 2 + %rotr + // stack: rotr(x, 2), x + swap1 + // stack: x, rotr(x, 2) + dup1 + // stack: x, x, rotr(x, 2) + push 13 + %rotr + // stack: rotr(x, 13), x, rotr(x, 2) + swap1 + // stack: x, rotr(x, 13), rotr(x, 2) + push 22 + %rotr + // stack: rotr(x, 22), rotr(x, 13), rotr(x, 2) + xor + xor + +global sha2_bigsigma_1: + JUMPDEST + // stack: x + dup1 + // stack: x, x + push 6 + %rotr + // stack: rotr(x, 6), x + swap1 + // stack: x, rotr(x, 6) + dup1 + // stack: x, x, rotr(x, 6) + push 11 + %rotr + // stack: rotr(x, 11), x, rotr(x, 6) + swap1 + // stack: x, rotr(x, 11), rotr(x, 6) + push 25 + %rotr + // stack: rotr(x, 25), rotr(x, 11), rotr(x, 6) + xor + xor + +global sha2_choice: + JUMPDEST + // stack: x, y, z + dup1 + // stack: x, x, y, z + swap2 + // stack: y, x, x, z + and + // stack: x and y, x, z + swap2 + // stack: z, x, x and y + swap1 + // stack: x, z, x and y + not + // stack: not x, z, x and y + and + // stack: (not x) and z, x and y + or + +global sha2_majority: + JUMPDEST + // stack: x, y, z + dup3 + dup3 + dup3 + // stack: x, y, z, x, y, z + and + // stack: x and y, z, x, y, z + swap2 + // stack: x, x and y, z, y, z + swap1 + // stack: x and y, x, z, y, z + swap2 + // stack: z, x, x and y, y, z + and + // stack: x and z, x and y, y, z + swap2 + // stack: y, x and z, x and y, z + swap1 + // stack: x and z, y, x and y, z + swap3 + // stack: z, y, x and z, x and y + and + // stack: y and z, x and z, x and y + or + or + + \ No newline at end of file From 89c79208fe8677ce15a8907e3d4eaa47c8601e12 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 18 Jul 2022 16:12:28 -0700 Subject: [PATCH 010/104] new padding --- evm/src/cpu/kernel/asm/sha2.asm | 463 +++++++------------------------- 1 file changed, 95 insertions(+), 368 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index f8e5e673..5788da0a 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -1,391 +1,118 @@ -global count_bits: - JUMPDEST - // stack: n (assumed to be > 0) - push 0 - // stack: 0, n - swap1 - // stack: n, 0 - %jump(count_bits_loop) -count_bits_loop: - JUMPDEST - // stack: k, bits - %div2 - // stack: k//2, bits - swap1 - // stack: bits, k//2 - %increment - // stack: bits+1, k//2 - swap1 - // stack: k//2, bits+1 - %jumpi(count_bits_loop) - // stack: 0, bits - pop - // stack: bits -global sha2_store: - JUMPDEST - // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] - dup1 - // stack: num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] - // TODO: use kernel memory, and start address not at 0 - push 0 - // stack: addr=0, num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] - mstore - // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] - push 1 - // stack: addr=1, counter=num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1] -sha2_store_loop: - JUMPDEST - // stack: addr, counter, x[num_u256s-counter], ... , x[num_u256s-1] - dup1 - // stack: addr, addr, counter, x[num_u256s-counter], ... , x[num_u256s-1] - swap3 - // stack: x[num_u256s-counter], addr, counter, addr, ... , x[num_u256s-1] - swap1 - // stack: addr, x[num_u256s-counter], counter, addr, ... , x[num_u256s-1] - mstore - // stack: counter, addr, ... , x[num_u256s-1] - %decrement - // stack: counter-1, addr, ... , x[num_u256s-1] - iszero - %jumpi(sha2_store_end) - swap1 - // stack: addr, counter-1, ... , x[num_u256s-1] - %increment - // stack: addr+1, counter-1, ... , x[num_u256s-1] - %jump(sha2_store_loop) -sha2_store_end: - JUMPDEST // Precodition: input is in memory, starting at [TODO: fix] 0, of the form -// num_u256s, x[0], x[1], ..., x[num_u256s-1] -// Postcodition: input is in memory, starting at [TODO: fix] 0, of the form +// num_bytes, x[0], x[1], ..., x[(num_bytes+31)/32-1] +// Postcodition: output is in memory, starting at [TODO: fix] 0, of the form // num_blocks, block0[0], block0[1], block1[0], ..., blocklast[1] global sha2_pad: // TODO: use kernel memory, and start address not at 0 push 0 mload - // stack: num_u256s + // stack: num_bytes + // STEP 1: append 1 + // add 1 << (8*(32-k)-1) to x[num_bytes//32], where k := num_bytes%32 dup1 - // stack: num_u256s, num_u256s - %iseven - // stack: is_even, num_u256s + // stack: num_bytes, num_bytes + dup1 + // stack: num_bytes, num_bytes, num_bytes + push 32 + // stack: 32, num_bytes, num_bytes, num_bytes swap1 - // stack: num_u256s, is_even - dup1 - // stack: num_u256s, num_u256s, is_even - mload - // stack: x[num_u256s-1], num_u256s, is_even - dup1 - // stack: x[num_u256s-1], x[num_u256s-1], num_u256s, is_even - %count_bits - // stack: num_bits, x[num_u256s-1], num_u256s, is_even - dup1 - // stack: num_bits, num_bits, x[num_u256s-1], num_u256s, is_even - swap3 - // stack: num_u256s, num_bits, x[num_u256s-1], num_bits, is_even + // stack: num_bytes, 32, num_bytes, num_bytes + mod + // stack: k := num_bytes % 32, num_bytes, num_bytes + push 32 + sub + // stack: 32 - k, num_bytes, num_bytes + push 8 + mul + // stack: 8 * (32 - k), num_bytes, num_bytes %decrement - // stack: num_u256s-1, num_bits, x[num_u256s-1], num_bits, is_even - push 256 - mul - // stack: (num_u256s-1)*256, num_bits, x[num_u256s-1], num_bits, is_even - add - // stack: message_bits, x[num_u256s-1], num_bits, is_even - swap2 - // stack: num_bits, x[num_u256s-1], message_bits, is_even - dup1 - // stack: num_bits, num_bits, x[num_u256s-1], message_bits, is_even - dup1 - // stack: num_bits, num_bits, num_bits, x[num_u256s-1], message_bits, is_even - dup1 - %lt(191) - // stack: num_bits<191, num_bits, num_bits,x[num_u256s-1], message_bits, is_even + // stack: 8 * (32 - k) - 1, num_bytes, num_bytes + push 1 swap1 - // stack: num_bits, num_bits<191, num_bits, x[num_u256s-1], message_bits, is_even + shl + // stack: 1 << (8 * (32 - k) - 1), num_bytes, num_bytes + swap1 + // stack: num_bytes, 1 << (8 * (32 - k) - 1), num_bytes + push 32 + swap1 + div + // stack: num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes dup1 - %eq(256) - // stack: num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits, is_even - push 0 - // stack: 0, num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits, is_even - swap6 - // stack: is_even, num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits - dup2 - dup2 - and - %jumpi(pad_case1) - not - // stack: is_odd, num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits - dup2 - dup2 - and - %jumpi(pad_case2) - swap1 - // stack: num_bits==256, is_odd, num_bits<191, num_bits, x[num_u256s-1], message_bits - pop - // stack: is_odd, num_bits<191, num_bits, x[num_u256s-1], message_bits - not - // stack: is_even, num_bits<191, num_bits, x[num_u256s-1], message_bits - dup2 - dup2 - and - %jumpi(pad_case3) - not - // stack: is_odd, num_bits<191, num_bits, x[num_u256s-1], message_bits - dup2 - dup2 - and - %jumpi(pad_case4) - swap1 - // stack: num_bits<191, is_odd, num_bits, x[num_u256s-1], message_bits - pop - // stack: is_odd, num_bits, x[num_u256s-1], message_bits - not - // stack: is_even, num_bits, x[num_u256s-1], message_bits - %jumpi(pad_case5) - %jump(pad_case6) -pad_case1: - // CASE 1: num_u256s is even; num_bits == 256 - JUMPDEST - // stack: is_odd, num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits - %pop5 - // stack: message_bits - push 0 + // stack: num_bytes // 32, num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes mload - // stack: num_u256s, message_bits - %increment - // stack: num_u256s+1, message_bits + // stack: x[num_bytes // 32], num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes + swap1 + // stack: num_bytes // 32, x[num_bytes // 32], 1 << (8 * (32 - k) - 1), num_bytes + swap2 + // stack: x[num_bytes // 32], 1 << (8 * (32 - k) - 1), num_bytes // 32, num_bytes + add + // stack: x[num_bytes // 32] + 1 << (8 * (32 - k) - 1), num_bytes // 32, num_bytes + swap1 + // stack: num_bytes // 32, x[num_bytes // 32] + 1 << (8 * (32 - k) - 1), num_bytes + mstore + // stack: num_bytes + // STEP 2: insert length + // (add length := num_bytes*8+1 to x[(num_bytes//64)*2-1]) dup1 - // stack: num_u256s+1, num_u256s+1, message_bits + dup1 + // stack: num_bytes, num_bytes, num_bytes + push 8 + mul + %increment + // stack: length := num_bytes*8+1, num_bytes, num_bytes + swap1 + // stack: num_bytes, length := num_bytes*8+1, num_bytes + push 64 + swap1 + div + // stack: num_bytes // 64, length := num_bytes*8+1, num_bytes push 2 - push 255 - %jump(exp) - // stack: 2^255, num_u256s+1, num_u256s+1, message_bits + mul + %decrement + // stack: (num_bytes // 64) * 2 - 1, length := num_bytes*8+1, num_bytes + dup1 + // stack: (num_bytes // 64) * 2 - 1, (num_bytes // 64) * 2 - 1, length, num_bytes + mload + // stack: x[(num_bytes // 64) * 2 - 1], (num_bytes // 64) * 2 - 1, length, num_bytes + swap1 + // stack: (num_bytes // 64) * 2 - 1, x[(num_bytes // 64) * 2 - 1], length, num_bytes + swap2 + // stack: length, x[(num_bytes // 64) * 2 - 1], (num_bytes // 64) * 2 - 1, num_bytes + add + // stack: x[(num_bytes // 64) * 2 - 1] + length, (num_bytes // 64) * 2 - 1, num_bytes + swap1 + // stack: (num_bytes // 64) * 2 - 1, x[(num_bytes // 64) * 2 - 1] + length, num_bytes + mstore + // stack: num_bytes + // STEP 3: insert num_blocks at start + push 64 swap - // stack: num_u256s+1, 2^255, num_u256s+1, message_bits - mstore - // stack: num_u256s+1, message_bits + div %increment - // stack: num_u256s+2, message_bits - dup1 - // stack: num_u256s+2, num_u256s+2, message_bits - swap2 - // stack: message_bits, num_u256s+2, num_u256s+2 - swap1 - // stack: num_u256s+2, message_bits, num_u256s+2 - mstore - // stack: num_u256s+2 - %div2 - // stack: num_blocks=(num_u256s+2)//2 + // stack: num_blocks := num_bytes // 64 + 1 push 0 mstore - %jump(pad_end) -pad_case2: - // CASE 2: num_u256s is odd; num_bits == 256 + +// Precodition: stack contains address of one message block, followed by output address +// Postcondition: 64 addresses starting at given output address contain 32-bit chunks of message schedule +global sha2_gen_message_schedule_from_block: JUMPDEST - // stack: is_even, num_bits==256, num_bits<191, num_bits, x[num_u256s-1], message_bits - %pop5 - // stack: message_bits - push 0 + // stack: block_addr, output_addr mload - // stack: num_u256s, message_bits - %increment - // stack: num_u256s+1, message_bits - swap - // stack: message_bits, num_u256s+1 - push 2 - push 255 - %jump(exp) - add - // stack: 2^255 + message_bits, num_u256s+1 - swap1 - // stack: num_u256s+1, 2^255 + message_bits - dup1 - // stack: num_u256s+1, num_u256s+1, 2^255 + message_bits - swap2 - // stack: 2^255 + message_bits, num_u256s+1, num_u256s+1 - swap1 - // stack: num_u256s+1, 2^255 + message_bits, num_u256s+1 - mstore - // stack: num_u256s+1 - div2 - // stack: num_blocks=(num_u256s+1)//2 - push 0 - mstore - %jump(pad_end) -pad_case3: - // CASE 3: num_u256s is even; num_bits < 191 - JUMPDEST - // stack: is_even, num_bits<191, num_bits, x[num_u256s-1], message_bits - %pop2 - // stack: num_bits, x[num_u256s-1], message_bits - swap1 - // stack: x[num_u256s-1], num_bits, message_bits - push 2 - mul - %increment - // stack: 2*x[num_u256s-1]+1, num_bits, message_bits - swap1 - // stack: num_bits, 2*x[num_u256s-1]+1, message_bits - push 255 - sub - // stack: 256 - (num_bits + 1), 2*x[num_u256s-1]+1, message_bits - push 2 - %jump(exp) - // stack: 2^(256 - (num_bits + 1)), 2*x[num_u256s-1]+1, message_bits - mul - // stack: [x[num_u256s-1] || 1 || 0s], message_bits - add - // stack: [x[num_u256s-1] || 1 || 0s | message_bits] - push 0 - mload - // stack: num_u256s, [x[num_u256s-1] || 1 || 0s | message_bits] - mstore - push 0 - mload - // stack: num_u256s - %div2 - // stack: num_blocks=num_u256s//2 - push 0 - mstore - %jump(pad_end) -pad_case4: - // CASE 4: num_u256s is odd; num_bits < 191 - JUMPDEST - // stack: is_odd, num_bits<191, num_bits, x[num_u256s-1], message_bits - %pop2 - // stack: num_bits, x[num_u256s-1], message_bits - swap1 - // stack: x[num_u256s-1], num_bits, message_bits - push 2 - mul - %increment - // stack: 2*x[num_u256s-1]+1, num_bits, message_bits - swap1 - // stack: num_bits, 2*x[num_u256s-1]+1, message_bits - push 255 - sub - // stack: 256 - (num_bits + 1), 2*x[num_u256s-1]+1, message_bits - push 2 - %jump(exp) - // stack: 2^(256 - (num_bits + 1)), 2*x[num_u256s-1]+1, message_bits - mul - // stack: [x[num_u256s-1] || 1 || 0s], message_bits - push 0 - mload - // stack: num_u256s, [x[num_u256s-1] || 1 || 0s], message_bits - mstore - // stack: message_bits - push 0 - mload - // stack: num_u256s, message_bits - %increment - // stack: num_u256s+1, message_bits - mstore - push 0 - mload - // stack: num_u256s - %increment - // stack: num_u256s+1 - %div2 - // stack: num_blocks=(num_u256s+1)//2 - push 0 - mstore - %jump(pad_end) -pad_case5: - // CASE 5: num_u256s is even; 191 <= num_bits < 256 - JUMPDEST - // stack: is_even, num_bits, x[num_u256s-1], message_bits - pop - // stack: num_bits, x[num_u256s-1], message_bits - swap1 - // stack: x[num_u256s-1], num_bits, message_bits - push 2 - mul - %increment - // stack: 2*x[num_u256s-1]+1, num_bits, message_bits - swap1 - // stack: num_bits, 2*x[num_u256s-1]+1, message_bits - push 255 - sub - // stack: 256 - (num_bits + 1), 2*x[num_u256s-1]+1, message_bits - push 2 - %jump(exp) - // stack: 2^(256 - (num_bits + 1)), 2*x[num_u256s-1]+1, message_bits - mul - // stack: [x[num_u256s-1] || 1 || 0s], message_bits - push 0 - mload - // stack: num_u256s, [x[num_u256s-1] || 1 || 0s], message_bits - dup1 - // stack: num_u256s, num_u256s, [x[num_u256s-1] || 1 || 0s], message_bits - swap2 - // stack: [x[num_u256s-1] || 1 || 0s], num_u256s, num_u256s, message_bits - swap1 - // stack: num_u256s, [x[num_u256s-1] || 1 || 0s], num_u256s, message_bits - mstore - // stack: num_u256s, message_bits - push 2 - add - // stack: num_u256s+2, message_bits - dup1 - // stack: num_u256s+2, num_u256s+2, message_bits - swap2 - // stack: message_bits, num_u256s+2, num_u256s+2 - swap1 - // stack: num_u256s+2, message_bits, num_u256s+2 - mstore - // stack: num_u256s+2 - div2 - // stack: num_blocks=(num_u256s+2)//2 - push 0 - mstore - %jump(pad_end) -pad_case6: - // CASE 6: num_u256s is odd; 191 <= num_bits < 256 - JUMPDEST - // stack: is_even, num_bits, x[num_u256s-1], message_bits - pop - // stack: num_bits, x[num_u256s-1], message_bits - swap1 - // stack: x[num_u256s-1], num_bits, message_bits - push 2 - mul - %increment - // stack: 2*x[num_u256s-1]+1, num_bits, message_bits - swap1 - // stack: num_bits, 2*x[num_u256s-1]+1, message_bits - push 255 - sub - // stack: 256 - (num_bits + 1), 2*x[num_u256s-1]+1, message_bits - push 2 - %jump(exp) - // stack: 2^(256 - (num_bits + 1)), 2*x[num_u256s-1]+1, message_bits - mul - // stack: [x[num_u256s-1] || 1 || 0s], message_bits - push 0 - mload - // stack: num_u256s, [x[num_u256s-1] || 1 || 0s], message_bits - dup1 - // stack: num_u256s, num_u256s, [x[num_u256s-1] || 1 || 0s], message_bits - swap2 - // stack: [x[num_u256s-1] || 1 || 0s], num_u256s, num_u256s, message_bits - swap1 - // stack: num_u256s, [x[num_u256s-1] || 1 || 0s], num_u256s, message_bits - mstore - // stack: num_u256s, message_bits - %increment - // stack: num_u256s+1, message_bits - dup1 - // stack: num_u256s+1, num_u256s+1, message_bits - swap2 - // stack: message_bits, num_u256s+1, num_u256s+1 - swap1 - // stack: num_u256s+1, message_bits, num_u256s+1 - mstore - // stack: num_u256s+1 - div2 - // stack: num_blocks=(num_u256s+1)//2 - push 0 - mstore -pad_end: + // stack: block, output_addr + push 16 + // stack: counter=16, block, output_addr + + +global sha2_message_schedule_next_word: JUMPDEST + // stack: address + + + + + +global sha2_gen_message_schedules: + JUMPDEST \ No newline at end of file From 3140c77cd16343e5c76e656efac4f221cace8452 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 18 Jul 2022 16:18:25 -0700 Subject: [PATCH 011/104] progress --- evm/src/cpu/kernel/asm/sha2.asm | 63 +++++++++++++++++++++++++---- evm/src/cpu/kernel/asm/sha2_ops.asm | 2 +- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 5788da0a..eb9a507f 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -106,13 +106,62 @@ global sha2_gen_message_schedule_from_block: // stack: counter=16, block, output_addr + global sha2_message_schedule_next_word: JUMPDEST - // stack: address + // stack: addr + dup1 + // stack: addr, addr + push 2 + swap1 + sub + // stack: addr - 2, addr + mload + // stack: x[addr - 2], addr + %jump(sha2_sigma_1) + // stack: sigma_1(x[addr - 2]), addr + swap1 + // stack: addr, sigma_1(x[addr - 2]) + dup1 + // stack: addr, addr, sigma_1(x[addr - 2]) + push 7 + swap1 + sub + // stack: addr - 7, addr, sigma_1(x[addr - 2]) + mload + // stack: x[addr - 7], addr, sigma_1(x[addr - 2]) + swap1 + // stack: addr, x[addr - 7], sigma_1(x[addr - 2]) + dup1 + // stack: addr, addr, x[addr - 7], sigma_1(x[addr - 2]) + push 15 + swap1 + sub + // stack: addr - 15, addr, x[addr - 7], sigma_1(x[addr - 2]) + mload + // stack: x[addr - 15], addr, x[addr - 7], sigma_1(x[addr - 2]) + %jump(sha2_sigma_0) + // stack: sigma_0(x[addr - 15]), addr, x[addr - 7], sigma_1(x[addr - 2]) + swap1 + // stack: addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]) + dup1 + // stack: addr, addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]) + push 16 + swap1 + sub + // stack: addr - 16, addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]) + mload + // stack: x[addr - 16], addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]) + swap1 + // stack: addr, x[addr - 16], sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]) + swap4 + // stack: sigma_1(x[addr - 2]), x[addr - 16], sigma_0(x[addr - 15]), x[addr - 7], addr + add + add + add + // stack: sigma_1(x[addr - 2]) + x[addr - 16] + sigma_0(x[addr - 15]) + x[addr - 7], addr + swap1 + mstore - - - - -global sha2_gen_message_schedules: - JUMPDEST \ No newline at end of file +global sha2_gen_all_message_schedules: + JUMPDEST diff --git a/evm/src/cpu/kernel/asm/sha2_ops.asm b/evm/src/cpu/kernel/asm/sha2_ops.asm index 31b03d4b..f6656954 100644 --- a/evm/src/cpu/kernel/asm/sha2_ops.asm +++ b/evm/src/cpu/kernel/asm/sha2_ops.asm @@ -27,7 +27,7 @@ %endmacro // 32-bit left rotation -%macro rot, +%macro rotl // stack: rot, value dup2 dup2 From caa1aeee84f992edf15261712bbb0712a3f7469e Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 18 Jul 2022 16:50:40 -0700 Subject: [PATCH 012/104] progress --- evm/src/cpu/kernel/asm/sha2.asm | 63 +++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index eb9a507f..96e630d1 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -100,10 +100,69 @@ global sha2_pad: global sha2_gen_message_schedule_from_block: JUMPDEST // stack: block_addr, output_addr + dup1 + // stack: block_addr, block_addr, output_addr + %increment + // stack: block_addr + 1, block_addr, output_addr + swap1 + // stack: block_addr, block_addr + 1, output_addr mload - // stack: block, output_addr + // stack: block[0], block_addr + 1, output_addr + swap1 + // stack: block_addr + 1, block[0], output_addr + mload + // stack: block[1], block[0], output_addr + swap2 + // stack: output_addr, block[0], block[1] + // stack: output_addr, block[0], block[1] + push 16 + // stack: counter=16, output_addr, block[0], block[1] + %jump(sha2_gen_message_schedule_from_block_0_loop) +sha2_gen_message_schedule_from_block_0_loop: + JUMPDEST + // stack: counter, output_addr, block[0], block[1] + swap2 + // stack: block[0], output_addr, counter, block[1] + push 1 + push 32 + shl + // stack: 1 << 32, block[0], output_addr, counter, block[1] + dup2 + dup2 + // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1] + swap1 + // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1] + mod + // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1] + swap2 + // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1] + // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1] + div + // stack: block[0] // (1 << 32), block[0] % (1 << 32), output_addr, counter, block[1] + swap1 + // stack: block[0] % (1 << 32), block[0] // (1 << 32), output_addr, counter, block[1] + dup3 + // stack: output_addr, block[0] % (1 << 32), block[0] // (1 << 32), output_addr, counter, block[1] + mstore + // stack: block[0] // (1 << 32), output_addr, counter, block[1] + swap1 + // stack: output_addr, block[0] // (1 << 32), counter, block[1] + %increment + // stack: output_addr + 1, block[0] // (1 << 32), counter, block[1] + swap1 + // stack: block[0] // (1 << 32), output_addr + 1, counter, block[1] + swap2 + // stack: counter, output_addr + 1, block[0] // (1 << 32), block[1] + %decrement + dup1 + iszero + %jumpi(sha2_gen_message_schedule_from_block_0_end) + %jump(sha2_gen_message_schedule_from_block_0_loop) +sha2_gen_message_schedule_from_block_0_end: + JUMPDEST + // stack: old counter=0, + pop push 16 - // stack: counter=16, block, output_addr From 94e2e98430b3bb3227f2cbb8976dcc9c0bd322e4 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 1 Aug 2022 16:36:00 -0700 Subject: [PATCH 013/104] fixes --- evm/src/cpu/kernel/aggregator.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index a0516307..65cfbe28 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -76,9 +76,14 @@ pub(crate) fn combined_kernel() -> Kernel { #[cfg(test)] mod tests { use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV}; - use log::debug; + use std::str::FromStr; - use crate::cpu::kernel::aggregator::combined_kernel; + use anyhow::Result; + use ethereum_types::U256; + use log::debug; + use rand::thread_rng; + + use crate::cpu::kernel::{aggregator::combined_kernel, interpreter::run}; #[test] fn make_kernel() { From 05837039eb280f160e131e464756914b0e2111a5 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 19 Jul 2022 13:37:17 -0700 Subject: [PATCH 014/104] constants as macros instead of functions --- evm/src/cpu/kernel/asm/sha2_constants.asm | 392 +++++++++++----------- 1 file changed, 196 insertions(+), 196 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2_constants.asm b/evm/src/cpu/kernel/asm/sha2_constants.asm index 76f5aba6..5870d961 100644 --- a/evm/src/cpu/kernel/asm/sha2_constants.asm +++ b/evm/src/cpu/kernel/asm/sha2_constants.asm @@ -1,647 +1,647 @@ -global sha2_const_0: - JUMPDEST +%macro sha2_const_0 push 1116352408 +%endmacro -global sha2_const_1: - JUMPDEST +%macro sha2_const_1 push 1899447441 +%endmacro -global sha2_const_2: - JUMPDEST +%macro sha2_const_2 push 3049323471 +%endmacro -global sha2_const_3: - JUMPDEST +%macro sha2_const_3 push 3921009573 +%endmacro -global sha2_const_4: - JUMPDEST +%macro sha2_const_4 push 961987163 +%endmacro -global sha2_const_5: - JUMPDEST +%macro sha2_const_5 push 1508970993 +%endmacro -global sha2_const_6: - JUMPDEST +%macro sha2_const_6 push 2453635748 +%endmacro -global sha2_const_7: - JUMPDEST +%macro sha2_const_7 push 2870763221 +%endmacro -global sha2_const_8: - JUMPDEST +%macro sha2_const_8 push 3624381080 +%endmacro -global sha2_const_9: - JUMPDEST +%macro sha2_const_9 push 310598401 +%endmacro -global sha2_const_10: - JUMPDEST +%macro sha2_const_10 push 607225278 +%endmacro -global sha2_const_11: - JUMPDEST +%macro sha2_const_11 push 1426881987 +%endmacro -global sha2_const_12: - JUMPDEST +%macro sha2_const_12 push 1925078388 +%endmacro -global sha2_const_13: - JUMPDEST +%macro sha2_const_13 push 2162078206 +%endmacro -global sha2_const_14: - JUMPDEST +%macro sha2_const_14 push 2614888103 +%endmacro -global sha2_const_15: - JUMPDEST +%macro sha2_const_15 push 3248222580 +%endmacro -global sha2_const_16: - JUMPDEST +%macro sha2_const_16 push 3835390401 +%endmacro -global sha2_const_17: - JUMPDEST +%macro sha2_const_17 push 4022224774 +%endmacro -global sha2_const_18: - JUMPDEST +%macro sha2_const_18 push 264347078 +%endmacro -global sha2_const_19: - JUMPDEST +%macro sha2_const_19 push 604807628 +%endmacro -global sha2_const_20: - JUMPDEST +%macro sha2_const_20 push 770255983 +%endmacro -global sha2_const_21: - JUMPDEST +%macro sha2_const_21 push 1249150122 +%endmacro -global sha2_const_22: - JUMPDEST +%macro sha2_const_22 push 1555081692 +%endmacro -global sha2_const_23: - JUMPDEST +%macro sha2_const_23 push 1996064986 +%endmacro -global sha2_const_24: - JUMPDEST +%macro sha2_const_24 push 2554220882 +%endmacro -global sha2_const_25: - JUMPDEST +%macro sha2_const_25 push 2821834349 +%endmacro -global sha2_const_26: - JUMPDEST +%macro sha2_const_26 push 2952996808 +%endmacro -global sha2_const_27: - JUMPDEST +%macro sha2_const_27 push 3210313671 +%endmacro -global sha2_const_28: - JUMPDEST +%macro sha2_const_28 push 3336571891 +%endmacro -global sha2_const_29: - JUMPDEST +%macro sha2_const_29 push 3584528711 +%endmacro -global sha2_const_30: - JUMPDEST +%macro sha2_const_30 push 113926993 +%endmacro -global sha2_const_31: - JUMPDEST +%macro sha2_const_31 push 338241895 +%endmacro -global sha2_const_32: - JUMPDEST +%macro sha2_const_32 push 666307205 +%endmacro -global sha2_const_33: - JUMPDEST +%macro sha2_const_33 push 773529912 +%endmacro -global sha2_const_34: - JUMPDEST +%macro sha2_const_34 push 1294757372 +%endmacro -global sha2_const_35: - JUMPDEST +%macro sha2_const_35 push 1396182291 +%endmacro -global sha2_const_36: - JUMPDEST +%macro sha2_const_36 push 1695183700 +%endmacro -global sha2_const_37: - JUMPDEST +%macro sha2_const_37 push 1986661051 +%endmacro -global sha2_const_38: - JUMPDEST +%macro sha2_const_38 push 2177026350 +%endmacro -global sha2_const_39: - JUMPDEST +%macro sha2_const_39 push 2456956037 +%endmacro -global sha2_const_40: - JUMPDEST +%macro sha2_const_40 push 2730485921 +%endmacro -global sha2_const_41: - JUMPDEST +%macro sha2_const_41 push 2820302411 +%endmacro -global sha2_const_42: - JUMPDEST +%macro sha2_const_42 push 3259730800 +%endmacro -global sha2_const_43: - JUMPDEST +%macro sha2_const_43 push 3345764771 +%endmacro -global sha2_const_44: - JUMPDEST +%macro sha2_const_44 push 3516065817 +%endmacro -global sha2_const_45: - JUMPDEST +%macro sha2_const_45 push 3600352804 +%endmacro -global sha2_const_46: - JUMPDEST +%macro sha2_const_46 push 4094571909 +%endmacro -global sha2_const_47: - JUMPDEST +%macro sha2_const_47 push 275423344 +%endmacro -global sha2_const_48: - JUMPDEST +%macro sha2_const_48 push 430227734 +%endmacro -global sha2_const_49: - JUMPDEST +%macro sha2_const_49 push 506948616 +%endmacro -global sha2_const_50: - JUMPDEST +%macro sha2_const_50 push 659060556 +%endmacro -global sha2_const_51: - JUMPDEST +%macro sha2_const_51 push 883997877 +%endmacro -global sha2_const_52: - JUMPDEST +%macro sha2_const_52 push 958139571 +%endmacro -global sha2_const_53: - JUMPDEST +%macro sha2_const_53 push 1322822218 +%endmacro -global sha2_const_54: - JUMPDEST +%macro sha2_const_54 push 1537002063 +%endmacro -global sha2_const_55: - JUMPDEST +%macro sha2_const_55 push 1747873779 +%endmacro -global sha2_const_56: - JUMPDEST +%macro sha2_const_56 push 1955562222 +%endmacro -global sha2_const_57: - JUMPDEST +%macro sha2_const_57 push 2024104815 +%endmacro -global sha2_const_58: - JUMPDEST +%macro sha2_const_58 push 2227730452 +%endmacro -global sha2_const_59: - JUMPDEST +%macro sha2_const_59 push 2361852424 +%endmacro -global sha2_const_60: - JUMPDEST +%macro sha2_const_60 push 2428436474 +%endmacro -global sha2_const_61: - JUMPDEST +%macro sha2_const_61 push 2756734187 +%endmacro -global sha2_const_62: - JUMPDEST +%macro sha2_const_62 push 3204031479 +%endmacro -global sha2_const_63: - JUMPDEST +%macro sha2_const_63 push 3329325298 +%endmacro -global sha2_const: - JUMPDEST +%macro sha2_const // stack: i - // case: 0 + // case 0 dup1 %eq(0) %jumpi(sha2_const_0) swap1 - // case: 1 + // case 1 dup1 %eq(1) %jumpi(sha2_const_1) swap1 - // case: 2 + // case 2 dup1 %eq(2) %jumpi(sha2_const_2) swap1 - // case: 3 + // case 3 dup1 %eq(3) %jumpi(sha2_const_3) swap1 - // case: 4 + // case 4 dup1 %eq(4) %jumpi(sha2_const_4) swap1 - // case: 5 + // case 5 dup1 %eq(5) %jumpi(sha2_const_5) swap1 - // case: 6 + // case 6 dup1 %eq(6) %jumpi(sha2_const_6) swap1 - // case: 7 + // case 7 dup1 %eq(7) %jumpi(sha2_const_7) swap1 - // case: 8 + // case 8 dup1 %eq(8) %jumpi(sha2_const_8) swap1 - // case: 9 + // case 9 dup1 %eq(9) %jumpi(sha2_const_9) swap1 - // case: 10 + // case 10 dup1 %eq(10) %jumpi(sha2_const_10) swap1 - // case: 11 + // case 11 dup1 %eq(11) %jumpi(sha2_const_11) swap1 - // case: 12 + // case 12 dup1 %eq(12) %jumpi(sha2_const_12) swap1 - // case: 13 + // case 13 dup1 %eq(13) %jumpi(sha2_const_13) swap1 - // case: 14 + // case 14 dup1 %eq(14) %jumpi(sha2_const_14) swap1 - // case: 15 + // case 15 dup1 %eq(15) %jumpi(sha2_const_15) swap1 - // case: 16 + // case 16 dup1 %eq(16) %jumpi(sha2_const_16) swap1 - // case: 17 + // case 17 dup1 %eq(17) %jumpi(sha2_const_17) swap1 - // case: 18 + // case 18 dup1 %eq(18) %jumpi(sha2_const_18) swap1 - // case: 19 + // case 19 dup1 %eq(19) %jumpi(sha2_const_19) swap1 - // case: 20 + // case 20 dup1 %eq(20) %jumpi(sha2_const_20) swap1 - // case: 21 + // case 21 dup1 %eq(21) %jumpi(sha2_const_21) swap1 - // case: 22 + // case 22 dup1 %eq(22) %jumpi(sha2_const_22) swap1 - // case: 23 + // case 23 dup1 %eq(23) %jumpi(sha2_const_23) swap1 - // case: 24 + // case 24 dup1 %eq(24) %jumpi(sha2_const_24) swap1 - // case: 25 + // case 25 dup1 %eq(25) %jumpi(sha2_const_25) swap1 - // case: 26 + // case 26 dup1 %eq(26) %jumpi(sha2_const_26) swap1 - // case: 27 + // case 27 dup1 %eq(27) %jumpi(sha2_const_27) swap1 - // case: 28 + // case 28 dup1 %eq(28) %jumpi(sha2_const_28) swap1 - // case: 29 + // case 29 dup1 %eq(29) %jumpi(sha2_const_29) swap1 - // case: 30 + // case 30 dup1 %eq(30) %jumpi(sha2_const_30) swap1 - // case: 31 + // case 31 dup1 %eq(31) %jumpi(sha2_const_31) swap1 - // case: 32 + // case 32 dup1 %eq(32) %jumpi(sha2_const_32) swap1 - // case: 33 + // case 33 dup1 %eq(33) %jumpi(sha2_const_33) swap1 - // case: 34 + // case 34 dup1 %eq(34) %jumpi(sha2_const_34) swap1 - // case: 35 + // case 35 dup1 %eq(35) %jumpi(sha2_const_35) swap1 - // case: 36 + // case 36 dup1 %eq(36) %jumpi(sha2_const_36) swap1 - // case: 37 + // case 37 dup1 %eq(37) %jumpi(sha2_const_37) swap1 - // case: 38 + // case 38 dup1 %eq(38) %jumpi(sha2_const_38) swap1 - // case: 39 + // case 39 dup1 %eq(39) %jumpi(sha2_const_39) swap1 - // case: 40 + // case 40 dup1 %eq(40) %jumpi(sha2_const_40) swap1 - // case: 41 + // case 41 dup1 %eq(41) %jumpi(sha2_const_41) swap1 - // case: 42 + // case 42 dup1 %eq(42) %jumpi(sha2_const_42) swap1 - // case: 43 + // case 43 dup1 %eq(43) %jumpi(sha2_const_43) swap1 - // case: 44 + // case 44 dup1 %eq(44) %jumpi(sha2_const_44) swap1 - // case: 45 + // case 45 dup1 %eq(45) %jumpi(sha2_const_45) swap1 - // case: 46 + // case 46 dup1 %eq(46) %jumpi(sha2_const_46) swap1 - // case: 47 + // case 47 dup1 %eq(47) %jumpi(sha2_const_47) swap1 - // case: 48 + // case 48 dup1 %eq(48) %jumpi(sha2_const_48) swap1 - // case: 49 + // case 49 dup1 %eq(49) %jumpi(sha2_const_49) swap1 - // case: 50 + // case 50 dup1 %eq(50) %jumpi(sha2_const_50) swap1 - // case: 51 + // case 51 dup1 %eq(51) %jumpi(sha2_const_51) swap1 - // case: 52 + // case 52 dup1 %eq(52) %jumpi(sha2_const_52) swap1 - // case: 53 + // case 53 dup1 %eq(53) %jumpi(sha2_const_53) swap1 - // case: 54 + // case 54 dup1 %eq(54) %jumpi(sha2_const_54) swap1 - // case: 55 + // case 55 dup1 %eq(55) %jumpi(sha2_const_55) swap1 - // case: 56 + // case 56 dup1 %eq(56) %jumpi(sha2_const_56) swap1 - // case: 57 + // case 57 dup1 %eq(57) %jumpi(sha2_const_57) swap1 - // case: 58 + // case 58 dup1 %eq(58) %jumpi(sha2_const_58) swap1 - // case: 59 + // case 59 dup1 %eq(59) %jumpi(sha2_const_59) swap1 - // case: 60 + // case 60 dup1 %eq(60) %jumpi(sha2_const_60) swap1 - // case: 61 + // case 61 dup1 %eq(61) %jumpi(sha2_const_61) swap1 - // case: 62 + // case 62 dup1 %eq(62) %jumpi(sha2_const_62) swap1 - // case: 63 + // case 63 dup1 %eq(63) %jumpi(sha2_const_63) swap1 - // stack: i, k[i] + // stack i, k[i] pop - // stack: k[i] \ No newline at end of file + // stack k[i] +%endmacro \ No newline at end of file From a357a34be9b18c7254ecb6563f100be0042e77d0 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 19 Jul 2022 13:56:35 -0700 Subject: [PATCH 015/104] redest, and progress --- evm/src/cpu/kernel/asm/sha2.asm | 218 ++++++++++++++-------- evm/src/cpu/kernel/asm/sha2_constants.asm | 2 +- 2 files changed, 137 insertions(+), 83 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 96e630d1..25261cce 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -6,153 +6,154 @@ // num_blocks, block0[0], block0[1], block1[0], ..., blocklast[1] global sha2_pad: // TODO: use kernel memory, and start address not at 0 + // stack: retdest push 0 mload - // stack: num_bytes + // stack: num_bytes, retdest // STEP 1: append 1 // add 1 << (8*(32-k)-1) to x[num_bytes//32], where k := num_bytes%32 dup1 - // stack: num_bytes, num_bytes + // stack: num_bytes, num_bytes, retdest dup1 - // stack: num_bytes, num_bytes, num_bytes + // stack: num_bytes, num_bytes, num_bytes, retdest push 32 - // stack: 32, num_bytes, num_bytes, num_bytes + // stack: 32, num_bytes, num_bytes, num_bytes, retdest swap1 - // stack: num_bytes, 32, num_bytes, num_bytes + // stack: num_bytes, 32, num_bytes, num_bytes, retdest mod - // stack: k := num_bytes % 32, num_bytes, num_bytes + // stack: k := num_bytes % 32, num_bytes, num_bytes, retdest push 32 sub - // stack: 32 - k, num_bytes, num_bytes + // stack: 32 - k, num_bytes, num_bytes, retdest push 8 mul - // stack: 8 * (32 - k), num_bytes, num_bytes + // stack: 8 * (32 - k), num_bytes, num_bytes, retdest %decrement - // stack: 8 * (32 - k) - 1, num_bytes, num_bytes + // stack: 8 * (32 - k) - 1, num_bytes, num_bytes, retdest push 1 swap1 shl - // stack: 1 << (8 * (32 - k) - 1), num_bytes, num_bytes + // stack: 1 << (8 * (32 - k) - 1), num_bytes, num_bytes, retdest swap1 - // stack: num_bytes, 1 << (8 * (32 - k) - 1), num_bytes + // stack: num_bytes, 1 << (8 * (32 - k) - 1), num_bytes, retdest push 32 swap1 div - // stack: num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes + // stack: num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes, retdest dup1 - // stack: num_bytes // 32, num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes + // stack: num_bytes // 32, num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes, retdest mload - // stack: x[num_bytes // 32], num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes + // stack: x[num_bytes // 32], num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes, retdest swap1 - // stack: num_bytes // 32, x[num_bytes // 32], 1 << (8 * (32 - k) - 1), num_bytes + // stack: num_bytes // 32, x[num_bytes // 32], 1 << (8 * (32 - k) - 1), num_bytes, retdest swap2 - // stack: x[num_bytes // 32], 1 << (8 * (32 - k) - 1), num_bytes // 32, num_bytes + // stack: x[num_bytes // 32], 1 << (8 * (32 - k) - 1), num_bytes // 32, num_bytes, retdest add - // stack: x[num_bytes // 32] + 1 << (8 * (32 - k) - 1), num_bytes // 32, num_bytes + // stack: x[num_bytes // 32] + 1 << (8 * (32 - k) - 1), num_bytes // 32, num_bytes, retdest swap1 - // stack: num_bytes // 32, x[num_bytes // 32] + 1 << (8 * (32 - k) - 1), num_bytes + // stack: num_bytes // 32, x[num_bytes // 32] + 1 << (8 * (32 - k) - 1), num_bytes, retdest mstore - // stack: num_bytes + // stack: num_bytes, retdest // STEP 2: insert length // (add length := num_bytes*8+1 to x[(num_bytes//64)*2-1]) dup1 dup1 - // stack: num_bytes, num_bytes, num_bytes + // stack: num_bytes, num_bytes, num_bytes, retdest push 8 mul %increment - // stack: length := num_bytes*8+1, num_bytes, num_bytes + // stack: length := num_bytes*8+1, num_bytes, num_bytes, retdest swap1 - // stack: num_bytes, length := num_bytes*8+1, num_bytes + // stack: num_bytes, length := num_bytes*8+1, num_bytes, retdest push 64 swap1 div - // stack: num_bytes // 64, length := num_bytes*8+1, num_bytes + // stack: num_bytes // 64, length := num_bytes*8+1, num_bytes, retdest push 2 mul %decrement - // stack: (num_bytes // 64) * 2 - 1, length := num_bytes*8+1, num_bytes + // stack: (num_bytes // 64) * 2 - 1, length := num_bytes*8+1, num_bytes, retdest dup1 - // stack: (num_bytes // 64) * 2 - 1, (num_bytes // 64) * 2 - 1, length, num_bytes + // stack: (num_bytes // 64) * 2 - 1, (num_bytes // 64) * 2 - 1, length, num_bytes, retdest mload - // stack: x[(num_bytes // 64) * 2 - 1], (num_bytes // 64) * 2 - 1, length, num_bytes + // stack: x[(num_bytes // 64) * 2 - 1], (num_bytes // 64) * 2 - 1, length, num_bytes, retdest swap1 - // stack: (num_bytes // 64) * 2 - 1, x[(num_bytes // 64) * 2 - 1], length, num_bytes + // stack: (num_bytes // 64) * 2 - 1, x[(num_bytes // 64) * 2 - 1], length, num_bytes, retdest swap2 - // stack: length, x[(num_bytes // 64) * 2 - 1], (num_bytes // 64) * 2 - 1, num_bytes + // stack: length, x[(num_bytes // 64) * 2 - 1], (num_bytes // 64) * 2 - 1, num_bytes, retdest add // stack: x[(num_bytes // 64) * 2 - 1] + length, (num_bytes // 64) * 2 - 1, num_bytes swap1 - // stack: (num_bytes // 64) * 2 - 1, x[(num_bytes // 64) * 2 - 1] + length, num_bytes + // stack: (num_bytes // 64) * 2 - 1, x[(num_bytes // 64) * 2 - 1] + length, num_bytes, retdest mstore - // stack: num_bytes + // stack: num_bytes, retdest // STEP 3: insert num_blocks at start push 64 swap div %increment - // stack: num_blocks := num_bytes // 64 + 1 + // stack: num_blocks := num_bytes // 64 + 1, retdest push 0 mstore + // stack: retdest + JUMP // Precodition: stack contains address of one message block, followed by output address // Postcondition: 64 addresses starting at given output address contain 32-bit chunks of message schedule global sha2_gen_message_schedule_from_block: JUMPDEST - // stack: block_addr, output_addr + // stack: block_addr, output_addr, retdest dup1 - // stack: block_addr, block_addr, output_addr + // stack: block_addr, block_addr, output_addr, retdest %increment - // stack: block_addr + 1, block_addr, output_addr + // stack: block_addr + 1, block_addr, output_addr, retdest swap1 - // stack: block_addr, block_addr + 1, output_addr + // stack: block_addr, block_addr + 1, output_addr, retdest mload - // stack: block[0], block_addr + 1, output_addr + // stack: block[0], block_addr + 1, output_addr, retdest swap1 - // stack: block_addr + 1, block[0], output_addr + // stack: block_addr + 1, block[0], output_addr, retdest mload - // stack: block[1], block[0], output_addr + // stack: block[1], block[0], output_addr, retdest swap2 - // stack: output_addr, block[0], block[1] - // stack: output_addr, block[0], block[1] - push 16 - // stack: counter=16, output_addr, block[0], block[1] + // stack: output_addr, block[0], block[1], retdest + push 8 + // stack: counter=8, output_addr, block[0], block[1], retdest %jump(sha2_gen_message_schedule_from_block_0_loop) sha2_gen_message_schedule_from_block_0_loop: JUMPDEST - // stack: counter, output_addr, block[0], block[1] + // stack: counter, output_addr, block[0], block[1], retdest swap2 - // stack: block[0], output_addr, counter, block[1] + // stack: block[0], output_addr, counter, block[1], retdest push 1 push 32 shl - // stack: 1 << 32, block[0], output_addr, counter, block[1] + // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest dup2 dup2 - // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1] + // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1], retdest swap1 - // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1] + // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1], retdest mod - // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1] + // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1], retdest swap2 - // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1] - // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1] + // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest div - // stack: block[0] // (1 << 32), block[0] % (1 << 32), output_addr, counter, block[1] + // stack: block[0] // (1 << 32), block[0] % (1 << 32), output_addr, counter, block[1], retdest swap1 - // stack: block[0] % (1 << 32), block[0] // (1 << 32), output_addr, counter, block[1] + // stack: block[0] % (1 << 32), block[0] // (1 << 32), output_addr, counter, block[1], retdest dup3 - // stack: output_addr, block[0] % (1 << 32), block[0] // (1 << 32), output_addr, counter, block[1] + // stack: output_addr, block[0] % (1 << 32), block[0] // (1 << 32), output_addr, counter, block[1], retdest mstore - // stack: block[0] // (1 << 32), output_addr, counter, block[1] + // stack: block[0] // (1 << 32), output_addr, counter, block[1], retdest swap1 - // stack: output_addr, block[0] // (1 << 32), counter, block[1] + // stack: output_addr, block[0] // (1 << 32), counter, block[1], retdest %increment - // stack: output_addr + 1, block[0] // (1 << 32), counter, block[1] + // stack: output_addr + 1, block[0] // (1 << 32), counter, block[1], retdest swap1 - // stack: block[0] // (1 << 32), output_addr + 1, counter, block[1] + // stack: block[0] // (1 << 32), output_addr + 1, counter, block[1], retdest swap2 - // stack: counter, output_addr + 1, block[0] // (1 << 32), block[1] + // stack: counter, output_addr + 1, block[0] // (1 << 32), block[1], retdest %decrement dup1 iszero @@ -160,67 +161,120 @@ sha2_gen_message_schedule_from_block_0_loop: %jump(sha2_gen_message_schedule_from_block_0_loop) sha2_gen_message_schedule_from_block_0_end: JUMPDEST - // stack: old counter=0, + // stack: old counter=0, output_addr, block[0], block[1], retdest pop - push 16 - + push 8 + // stack: counter=8, output_addr, block[0], block[1], retdest + swap2 + // stack: block[0], output_addr, counter, block[1], retdest + swap3 + // stack: block[1], output_addr, counter, block[0], retdest + swap2 + // stack: counter, output_addr, block[1], block[0], retdest +sha2_gen_message_schedule_from_block_1_loop: + JUMPDEST + // stack: counter, output_addr, block[1], block[0], retdest + swap2 + // stack: block[1], output_addr, counter, block[0], retdest + push 1 + push 32 + shl + // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest + dup2 + dup2 + // stack: 1 << 32, block[1], 1 << 32, block[1], output_addr, counter, block[0], retdest + swap1 + // stack: block[1], 1 << 32, 1 << 32, block[1], output_addr, counter, block[0], retdest + mod + // stack: block[1] % (1 << 32), 1 << 32, block[1], output_addr, counter, block[0], retdest + swap2 + // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest + div + // stack: block[1] // (1 << 32), block[1] % (1 << 32), output_addr, counter, block[0], retdest + swap1 + // stack: block[1] % (1 << 32), block[1] // (1 << 32), output_addr, counter, block[0], retdest + dup3 + // stack: output_addr, block[1] % (1 << 32), block[1] // (1 << 32), output_addr, counter, block[0], retdest + mstore + // stack: block[1] // (1 << 32), output_addr, counter, block[0], retdest + swap1 + // stack: output_addr, block[1] // (1 << 32), counter, block[0], retdest + %increment + // stack: output_addr + 1, block[1] // (1 << 32), counter, block[0], retdest + swap1 + // stack: block[1] // (1 << 32), output_addr + 1, counter, block[0], retdest + swap2 + // stack: counter, output_addr + 1, block[1] // (1 << 32), block[0], retdest + %decrement + dup1 + iszero + %jumpi(sha2_gen_message_schedule_from_block_1_end) + %jump(sha2_gen_message_schedule_from_block_1_loop) +sha2_gen_message_schedule_from_block_1_end: + JUMPDEST + // stack: old counter=0, output_addr, block[1], block[0], retdest + pop + // stack: output_addr, block[0], block[1], retdest + global sha2_message_schedule_next_word: JUMPDEST - // stack: addr + // stack: addr, retdest dup1 - // stack: addr, addr + // stack: addr, addr, retdest push 2 swap1 sub - // stack: addr - 2, addr + // stack: addr - 2, addr, retdest mload - // stack: x[addr - 2], addr + // stack: x[addr - 2], addr, retdest %jump(sha2_sigma_1) - // stack: sigma_1(x[addr - 2]), addr + // stack: sigma_1(x[addr - 2]), addr, retdest swap1 - // stack: addr, sigma_1(x[addr - 2]) + // stack: addr, sigma_1(x[addr - 2]), retdest dup1 - // stack: addr, addr, sigma_1(x[addr - 2]) + // stack: addr, addr, sigma_1(x[addr - 2]), retdest push 7 swap1 sub - // stack: addr - 7, addr, sigma_1(x[addr - 2]) + // stack: addr - 7, addr, sigma_1(x[addr - 2]), retdest mload - // stack: x[addr - 7], addr, sigma_1(x[addr - 2]) + // stack: x[addr - 7], addr, sigma_1(x[addr - 2]), retdest swap1 - // stack: addr, x[addr - 7], sigma_1(x[addr - 2]) + // stack: addr, x[addr - 7], sigma_1(x[addr - 2]), retdest dup1 - // stack: addr, addr, x[addr - 7], sigma_1(x[addr - 2]) + // stack: addr, addr, x[addr - 7], sigma_1(x[addr - 2]), retdest push 15 swap1 sub - // stack: addr - 15, addr, x[addr - 7], sigma_1(x[addr - 2]) + // stack: addr - 15, addr, x[addr - 7], sigma_1(x[addr - 2]), retdest mload - // stack: x[addr - 15], addr, x[addr - 7], sigma_1(x[addr - 2]) + // stack: x[addr - 15], addr, x[addr - 7], sigma_1(x[addr - 2]), retdest %jump(sha2_sigma_0) - // stack: sigma_0(x[addr - 15]), addr, x[addr - 7], sigma_1(x[addr - 2]) + // stack: sigma_0(x[addr - 15]), addr, x[addr - 7], sigma_1(x[addr - 2]), retdest swap1 - // stack: addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]) + // stack: addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest dup1 - // stack: addr, addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]) + // stack: addr, addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest push 16 swap1 sub - // stack: addr - 16, addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]) + // stack: addr - 16, addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest mload - // stack: x[addr - 16], addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]) + // stack: x[addr - 16], addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest swap1 - // stack: addr, x[addr - 16], sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]) + // stack: addr, x[addr - 16], sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest swap4 - // stack: sigma_1(x[addr - 2]), x[addr - 16], sigma_0(x[addr - 15]), x[addr - 7], addr + // stack: sigma_1(x[addr - 2]), x[addr - 16], sigma_0(x[addr - 15]), x[addr - 7], addr, retdest add add add - // stack: sigma_1(x[addr - 2]) + x[addr - 16] + sigma_0(x[addr - 15]) + x[addr - 7], addr + // stack: sigma_1(x[addr - 2]) + x[addr - 16] + sigma_0(x[addr - 15]) + x[addr - 7], addr, retdest swap1 mstore + // stack: retdest + JUMP global sha2_gen_all_message_schedules: JUMPDEST diff --git a/evm/src/cpu/kernel/asm/sha2_constants.asm b/evm/src/cpu/kernel/asm/sha2_constants.asm index 5870d961..62091bdb 100644 --- a/evm/src/cpu/kernel/asm/sha2_constants.asm +++ b/evm/src/cpu/kernel/asm/sha2_constants.asm @@ -644,4 +644,4 @@ // stack i, k[i] pop // stack k[i] -%endmacro \ No newline at end of file +%endmacro From 92b14fe7f2c5d664d11153424ba83b174bc0a5a3 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 19 Jul 2022 14:19:19 -0700 Subject: [PATCH 016/104] functions --> macros --- evm/src/cpu/kernel/asm/sha2.asm | 3 ++- evm/src/cpu/kernel/asm/sha2_ops.asm | 21 ++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 25261cce..054e0a97 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -215,7 +215,8 @@ sha2_gen_message_schedule_from_block_1_end: // stack: old counter=0, output_addr, block[1], block[0], retdest pop // stack: output_addr, block[0], block[1], retdest - + push 48 + // stack: counter=48, output_addr, block[0], block[1], retdest global sha2_message_schedule_next_word: diff --git a/evm/src/cpu/kernel/asm/sha2_ops.asm b/evm/src/cpu/kernel/asm/sha2_ops.asm index f6656954..cfb24d6a 100644 --- a/evm/src/cpu/kernel/asm/sha2_ops.asm +++ b/evm/src/cpu/kernel/asm/sha2_ops.asm @@ -77,8 +77,7 @@ global sha2_sigma_0: xor xor -global sha2_sigma_1: - JUMPDEST +%macro sha2_sigma_1 // stack: x dup1 // stack: x, x @@ -99,9 +98,9 @@ global sha2_sigma_1: // stack: shr(x, 10), rotr(x, 19), rotr(x, 17) xor xor +%endmacro -global sha2_bigsigma_0: - JUMPDEST +%macro sha2_bigsigma_0 // stack: x dup1 // stack: x, x @@ -122,9 +121,9 @@ global sha2_bigsigma_0: // stack: rotr(x, 22), rotr(x, 13), rotr(x, 2) xor xor +%endmacro -global sha2_bigsigma_1: - JUMPDEST +%macro sha2_bigsigma_1 // stack: x dup1 // stack: x, x @@ -145,9 +144,9 @@ global sha2_bigsigma_1: // stack: rotr(x, 25), rotr(x, 11), rotr(x, 6) xor xor +%endmacro -global sha2_choice: - JUMPDEST +%macro sha2_choice // stack: x, y, z dup1 // stack: x, x, y, z @@ -164,9 +163,9 @@ global sha2_choice: and // stack: (not x) and z, x and y or +%endmacro -global sha2_majority: - JUMPDEST +%macro sha2_majority // stack: x, y, z dup3 dup3 @@ -192,5 +191,5 @@ global sha2_majority: // stack: y and z, x and z, x and y or or - +%endmacro \ No newline at end of file From 615ece2289f7f98a9808261ee31b4bd54979eae6 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 19 Jul 2022 14:19:35 -0700 Subject: [PATCH 017/104] progress --- evm/src/cpu/kernel/asm/sha2.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 054e0a97..b13abb38 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -230,7 +230,7 @@ global sha2_message_schedule_next_word: // stack: addr - 2, addr, retdest mload // stack: x[addr - 2], addr, retdest - %jump(sha2_sigma_1) + %sha2_sigma_1 // stack: sigma_1(x[addr - 2]), addr, retdest swap1 // stack: addr, sigma_1(x[addr - 2]), retdest @@ -252,7 +252,7 @@ global sha2_message_schedule_next_word: // stack: addr - 15, addr, x[addr - 7], sigma_1(x[addr - 2]), retdest mload // stack: x[addr - 15], addr, x[addr - 7], sigma_1(x[addr - 2]), retdest - %jump(sha2_sigma_0) + %sha2_sigma_0 // stack: sigma_0(x[addr - 15]), addr, x[addr - 7], sigma_1(x[addr - 2]), retdest swap1 // stack: addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest From eb6095cd047de85e6c24706aabae1a056a5a2ec0 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 1 Aug 2022 16:36:36 -0700 Subject: [PATCH 018/104] message schedule progress --- evm/src/cpu/kernel/aggregator.rs | 196 +------------------- evm/src/cpu/kernel/asm/helper_functions.asm | 135 -------------- evm/src/cpu/kernel/asm/sha2.asm | 76 +++++--- evm/src/cpu/kernel/tests/mod.rs | 1 + 4 files changed, 52 insertions(+), 356 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/helper_functions.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 65cfbe28..85ebd11a 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -41,7 +41,7 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/memory/packing.asm"), include_str!("asm/memory/txn_fields.asm"), include_str!("asm/exp.asm"), - include_str!("asm/helper_functions.asm"), + include_str!("asm/memory.asm"), include_str!("asm/moddiv.asm"), include_str!("asm/secp256k1/curve_mul.asm"), include_str!("asm/secp256k1/curve_add.asm"), @@ -81,9 +81,8 @@ mod tests { use anyhow::Result; use ethereum_types::U256; use log::debug; - use rand::thread_rng; - use crate::cpu::kernel::{aggregator::combined_kernel, interpreter::run}; + use crate::cpu::kernel::aggregator::combined_kernel; #[test] fn make_kernel() { @@ -93,195 +92,4 @@ mod tests { let kernel = combined_kernel(); debug!("Total kernel size: {} bytes", kernel.code.len()); } - - fn u256ify<'a>(hexes: impl IntoIterator) -> Result> { - Ok(hexes - .into_iter() - .map(U256::from_str) - .collect::, _>>()?) - } - - #[test] - fn test_insert() -> Result<()> { - // Make sure we can parse and assemble the entire kernel. - let kernel = combined_kernel(); - let exp = kernel.global_labels["swapn"]; - let mut rng = thread_rng(); - let a = U256([0; 4].map(|_| rng.gen())); - let b = U256([0; 4].map(|_| rng.gen())); - let n = rng.gen_range(0..16); - let n_u256 = U256([n, 0, 0, 0]); - - let mut initial_stack = vec![U256::from_str("0xdeadbeef")?, n_u256, b]; - initial_stack.extend([a; 16]); - let stack_with_kernel = run(&kernel.code, exp, initial_stack); - - dbg!(stack_with_kernel); - let expected_stack = todo!(); - - // assert_eq!(stack_with_kernel, expected_stack); - - Ok(()) - } - - #[test] - fn test_exp() -> Result<()> { - // Make sure we can parse and assemble the entire kernel. - let kernel = combined_kernel(); - let exp = kernel.global_labels["exp"]; - let mut rng = thread_rng(); - let a = U256([0; 4].map(|_| rng.gen())); - let b = U256([0; 4].map(|_| rng.gen())); - - // Random input - let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a]; - let stack_with_kernel = run(&kernel.code, exp, initial_stack); - let initial_stack = vec![b, a]; - let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP - let stack_with_opcode = run(&code, 0, initial_stack); - assert_eq!(stack_with_kernel, stack_with_opcode); - - // 0 base - let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, U256::zero()]; - let stack_with_kernel = run(&kernel.code, exp, initial_stack); - let initial_stack = vec![b, U256::zero()]; - let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP - let stack_with_opcode = run(&code, 0, initial_stack); - assert_eq!(stack_with_kernel, stack_with_opcode); - - // 0 exponent - let initial_stack = vec![U256::from_str("0xdeadbeef")?, U256::zero(), a]; - let stack_with_kernel = run(&kernel.code, exp, initial_stack); - let initial_stack = vec![U256::zero(), a]; - let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP - let stack_with_opcode = run(&code, 0, initial_stack); - assert_eq!(stack_with_kernel, stack_with_opcode); - - Ok(()) - } - - #[test] - fn test_ec_ops() -> Result<()> { - // Make sure we can parse and assemble the entire kernel. - let kernel = combined_kernel(); - let ec_add = kernel.global_labels["ec_add"]; - let ec_double = kernel.global_labels["ec_double"]; - let ec_mul = kernel.global_labels["ec_mul"]; - let identity = ("0x0", "0x0"); - let invalid = ("0x0", "0x3"); // Not on curve - let point0 = ( - "0x1feee7ec986e198890cb83be8b8ba09ee953b3f149db6d9bfdaa5c308a33e58d", - "0x2051cc9a9edd46231604fd88f351e95ec72a285be93e289ac59cb48561efb2c6", - ); - let point1 = ( - "0x15b64d0a5f329fb672029298be8050f444626e6de11903caffa74b388075be1b", - "0x2d9e07340bd5cd7b70687b98f2500ff930a89a30d7b6a3e04b1b4d345319d234", - ); - // point2 = point0 + point1 - let point2 = ( - "0x18659c0e0a8fedcb8747cf463fc7cfa05f667d84e771d0a9521fc1a550688f0c", - "0x283ed10b42703e187e7a808aeb45c6b457bc4cc7d704e53b3348a1e3b0bfa55b", - ); - // point3 = 2 * point0 - let point3 = ( - "0x17da2b7b1a01c8dfdf0f5a6415833c7d755d219aa7e2c4cd0ac83d87d0ca4217", - "0xc9ace9de14aac8114541b50c19320eb40f0eeac3621526d9e34dbcf4c3a6c0f", - ); - let s = "0xabb2a34c0e7956cfe6cef9ddb7e810c45ea19a6ebadd79c21959af09f5ba480a"; - // point4 = s * point0 - let point4 = ( - "0xe519344959cc17021fe98878f947f5c1b1675325533a620c1684cfa6367e6c0", - "0x7496a7575b0b6a821e19ce780ecc3e0b156e605327798693defeb9f265b7a6f", - ); - - // Standard addition #1 - let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, u256ify([point2.1, point2.0])?); - // Standard addition #2 - let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, u256ify([point2.1, point2.0])?); - - // Standard doubling #1 - let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, u256ify([point3.1, point3.0])?); - // Standard doubling #2 - let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?; - let stack = run(&kernel.code, ec_double, initial_stack); - assert_eq!(stack, u256ify([point3.1, point3.0])?); - // Standard doubling #3 - let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?; - let stack = run(&kernel.code, ec_mul, initial_stack); - assert_eq!(stack, u256ify([point3.1, point3.0])?); - - // Addition with identity #1 - let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, u256ify([point1.1, point1.0])?); - // Addition with identity #2 - let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, u256ify([point1.1, point1.0])?); - // Addition with identity #3 - let initial_stack = - u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, u256ify([identity.1, identity.0])?); - - // Addition with invalid point(s) #1 - let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, invalid.1, invalid.0])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, vec![U256::MAX, U256::MAX]); - // Addition with invalid point(s) #2 - let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, point0.1, point0.0])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, vec![U256::MAX, U256::MAX]); - // Addition with invalid point(s) #3 - let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, identity.1, identity.0])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, vec![U256::MAX, U256::MAX]); - // Addition with invalid point(s) #4 - let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, invalid.1, invalid.0])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, vec![U256::MAX, U256::MAX]); - - // Scalar multiplication #1 - let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?; - let stack = run(&kernel.code, ec_mul, initial_stack); - assert_eq!(stack, u256ify([point4.1, point4.0])?); - // Scalar multiplication #2 - let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?; - let stack = run(&kernel.code, ec_mul, initial_stack); - assert_eq!(stack, u256ify([identity.1, identity.0])?); - // Scalar multiplication #3 - let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?; - let stack = run(&kernel.code, ec_mul, initial_stack); - assert_eq!(stack, u256ify([point0.1, point0.0])?); - // Scalar multiplication #4 - let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?; - let stack = run(&kernel.code, ec_mul, initial_stack); - assert_eq!(stack, u256ify([identity.1, identity.0])?); - // Scalar multiplication #5 - let initial_stack = u256ify(["0xdeadbeef", s, invalid.1, invalid.0])?; - let stack = run(&kernel.code, ec_mul, initial_stack); - assert_eq!(stack, vec![U256::MAX, U256::MAX]); - - // Multiple calls - let ec_mul_hex = format!("0x{:x}", ec_mul); - let initial_stack = u256ify([ - "0xdeadbeef", - s, - &ec_mul_hex, - identity.1, - identity.0, - point0.1, - point0.0, - ])?; - let stack = run(&kernel.code, ec_add, initial_stack); - assert_eq!(stack, u256ify([point4.1, point4.0])?); - - Ok(()) - } } diff --git a/evm/src/cpu/kernel/asm/helper_functions.asm b/evm/src/cpu/kernel/asm/helper_functions.asm deleted file mode 100644 index c628916c..00000000 --- a/evm/src/cpu/kernel/asm/helper_functions.asm +++ /dev/null @@ -1,135 +0,0 @@ -global swapn: - JUMPDEST - - // stack: n, ... - %eq(1) - %jumpi(case1) - %eq(2) - %jumpi(case2) - %eq(3) - %jumpi(case3) - %eq(4) - %jumpi(case4) - %eq(5) - %jumpi(case5) - %eq(6) - %jumpi(case6) - %eq(7) - %jumpi(case7) - %eq(8) - %jumpi(case8) - %eq(9) - %jumpi(case9) - %eq(10) - %jumpi(case10) - %eq(11) - %jumpi(case11) - %eq(12) - %jumpi(case12) - %eq(13) - %jumpi(case13) - %eq(14) - %jumpi(case14) - %eq(15) - %jumpi(case15) - %eq(16) - %jumpi(case16) -case1: - JUMPDEST - swap1 - %jump(swapn_end) -case2: - JUMPDEST - swap2 - %jump(swapn_end) -case3: - JUMPDEST - swap3 - %jump(swapn_end) -case4: - JUMPDEST - swap4 - %jump(swapn_end) -case5: - JUMPDEST - swap5 - %jump(swapn_end) -case6: - JUMPDEST - swap6 - %jump(swapn_end) -case7: - JUMPDEST - swap7 - %jump(swapn_end) -case8: - JUMPDEST - swap8 - %jump(swapn_end) -case9: - JUMPDEST - swap9 - %jump(swapn_end) -case10: - JUMPDEST - swap10 - %jump(swapn_end) -case11: - JUMPDEST - swap11 - %jump(swapn_end) -case12: - JUMPDEST - swap12 - %jump(swapn_end) -case13: - JUMPDEST - swap13 - %jump(swapn_end) -case14: - JUMPDEST - swap14 - %jump(swapn_end) -case15: - JUMPDEST - swap15 - %jump(swapn_end) -case16: - JUMPDEST - swap16 - %jump(swapn_end) -swapn_end: - JUMPDEST - - -global insertn: - JUMPDEST - - // stack: n, val, ... - dup1 - // stack: n, n, val, ... - swap2 - // stack: val, n, n, ... - swap1 - // stack: n, val, n, ... - %jump(swapn) - // stack: [nth], n, ..., val - swap1 - // stack: n, [nth], ..., val -swap_back_loop: - // stack: k, [kth], ..., [k-1st] - dup1 - // stack: k, k, [kth], ..., [k-1st] - swap2 - // stack: [kth], k, k, ..., [k-1st] - swap1 - // stack: k, [kth], k, ..., [k-1st] - %jump(swapn) - // stack: [k-1st], k, ..., [k-2nd], [kth] - swap1 - // stack: k, [k-1st], ..., [k-2nd], [kth] - %decrement - // stack: k-1, [k-1st], ..., [k-2nd], [kth] - iszero - not - %jumpi(swap_back_loop) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index b13abb38..33b4e11d 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -1,3 +1,8 @@ +sha2_test_input: + BYTES 0x4 + BYTES 0x1, 0x2, 0x3, 0x4 + + // Precodition: input is in memory, starting at [TODO: fix] 0, of the form @@ -5,7 +10,7 @@ // Postcodition: output is in memory, starting at [TODO: fix] 0, of the form // num_blocks, block0[0], block0[1], block1[0], ..., blocklast[1] global sha2_pad: - // TODO: use kernel memory, and start address not at 0 + // TODO: use kernel memory (SEGMENT_KERNEL_MISC or SEGMENT_KERNEL_SHA2), and instead of 0 // stack: retdest push 0 mload @@ -217,65 +222,82 @@ sha2_gen_message_schedule_from_block_1_end: // stack: output_addr, block[0], block[1], retdest push 48 // stack: counter=48, output_addr, block[0], block[1], retdest - - -global sha2_message_schedule_next_word: +sha2_gen_message_schedule_remaining_loop: JUMPDEST - // stack: addr, retdest + // stack: counter, output_addr, block[0], block[1], retdest + swap1 + // stack: output_addr, counter, block[0], block[1], retdest dup1 - // stack: addr, addr, retdest + // stack: output_addr, output_addr, counter, block[0], block[1], retdest push 2 swap1 sub - // stack: addr - 2, addr, retdest + // stack: output_addr - 2, output_addr, counter, block[0], block[1], retdest mload - // stack: x[addr - 2], addr, retdest + // stack: x[output_addr - 2], output_addr, counter, block[0], block[1], retdest %sha2_sigma_1 - // stack: sigma_1(x[addr - 2]), addr, retdest + // stack: sigma_1(x[output_addr - 2]), output_addr, counter, block[0], block[1], retdest swap1 - // stack: addr, sigma_1(x[addr - 2]), retdest + // stack: output_addr, sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest dup1 - // stack: addr, addr, sigma_1(x[addr - 2]), retdest + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest push 7 swap1 sub - // stack: addr - 7, addr, sigma_1(x[addr - 2]), retdest + // stack: output_addr - 7, output_addr, sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest mload - // stack: x[addr - 7], addr, sigma_1(x[addr - 2]), retdest + // stack: x[output_addr - 7], output_addr, sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest swap1 - // stack: addr, x[addr - 7], sigma_1(x[addr - 2]), retdest + // stack: output_addr, x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest dup1 - // stack: addr, addr, x[addr - 7], sigma_1(x[addr - 2]), retdest + // stack: output_addr, output_addr, x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest push 15 swap1 sub - // stack: addr - 15, addr, x[addr - 7], sigma_1(x[addr - 2]), retdest + // stack: output_addr - 15, output_addr, x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest mload - // stack: x[addr - 15], addr, x[addr - 7], sigma_1(x[addr - 2]), retdest + // stack: x[output_addr - 15], output_addr, x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest %sha2_sigma_0 - // stack: sigma_0(x[addr - 15]), addr, x[addr - 7], sigma_1(x[addr - 2]), retdest + // stack: sigma_0(x[output_addr - 15]), output_addr, x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest swap1 - // stack: addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest + // stack: output_addr, sigma_0(x[output_addr - 15]), x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest dup1 - // stack: addr, addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest + // stack: output_addr, output_addr, sigma_0(x[output_addr - 15]), x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest push 16 swap1 sub - // stack: addr - 16, addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest + // stack: output_addr - 16, output_addr, sigma_0(x[output_addr - 15]), x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest mload - // stack: x[addr - 16], addr, sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest + // stack: x[output_addr - 16], output_addr, sigma_0(x[output_addr - 15]), x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest swap1 - // stack: addr, x[addr - 16], sigma_0(x[addr - 15]), x[addr - 7], sigma_1(x[addr - 2]), retdest + // stack: output_addr, x[output_addr - 16], sigma_0(x[output_addr - 15]), x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest swap4 - // stack: sigma_1(x[addr - 2]), x[addr - 16], sigma_0(x[addr - 15]), x[addr - 7], addr, retdest + // stack: sigma_1(x[output_addr - 2]), x[output_addr - 16], sigma_0(x[output_addr - 15]), x[output_addr - 7], output_addr, counter, block[0], block[1], retdest add add add - // stack: sigma_1(x[addr - 2]) + x[addr - 16] + sigma_0(x[addr - 15]) + x[addr - 7], addr, retdest + // stack: sigma_1(x[output_addr - 2]) + x[output_addr - 16] + sigma_0(x[output_addr - 15]) + x[output_addr - 7], output_addr, counter, block[0], block[1], retdest swap1 + // stack: output_addr, sigma_1(x[output_addr - 2]) + x[output_addr - 16] + sigma_0(x[output_addr - 15]) + x[output_addr - 7], counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2]) + x[output_addr - 16] + sigma_0(x[output_addr - 15]) + x[output_addr - 7], counter, block[0], block[1], retdest + swap2 + // stack: sigma_1(x[output_addr - 2]) + x[output_addr - 16] + sigma_0(x[output_addr - 15]) + x[output_addr - 7], output_addr, output_addr, counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_1(x[output_addr - 2]) + x[output_addr - 16] + sigma_0(x[output_addr - 15]) + x[output_addr - 7], output_addr, counter, block[0], block[1], retdest mstore - // stack: retdest - JUMP + // stack: output_addr, counter, block[0], block[1], retdest + %increment + // stack: output_addr + 1, counter, block[0], block[1], retdest + swap1 + // stack: counter, output_addr + 1, block[0], block[1], retdest + %decrement + // stack: counter - 1, output_addr + 1, block[0], block[1], retdest + iszero + %jumpi(sha2_gen_message_schedule_remaining_end) + %jump(sha2_gen_message_schedule_remaining_loop) +sha2_gen_message_schedule_remaining_end: + JUMPDEST global sha2_gen_all_message_schedules: JUMPDEST diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs index a9c8c08c..d1fa616b 100644 --- a/evm/src/cpu/kernel/tests/mod.rs +++ b/evm/src/cpu/kernel/tests/mod.rs @@ -6,6 +6,7 @@ mod mpt; mod packing; mod rlp; mod transaction_parsing; +mod sha2; use std::str::FromStr; From 8dbb653a54e7615eda8fd9d28b0e811cc6177310 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 21 Jul 2022 15:04:33 -0700 Subject: [PATCH 019/104] memory commands --- evm/src/cpu/kernel/asm/sha2.asm | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 33b4e11d..234ff9d3 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -1,19 +1,19 @@ -sha2_test_input: - BYTES 0x4 - BYTES 0x1, 0x2, 0x3, 0x4 +// sha2_test_input: +// BYTES 0x4 +// BYTES 0x1, 0x2, 0x3, 0x4 +// BYTES 0, 0, - -// Precodition: input is in memory, starting at [TODO: fix] 0, of the form -// num_bytes, x[0], x[1], ..., x[(num_bytes+31)/32-1] -// Postcodition: output is in memory, starting at [TODO: fix] 0, of the form -// num_blocks, block0[0], block0[1], block1[0], ..., blocklast[1] +// Precodition: input is in memory, starting at 0 of kernel SHA2 segment, of the form +// num_bytes, x[0], x[1], ..., x[num_bytes - 1] +// Postcodition: output is in memory, starting at 0, of the form +// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] global sha2_pad: - // TODO: use kernel memory (SEGMENT_KERNEL_MISC or SEGMENT_KERNEL_SHA2), and instead of 0 + // TODO: use kernel memory (SEGMENT_KERNEL_GENERAL), and instead of 0 // stack: retdest push 0 - mload + %mload_kernel_sha2 // stack: num_bytes, retdest // STEP 1: append 1 // add 1 << (8*(32-k)-1) to x[num_bytes//32], where k := num_bytes%32 From 89e5a04029b03e5ebe8092fd1b2ed17f6301a82a Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 21 Jul 2022 15:14:18 -0700 Subject: [PATCH 020/104] constants --- evm/src/cpu/kernel/asm/sha2_constants.asm | 711 ++-------------------- 1 file changed, 65 insertions(+), 646 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2_constants.asm b/evm/src/cpu/kernel/asm/sha2_constants.asm index 62091bdb..6f5e9083 100644 --- a/evm/src/cpu/kernel/asm/sha2_constants.asm +++ b/evm/src/cpu/kernel/asm/sha2_constants.asm @@ -1,647 +1,66 @@ -%macro sha2_const_0 - push 1116352408 -%endmacro +sha2_constants_k: + BYTES 66, 138, 47, 152 + BYTES 113, 55, 68, 145 + BYTES 181, 192, 251, 207 + BYTES 233, 181, 219, 165 + BYTES 57, 86, 194, 91 + BYTES 89, 241, 17, 241 + BYTES 146, 63, 130, 164 + BYTES 171, 28, 94, 213 + BYTES 216, 7, 170, 152 + BYTES 18, 131, 91, 1 + BYTES 36, 49, 133, 190 + BYTES 85, 12, 125, 195 + BYTES 114, 190, 93, 116 + BYTES 128, 222, 177, 254 + BYTES 155, 220, 6, 167 + BYTES 193, 155, 241, 116 + BYTES 228, 155, 105, 193 + BYTES 239, 190, 71, 134 + BYTES 15, 193, 157, 198 + BYTES 36, 12, 161, 204 + BYTES 45, 233, 44, 111 + BYTES 74, 116, 132, 170 + BYTES 92, 176, 169, 220 + BYTES 118, 249, 136, 218 + BYTES 152, 62, 81, 82 + BYTES 168, 49, 198, 109 + BYTES 176, 3, 39, 200 + BYTES 191, 89, 127, 199 + BYTES 198, 224, 11, 243 + BYTES 213, 167, 145, 71 + BYTES 6, 202, 99, 81 + BYTES 20, 41, 41, 103 + BYTES 39, 183, 10, 133 + BYTES 46, 27, 33, 56 + BYTES 77, 44, 109, 252 + BYTES 83, 56, 13, 19 + BYTES 101, 10, 115, 84 + BYTES 118, 106, 10, 187 + BYTES 129, 194, 201, 46 + BYTES 146, 114, 44, 133 + BYTES 162, 191, 232, 161 + BYTES 168, 26, 102, 75 + BYTES 194, 75, 139, 112 + BYTES 199, 108, 81, 163 + BYTES 209, 146, 232, 25 + BYTES 214, 153, 6, 36 + BYTES 244, 14, 53, 133 + BYTES 16, 106, 160, 112 + BYTES 25, 164, 193, 22 + BYTES 30, 55, 108, 8 + BYTES 39, 72, 119, 76 + BYTES 52, 176, 188, 181 + BYTES 57, 28, 12, 179 + BYTES 78, 216, 170, 74 + BYTES 91, 156, 202, 79 + BYTES 104, 46, 111, 243 + BYTES 116, 143, 130, 238 + BYTES 120, 165, 99, 111 + BYTES 132, 200, 120, 20 + BYTES 140, 199, 2, 8 + BYTES 144, 190, 255, 250 + BYTES 164, 80, 108, 235 + BYTES 190, 249, 163, 247 + BYTES 198, 113, 120, 242 -%macro sha2_const_1 - push 1899447441 -%endmacro - -%macro sha2_const_2 - push 3049323471 -%endmacro - -%macro sha2_const_3 - push 3921009573 -%endmacro - -%macro sha2_const_4 - push 961987163 -%endmacro - -%macro sha2_const_5 - push 1508970993 -%endmacro - -%macro sha2_const_6 - push 2453635748 -%endmacro - -%macro sha2_const_7 - push 2870763221 -%endmacro - -%macro sha2_const_8 - push 3624381080 -%endmacro - -%macro sha2_const_9 - push 310598401 -%endmacro - -%macro sha2_const_10 - push 607225278 -%endmacro - -%macro sha2_const_11 - push 1426881987 -%endmacro - -%macro sha2_const_12 - push 1925078388 -%endmacro - -%macro sha2_const_13 - push 2162078206 -%endmacro - -%macro sha2_const_14 - push 2614888103 -%endmacro - -%macro sha2_const_15 - push 3248222580 -%endmacro - -%macro sha2_const_16 - push 3835390401 -%endmacro - -%macro sha2_const_17 - push 4022224774 -%endmacro - -%macro sha2_const_18 - push 264347078 -%endmacro - -%macro sha2_const_19 - push 604807628 -%endmacro - -%macro sha2_const_20 - push 770255983 -%endmacro - -%macro sha2_const_21 - push 1249150122 -%endmacro - -%macro sha2_const_22 - push 1555081692 -%endmacro - -%macro sha2_const_23 - push 1996064986 -%endmacro - -%macro sha2_const_24 - push 2554220882 -%endmacro - -%macro sha2_const_25 - push 2821834349 -%endmacro - -%macro sha2_const_26 - push 2952996808 -%endmacro - -%macro sha2_const_27 - push 3210313671 -%endmacro - -%macro sha2_const_28 - push 3336571891 -%endmacro - -%macro sha2_const_29 - push 3584528711 -%endmacro - -%macro sha2_const_30 - push 113926993 -%endmacro - -%macro sha2_const_31 - push 338241895 -%endmacro - -%macro sha2_const_32 - push 666307205 -%endmacro - -%macro sha2_const_33 - push 773529912 -%endmacro - -%macro sha2_const_34 - push 1294757372 -%endmacro - -%macro sha2_const_35 - push 1396182291 -%endmacro - -%macro sha2_const_36 - push 1695183700 -%endmacro - -%macro sha2_const_37 - push 1986661051 -%endmacro - -%macro sha2_const_38 - push 2177026350 -%endmacro - -%macro sha2_const_39 - push 2456956037 -%endmacro - -%macro sha2_const_40 - push 2730485921 -%endmacro - -%macro sha2_const_41 - push 2820302411 -%endmacro - -%macro sha2_const_42 - push 3259730800 -%endmacro - -%macro sha2_const_43 - push 3345764771 -%endmacro - -%macro sha2_const_44 - push 3516065817 -%endmacro - -%macro sha2_const_45 - push 3600352804 -%endmacro - -%macro sha2_const_46 - push 4094571909 -%endmacro - -%macro sha2_const_47 - push 275423344 -%endmacro - -%macro sha2_const_48 - push 430227734 -%endmacro - -%macro sha2_const_49 - push 506948616 -%endmacro - -%macro sha2_const_50 - push 659060556 -%endmacro - -%macro sha2_const_51 - push 883997877 -%endmacro - -%macro sha2_const_52 - push 958139571 -%endmacro - -%macro sha2_const_53 - push 1322822218 -%endmacro - -%macro sha2_const_54 - push 1537002063 -%endmacro - -%macro sha2_const_55 - push 1747873779 -%endmacro - -%macro sha2_const_56 - push 1955562222 -%endmacro - -%macro sha2_const_57 - push 2024104815 -%endmacro - -%macro sha2_const_58 - push 2227730452 -%endmacro - -%macro sha2_const_59 - push 2361852424 -%endmacro - -%macro sha2_const_60 - push 2428436474 -%endmacro - -%macro sha2_const_61 - push 2756734187 -%endmacro - -%macro sha2_const_62 - push 3204031479 -%endmacro - -%macro sha2_const_63 - push 3329325298 -%endmacro - -%macro sha2_const - // stack: i - - // case 0 - dup1 - %eq(0) - %jumpi(sha2_const_0) - swap1 - - // case 1 - dup1 - %eq(1) - %jumpi(sha2_const_1) - swap1 - - // case 2 - dup1 - %eq(2) - %jumpi(sha2_const_2) - swap1 - - // case 3 - dup1 - %eq(3) - %jumpi(sha2_const_3) - swap1 - - // case 4 - dup1 - %eq(4) - %jumpi(sha2_const_4) - swap1 - - // case 5 - dup1 - %eq(5) - %jumpi(sha2_const_5) - swap1 - - // case 6 - dup1 - %eq(6) - %jumpi(sha2_const_6) - swap1 - - // case 7 - dup1 - %eq(7) - %jumpi(sha2_const_7) - swap1 - - // case 8 - dup1 - %eq(8) - %jumpi(sha2_const_8) - swap1 - - // case 9 - dup1 - %eq(9) - %jumpi(sha2_const_9) - swap1 - - // case 10 - dup1 - %eq(10) - %jumpi(sha2_const_10) - swap1 - - // case 11 - dup1 - %eq(11) - %jumpi(sha2_const_11) - swap1 - - // case 12 - dup1 - %eq(12) - %jumpi(sha2_const_12) - swap1 - - // case 13 - dup1 - %eq(13) - %jumpi(sha2_const_13) - swap1 - - // case 14 - dup1 - %eq(14) - %jumpi(sha2_const_14) - swap1 - - // case 15 - dup1 - %eq(15) - %jumpi(sha2_const_15) - swap1 - - // case 16 - dup1 - %eq(16) - %jumpi(sha2_const_16) - swap1 - - // case 17 - dup1 - %eq(17) - %jumpi(sha2_const_17) - swap1 - - // case 18 - dup1 - %eq(18) - %jumpi(sha2_const_18) - swap1 - - // case 19 - dup1 - %eq(19) - %jumpi(sha2_const_19) - swap1 - - // case 20 - dup1 - %eq(20) - %jumpi(sha2_const_20) - swap1 - - // case 21 - dup1 - %eq(21) - %jumpi(sha2_const_21) - swap1 - - // case 22 - dup1 - %eq(22) - %jumpi(sha2_const_22) - swap1 - - // case 23 - dup1 - %eq(23) - %jumpi(sha2_const_23) - swap1 - - // case 24 - dup1 - %eq(24) - %jumpi(sha2_const_24) - swap1 - - // case 25 - dup1 - %eq(25) - %jumpi(sha2_const_25) - swap1 - - // case 26 - dup1 - %eq(26) - %jumpi(sha2_const_26) - swap1 - - // case 27 - dup1 - %eq(27) - %jumpi(sha2_const_27) - swap1 - - // case 28 - dup1 - %eq(28) - %jumpi(sha2_const_28) - swap1 - - // case 29 - dup1 - %eq(29) - %jumpi(sha2_const_29) - swap1 - - // case 30 - dup1 - %eq(30) - %jumpi(sha2_const_30) - swap1 - - // case 31 - dup1 - %eq(31) - %jumpi(sha2_const_31) - swap1 - - // case 32 - dup1 - %eq(32) - %jumpi(sha2_const_32) - swap1 - - // case 33 - dup1 - %eq(33) - %jumpi(sha2_const_33) - swap1 - - // case 34 - dup1 - %eq(34) - %jumpi(sha2_const_34) - swap1 - - // case 35 - dup1 - %eq(35) - %jumpi(sha2_const_35) - swap1 - - // case 36 - dup1 - %eq(36) - %jumpi(sha2_const_36) - swap1 - - // case 37 - dup1 - %eq(37) - %jumpi(sha2_const_37) - swap1 - - // case 38 - dup1 - %eq(38) - %jumpi(sha2_const_38) - swap1 - - // case 39 - dup1 - %eq(39) - %jumpi(sha2_const_39) - swap1 - - // case 40 - dup1 - %eq(40) - %jumpi(sha2_const_40) - swap1 - - // case 41 - dup1 - %eq(41) - %jumpi(sha2_const_41) - swap1 - - // case 42 - dup1 - %eq(42) - %jumpi(sha2_const_42) - swap1 - - // case 43 - dup1 - %eq(43) - %jumpi(sha2_const_43) - swap1 - - // case 44 - dup1 - %eq(44) - %jumpi(sha2_const_44) - swap1 - - // case 45 - dup1 - %eq(45) - %jumpi(sha2_const_45) - swap1 - - // case 46 - dup1 - %eq(46) - %jumpi(sha2_const_46) - swap1 - - // case 47 - dup1 - %eq(47) - %jumpi(sha2_const_47) - swap1 - - // case 48 - dup1 - %eq(48) - %jumpi(sha2_const_48) - swap1 - - // case 49 - dup1 - %eq(49) - %jumpi(sha2_const_49) - swap1 - - // case 50 - dup1 - %eq(50) - %jumpi(sha2_const_50) - swap1 - - // case 51 - dup1 - %eq(51) - %jumpi(sha2_const_51) - swap1 - - // case 52 - dup1 - %eq(52) - %jumpi(sha2_const_52) - swap1 - - // case 53 - dup1 - %eq(53) - %jumpi(sha2_const_53) - swap1 - - // case 54 - dup1 - %eq(54) - %jumpi(sha2_const_54) - swap1 - - // case 55 - dup1 - %eq(55) - %jumpi(sha2_const_55) - swap1 - - // case 56 - dup1 - %eq(56) - %jumpi(sha2_const_56) - swap1 - - // case 57 - dup1 - %eq(57) - %jumpi(sha2_const_57) - swap1 - - // case 58 - dup1 - %eq(58) - %jumpi(sha2_const_58) - swap1 - - // case 59 - dup1 - %eq(59) - %jumpi(sha2_const_59) - swap1 - - // case 60 - dup1 - %eq(60) - %jumpi(sha2_const_60) - swap1 - - // case 61 - dup1 - %eq(61) - %jumpi(sha2_const_61) - swap1 - - // case 62 - dup1 - %eq(62) - %jumpi(sha2_const_62) - swap1 - - // case 63 - dup1 - %eq(63) - %jumpi(sha2_const_63) - swap1 - - // stack i, k[i] - pop - // stack k[i] -%endmacro From 924880390f7979ba2aa44d5324548ec1a070bb0e Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 21 Jul 2022 16:05:34 -0700 Subject: [PATCH 021/104] h constants --- evm/src/cpu/kernel/asm/sha2_constants.asm | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/evm/src/cpu/kernel/asm/sha2_constants.asm b/evm/src/cpu/kernel/asm/sha2_constants.asm index 6f5e9083..22712a98 100644 --- a/evm/src/cpu/kernel/asm/sha2_constants.asm +++ b/evm/src/cpu/kernel/asm/sha2_constants.asm @@ -64,3 +64,12 @@ sha2_constants_k: BYTES 190, 249, 163, 247 BYTES 198, 113, 120, 242 +sha2_constants_h: + BYTES 106, 9, 230, 103 + BYTES 187, 103, 174, 133 + BYTES 60, 110, 243, 114 + BYTES 165, 79, 245, 58 + BYTES 81, 14, 82, 127 + BYTES 155, 5, 104, 140 + BYTES 31, 131, 217, 171 + BYTES 91, 224, 205, 25 From f0dd1fd3f4ef7c21c20a1a8a54a1d315f58fb30c Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 22 Jul 2022 14:35:41 -0700 Subject: [PATCH 022/104] updates --- evm/src/cpu/kernel/asm/sha2.asm | 46 +++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 234ff9d3..bc44c4a7 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -1,19 +1,49 @@ -// sha2_test_input: -// BYTES 0x4 -// BYTES 0x1, 0x2, 0x3, 0x4 -// BYTES 0, 0, +global sha2_store: + JUMPDEST + // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1], retdest + dup1 + // stack: num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1], retdest + // TODO: use kernel memory, and start address not at 0 + push 0 + // stack: addr=0, num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1], retdest + %mstore_kernel_general + // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1], retdest + push 1 + // stack: addr=1, counter=num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1], retdest +sha2_store_loop: + JUMPDEST + // stack: addr, counter, x[num_u256s-counter], ... , x[num_u256s-1], retdest + dup1 + // stack: addr, addr, counter, x[num_u256s-counter], ... , x[num_u256s-1], retdest + swap3 + // stack: x[num_u256s-counter], addr, counter, addr, ... , x[num_u256s-1], retdest + swap1 + // stack: addr, x[num_u256s-counter], counter, addr, ... , x[num_u256s-1], retdest + %mstore_kernel_general + // stack: counter, addr, ... , x[num_u256s-1], retdest + %decrement + // stack: counter-1, addr, ... , x[num_u256s-1], retdest + iszero + %jumpi(sha2_store_end) + swap1 + // stack: addr, counter-1, ... , x[num_u256s-1], retdest + %increment + // stack: addr+1, counter-1, ... , x[num_u256s-1], retdest + %jump(sha2_store_loop) +sha2_store_end: + // stack: counter=0, addr, retdest + %pop2 + JUMP - -// Precodition: input is in memory, starting at 0 of kernel SHA2 segment, of the form +// Precodition: input is in memory, starting at 0 of kernel general segment, of the form // num_bytes, x[0], x[1], ..., x[num_bytes - 1] // Postcodition: output is in memory, starting at 0, of the form // num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] global sha2_pad: - // TODO: use kernel memory (SEGMENT_KERNEL_GENERAL), and instead of 0 // stack: retdest push 0 - %mload_kernel_sha2 + %mload_kernel_gemeral // stack: num_bytes, retdest // STEP 1: append 1 // add 1 << (8*(32-k)-1) to x[num_bytes//32], where k := num_bytes%32 From c29190328c4c46ca8d030ddfbb61359aee89e874 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 25 Jul 2022 15:09:36 -0700 Subject: [PATCH 023/104] updates --- evm/src/cpu/kernel/asm/sha2.asm | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index bc44c4a7..3e7165af 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -1,34 +1,33 @@ global sha2_store: JUMPDEST - // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1], retdest + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest dup1 - // stack: num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1], retdest - // TODO: use kernel memory, and start address not at 0 + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest push 0 - // stack: addr=0, num_u256s, num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1], retdest + // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest %mstore_kernel_general - // stack: num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1], retdest + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest push 1 - // stack: addr=1, counter=num_u256s, x[0], x[1], x[2], ... , x[num_u256s-1], retdest + // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest sha2_store_loop: JUMPDEST - // stack: addr, counter, x[num_u256s-counter], ... , x[num_u256s-1], retdest + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest dup1 - // stack: addr, addr, counter, x[num_u256s-counter], ... , x[num_u256s-1], retdest + // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest swap3 - // stack: x[num_u256s-counter], addr, counter, addr, ... , x[num_u256s-1], retdest + // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest swap1 - // stack: addr, x[num_u256s-counter], counter, addr, ... , x[num_u256s-1], retdest + // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest %mstore_kernel_general - // stack: counter, addr, ... , x[num_u256s-1], retdest + // stack: counter, addr, ... , x[num_bytes-1], retdest %decrement - // stack: counter-1, addr, ... , x[num_u256s-1], retdest + // stack: counter-1, addr, ... , x[num_bytes-1], retdest iszero %jumpi(sha2_store_end) swap1 - // stack: addr, counter-1, ... , x[num_u256s-1], retdest + // stack: addr, counter-1, ... , x[num_bytes-1], retdest %increment - // stack: addr+1, counter-1, ... , x[num_u256s-1], retdest + // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest %jump(sha2_store_loop) sha2_store_end: // stack: counter=0, addr, retdest From 268c6a115de8d1d5829e98b1cc98cf53a8f8c553 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 25 Jul 2022 15:49:27 -0700 Subject: [PATCH 024/104] mstore_kernel_general_u32 macro --- evm/src/cpu/kernel/asm/sha2.asm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 3e7165af..37594f8d 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -42,7 +42,7 @@ sha2_store_end: global sha2_pad: // stack: retdest push 0 - %mload_kernel_gemeral + %mload_kernel_general // stack: num_bytes, retdest // STEP 1: append 1 // add 1 << (8*(32-k)-1) to x[num_bytes//32], where k := num_bytes%32 From 87e06946b9c5a86165bb2a89b542435774cb56be Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 26 Jul 2022 11:01:57 -0700 Subject: [PATCH 025/104] first test, and fixes --- evm/src/cpu/kernel/aggregator.rs | 4 ++++ evm/src/cpu/kernel/asm/sha2.asm | 2 +- evm/src/cpu/kernel/asm/sha2_ops.asm | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 85ebd11a..d8466894 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -48,6 +48,10 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/secp256k1/moddiv.asm"), include_str!("asm/secp256k1/lift_x.asm"), include_str!("asm/secp256k1/inverse_scalar.asm"), + include_str!("asm/sha2.asm"), + include_str!("asm/sha2_constants.asm"), + include_str!("asm/sha2_memory.asm"), + include_str!("asm/sha2_ops.asm"), include_str!("asm/ecrecover.asm"), include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/decode.asm"), diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 37594f8d..10a2d815 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -123,7 +123,7 @@ global sha2_pad: // stack: num_bytes, retdest // STEP 3: insert num_blocks at start push 64 - swap + swap1 div %increment // stack: num_blocks := num_bytes // 64 + 1, retdest diff --git a/evm/src/cpu/kernel/asm/sha2_ops.asm b/evm/src/cpu/kernel/asm/sha2_ops.asm index cfb24d6a..c600afe9 100644 --- a/evm/src/cpu/kernel/asm/sha2_ops.asm +++ b/evm/src/cpu/kernel/asm/sha2_ops.asm @@ -54,7 +54,7 @@ add %endmacro -global sha2_sigma_0: +%macro sha2_sigma_0 JUMPDEST // stack: x dup1 @@ -76,6 +76,7 @@ global sha2_sigma_0: // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) xor xor +%endmacro %macro sha2_sigma_1 // stack: x From baa4bd4ea10eafafac8b72755a020acfd33228aa Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 26 Jul 2022 11:02:12 -0700 Subject: [PATCH 026/104] files --- evm/src/cpu/kernel/asm/sha2_memory.asm | 171 +++++++++++++++++++++++++ evm/src/cpu/kernel/tests/sha2.rs | 53 ++++++++ 2 files changed, 224 insertions(+) create mode 100644 evm/src/cpu/kernel/asm/sha2_memory.asm create mode 100644 evm/src/cpu/kernel/tests/sha2.rs diff --git a/evm/src/cpu/kernel/asm/sha2_memory.asm b/evm/src/cpu/kernel/asm/sha2_memory.asm new file mode 100644 index 00000000..42fb5849 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2_memory.asm @@ -0,0 +1,171 @@ +// Load a single byte from kernel general memory. +%macro mload_kernel_general + // stack: offset + PUSH @SEGMENT_KERNEL_GENERAL + // stack: segment, offset + PUSH 0 // kernel has context 0 + // stack: context, segment, offset + MLOAD_GENERAL + // stack: value +%endmacro + +// Load a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// from kernel general memory. +%macro mload_kernel_general_u32 + // stack: offset + DUP1 + %mload_kernel_general + // stack: c_3, offset + %shl_const(8) + // stack: c_3 << 8, offset + DUP2 + %add_const(1) + %mload_kernel_general + OR + // stack: (c_3 << 8) | c_2, offset + %shl_const(8) + // stack: ((c_3 << 8) | c_2) << 8, offset + DUP2 + %add_const(2) + %mload_kernel_general + OR + // stack: (((c_3 << 8) | c_2) << 8) | c_1, offset + %shl_const(8) + // stack: ((((c_3 << 8) | c_2) << 8) | c_1) << 8, offset + SWAP1 + %add_const(3) + %mload_kernel_general + OR + // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0 +%endmacro + +// Load 256 bits (half of a 512-bit SHA-2 block) from general kernel memory. +%macro mload_kernel_general_u256 + // stack: offset + DUP1 + %mload_kernel_code_u32 + // stack: c_7, offset + %shl_const(32) + // stack: c7 << 32, offset + DUP2 + %add_const(1) + %mload_kernel_general_u32 + OR + // stack: (c_7 << 32) | c_6, offset + %shl_const(32) + // stack: ((c_7 << 32) | c_6) << 32, offset + DUP2 + %add_const(2) + %mload_kernel_general_u32 + OR + // stack: (c_7 << 64) | (c_6 << 32) | c_5, offset + %shl_const(32) + // stack: ((c_7 << 64) | (c_6 << 32) | c_5) << 32, offset + DUP2 + %add_const(3) + %mload_kernel_general_u32 + OR + // stack: (c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4, offset + %shl_const(32) + // stack: ((c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4) << 32, offset + DUP2 + %add_const(4) + %mload_kernel_general_u32 + OR + // stack: (c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3, offset + %shl_const(32) + // stack: ((c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3) << 32, offset + DUP2 + %add_const(5) + %mload_kernel_general_u32 + OR + // stack: (c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2, offset + %shl_const(32) + // stack: ((c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2) << 32, offset + DUP2 + %add_const(6) + %mload_kernel_general_u32 + OR + // stack: (c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1, offset + %shl_const(32) + // stack: ((c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1) << 32, offset + DUP2 + %add_const(7) + %mload_kernel_general_u32 + OR + // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0, offset +%endmacro + +// Store a single byte to kernel general memory. +%macro mstore_kernel_general + // stack: offset, value + PUSH @SEGMENT_KERNEL_GENERAL + // stack: segment, offset + PUSH 0 // kernel has context 0 + // stack: context, segment, offset, value + MSTORE_GENERAL +%endmacro + +// Store a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// to kernel general memory. +%macro mstore_kernel_general_u32 + // stack: offset, value + swap1 + // stack: value, offset + push 1 + push 8 + shl + // stack: 1 << 8, value, offset + swap1 + // stack: value, 1 << 8, offset + dup2 + dup2 + // stack: value, 1 << 8, value, 1 << 8, offset + mod + // stack: c_0 = value % (1 << 8), value, 1 << 8, offset + swap2 + swap1 + // stack: value, 1 << 8, c_0, offset + push 8 + shr + // stack: value >> 8, 1 << 8, c_0, offset + dup2 + dup2 + // stack: value >> 8, 1 << 8, value >> 8, 1 << 8, c_0, offset + mod + // stack: c_1 = (value >> 8) % (1 << 8), value >> 8, 1 << 8, c_0, offset + swap2 + swap1 + // stack: value >> 8, 1 << 8, c_1, c_0, offset + push 8 + shr + // stack: value >> 16, 1 << 8, c_1, c_0, offset + dup2 + dup2 + // stack: value >> 16, 1 << 8, value >> 16, 1 << 8, c_1, c_0, offset + mod + // stack: c_2 = (value >> 16) % (1 << 8), value >> 16, 1 << 8, c_1, c_0, offset + swap2 + swap1 + // stack: value >> 16, 1 << 8, c_2, c_1, c_0, offset + push 8 + shr + // stack: value >> 24, 1 << 8, c_2, c_1, c_0, offset + mod + // stack: c_3 = (value >> 24) % (1 << 8), c_2, c_1, c_0, offset + dup5 + // stack: offset, c_3, c_2, c_1, c_0, offset + %mstore_kernel_general + // stack: c_2, c_1, c_0, offset + dup4 + // stack: offset, c_2, c_1, c_0, offset + %mstore_kernel_general + // stack: c_1, c_0, offset + dup3 + // stack: offset, c_1, c_0, offset + %mstore_kernel_general + // stack: c_0, offset + swap1 + // stack: offset, c_0 + %mstore_kernel_general +%endmacro diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs new file mode 100644 index 00000000..b46fd6cc --- /dev/null +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -0,0 +1,53 @@ +use std::str::FromStr; + +use anyhow::Result; +use ethereum_types::U256; +use rand::{thread_rng, Rng}; + +use crate::cpu::kernel::aggregator::combined_kernel; +use crate::cpu::kernel::interpreter::run; + +#[test] +fn test_sha2_store() -> Result<()> { + let kernel = combined_kernel(); + let sha2_store = kernel.global_labels["sha2_store"]; + let mut rng = thread_rng(); + let num_bytes = rng.gen_range(0..20); + let mut bytes: Vec = Vec::with_capacity(num_bytes); + for _ in 0..num_bytes { + let byte: u8 = rng.gen(); + let mut v = vec![0; 31]; + v.push(byte); + let v2: [u8; 32] = v.try_into().unwrap(); + bytes.push(U256::from(v2)); + } + + let mut initial_stack = vec![U256::from(num_bytes)]; + initial_stack.extend(bytes); + let stack_with_kernel = run(&kernel.code, sha2_store, initial_stack)?.stack; + + // let expected_stack = todo!(); + // assert_eq!(stack_with_kernel, expected_stack); + + Ok(()) +} + +/*#[test] +fn test_sha2() -> Result<()> { + let kernel = combined_kernel(); + let sha2_store = kernel.global_labels["sha2_store"]; + let sha2_pad = kernel.global_labels["sha2_pad"]; + let mut rng = thread_rng(); + let a = U256([0; 4].map(|_| rng.gen())); + let b = U256([0; 4].map(|_| rng.gen())); + + let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a]; + let stack_with_kernel = run(&kernel.code, exp, initial_stack)?.stack; + let initial_stack = vec![b, a]; + let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP + + let expected_stack = todo!(); + assert_eq!(stack_with_kernel, expected_stack); + + Ok(()) +}*/ From c24af37288c848340298208ae5a1ded4285df2d0 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 28 Jul 2022 07:11:58 -0700 Subject: [PATCH 027/104] fixes --- evm/src/cpu/kernel/asm/sha2.asm | 8 ++++++++ evm/src/cpu/kernel/asm/util/basic_macros.asm | 5 ----- evm/src/cpu/kernel/tests/sha2.rs | 6 ++++++ evm/src/lib.rs | 1 + 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 10a2d815..cbd7407e 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -20,6 +20,9 @@ sha2_store_loop: // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest %mstore_kernel_general // stack: counter, addr, ... , x[num_bytes-1], retdest + dup1 + %eq_const(6) + %jumpi(sha2_stop) %decrement // stack: counter-1, addr, ... , x[num_bytes-1], retdest iszero @@ -30,9 +33,13 @@ sha2_store_loop: // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest %jump(sha2_store_loop) sha2_store_end: + JUMPDEST // stack: counter=0, addr, retdest %pop2 JUMP +sha2_stop: + JUMPDEST + STOP // Precodition: input is in memory, starting at 0 of kernel general segment, of the form @@ -40,6 +47,7 @@ sha2_store_end: // Postcodition: output is in memory, starting at 0, of the form // num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] global sha2_pad: + JUMPDEST // stack: retdest push 0 %mload_kernel_general diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 0718fae5..9b8ce137 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -258,11 +258,6 @@ div %endmacro -%macro eq(x) - push $x - eq -%endmacro - %macro lt(x) push $x swap1 diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index b46fd6cc..411f2ec5 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -1,3 +1,4 @@ +use core::num; use std::str::FromStr; use anyhow::Result; @@ -22,9 +23,14 @@ fn test_sha2_store() -> Result<()> { bytes.push(U256::from(v2)); } + dbg!(num_bytes); + dbg!(bytes.clone()); + let mut initial_stack = vec![U256::from(num_bytes)]; initial_stack.extend(bytes); + dbg!(initial_stack.clone()); let stack_with_kernel = run(&kernel.code, sha2_store, initial_stack)?.stack; + dbg!(stack_with_kernel); // let expected_stack = todo!(); // assert_eq!(stack_with_kernel, expected_stack); diff --git a/evm/src/lib.rs b/evm/src/lib.rs index 6f332b59..923df55a 100644 --- a/evm/src/lib.rs +++ b/evm/src/lib.rs @@ -4,6 +4,7 @@ #![allow(clippy::type_complexity)] #![feature(let_chains)] #![feature(generic_const_exprs)] +#![feature(let_chains)] pub mod all_stark; pub mod arithmetic; From 6a31a4b7ecf24ef93b505f2e89cd648b0eb1eb3e Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 28 Jul 2022 10:25:20 -0700 Subject: [PATCH 028/104] fixes --- evm/src/cpu/kernel/asm/sha2.asm | 5 +++-- evm/src/cpu/kernel/asm/util/basic_macros.asm | 3 +-- evm/src/cpu/kernel/tests/sha2.rs | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index cbd7407e..c5cdb212 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -6,6 +6,8 @@ global sha2_store: push 0 // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest %mstore_kernel_general + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + dup1 // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest push 1 // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest @@ -21,8 +23,6 @@ sha2_store_loop: %mstore_kernel_general // stack: counter, addr, ... , x[num_bytes-1], retdest dup1 - %eq_const(6) - %jumpi(sha2_stop) %decrement // stack: counter-1, addr, ... , x[num_bytes-1], retdest iszero @@ -36,6 +36,7 @@ sha2_store_end: JUMPDEST // stack: counter=0, addr, retdest %pop2 + STOP JUMP sha2_stop: JUMPDEST diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 9b8ce137..64a937e3 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -242,8 +242,7 @@ %macro increment push 1 - swap1 - sub + add %endmacro %macro decrement diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 411f2ec5..b90ba52c 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -13,7 +13,7 @@ fn test_sha2_store() -> Result<()> { let kernel = combined_kernel(); let sha2_store = kernel.global_labels["sha2_store"]; let mut rng = thread_rng(); - let num_bytes = rng.gen_range(0..20); + let num_bytes = rng.gen_range(1..17); let mut bytes: Vec = Vec::with_capacity(num_bytes); for _ in 0..num_bytes { let byte: u8 = rng.gen(); @@ -28,6 +28,7 @@ fn test_sha2_store() -> Result<()> { let mut initial_stack = vec![U256::from(num_bytes)]; initial_stack.extend(bytes); + initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); dbg!(initial_stack.clone()); let stack_with_kernel = run(&kernel.code, sha2_store, initial_stack)?.stack; dbg!(stack_with_kernel); From 4b3ce01f8eec86061912d3a6efdf21c6f832615b Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 28 Jul 2022 10:34:51 -0700 Subject: [PATCH 029/104] fixes --- evm/src/cpu/kernel/asm/sha2.asm | 1 + evm/src/cpu/kernel/tests/sha2.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index c5cdb212..f9d83873 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -15,6 +15,7 @@ sha2_store_loop: JUMPDEST // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest dup1 + STOP // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest swap3 // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index b90ba52c..fde9c99e 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -29,6 +29,7 @@ fn test_sha2_store() -> Result<()> { let mut initial_stack = vec![U256::from(num_bytes)]; initial_stack.extend(bytes); initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); + initial_stack.reverse(); dbg!(initial_stack.clone()); let stack_with_kernel = run(&kernel.code, sha2_store, initial_stack)?.stack; dbg!(stack_with_kernel); From f90cfd0f56f8e46bc48dea4168284013a1e045a9 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Sun, 25 Sep 2022 20:13:04 -0700 Subject: [PATCH 030/104] fixes and updates --- evm/src/cpu/kernel/asm/sha2.asm | 45 ++++++++++++++++++++++++------- evm/src/cpu/kernel/interpreter.rs | 13 +++++++-- evm/src/cpu/kernel/tests/sha2.rs | 25 ++++++++++++----- 3 files changed, 65 insertions(+), 18 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index f9d83873..a45854ec 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -7,15 +7,12 @@ global sha2_store: // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest %mstore_kernel_general // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - dup1 - // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest push 1 // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest sha2_store_loop: JUMPDEST // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest dup1 - STOP // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest swap3 // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest @@ -23,11 +20,13 @@ sha2_store_loop: // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest %mstore_kernel_general // stack: counter, addr, ... , x[num_bytes-1], retdest - dup1 %decrement // stack: counter-1, addr, ... , x[num_bytes-1], retdest + dup1 + // stack: counter-1, counter-1, addr, ... , x[num_bytes-1], retdest iszero %jumpi(sha2_store_end) + // stack: counter-1, addr, ... , x[num_bytes-1], retdest swap1 // stack: addr, counter-1, ... , x[num_bytes-1], retdest %increment @@ -37,12 +36,40 @@ sha2_store_end: JUMPDEST // stack: counter=0, addr, retdest %pop2 - STOP - JUMP -sha2_stop: - JUMPDEST - STOP + // stack: retdest + //JUMP + %jump(sha2_pad) +global test_sha2_read: + JUMPDEST + // stack: retdest + push 0 + // stack: 0, retdest + %mload_kernel_general + // stack: counter=num_bytes, retdest +test_sha2_read_loop: + JUMPDEST + // stack: counter, retdest, [stack] + dup1 + // stack: addr=counter, counter, retdest, [stack] + %mload_kernel_general + // stack: value, counter, retdest, [stack] + swap2 + // stack: retdest, counter, value, [stack] + swap1 + // stack: counter, retdest, value, [stack] + %decrement + // stack: counter-1, retdest, value, [stack] + dup1 + iszero + %jumpi(test_sha2_read_end) + %jump(test_sha2_read_loop) +test_sha2_read_end: + // stack: counter=0, retdest, [stack] + JUMPDEST + pop + // stack: retdest, [stack] + JUMP // Precodition: input is in memory, starting at 0 of kernel general segment, of the form // num_bytes, x[0], x[1], ..., x[num_bytes - 1] diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 343f9773..090230f5 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -74,6 +74,9 @@ pub struct Interpreter<'a> { pub(crate) memory: InterpreterMemory, pub(crate) generation_state: GenerationState, prover_inputs_map: &'a HashMap, + /// Non-deterministic prover inputs, stored backwards so that popping the last item gives the + /// next prover input. + prover_inputs: Vec, pub(crate) halt_offsets: Vec, running: bool, } @@ -435,8 +438,14 @@ impl<'a> Interpreter<'a> { fn run_shl(&mut self) { let shift = self.pop(); - let x = self.pop(); - self.push(x << shift); + let value = self.pop(); + self.push(value << shift); + } + + fn run_shr(&mut self) { + let shift = self.pop(); + let value = self.pop(); + self.push(value >> shift); } fn run_shr(&mut self) { diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index fde9c99e..efb0d3aa 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -12,6 +12,8 @@ use crate::cpu::kernel::interpreter::run; fn test_sha2_store() -> Result<()> { let kernel = combined_kernel(); let sha2_store = kernel.global_labels["sha2_store"]; + // let test_sha2_read = kernel.global_labels["test_sha2_read"]; + let mut rng = thread_rng(); let num_bytes = rng.gen_range(1..17); let mut bytes: Vec = Vec::with_capacity(num_bytes); @@ -26,13 +28,22 @@ fn test_sha2_store() -> Result<()> { dbg!(num_bytes); dbg!(bytes.clone()); - let mut initial_stack = vec![U256::from(num_bytes)]; - initial_stack.extend(bytes); - initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); - initial_stack.reverse(); - dbg!(initial_stack.clone()); - let stack_with_kernel = run(&kernel.code, sha2_store, initial_stack)?.stack; - dbg!(stack_with_kernel); + let mut store_initial_stack = vec![U256::from(num_bytes)]; + store_initial_stack.extend(bytes); + store_initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); + store_initial_stack.reverse(); + dbg!(store_initial_stack.clone()); + + let after_storing = run(&kernel.code, sha2_store, store_initial_stack)?; + let stack_after_storing = after_storing.stack; + dbg!(stack_after_storing.clone()); + let memory_after_storing = after_storing.memory; + dbg!(memory_after_storing); + + + // let load_initial_stack = vec![U256::from_str("0xdeadbeef").unwrap()]; + // let stack_after_loading = run(&kernel.code, test_sha2_read, load_initial_stack)?.stack; + // dbg!(stack_after_loading); // let expected_stack = todo!(); // assert_eq!(stack_with_kernel, expected_stack); From 2d34a9d25e43f8339fc59284155a687e56af2272 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 1 Aug 2022 10:59:06 -0700 Subject: [PATCH 031/104] finished sha2 pad --- evm/src/cpu/kernel/asm/sha2.asm | 105 ++++---------- evm/src/cpu/kernel/asm/sha2_write_length.asm | 143 +++++++++++++++++++ 2 files changed, 173 insertions(+), 75 deletions(-) create mode 100644 evm/src/cpu/kernel/asm/sha2_write_length.asm diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index a45854ec..6a122063 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -82,90 +82,45 @@ global sha2_pad: %mload_kernel_general // stack: num_bytes, retdest // STEP 1: append 1 - // add 1 << (8*(32-k)-1) to x[num_bytes//32], where k := num_bytes%32 + // insert 128 (= 1 << 7) at x[num_bytes] + // stack: num_bytes, retdest + push 1 + push 7 + shl + // stack: 128, num_bytes, retdest + dup2 + // stack: num_bytes, 128, num_bytes, retdest + %mstore_kernel_general + // stack: num_bytes, retdest + // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 dup1 // stack: num_bytes, num_bytes, retdest - dup1 - // stack: num_bytes, num_bytes, num_bytes, retdest - push 32 - // stack: 32, num_bytes, num_bytes, num_bytes, retdest - swap1 - // stack: num_bytes, 32, num_bytes, num_bytes, retdest - mod - // stack: k := num_bytes % 32, num_bytes, num_bytes, retdest - push 32 - sub - // stack: 32 - k, num_bytes, num_bytes, retdest push 8 - mul - // stack: 8 * (32 - k), num_bytes, num_bytes, retdest - %decrement - // stack: 8 * (32 - k) - 1, num_bytes, num_bytes, retdest - push 1 - swap1 - shl - // stack: 1 << (8 * (32 - k) - 1), num_bytes, num_bytes, retdest - swap1 - // stack: num_bytes, 1 << (8 * (32 - k) - 1), num_bytes, retdest - push 32 - swap1 - div - // stack: num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes, retdest - dup1 - // stack: num_bytes // 32, num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes, retdest - mload - // stack: x[num_bytes // 32], num_bytes // 32, 1 << (8 * (32 - k) - 1), num_bytes, retdest - swap1 - // stack: num_bytes // 32, x[num_bytes // 32], 1 << (8 * (32 - k) - 1), num_bytes, retdest - swap2 - // stack: x[num_bytes // 32], 1 << (8 * (32 - k) - 1), num_bytes // 32, num_bytes, retdest add - // stack: x[num_bytes // 32] + 1 << (8 * (32 - k) - 1), num_bytes // 32, num_bytes, retdest - swap1 - // stack: num_bytes // 32, x[num_bytes // 32] + 1 << (8 * (32 - k) - 1), num_bytes, retdest - mstore - // stack: num_bytes, retdest - // STEP 2: insert length - // (add length := num_bytes*8+1 to x[(num_bytes//64)*2-1]) - dup1 - dup1 - // stack: num_bytes, num_bytes, num_bytes, retdest - push 8 - mul - %increment - // stack: length := num_bytes*8+1, num_bytes, num_bytes, retdest - swap1 - // stack: num_bytes, length := num_bytes*8+1, num_bytes, retdest - push 64 - swap1 - div - // stack: num_bytes // 64, length := num_bytes*8+1, num_bytes, retdest - push 2 - mul - %decrement - // stack: (num_bytes // 64) * 2 - 1, length := num_bytes*8+1, num_bytes, retdest - dup1 - // stack: (num_bytes // 64) * 2 - 1, (num_bytes // 64) * 2 - 1, length, num_bytes, retdest - mload - // stack: x[(num_bytes // 64) * 2 - 1], (num_bytes // 64) * 2 - 1, length, num_bytes, retdest - swap1 - // stack: (num_bytes // 64) * 2 - 1, x[(num_bytes // 64) * 2 - 1], length, num_bytes, retdest - swap2 - // stack: length, x[(num_bytes // 64) * 2 - 1], (num_bytes // 64) * 2 - 1, num_bytes, retdest - add - // stack: x[(num_bytes // 64) * 2 - 1] + length, (num_bytes // 64) * 2 - 1, num_bytes - swap1 - // stack: (num_bytes // 64) * 2 - 1, x[(num_bytes // 64) * 2 - 1] + length, num_bytes, retdest - mstore - // stack: num_bytes, retdest - // STEP 3: insert num_blocks at start push 64 swap1 div %increment - // stack: num_blocks := num_bytes // 64 + 1, retdest + // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest + // STEP 3: calculate length := num_bytes*8+1 + swap1 + // stack: num_bytes, num_blocks, retdest + push 8 + mul + %increment + // stack: length = num_bytes*8+1, num_blocks, retdest + // STEP 4: write length to x[num_blocks*64-8..num_blocks*64-1] + dup2 + // stack: num_blocks, length, num_blocks, retdest + push 64 + mul + %decrement + // stack: last_addr = num_blocks*64-1, length, num_blocks, retdest + %sha2_write_length + // stack: num_blocks, retdest + // STEP 5: write num_blocks to x[0] push 0 - mstore + %mstore_kernel_general // stack: retdest JUMP diff --git a/evm/src/cpu/kernel/asm/sha2_write_length.asm b/evm/src/cpu/kernel/asm/sha2_write_length.asm new file mode 100644 index 00000000..f1717878 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2_write_length.asm @@ -0,0 +1,143 @@ +%macro sha2_write_length + // stack: length, last_addr + push 1 + push 8 + shl + + // stack: 1 << 8, length, last_addr + dup2 + // stack: length, 1 << 8, length, last_addr + mod + // stack: length % (1 << 8), length, last_addr + dup3 + // stack: last_addr, length % (1 << 8), length, last_addr + %mstore_kernel_general + + // stack: 1 << 8, length, last_addr + dup1 + // stack: 1 << 8, 1 << 8, length, last_addr + dup2 + // stack: length, 1 << 8, 1 << 8, length, last_addr + push 8 + shr + // stack: length >> 8, 1 << 8, 1 << 8, length, last_addr + mod + // stack: (length >> 8) % (1 << 8), 1 << 8, length, last_addr + dup3 + // stack: last_addr, (length >> 8) % (1 << 8), 1 << 8, length, last_addr + push 1 + swap1 + sub + // stack: last_addr - 1, (length >> 8) % (1 << 8), 1 << 8, length, last_addr + %mstore_kernel_general + + // stack: 1 << 8, length, last_addr + dup1 + // stack: 1 << 8, 1 << 8, length, last_addr + dup2 + // stack: length, 1 << 8, 1 << 8, length, last_addr + push 16 + shr + // stack: length >> 16, 1 << 8, 1 << 8, length, last_addr + mod + // stack: (length >> 16) % (1 << 8), 1 << 8, length, last_addr + dup3 + // stack: last_addr, (length >> 16) % (1 << 8), 1 << 8, length, last_addr + push 2 + swap1 + sub + // stack: last_addr - 2, (length >> 16) % (1 << 8), 1 << 8, length, last_addr + %mstore_kernel_general + + // stack: 1 << 8, length, last_addr + dup1 + // stack: 1 << 8, 1 << 8, length, last_addr + dup2 + // stack: length, 1 << 8, 1 << 8, length, last_addr + push 24 + shr + // stack: length >> 24, 1 << 8, 1 << 8, length, last_addr + mod + // stack: (length >> 24) % (1 << 8), 1 << 8, length, last_addr + dup3 + // stack: last_addr, (length >> 24) % (1 << 8), 1 << 8, length, last_addr + push 3 + swap1 + sub + // stack: last_addr - 1, (length >> 24) % (1 << 8), 1 << 8, length, last_addr + %mstore_kernel_general + + // stack: 1 << 8, length, last_addr + dup1 + // stack: 1 << 8, 1 << 8, length, last_addr + dup2 + // stack: length, 1 << 8, 1 << 8, length, last_addr + push 32 + shr + // stack: length >> 32, 1 << 8, 1 << 8, length, last_addr + mod + // stack: (length >> 32) % (1 << 8), 1 << 8, length, last_addr + dup3 + // stack: last_addr, (length >> 32) % (1 << 8), 1 << 8, length, last_addr + push 4 + swap1 + sub + // stack: last_addr - 1, (length >> 32) % (1 << 8), 1 << 8, length, last_addr + %mstore_kernel_general + + // stack: 1 << 8, length, last_addr + dup1 + // stack: 1 << 8, 1 << 8, length, last_addr + dup2 + // stack: length, 1 << 8, 1 << 8, length, last_addr + push 40 + shr + // stack: length >> 40, 1 << 8, 1 << 8, length, last_addr + mod + // stack: (length >> 40) % (1 << 8), 1 << 8, length, last_addr + dup3 + // stack: last_addr, (length >> 40) % (1 << 8), 1 << 8, length, last_addr + push 5 + swap1 + sub + // stack: last_addr - 1, (length >> 40) % (1 << 8), 1 << 8, length, last_addr + %mstore_kernel_general + + // stack: 1 << 8, length, last_addr + dup1 + // stack: 1 << 8, 1 << 8, length, last_addr + dup2 + // stack: length, 1 << 8, 1 << 8, length, last_addr + push 48 + shr + // stack: length >> 48, 1 << 8, 1 << 8, length, last_addr + mod + // stack: (length >> 48) % (1 << 8), 1 << 8, length, last_addr + dup3 + // stack: last_addr, (length >> 48) % (1 << 8), 1 << 8, length, last_addr + push 6 + swap1 + sub + // stack: last_addr - 1, (length >> 48) % (1 << 8), 1 << 8, length, last_addr + %mstore_kernel_general + + // stack: 1 << 8, length, last_addr + dup1 + // stack: 1 << 8, 1 << 8, length, last_addr + dup2 + // stack: length, 1 << 8, 1 << 8, length, last_addr + push 56 + shr + // stack: length >> 56, 1 << 8, 1 << 8, length, last_addr + mod + // stack: (length >> 56) % (1 << 8), 1 << 8, length, last_addr + dup3 + // stack: last_addr, (length >> 56) % (1 << 8), 1 << 8, length, last_addr + push 7 + swap1 + sub + // stack: last_addr - 1, (length >> 56) % (1 << 8), 1 << 8, length, last_addr + %mstore_kernel_general + %pop3 + // stack: (empty) +%endmacro From 05eb70f908dbd1df80b167a90e71a81bc7474a8e Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 1 Aug 2022 15:05:42 -0700 Subject: [PATCH 032/104] updates --- evm/src/cpu/kernel/asm/sha2.asm | 117 +++++++++++++------------ evm/src/cpu/kernel/asm/sha2_memory.asm | 4 +- 2 files changed, 64 insertions(+), 57 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2.asm index 6a122063..40bcc260 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2.asm @@ -95,11 +95,9 @@ global sha2_pad: // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 dup1 // stack: num_bytes, num_bytes, retdest - push 8 - add - push 64 - swap1 - div + %add_const(8) + %div_const(64) + %increment // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest // STEP 3: calculate length := num_bytes*8+1 @@ -125,21 +123,22 @@ global sha2_pad: JUMP // Precodition: stack contains address of one message block, followed by output address -// Postcondition: 64 addresses starting at given output address contain 32-bit chunks of message schedule +// Postcondition: 256 addresses starting at given output address, contain 32-bit chunks +// of message schedule (in four-byte increments) global sha2_gen_message_schedule_from_block: JUMPDEST // stack: block_addr, output_addr, retdest dup1 // stack: block_addr, block_addr, output_addr, retdest - %increment - // stack: block_addr + 1, block_addr, output_addr, retdest + %add_const(32) + // stack: block_addr + 32, block_addr, output_addr, retdest swap1 - // stack: block_addr, block_addr + 1, output_addr, retdest - mload - // stack: block[0], block_addr + 1, output_addr, retdest + // stack: block_addr, block_addr + 32, output_addr, retdest + %mload_kernel_general_u256 + // stack: block[0], block_addr + 32, output_addr, retdest swap1 - // stack: block_addr + 1, block[0], output_addr, retdest - mload + // stack: block_addr + 32, block[0], output_addr, retdest + %mload_kernel_general_u256 // stack: block[1], block[0], output_addr, retdest swap2 // stack: output_addr, block[0], block[1], retdest @@ -170,16 +169,16 @@ sha2_gen_message_schedule_from_block_0_loop: // stack: block[0] % (1 << 32), block[0] // (1 << 32), output_addr, counter, block[1], retdest dup3 // stack: output_addr, block[0] % (1 << 32), block[0] // (1 << 32), output_addr, counter, block[1], retdest - mstore + %mstore_kernel_general_u32 // stack: block[0] // (1 << 32), output_addr, counter, block[1], retdest swap1 // stack: output_addr, block[0] // (1 << 32), counter, block[1], retdest - %increment - // stack: output_addr + 1, block[0] // (1 << 32), counter, block[1], retdest + %add_const(4) + // stack: output_addr + 4, block[0] // (1 << 32), counter, block[1], retdest swap1 - // stack: block[0] // (1 << 32), output_addr + 1, counter, block[1], retdest + // stack: block[0] // (1 << 32), output_addr + 4, counter, block[1], retdest swap2 - // stack: counter, output_addr + 1, block[0] // (1 << 32), block[1], retdest + // stack: counter, output_addr + 4, block[0] // (1 << 32), block[1], retdest %decrement dup1 iszero @@ -221,16 +220,16 @@ sha2_gen_message_schedule_from_block_1_loop: // stack: block[1] % (1 << 32), block[1] // (1 << 32), output_addr, counter, block[0], retdest dup3 // stack: output_addr, block[1] % (1 << 32), block[1] // (1 << 32), output_addr, counter, block[0], retdest - mstore + %mstore_kernel_general_u32 // stack: block[1] // (1 << 32), output_addr, counter, block[0], retdest swap1 // stack: output_addr, block[1] // (1 << 32), counter, block[0], retdest - %increment - // stack: output_addr + 1, block[1] // (1 << 32), counter, block[0], retdest + %add_const(4) + // stack: output_addr + 4, block[1] // (1 << 32), counter, block[0], retdest swap1 - // stack: block[1] // (1 << 32), output_addr + 1, counter, block[0], retdest + // stack: block[1] // (1 << 32), output_addr + 4, counter, block[0], retdest swap2 - // stack: counter, output_addr + 1, block[1] // (1 << 32), block[0], retdest + // stack: counter, output_addr + 4, block[1] // (1 << 32), block[0], retdest %decrement dup1 iszero @@ -251,69 +250,77 @@ sha2_gen_message_schedule_remaining_loop: dup1 // stack: output_addr, output_addr, counter, block[0], block[1], retdest push 2 + push 4 + mul swap1 sub - // stack: output_addr - 2, output_addr, counter, block[0], block[1], retdest - mload - // stack: x[output_addr - 2], output_addr, counter, block[0], block[1], retdest + // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest %sha2_sigma_1 - // stack: sigma_1(x[output_addr - 2]), output_addr, counter, block[0], block[1], retdest + // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest swap1 - // stack: output_addr, sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest dup1 - // stack: output_addr, output_addr, sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest push 7 + push 4 + mul swap1 sub - // stack: output_addr - 7, output_addr, sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest - mload - // stack: x[output_addr - 7], output_addr, sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest swap1 - // stack: output_addr, x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest dup1 - // stack: output_addr, output_addr, x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest push 15 + push 4 + mul swap1 sub - // stack: output_addr - 15, output_addr, x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest - mload - // stack: x[output_addr - 15], output_addr, x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %sha2_sigma_0 - // stack: sigma_0(x[output_addr - 15]), output_addr, x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest swap1 - // stack: output_addr, sigma_0(x[output_addr - 15]), x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest dup1 - // stack: output_addr, output_addr, sigma_0(x[output_addr - 15]), x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest push 16 + push 4 + mul swap1 sub - // stack: output_addr - 16, output_addr, sigma_0(x[output_addr - 15]), x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest - mload - // stack: x[output_addr - 16], output_addr, sigma_0(x[output_addr - 15]), x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest swap1 - // stack: output_addr, x[output_addr - 16], sigma_0(x[output_addr - 15]), x[output_addr - 7], sigma_1(x[output_addr - 2]), counter, block[0], block[1], retdest + // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest swap4 - // stack: sigma_1(x[output_addr - 2]), x[output_addr - 16], sigma_0(x[output_addr - 15]), x[output_addr - 7], output_addr, counter, block[0], block[1], retdest + // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest add add add - // stack: sigma_1(x[output_addr - 2]) + x[output_addr - 16] + sigma_0(x[output_addr - 15]) + x[output_addr - 7], output_addr, counter, block[0], block[1], retdest + // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest swap1 - // stack: output_addr, sigma_1(x[output_addr - 2]) + x[output_addr - 16] + sigma_0(x[output_addr - 15]) + x[output_addr - 7], counter, block[0], block[1], retdest + // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest dup1 - // stack: output_addr, output_addr, sigma_1(x[output_addr - 2]) + x[output_addr - 16] + sigma_0(x[output_addr - 15]) + x[output_addr - 7], counter, block[0], block[1], retdest + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest swap2 - // stack: sigma_1(x[output_addr - 2]) + x[output_addr - 16] + sigma_0(x[output_addr - 15]) + x[output_addr - 7], output_addr, output_addr, counter, block[0], block[1], retdest + // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, output_addr, counter, block[0], block[1], retdest swap1 - // stack: output_addr, sigma_1(x[output_addr - 2]) + x[output_addr - 16] + sigma_0(x[output_addr - 15]) + x[output_addr - 7], output_addr, counter, block[0], block[1], retdest - mstore + // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %mstore_kernel_general_u32 // stack: output_addr, counter, block[0], block[1], retdest - %increment - // stack: output_addr + 1, counter, block[0], block[1], retdest + %add_const(4) + // stack: output_addr + 4, counter, block[0], block[1], retdest swap1 - // stack: counter, output_addr + 1, block[0], block[1], retdest + // stack: counter, output_addr + 4, block[0], block[1], retdest %decrement - // stack: counter - 1, output_addr + 1, block[0], block[1], retdest + // stack: counter - 1, output_addr + 4, block[0], block[1], retdest iszero %jumpi(sha2_gen_message_schedule_remaining_end) %jump(sha2_gen_message_schedule_remaining_loop) diff --git a/evm/src/cpu/kernel/asm/sha2_memory.asm b/evm/src/cpu/kernel/asm/sha2_memory.asm index 42fb5849..a6604251 100644 --- a/evm/src/cpu/kernel/asm/sha2_memory.asm +++ b/evm/src/cpu/kernel/asm/sha2_memory.asm @@ -19,7 +19,7 @@ %shl_const(8) // stack: c_3 << 8, offset DUP2 - %add_const(1) + %increment %mload_kernel_general OR // stack: (c_3 << 8) | c_2, offset @@ -48,7 +48,7 @@ %shl_const(32) // stack: c7 << 32, offset DUP2 - %add_const(1) + %increment %mload_kernel_general_u32 OR // stack: (c_7 << 32) | c_6, offset From e4521c481f88751c86c5fd134bd714bf4a734058 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 1 Aug 2022 16:29:57 -0700 Subject: [PATCH 033/104] a great many fixes --- evm/src/cpu/kernel/aggregator.rs | 9 ++- .../constants.asm} | 0 .../asm/{sha2_memory.asm => sha2/memory.asm} | 3 + .../kernel/asm/{sha2_ops.asm => sha2/ops.asm} | 0 evm/src/cpu/kernel/asm/{ => sha2}/sha2.asm | 74 ++++++++++--------- .../write_length.asm} | 14 ++-- evm/src/cpu/kernel/tests/sha2.rs | 1 - 7 files changed, 58 insertions(+), 43 deletions(-) rename evm/src/cpu/kernel/asm/{sha2_constants.asm => sha2/constants.asm} (100%) rename evm/src/cpu/kernel/asm/{sha2_memory.asm => sha2/memory.asm} (97%) rename evm/src/cpu/kernel/asm/{sha2_ops.asm => sha2/ops.asm} (100%) rename evm/src/cpu/kernel/asm/{ => sha2}/sha2.asm (92%) rename evm/src/cpu/kernel/asm/{sha2_write_length.asm => sha2/write_length.asm} (93%) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index d8466894..3cdab342 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -48,10 +48,11 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/secp256k1/moddiv.asm"), include_str!("asm/secp256k1/lift_x.asm"), include_str!("asm/secp256k1/inverse_scalar.asm"), - include_str!("asm/sha2.asm"), - include_str!("asm/sha2_constants.asm"), - include_str!("asm/sha2_memory.asm"), - include_str!("asm/sha2_ops.asm"), + include_str!("asm/sha2/constants.asm"), + include_str!("asm/sha2/memory.asm"), + include_str!("asm/sha2/ops.asm"), + include_str!("asm/sha2/sha2.asm"), + include_str!("asm/sha2/write_length.asm"), include_str!("asm/ecrecover.asm"), include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/decode.asm"), diff --git a/evm/src/cpu/kernel/asm/sha2_constants.asm b/evm/src/cpu/kernel/asm/sha2/constants.asm similarity index 100% rename from evm/src/cpu/kernel/asm/sha2_constants.asm rename to evm/src/cpu/kernel/asm/sha2/constants.asm diff --git a/evm/src/cpu/kernel/asm/sha2_memory.asm b/evm/src/cpu/kernel/asm/sha2/memory.asm similarity index 97% rename from evm/src/cpu/kernel/asm/sha2_memory.asm rename to evm/src/cpu/kernel/asm/sha2/memory.asm index a6604251..3b86d9b0 100644 --- a/evm/src/cpu/kernel/asm/sha2_memory.asm +++ b/evm/src/cpu/kernel/asm/sha2/memory.asm @@ -94,6 +94,9 @@ %mload_kernel_general_u32 OR // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0, offset + swap1 + pop + // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0 %endmacro // Store a single byte to kernel general memory. diff --git a/evm/src/cpu/kernel/asm/sha2_ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm similarity index 100% rename from evm/src/cpu/kernel/asm/sha2_ops.asm rename to evm/src/cpu/kernel/asm/sha2/ops.asm diff --git a/evm/src/cpu/kernel/asm/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm similarity index 92% rename from evm/src/cpu/kernel/asm/sha2.asm rename to evm/src/cpu/kernel/asm/sha2/sha2.asm index 40bcc260..2d3b0c53 100644 --- a/evm/src/cpu/kernel/asm/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -40,36 +40,36 @@ sha2_store_end: //JUMP %jump(sha2_pad) -global test_sha2_read: - JUMPDEST - // stack: retdest - push 0 - // stack: 0, retdest - %mload_kernel_general - // stack: counter=num_bytes, retdest -test_sha2_read_loop: - JUMPDEST - // stack: counter, retdest, [stack] - dup1 - // stack: addr=counter, counter, retdest, [stack] - %mload_kernel_general - // stack: value, counter, retdest, [stack] - swap2 - // stack: retdest, counter, value, [stack] - swap1 - // stack: counter, retdest, value, [stack] - %decrement - // stack: counter-1, retdest, value, [stack] - dup1 - iszero - %jumpi(test_sha2_read_end) - %jump(test_sha2_read_loop) -test_sha2_read_end: - // stack: counter=0, retdest, [stack] - JUMPDEST - pop - // stack: retdest, [stack] - JUMP +//global test_sha2_read: +// JUMPDEST +// // stack: retdest +// push 0 +// // stack: 0, retdest +// %mload_kernel_general +// // stack: counter=num_bytes, retdest +//test_sha2_read_loop: +// JUMPDEST +// // stack: counter, retdest, [stack] +// dup1 +// // stack: addr=counter, counter, retdest, [stack] +// %mload_kernel_general +// // stack: value, counter, retdest, [stack] +// swap2 +// // stack: retdest, counter, value, [stack] +// swap1 +// // stack: counter, retdest, value, [stack] +// %decrement +// // stack: counter-1, retdest, value, [stack] +// dup1 +// iszero +// %jumpi(test_sha2_read_end) +// %jump(test_sha2_read_loop) +//test_sha2_read_end: +// // stack: counter=0, retdest, [stack] +// JUMPDEST +// pop +// // stack: retdest, [stack] +// JUMP // Precodition: input is in memory, starting at 0 of kernel general segment, of the form // num_bytes, x[0], x[1], ..., x[num_bytes - 1] @@ -120,7 +120,10 @@ global sha2_pad: push 0 %mstore_kernel_general // stack: retdest - JUMP + //JUMP + push 100 + push 1 + %jump(sha2_gen_message_schedule_from_block) // Precodition: stack contains address of one message block, followed by output address // Postcondition: 256 addresses starting at given output address, contain 32-bit chunks @@ -321,11 +324,16 @@ sha2_gen_message_schedule_remaining_loop: // stack: counter, output_addr + 4, block[0], block[1], retdest %decrement // stack: counter - 1, output_addr + 4, block[0], block[1], retdest + dup1 iszero %jumpi(sha2_gen_message_schedule_remaining_end) %jump(sha2_gen_message_schedule_remaining_loop) sha2_gen_message_schedule_remaining_end: JUMPDEST + // stack: counter=0, output_addr, block[0], block[1], retdest + %pop4 + STOP + JUMP -global sha2_gen_all_message_schedules: - JUMPDEST +//global sha2_gen_all_message_schedules: +// JUMPDEST diff --git a/evm/src/cpu/kernel/asm/sha2_write_length.asm b/evm/src/cpu/kernel/asm/sha2/write_length.asm similarity index 93% rename from evm/src/cpu/kernel/asm/sha2_write_length.asm rename to evm/src/cpu/kernel/asm/sha2/write_length.asm index f1717878..c79ceb06 100644 --- a/evm/src/cpu/kernel/asm/sha2_write_length.asm +++ b/evm/src/cpu/kernel/asm/sha2/write_length.asm @@ -1,16 +1,20 @@ %macro sha2_write_length + // stack: last_addr, length + swap1 // stack: length, last_addr push 1 push 8 shl // stack: 1 << 8, length, last_addr - dup2 - // stack: length, 1 << 8, length, last_addr - mod - // stack: length % (1 << 8), length, last_addr + dup1 + // stack: 1 << 8, 1 << 8, length, last_addr dup3 - // stack: last_addr, length % (1 << 8), length, last_addr + // stack: length, 1 << 8, 1 << 8, length, last_addr + mod + // stack: length % (1 << 8), 1 << 8, length, last_addr + dup3 + // stack: last_addr, length % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index efb0d3aa..de74edc3 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -40,7 +40,6 @@ fn test_sha2_store() -> Result<()> { let memory_after_storing = after_storing.memory; dbg!(memory_after_storing); - // let load_initial_stack = vec![U256::from_str("0xdeadbeef").unwrap()]; // let stack_after_loading = run(&kernel.code, test_sha2_read, load_initial_stack)?.stack; // dbg!(stack_after_loading); From d6f6fc7599cdb70778dcce41fe4dc7eb801dd4e7 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 24 Aug 2022 08:41:41 -0700 Subject: [PATCH 034/104] fixes --- evm/src/cpu/kernel/aggregator.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 3cdab342..542dd38f 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -42,18 +42,11 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/memory/txn_fields.asm"), include_str!("asm/exp.asm"), include_str!("asm/memory.asm"), - include_str!("asm/moddiv.asm"), - include_str!("asm/secp256k1/curve_mul.asm"), - include_str!("asm/secp256k1/curve_add.asm"), - include_str!("asm/secp256k1/moddiv.asm"), - include_str!("asm/secp256k1/lift_x.asm"), - include_str!("asm/secp256k1/inverse_scalar.asm"), include_str!("asm/sha2/constants.asm"), include_str!("asm/sha2/memory.asm"), include_str!("asm/sha2/ops.asm"), include_str!("asm/sha2/sha2.asm"), include_str!("asm/sha2/write_length.asm"), - include_str!("asm/ecrecover.asm"), include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/decode.asm"), include_str!("asm/rlp/read_to_memory.asm"), From 3960940942f89c7f3fb2f655c59109d3feea87d1 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 1 Aug 2022 17:00:51 -0700 Subject: [PATCH 035/104] fix --- evm/src/cpu/kernel/tests/sha2.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index de74edc3..dd3e0552 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -1,4 +1,5 @@ use core::num; +use std::collections::HashMap; use std::str::FromStr; use anyhow::Result; @@ -34,8 +35,8 @@ fn test_sha2_store() -> Result<()> { store_initial_stack.reverse(); dbg!(store_initial_stack.clone()); - let after_storing = run(&kernel.code, sha2_store, store_initial_stack)?; - let stack_after_storing = after_storing.stack; + let after_storing = run(&kernel.code, sha2_store, store_initial_stack, &kernel.prover_inputs)?; + let stack_after_storing = after_storing.stack(); dbg!(stack_after_storing.clone()); let memory_after_storing = after_storing.memory; dbg!(memory_after_storing); From faa1023bd40d6e149787b3f73da33ed602c30534 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 4 Aug 2022 14:45:23 -0400 Subject: [PATCH 036/104] fix --- evm/src/cpu/kernel/aggregator.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 542dd38f..ef9e7dfa 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -41,7 +41,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/memory/packing.asm"), include_str!("asm/memory/txn_fields.asm"), include_str!("asm/exp.asm"), - include_str!("asm/memory.asm"), include_str!("asm/sha2/constants.asm"), include_str!("asm/sha2/memory.asm"), include_str!("asm/sha2/ops.asm"), From bff3da1da1906048fb76e7a53e900a0e5ecf74ae Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 8 Aug 2022 11:37:22 -0700 Subject: [PATCH 037/104] removed duplicate macros --- evm/src/cpu/kernel/asm/util/basic_macros.asm | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 64a937e3..3ea34bce 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -159,16 +159,6 @@ CONSUME_GAS %endmacro -%macro pop5 - %pop3 - %pop2 -%endmacro - -%macro pop6 - %pop4 - %pop2 -%endmacro - // If pred is zero, yields z; otherwise, yields nz %macro select // stack: pred, nz, z From 4624ce515ba1ce40a03f26b5b52df961d65d292a Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 8 Aug 2022 11:37:35 -0700 Subject: [PATCH 038/104] fmt --- evm/src/cpu/kernel/aggregator.rs | 2 +- evm/src/cpu/kernel/tests/mod.rs | 2 +- evm/src/cpu/kernel/tests/sha2.rs | 9 +++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index ef9e7dfa..d5292350 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -72,10 +72,10 @@ pub(crate) fn combined_kernel() -> Kernel { #[cfg(test)] mod tests { - use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV}; use std::str::FromStr; use anyhow::Result; + use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV}; use ethereum_types::U256; use log::debug; diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs index d1fa616b..36471dc4 100644 --- a/evm/src/cpu/kernel/tests/mod.rs +++ b/evm/src/cpu/kernel/tests/mod.rs @@ -5,8 +5,8 @@ mod exp; mod mpt; mod packing; mod rlp; -mod transaction_parsing; mod sha2; +mod transaction_parsing; use std::str::FromStr; diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index dd3e0552..f9a358ab 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -34,8 +34,13 @@ fn test_sha2_store() -> Result<()> { store_initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); store_initial_stack.reverse(); dbg!(store_initial_stack.clone()); - - let after_storing = run(&kernel.code, sha2_store, store_initial_stack, &kernel.prover_inputs)?; + + let after_storing = run( + &kernel.code, + sha2_store, + store_initial_stack, + &kernel.prover_inputs, + )?; let stack_after_storing = after_storing.stack(); dbg!(stack_after_storing.clone()); let memory_after_storing = after_storing.memory; From b1b95e7b65172be8ad6d8f0e7835c7b0aecda7c5 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 8 Aug 2022 11:38:56 -0700 Subject: [PATCH 039/104] clippy --- evm/src/cpu/kernel/aggregator.rs | 6 +++--- evm/src/cpu/kernel/tests/sha2.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index d5292350..91cd3e4c 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -72,11 +72,11 @@ pub(crate) fn combined_kernel() -> Kernel { #[cfg(test)] mod tests { - use std::str::FromStr; + - use anyhow::Result; + use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV}; - use ethereum_types::U256; + use log::debug; use crate::cpu::kernel::aggregator::combined_kernel; diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index f9a358ab..3d26d207 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -1,5 +1,5 @@ -use core::num; -use std::collections::HashMap; + + use std::str::FromStr; use anyhow::Result; From 79e4d80d5b56c5ce1a529792d1d50d1a7e8dc19a Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 8 Aug 2022 11:49:22 -0700 Subject: [PATCH 040/104] fmt --- evm/src/cpu/kernel/aggregator.rs | 3 --- evm/src/cpu/kernel/tests/sha2.rs | 2 -- 2 files changed, 5 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 91cd3e4c..069bece0 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -72,11 +72,8 @@ pub(crate) fn combined_kernel() -> Kernel { #[cfg(test)] mod tests { - - use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV}; - use log::debug; use crate::cpu::kernel::aggregator::combined_kernel; diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 3d26d207..2399b2ee 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -1,5 +1,3 @@ - - use std::str::FromStr; use anyhow::Result; From 54e96a9db224700017c414cb4129d80906ed94c0 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 8 Aug 2022 15:30:54 -0700 Subject: [PATCH 041/104] many fixes --- evm/src/cpu/kernel/asm/sha2/sha2.asm | 42 ++++---------------- evm/src/cpu/kernel/asm/sha2/write_length.asm | 25 +++++++----- evm/src/cpu/kernel/interpreter.rs | 2 +- evm/src/cpu/kernel/tests/sha2.rs | 13 +++--- evm/src/generation/memory.rs | 4 +- 5 files changed, 32 insertions(+), 54 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 2d3b0c53..e1e74038 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -40,37 +40,6 @@ sha2_store_end: //JUMP %jump(sha2_pad) -//global test_sha2_read: -// JUMPDEST -// // stack: retdest -// push 0 -// // stack: 0, retdest -// %mload_kernel_general -// // stack: counter=num_bytes, retdest -//test_sha2_read_loop: -// JUMPDEST -// // stack: counter, retdest, [stack] -// dup1 -// // stack: addr=counter, counter, retdest, [stack] -// %mload_kernel_general -// // stack: value, counter, retdest, [stack] -// swap2 -// // stack: retdest, counter, value, [stack] -// swap1 -// // stack: counter, retdest, value, [stack] -// %decrement -// // stack: counter-1, retdest, value, [stack] -// dup1 -// iszero -// %jumpi(test_sha2_read_end) -// %jump(test_sha2_read_loop) -//test_sha2_read_end: -// // stack: counter=0, retdest, [stack] -// JUMPDEST -// pop -// // stack: retdest, [stack] -// JUMP - // Precodition: input is in memory, starting at 0 of kernel general segment, of the form // num_bytes, x[0], x[1], ..., x[num_bytes - 1] // Postcodition: output is in memory, starting at 0, of the form @@ -84,8 +53,9 @@ global sha2_pad: // STEP 1: append 1 // insert 128 (= 1 << 7) at x[num_bytes] // stack: num_bytes, retdest - push 1 + // TODO: these should be in the other order once SHL implementation is fixed push 7 + push 1 shl // stack: 128, num_bytes, retdest dup2 @@ -126,7 +96,7 @@ global sha2_pad: %jump(sha2_gen_message_schedule_from_block) // Precodition: stack contains address of one message block, followed by output address -// Postcondition: 256 addresses starting at given output address, contain 32-bit chunks +// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks // of message schedule (in four-byte increments) global sha2_gen_message_schedule_from_block: JUMPDEST @@ -153,8 +123,9 @@ sha2_gen_message_schedule_from_block_0_loop: // stack: counter, output_addr, block[0], block[1], retdest swap2 // stack: block[0], output_addr, counter, block[1], retdest - push 1 + // TODO: these should be in the other order once SHL implementation is fixed push 32 + push 1 shl // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest dup2 @@ -204,8 +175,9 @@ sha2_gen_message_schedule_from_block_1_loop: // stack: counter, output_addr, block[1], block[0], retdest swap2 // stack: block[1], output_addr, counter, block[0], retdest - push 1 + // TODO: these should be in the other order once SHL implementation is fixed push 32 + push 1 shl // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest dup2 diff --git a/evm/src/cpu/kernel/asm/sha2/write_length.asm b/evm/src/cpu/kernel/asm/sha2/write_length.asm index c79ceb06..ecc20fe6 100644 --- a/evm/src/cpu/kernel/asm/sha2/write_length.asm +++ b/evm/src/cpu/kernel/asm/sha2/write_length.asm @@ -2,8 +2,9 @@ // stack: last_addr, length swap1 // stack: length, last_addr - push 1 + // TODO: these should be in the other order once SHL implementation is fixed push 8 + push 1 shl // stack: 1 << 8, length, last_addr @@ -13,7 +14,7 @@ // stack: length, 1 << 8, 1 << 8, length, last_addr mod // stack: length % (1 << 8), 1 << 8, length, last_addr - dup3 + dup4 // stack: last_addr, length % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general @@ -23,11 +24,12 @@ dup2 // stack: length, 1 << 8, 1 << 8, length, last_addr push 8 + swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 8, 1 << 8, 1 << 8, length, last_addr mod // stack: (length >> 8) % (1 << 8), 1 << 8, length, last_addr - dup3 + dup4 // stack: last_addr, (length >> 8) % (1 << 8), 1 << 8, length, last_addr push 1 swap1 @@ -41,11 +43,12 @@ dup2 // stack: length, 1 << 8, 1 << 8, length, last_addr push 16 + swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 16, 1 << 8, 1 << 8, length, last_addr mod // stack: (length >> 16) % (1 << 8), 1 << 8, length, last_addr - dup3 + dup4 // stack: last_addr, (length >> 16) % (1 << 8), 1 << 8, length, last_addr push 2 swap1 @@ -59,11 +62,12 @@ dup2 // stack: length, 1 << 8, 1 << 8, length, last_addr push 24 + swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 24, 1 << 8, 1 << 8, length, last_addr mod // stack: (length >> 24) % (1 << 8), 1 << 8, length, last_addr - dup3 + dup4 // stack: last_addr, (length >> 24) % (1 << 8), 1 << 8, length, last_addr push 3 swap1 @@ -77,11 +81,12 @@ dup2 // stack: length, 1 << 8, 1 << 8, length, last_addr push 32 + swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 32, 1 << 8, 1 << 8, length, last_addr mod // stack: (length >> 32) % (1 << 8), 1 << 8, length, last_addr - dup3 + dup4 // stack: last_addr, (length >> 32) % (1 << 8), 1 << 8, length, last_addr push 4 swap1 @@ -99,7 +104,7 @@ // stack: length >> 40, 1 << 8, 1 << 8, length, last_addr mod // stack: (length >> 40) % (1 << 8), 1 << 8, length, last_addr - dup3 + dup4 // stack: last_addr, (length >> 40) % (1 << 8), 1 << 8, length, last_addr push 5 swap1 @@ -113,11 +118,12 @@ dup2 // stack: length, 1 << 8, 1 << 8, length, last_addr push 48 + swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 48, 1 << 8, 1 << 8, length, last_addr mod // stack: (length >> 48) % (1 << 8), 1 << 8, length, last_addr - dup3 + dup4 // stack: last_addr, (length >> 48) % (1 << 8), 1 << 8, length, last_addr push 6 swap1 @@ -131,11 +137,12 @@ dup2 // stack: length, 1 << 8, 1 << 8, length, last_addr push 56 + swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 56, 1 << 8, 1 << 8, length, last_addr mod // stack: (length >> 56) % (1 << 8), 1 << 8, length, last_addr - dup3 + dup4 // stack: last_addr, (length >> 56) % (1 << 8), 1 << 8, length, last_addr push 7 swap1 diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 090230f5..72a76950 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -20,7 +20,7 @@ type F = GoldilocksField; /// Halt interpreter execution whenever a jump to this offset is done. const DEFAULT_HALT_OFFSET: usize = 0xdeadbeef; -#[derive(Debug)] +#[derive(Clone, Debug)] pub(crate) struct InterpreterMemory { pub(crate) context_memory: Vec, } diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 2399b2ee..c610e94f 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -6,6 +6,7 @@ use rand::{thread_rng, Rng}; use crate::cpu::kernel::aggregator::combined_kernel; use crate::cpu::kernel::interpreter::run; +use crate::memory::segments::Segment; #[test] fn test_sha2_store() -> Result<()> { @@ -39,17 +40,15 @@ fn test_sha2_store() -> Result<()> { store_initial_stack, &kernel.prover_inputs, )?; + let stack_after_storing = after_storing.stack(); dbg!(stack_after_storing.clone()); + let memory_after_storing = after_storing.memory; - dbg!(memory_after_storing); + let mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize].content.clone(); + dbg!(&mem[0..66]); - // let load_initial_stack = vec![U256::from_str("0xdeadbeef").unwrap()]; - // let stack_after_loading = run(&kernel.code, test_sha2_read, load_initial_stack)?.stack; - // dbg!(stack_after_loading); - - // let expected_stack = todo!(); - // assert_eq!(stack_with_kernel, expected_stack); + // dbg!(&mem[100..353]); Ok(()) } diff --git a/evm/src/generation/memory.rs b/evm/src/generation/memory.rs index 5e2919a4..944b42a6 100644 --- a/evm/src/generation/memory.rs +++ b/evm/src/generation/memory.rs @@ -22,13 +22,13 @@ impl Default for MemoryState { } } -#[derive(Default, Debug)] +#[derive(Clone, Default, Debug)] pub(crate) struct MemoryContextState { /// The content of each memory segment. pub segments: [MemorySegmentState; Segment::COUNT], } -#[derive(Default, Debug)] +#[derive(Clone, Default, Debug)] pub(crate) struct MemorySegmentState { pub content: Vec, } From c40074a00d7926435eb23eb48ecb277bdf0d3592 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 8 Aug 2022 15:31:14 -0700 Subject: [PATCH 042/104] fmt --- evm/src/cpu/kernel/tests/sha2.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index c610e94f..3dbecdd8 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -40,12 +40,14 @@ fn test_sha2_store() -> Result<()> { store_initial_stack, &kernel.prover_inputs, )?; - + let stack_after_storing = after_storing.stack(); dbg!(stack_after_storing.clone()); let memory_after_storing = after_storing.memory; - let mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize].content.clone(); + let mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] + .content + .clone(); dbg!(&mem[0..66]); // dbg!(&mem[100..353]); From 4905f499a47e5719200764d00fa61049e24e2859 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 8 Aug 2022 16:03:26 -0700 Subject: [PATCH 043/104] fixes --- evm/src/cpu/kernel/asm/sha2/sha2.asm | 8 ++++---- evm/src/cpu/kernel/tests/sha2.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index e1e74038..2e97139a 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -53,9 +53,9 @@ global sha2_pad: // STEP 1: append 1 // insert 128 (= 1 << 7) at x[num_bytes] // stack: num_bytes, retdest - // TODO: these should be in the other order once SHL implementation is fixed - push 7 push 1 + push 7 + swap1 // TODO: remove once SHR implementation is fixed shl // stack: 128, num_bytes, retdest dup2 @@ -123,9 +123,9 @@ sha2_gen_message_schedule_from_block_0_loop: // stack: counter, output_addr, block[0], block[1], retdest swap2 // stack: block[0], output_addr, counter, block[1], retdest - // TODO: these should be in the other order once SHL implementation is fixed push 32 push 1 + swap1 // TODO: remove once SHR implementation is fixed shl // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest dup2 @@ -175,9 +175,9 @@ sha2_gen_message_schedule_from_block_1_loop: // stack: counter, output_addr, block[1], block[0], retdest swap2 // stack: block[1], output_addr, counter, block[0], retdest - // TODO: these should be in the other order once SHL implementation is fixed push 32 push 1 + swap1 // TODO: remove once SHR implementation is fixed shl // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest dup2 diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 3dbecdd8..7b931a91 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -50,7 +50,7 @@ fn test_sha2_store() -> Result<()> { .clone(); dbg!(&mem[0..66]); - // dbg!(&mem[100..353]); + dbg!(&mem[100..353]); Ok(()) } From 2601109dbc98012b74e049abc62941217fadb6f1 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 9 Aug 2022 14:29:10 -0700 Subject: [PATCH 044/104] debugging, and progress --- evm/src/cpu/kernel/asm/sha2/sha2.asm | 59 +++++++++++++++++--- evm/src/cpu/kernel/asm/sha2/write_length.asm | 4 +- 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 2e97139a..368ea587 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -123,10 +123,10 @@ sha2_gen_message_schedule_from_block_0_loop: // stack: counter, output_addr, block[0], block[1], retdest swap2 // stack: block[0], output_addr, counter, block[1], retdest - push 32 + push 15 push 1 - swap1 // TODO: remove once SHR implementation is fixed - shl + //shl + STOP // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest dup2 dup2 @@ -175,9 +175,9 @@ sha2_gen_message_schedule_from_block_1_loop: // stack: counter, output_addr, block[1], block[0], retdest swap2 // stack: block[1], output_addr, counter, block[0], retdest - push 32 push 1 - swap1 // TODO: remove once SHR implementation is fixed + push 32 + swap1 // TODO: remove once SHL implementation is fixed shl // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest dup2 @@ -304,8 +304,51 @@ sha2_gen_message_schedule_remaining_end: JUMPDEST // stack: counter=0, output_addr, block[0], block[1], retdest %pop4 - STOP JUMP -//global sha2_gen_all_message_schedules: -// JUMPDEST +// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] + stack contains output_addr +// Postcondition: +global sha2_gen_all_message_schedules: + JUMPDEST + push 0 + // stack: 0, output_addr, retdest + %mload_kernel_general + // stack: num_blocks, output_addr, retdest + push 1 + // stack: cur_addr = 1, counter = num_blocks, output_addr, retdest +sha2_gen_all_message_schedules_loop: + JUMPDEST + // stack: cur_addr, counter, cur_output_addr, retdest + push sha2_gen_all_message_schedules_loop_end + // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, retdest + dup4 + // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, retdest + dup3 + // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, retdest + %jump(sha2_gen_message_schedule_from_block) +sha2_gen_all_message_schedules_loop_end: + // stack: cur_addr, counter, cur_output_addr, retdest + %add_const(64) + // stack: cur_addr + 64, counter, cur_output_addr, retdest + swap1 + %decrement + swap1 + // stack: cur_addr + 64, counter - 1, cur_output_addr, retdest + swap2 + %add_const(256) + swap2 + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, retdest + dup2 + // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, retdest + iszero + %jumpi(sha2_gen_all_message_schedules_end) + %jump(sha2_gen_all_message_schedules_loop) + JUMPDEST +sha2_gen_all_message_schedules_end: + JUMPDEST + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, retdest + %pop3 + // stack: retdest + JUMP + diff --git a/evm/src/cpu/kernel/asm/sha2/write_length.asm b/evm/src/cpu/kernel/asm/sha2/write_length.asm index ecc20fe6..368572e4 100644 --- a/evm/src/cpu/kernel/asm/sha2/write_length.asm +++ b/evm/src/cpu/kernel/asm/sha2/write_length.asm @@ -2,9 +2,9 @@ // stack: last_addr, length swap1 // stack: length, last_addr - // TODO: these should be in the other order once SHL implementation is fixed - push 8 push 1 + push 8 + swap1 // TODO: remove once SHL implementation is fixed shl // stack: 1 << 8, length, last_addr From 84273889b5e86c603c56e70469ed44176814904a Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 11 Aug 2022 17:50:23 -0700 Subject: [PATCH 045/104] many fixes --- evm/src/cpu/kernel/asm/sha2/memory.asm | 16 +++--- evm/src/cpu/kernel/asm/sha2/sha2.asm | 56 ++++++++++---------- evm/src/cpu/kernel/asm/sha2/write_length.asm | 31 +++++------ evm/src/cpu/kernel/tests/sha2.rs | 5 +- 4 files changed, 51 insertions(+), 57 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/memory.asm b/evm/src/cpu/kernel/asm/sha2/memory.asm index 3b86d9b0..018abef8 100644 --- a/evm/src/cpu/kernel/asm/sha2/memory.asm +++ b/evm/src/cpu/kernel/asm/sha2/memory.asm @@ -43,54 +43,54 @@ %macro mload_kernel_general_u256 // stack: offset DUP1 - %mload_kernel_code_u32 + %mload_kernel_general_u32 // stack: c_7, offset %shl_const(32) // stack: c7 << 32, offset DUP2 - %increment + %add_const(4) %mload_kernel_general_u32 OR // stack: (c_7 << 32) | c_6, offset %shl_const(32) // stack: ((c_7 << 32) | c_6) << 32, offset DUP2 - %add_const(2) + %add_const(8) %mload_kernel_general_u32 OR // stack: (c_7 << 64) | (c_6 << 32) | c_5, offset %shl_const(32) // stack: ((c_7 << 64) | (c_6 << 32) | c_5) << 32, offset DUP2 - %add_const(3) + %add_const(12) %mload_kernel_general_u32 OR // stack: (c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4, offset %shl_const(32) // stack: ((c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4) << 32, offset DUP2 - %add_const(4) + %add_const(16) %mload_kernel_general_u32 OR // stack: (c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3, offset %shl_const(32) // stack: ((c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3) << 32, offset DUP2 - %add_const(5) + %add_const(20) %mload_kernel_general_u32 OR // stack: (c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2, offset %shl_const(32) // stack: ((c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2) << 32, offset DUP2 - %add_const(6) + %add_const(24) %mload_kernel_general_u32 OR // stack: (c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1, offset %shl_const(32) // stack: ((c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1) << 32, offset DUP2 - %add_const(7) + %add_const(28) %mload_kernel_general_u32 OR // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0, offset diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 368ea587..1bee6699 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -51,15 +51,16 @@ global sha2_pad: %mload_kernel_general // stack: num_bytes, retdest // STEP 1: append 1 - // insert 128 (= 1 << 7) at x[num_bytes] + // insert 128 (= 1 << 7) at x[num_bytes+1] // stack: num_bytes, retdest push 1 push 7 - swap1 // TODO: remove once SHR implementation is fixed shl // stack: 128, num_bytes, retdest dup2 // stack: num_bytes, 128, num_bytes, retdest + %increment + // stack: num_bytes+1, 128, num_bytes, retdest %mstore_kernel_general // stack: num_bytes, retdest // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 @@ -70,20 +71,18 @@ global sha2_pad: %increment // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest - // STEP 3: calculate length := num_bytes*8+1 + // STEP 3: calculate length := num_bytes*8 swap1 // stack: num_bytes, num_blocks, retdest push 8 mul - %increment - // stack: length = num_bytes*8+1, num_blocks, retdest - // STEP 4: write length to x[num_blocks*64-8..num_blocks*64-1] + // stack: length = num_bytes*8, num_blocks, retdest + // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] dup2 // stack: num_blocks, length, num_blocks, retdest push 64 mul - %decrement - // stack: last_addr = num_blocks*64-1, length, num_blocks, retdest + // stack: last_addr = num_blocks*64, length, num_blocks, retdest %sha2_write_length // stack: num_blocks, retdest // STEP 5: write num_blocks to x[0] @@ -114,6 +113,7 @@ global sha2_gen_message_schedule_from_block: %mload_kernel_general_u256 // stack: block[1], block[0], output_addr, retdest swap2 + STOP // stack: output_addr, block[0], block[1], retdest push 8 // stack: counter=8, output_addr, block[0], block[1], retdest @@ -123,10 +123,9 @@ sha2_gen_message_schedule_from_block_0_loop: // stack: counter, output_addr, block[0], block[1], retdest swap2 // stack: block[0], output_addr, counter, block[1], retdest - push 15 push 1 - //shl - STOP + push 32 + shl // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest dup2 dup2 @@ -138,21 +137,21 @@ sha2_gen_message_schedule_from_block_0_loop: swap2 // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest div - // stack: block[0] // (1 << 32), block[0] % (1 << 32), output_addr, counter, block[1], retdest + // stack: block[0] >> 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest swap1 - // stack: block[0] % (1 << 32), block[0] // (1 << 32), output_addr, counter, block[1], retdest + // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest dup3 - // stack: output_addr, block[0] % (1 << 32), block[0] // (1 << 32), output_addr, counter, block[1], retdest + // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest %mstore_kernel_general_u32 - // stack: block[0] // (1 << 32), output_addr, counter, block[1], retdest + // stack: block[0] >> 32, output_addr, counter, block[1], retdest swap1 - // stack: output_addr, block[0] // (1 << 32), counter, block[1], retdest + // stack: output_addr, block[0] >> 32, counter, block[1], retdest %add_const(4) - // stack: output_addr + 4, block[0] // (1 << 32), counter, block[1], retdest + // stack: output_addr + 4, block[0] >> 32, counter, block[1], retdest swap1 - // stack: block[0] // (1 << 32), output_addr + 4, counter, block[1], retdest + // stack: block[0] >> 32, output_addr + 4, counter, block[1], retdest swap2 - // stack: counter, output_addr + 4, block[0] // (1 << 32), block[1], retdest + // stack: counter, output_addr + 4, block[0] >> 32, block[1], retdest %decrement dup1 iszero @@ -177,7 +176,6 @@ sha2_gen_message_schedule_from_block_1_loop: // stack: block[1], output_addr, counter, block[0], retdest push 1 push 32 - swap1 // TODO: remove once SHL implementation is fixed shl // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest dup2 @@ -190,21 +188,21 @@ sha2_gen_message_schedule_from_block_1_loop: swap2 // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest div - // stack: block[1] // (1 << 32), block[1] % (1 << 32), output_addr, counter, block[0], retdest + // stack: block[1] >> 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest swap1 - // stack: block[1] % (1 << 32), block[1] // (1 << 32), output_addr, counter, block[0], retdest + // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest dup3 - // stack: output_addr, block[1] % (1 << 32), block[1] // (1 << 32), output_addr, counter, block[0], retdest + // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest %mstore_kernel_general_u32 - // stack: block[1] // (1 << 32), output_addr, counter, block[0], retdest + // stack: block[1] >> 32, output_addr, counter, block[0], retdest swap1 - // stack: output_addr, block[1] // (1 << 32), counter, block[0], retdest + // stack: output_addr, block[1] >> 32, counter, block[0], retdest %add_const(4) - // stack: output_addr + 4, block[1] // (1 << 32), counter, block[0], retdest + // stack: output_addr + 4, block[1] >> 32, counter, block[0], retdest swap1 - // stack: block[1] // (1 << 32), output_addr + 4, counter, block[0], retdest + // stack: block[1] >> 32, output_addr + 4, counter, block[0], retdest swap2 - // stack: counter, output_addr + 4, block[1] // (1 << 32), block[0], retdest + // stack: counter, output_addr + 4, block[1] >> 32, block[0], retdest %decrement dup1 iszero @@ -307,7 +305,7 @@ sha2_gen_message_schedule_remaining_end: JUMP // Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] - stack contains output_addr +// stack contains output_addr // Postcondition: global sha2_gen_all_message_schedules: JUMPDEST diff --git a/evm/src/cpu/kernel/asm/sha2/write_length.asm b/evm/src/cpu/kernel/asm/sha2/write_length.asm index 368572e4..7474cd0e 100644 --- a/evm/src/cpu/kernel/asm/sha2/write_length.asm +++ b/evm/src/cpu/kernel/asm/sha2/write_length.asm @@ -4,7 +4,6 @@ // stack: length, last_addr push 1 push 8 - swap1 // TODO: remove once SHL implementation is fixed shl // stack: 1 << 8, length, last_addr @@ -21,10 +20,9 @@ // stack: 1 << 8, length, last_addr dup1 // stack: 1 << 8, 1 << 8, length, last_addr - dup2 + dup3 // stack: length, 1 << 8, 1 << 8, length, last_addr push 8 - swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 8, 1 << 8, 1 << 8, length, last_addr mod @@ -40,10 +38,9 @@ // stack: 1 << 8, length, last_addr dup1 // stack: 1 << 8, 1 << 8, length, last_addr - dup2 + dup3 // stack: length, 1 << 8, 1 << 8, length, last_addr push 16 - swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 16, 1 << 8, 1 << 8, length, last_addr mod @@ -59,10 +56,9 @@ // stack: 1 << 8, length, last_addr dup1 // stack: 1 << 8, 1 << 8, length, last_addr - dup2 + dup3 // stack: length, 1 << 8, 1 << 8, length, last_addr push 24 - swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 24, 1 << 8, 1 << 8, length, last_addr mod @@ -72,16 +68,15 @@ push 3 swap1 sub - // stack: last_addr - 1, (length >> 24) % (1 << 8), 1 << 8, length, last_addr + // stack: last_addr - 3, (length >> 24) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr dup1 // stack: 1 << 8, 1 << 8, length, last_addr - dup2 + dup3 // stack: length, 1 << 8, 1 << 8, length, last_addr push 32 - swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 32, 1 << 8, 1 << 8, length, last_addr mod @@ -91,13 +86,13 @@ push 4 swap1 sub - // stack: last_addr - 1, (length >> 32) % (1 << 8), 1 << 8, length, last_addr + // stack: last_addr - 4, (length >> 32) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr dup1 // stack: 1 << 8, 1 << 8, length, last_addr - dup2 + dup3 // stack: length, 1 << 8, 1 << 8, length, last_addr push 40 shr @@ -109,16 +104,15 @@ push 5 swap1 sub - // stack: last_addr - 1, (length >> 40) % (1 << 8), 1 << 8, length, last_addr + // stack: last_addr - 5, (length >> 40) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr dup1 // stack: 1 << 8, 1 << 8, length, last_addr - dup2 + dup3 // stack: length, 1 << 8, 1 << 8, length, last_addr push 48 - swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 48, 1 << 8, 1 << 8, length, last_addr mod @@ -128,16 +122,15 @@ push 6 swap1 sub - // stack: last_addr - 1, (length >> 48) % (1 << 8), 1 << 8, length, last_addr + // stack: last_addr - 6, (length >> 48) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr dup1 // stack: 1 << 8, 1 << 8, length, last_addr - dup2 + dup3 // stack: length, 1 << 8, 1 << 8, length, last_addr push 56 - swap1 // TODO: remove once SHR implementation is fixed shr // stack: length >> 56, 1 << 8, 1 << 8, length, last_addr mod @@ -147,7 +140,7 @@ push 7 swap1 sub - // stack: last_addr - 1, (length >> 56) % (1 << 8), 1 << 8, length, last_addr + // stack: last_addr - 7, (length >> 56) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general %pop3 // stack: (empty) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 7b931a91..a123c9da 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -28,6 +28,9 @@ fn test_sha2_store() -> Result<()> { dbg!(num_bytes); dbg!(bytes.clone()); + let num_bytes = 3; + let bytes: Vec = vec![97.into(), 98.into(), 99.into()]; + let mut store_initial_stack = vec![U256::from(num_bytes)]; store_initial_stack.extend(bytes); store_initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); @@ -48,7 +51,7 @@ fn test_sha2_store() -> Result<()> { let mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] .content .clone(); - dbg!(&mem[0..66]); + dbg!(&mem[0..65]); dbg!(&mem[100..353]); From e13841dd97fe572e6be2fe1882ce6026a141b2f5 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 3 Oct 2022 09:58:53 -0700 Subject: [PATCH 046/104] MESSAGE SCHEDULE WORKS --- evm/src/cpu/kernel/asm/sha2/memory.asm | 3 ++ evm/src/cpu/kernel/asm/sha2/sha2.asm | 41 ++++++++++++++++---------- evm/src/cpu/kernel/interpreter.rs | 6 ++++ evm/src/cpu/kernel/tests/sha2.rs | 2 +- 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/memory.asm b/evm/src/cpu/kernel/asm/sha2/memory.asm index 018abef8..0b722287 100644 --- a/evm/src/cpu/kernel/asm/sha2/memory.asm +++ b/evm/src/cpu/kernel/asm/sha2/memory.asm @@ -162,13 +162,16 @@ // stack: c_2, c_1, c_0, offset dup4 // stack: offset, c_2, c_1, c_0, offset + %add_const(1) %mstore_kernel_general // stack: c_1, c_0, offset dup3 // stack: offset, c_1, c_0, offset + %add_const(2) %mstore_kernel_general // stack: c_0, offset swap1 // stack: offset, c_0 + %add_const(3) %mstore_kernel_general %endmacro diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 1bee6699..4ed3d466 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -37,7 +37,6 @@ sha2_store_end: // stack: counter=0, addr, retdest %pop2 // stack: retdest - //JUMP %jump(sha2_pad) // Precodition: input is in memory, starting at 0 of kernel general segment, of the form @@ -89,10 +88,8 @@ global sha2_pad: push 0 %mstore_kernel_general // stack: retdest - //JUMP push 100 - push 1 - %jump(sha2_gen_message_schedule_from_block) + %jump(sha2_gen_all_message_schedules) // Precodition: stack contains address of one message block, followed by output address // Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks @@ -113,10 +110,10 @@ global sha2_gen_message_schedule_from_block: %mload_kernel_general_u256 // stack: block[1], block[0], output_addr, retdest swap2 - STOP // stack: output_addr, block[0], block[1], retdest + %add_const(28) push 8 - // stack: counter=8, output_addr, block[0], block[1], retdest + // stack: counter=8, output_addr + 28, block[0], block[1], retdest %jump(sha2_gen_message_schedule_from_block_0_loop) sha2_gen_message_schedule_from_block_0_loop: JUMPDEST @@ -146,12 +143,12 @@ sha2_gen_message_schedule_from_block_0_loop: // stack: block[0] >> 32, output_addr, counter, block[1], retdest swap1 // stack: output_addr, block[0] >> 32, counter, block[1], retdest - %add_const(4) - // stack: output_addr + 4, block[0] >> 32, counter, block[1], retdest + %sub_const(4) + // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest swap1 - // stack: block[0] >> 32, output_addr + 4, counter, block[1], retdest + // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest swap2 - // stack: counter, output_addr + 4, block[0] >> 32, block[1], retdest + // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest %decrement dup1 iszero @@ -169,6 +166,12 @@ sha2_gen_message_schedule_from_block_0_end: // stack: block[1], output_addr, counter, block[0], retdest swap2 // stack: counter, output_addr, block[1], block[0], retdest + swap1 + // stack: output_addr, counter, block[1], block[0], retdest + %add_const(64) + // stack: output_addr + 64, counter, block[1], block[0], retdest + swap1 + // stack: counter, output_addr + 64, block[1], block[0], retdest sha2_gen_message_schedule_from_block_1_loop: JUMPDEST // stack: counter, output_addr, block[1], block[0], retdest @@ -197,12 +200,12 @@ sha2_gen_message_schedule_from_block_1_loop: // stack: block[1] >> 32, output_addr, counter, block[0], retdest swap1 // stack: output_addr, block[1] >> 32, counter, block[0], retdest - %add_const(4) - // stack: output_addr + 4, block[1] >> 32, counter, block[0], retdest + %sub_const(4) + // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest swap1 - // stack: block[1] >> 32, output_addr + 4, counter, block[0], retdest + // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest swap2 - // stack: counter, output_addr + 4, block[1] >> 32, block[0], retdest + // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest %decrement dup1 iszero @@ -215,6 +218,12 @@ sha2_gen_message_schedule_from_block_1_end: // stack: output_addr, block[0], block[1], retdest push 48 // stack: counter=48, output_addr, block[0], block[1], retdest + swap1 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(36) + // stack: output_addr + 36, counter, block[0], block[1], retdest + swap1 + // stack: counter, output_addr + 36, block[0], block[1], retdest sha2_gen_message_schedule_remaining_loop: JUMPDEST // stack: counter, output_addr, block[0], block[1], retdest @@ -306,7 +315,8 @@ sha2_gen_message_schedule_remaining_end: // Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] // stack contains output_addr -// Postcondition: +// Postcondition: starting at output_addr, set of 256 bytes per block +// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) global sha2_gen_all_message_schedules: JUMPDEST push 0 @@ -326,6 +336,7 @@ sha2_gen_all_message_schedules_loop: // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, retdest %jump(sha2_gen_message_schedule_from_block) sha2_gen_all_message_schedules_loop_end: + JUMPDEST // stack: cur_addr, counter, cur_output_addr, retdest %add_const(64) // stack: cur_addr + 64, counter, cur_output_addr, retdest diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 72a76950..e7e02d39 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -599,7 +599,10 @@ impl<'a> Interpreter<'a> { let context = self.pop().as_usize(); let segment = Segment::all()[self.pop().as_usize()]; let offset = self.pop().as_usize(); + dbg!(offset); let value = self.memory.mload_general(context, segment, offset); + dbg!(value); + assert!(value.bits() <= segment.bit_range()); self.push(value); } @@ -608,6 +611,9 @@ impl<'a> Interpreter<'a> { let segment = Segment::all()[self.pop().as_usize()]; let offset = self.pop().as_usize(); let value = self.pop(); + dbg!("STORE",offset); + dbg!("STORE",value); + assert!(value.bits() <= segment.bit_range()); self.memory.mstore_general(context, segment, offset, value); } } diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index a123c9da..a9a82a1a 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -53,7 +53,7 @@ fn test_sha2_store() -> Result<()> { .clone(); dbg!(&mem[0..65]); - dbg!(&mem[100..353]); + dbg!(&mem[100..356]); Ok(()) } From deddcf6b18185faac773c78ad6a2b916abc622b2 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 12 Aug 2022 11:12:07 -0700 Subject: [PATCH 047/104] cleanup --- evm/src/cpu/kernel/interpreter.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index e7e02d39..c4eea076 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -599,9 +599,7 @@ impl<'a> Interpreter<'a> { let context = self.pop().as_usize(); let segment = Segment::all()[self.pop().as_usize()]; let offset = self.pop().as_usize(); - dbg!(offset); let value = self.memory.mload_general(context, segment, offset); - dbg!(value); assert!(value.bits() <= segment.bit_range()); self.push(value); } @@ -611,8 +609,6 @@ impl<'a> Interpreter<'a> { let segment = Segment::all()[self.pop().as_usize()]; let offset = self.pop().as_usize(); let value = self.pop(); - dbg!("STORE",offset); - dbg!("STORE",value); assert!(value.bits() <= segment.bit_range()); self.memory.mstore_general(context, segment, offset, value); } From 42019264ad3c492311d35411cafd8d407363c499 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 15 Aug 2022 15:43:22 -0700 Subject: [PATCH 048/104] compression --- evm/src/cpu/kernel/aggregator.rs | 1 + evm/src/cpu/kernel/asm/sha2/ops.asm | 19 ++- evm/src/cpu/kernel/asm/sha2/sha2.asm | 151 ++++++++++++++++++++- evm/src/cpu/kernel/asm/sha2/temp_words.asm | 34 +++++ 4 files changed, 201 insertions(+), 4 deletions(-) create mode 100644 evm/src/cpu/kernel/asm/sha2/temp_words.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 069bece0..ce59a974 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -45,6 +45,7 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/sha2/memory.asm"), include_str!("asm/sha2/ops.asm"), include_str!("asm/sha2/sha2.asm"), + include_str!("asm/sha2/temp_words.asm"), include_str!("asm/sha2/write_length.asm"), include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/decode.asm"), diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index c600afe9..5b042b40 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -1,3 +1,21 @@ +// u32 addition (discarding 2^32 bit) +%macro add_u32 + // stack: x, y + add + // stack: x + y + dup1 + // stack: x + y, x + y + %shr_const(32) + // stack: (x + y) >> 32, x + y + %shl_const(32) + // stack: ((x + y) >> 32) << 32, x + y + swap1 + // stack: x + y, ((x + y) >> 32) << 32 + sub + // stack: x + y - ((x + y) >> 32) << 32 +%endmacro + + // 32-bit right rotation %macro rotr // stack: rot, value @@ -55,7 +73,6 @@ %endmacro %macro sha2_sigma_0 - JUMPDEST // stack: x dup1 // stack: x, x diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 4ed3d466..4094fd95 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -1,3 +1,7 @@ +global sha2: + JUMPDEST + %jump(sha2_store) + global sha2_store: JUMPDEST // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest @@ -283,9 +287,9 @@ sha2_gen_message_schedule_remaining_loop: // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest swap4 // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - add - add - add + %add_u32 + %add_u32 + %add_u32 // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest swap1 // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest @@ -361,3 +365,144 @@ sha2_gen_all_message_schedules_end: // stack: retdest JUMP +// TODO: message schedules for multiple blocks +global sha2_compression: + JUMPDEST + // stack: message_schedule_addr, i=0 + push sha2_constants_h + %add_const(7) + %mload_kernel_code_u32 + // stack: h[0], message_schedule_addr, i=0 + push sha2_constants_h + %add_const(6) + %mload_kernel_code_u32 + // stack: g[0], h[0], message_schedule_addr, i=0 + push sha2_constants_h + %add_const(5) + %mload_kernel_code_u32 + // stack: f[0], g[0], h[0], message_schedule_addr, i=0 + push sha2_constants_h + %add_const(4) + %mload_kernel_code_u32 + // stack: e[0], f[0], g[0], h[0], message_schedule_addr, i=0 + push sha2_constants_h + %add_const(3) + %mload_kernel_code_u32 + // stack: d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0 + push sha2_constants_h + %add_const(2) + %mload_kernel_code_u32 + // stack: c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0 + push sha2_constants_h + %add_const(1) + %mload_kernel_code_u32 + // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0 + push sha2_constants_h + %mload_kernel_code_u32 + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0 +sha2_compression_loop: + JUMPDEST + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup9 + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup11 + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + %mul_const(4) + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + add + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + %mload_kernel_general_u32, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + push sha2_constants_k + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup12 + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + %mul_const(4) + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + add + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + %mload_kernel_code_u32 + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup10 + // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup10 + // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup10 + // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup10 + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + %sha2_temp_word1 + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup4 + // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup4 + // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup4 + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + %sha2_temp_word2 + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup6 + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + dup3 + // stack: T[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + %add_u32 + // stack: e[i+1]=T[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + swap2 + // stack: T[1], T2[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + %add_u32 + // stack: a[i+1]=T[1]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], message_schedule_addr, i + swap1 + // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i + swap5 + // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i + pop + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i + swap8 + // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], message_schedule_addr, i + pop + // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], message_schedule_addr, i + swap7 + // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], message_schedule_addr, i + swap1 + swap7 + swap1 + // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], message_schedule_addr, i + swap2 + swap7 + swap2 + // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], message_schedule_addr, i + swap3 + swap7 + swap3 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], message_schedule_addr, i + swap4 + swap7 + swap4 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], message_schedule_addr, i + swap5 + swap7 + swap5 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], message_schedule_addr, i + swap6 + swap7 + swap6 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i + dup10 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i + %increment + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i + dup1 + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i + %eq_const(64) + %jumpi(sha2_compression_end) + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i + swap10 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i+1 + pop + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i+1 + %jump(sha2_compression_loop) +sha2_compression_end: + JUMPDEST + // stack: i+1=64, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + pop + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm new file mode 100644 index 00000000..18046c94 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/temp_words.asm @@ -0,0 +1,34 @@ +%macro sha2_temp_word1 + // stack: e, f, g, h, K[i], W[i] + dup1 + // stack: e, e, f, g, h, K[i], W[i] + %sha2_bigsigma_1 + // stack: Sigma_1(e), e, f, g, h, K[i], W[i] + swap3 + // stack: g, e, f, Sigma_1(e), h, K[i], W[i] + swap2 + // stack: f, e, g, Sigma_1(e), h, K[i], W[i] + swap1 + // stack: e, f, g, Sigma_1(e), h, K[i], W[i] + %sha2_choice + // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] + add + add + add + add + // stack: Ch(e, f, g) + Sigma_1(e) + h + K[i] + W[i] +%endmacro + +%macro sha2_temp_word2 + // stack: a, b, c + dup1 + // stack: a, a, b, c + %sha2_bigsigma_0 + // stack: Sigma_0(a), a, b, c + swap3 + // stack: c, a, b, Sigma_0(a) + %sha2_majority + // stack: Maj(c, a, b), Sigma_0(a) + add + // stack: Maj(c, a, b) + Sigma_0(a) +%endmacro From 7b35433aa3aff5b8128d46ba44084103311fbc92 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 15 Aug 2022 16:03:37 -0700 Subject: [PATCH 049/104] more compression --- evm/src/cpu/kernel/asm/sha2/sha2.asm | 92 ++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 4094fd95..6873c96a 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -506,3 +506,95 @@ sha2_compression_end: // stack: i+1=64, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i pop // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + push sha2_constants_h + %mload_kernel_code_u32 + // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + %add_u32 + // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + swap1 + // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + push sha2_constants_h + %add_const(1) + %mload_kernel_code_u32 + // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + %add_u32 + // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + swap2 + // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + push sha2_constants_h + %add_const(2) + %mload_kernel_code_u32 + // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + %add_u32 + // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + swap3 + // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + push sha2_constants_h + %add_const(3) + %mload_kernel_code_u32 + // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + %add_u32 + // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + swap4 + // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i + push sha2_constants_h + %add_const(4) + %mload_kernel_code_u32 + // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i + %add_u32 + // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i + swap5 + // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i + push sha2_constants_h + %add_const(5) + %mload_kernel_code_u32 + // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i + %add_u32 + // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i + swap6 + // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i + push sha2_constants_h + %add_const(6) + %mload_kernel_code_u32 + // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i + %add_u32 + // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i + swap7 + // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i + push sha2_constants_h + %add_const(6) + %mload_kernel_code_u32 + // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i + %add_u32 + // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i + swap8 + // stack: message_schedule_addr, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], i + pop + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], i + swap1 + %shl(32) + or + swap1 + %shl(64) + or + swap1 + %shl(96) + or + swap1 + %shl(128) + or + swap1 + %shl(160) + or + swap1 + %shl(192) + or + swap1 + %shl(224) + or + // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), i + swap1 + // stack: i, concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]) + pop + // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]) + From 2a2152db9636e8cbf133fc0e2b87be29b9e3e7d8 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 15 Aug 2022 16:09:12 -0700 Subject: [PATCH 050/104] updates --- evm/src/cpu/kernel/asm/sha2/sha2.asm | 202 ++++++++++++++------------- 1 file changed, 105 insertions(+), 97 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 6873c96a..eb387084 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -325,252 +325,260 @@ global sha2_gen_all_message_schedules: JUMPDEST push 0 // stack: 0, output_addr, retdest + dup2 + // stack: output_addr, 0, output_addr, retdest + swap1 + // stack: 0, output_addr, output_addr, retdest %mload_kernel_general - // stack: num_blocks, output_addr, retdest + // stack: num_blocks, output_addr, output_addr, retdest push 1 - // stack: cur_addr = 1, counter = num_blocks, output_addr, retdest + // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest sha2_gen_all_message_schedules_loop: JUMPDEST - // stack: cur_addr, counter, cur_output_addr, retdest + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest push sha2_gen_all_message_schedules_loop_end - // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, retdest + // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest dup4 - // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, retdest + // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest dup3 - // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, retdest + // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest %jump(sha2_gen_message_schedule_from_block) sha2_gen_all_message_schedules_loop_end: JUMPDEST - // stack: cur_addr, counter, cur_output_addr, retdest + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest %add_const(64) - // stack: cur_addr + 64, counter, cur_output_addr, retdest + // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest swap1 %decrement swap1 - // stack: cur_addr + 64, counter - 1, cur_output_addr, retdest + // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest swap2 %add_const(256) swap2 - // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, retdest + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest dup2 - // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, retdest + // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest iszero %jumpi(sha2_gen_all_message_schedules_end) %jump(sha2_gen_all_message_schedules_loop) JUMPDEST sha2_gen_all_message_schedules_end: JUMPDEST - // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, retdest + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest %pop3 - // stack: retdest - JUMP + // stack: output_addr, retdest + push 0 + // stack: 0, output_addr, retdest + swap1 + // stack: output_addr, 0, retdest + %jump(sha2_compression) // TODO: message schedules for multiple blocks global sha2_compression: JUMPDEST - // stack: message_schedule_addr, i=0 + // stack: message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(7) %mload_kernel_code_u32 - // stack: h[0], message_schedule_addr, i=0 + // stack: h[0], message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(6) %mload_kernel_code_u32 - // stack: g[0], h[0], message_schedule_addr, i=0 + // stack: g[0], h[0], message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(5) %mload_kernel_code_u32 - // stack: f[0], g[0], h[0], message_schedule_addr, i=0 + // stack: f[0], g[0], h[0], message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(4) %mload_kernel_code_u32 - // stack: e[0], f[0], g[0], h[0], message_schedule_addr, i=0 + // stack: e[0], f[0], g[0], h[0], message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(3) %mload_kernel_code_u32 - // stack: d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0 + // stack: d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(2) %mload_kernel_code_u32 - // stack: c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0 + // stack: c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(1) %mload_kernel_code_u32 - // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0 + // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0, retdest push sha2_constants_h %mload_kernel_code_u32 - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0 + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0, retdest sha2_compression_loop: JUMPDEST - // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup9 - // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup11 - // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest %mul_const(4) - // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest add - // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest %mload_kernel_general_u32, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i - // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest push sha2_constants_k - // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup12 - // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest %mul_const(4) - // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest add - // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest %mload_kernel_code_u32 - // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup10 - // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup10 - // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup10 - // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup10 - // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest %sha2_temp_word1 - // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup4 - // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup4 - // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup4 - // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest %sha2_temp_word2 - // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup6 - // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest dup3 - // stack: T[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: T[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest %add_u32 - // stack: e[i+1]=T[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: e[i+1]=T[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest swap2 - // stack: T[1], T2[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + // stack: T[1], T2[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest %add_u32 - // stack: a[i+1]=T[1]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], message_schedule_addr, i + // stack: a[i+1]=T[1]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], message_schedule_addr, i, retdest swap1 - // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i + // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i, retdest swap5 - // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i + // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i, retdest pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i, retdest swap8 - // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], message_schedule_addr, i + // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], message_schedule_addr, i, retdest pop - // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], message_schedule_addr, i + // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], message_schedule_addr, i, retdest swap7 - // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], message_schedule_addr, i + // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], message_schedule_addr, i, retdest swap1 swap7 swap1 - // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], message_schedule_addr, i + // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], message_schedule_addr, i, retdest swap2 swap7 swap2 - // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], message_schedule_addr, i + // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], message_schedule_addr, i, retdest swap3 swap7 swap3 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], message_schedule_addr, i + // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], message_schedule_addr, i, retdest swap4 swap7 swap4 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], message_schedule_addr, i + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], message_schedule_addr, i, retdest swap5 swap7 swap5 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], message_schedule_addr, i + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], message_schedule_addr, i, retdest swap6 swap7 swap6 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i, retdest dup10 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i, retdest %increment - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i, retdest dup1 - // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i, retdest %eq_const(64) %jumpi(sha2_compression_end) - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i, retdest swap10 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i+1 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i+1, retdest pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i+1 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i+1, retdest %jump(sha2_compression_loop) sha2_compression_end: JUMPDEST - // stack: i+1=64, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: i+1=64, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest pop - // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest push sha2_constants_h %mload_kernel_code_u32 - // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest %add_u32 - // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest swap1 - // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest push sha2_constants_h %add_const(1) %mload_kernel_code_u32 - // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest %add_u32 - // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest swap2 - // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest push sha2_constants_h %add_const(2) %mload_kernel_code_u32 - // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest %add_u32 - // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest swap3 - // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest push sha2_constants_h %add_const(3) %mload_kernel_code_u32 - // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest %add_u32 - // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest swap4 - // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i, retdest push sha2_constants_h %add_const(4) %mload_kernel_code_u32 - // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i, retdest %add_u32 - // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i + // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i, retdest swap5 - // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i + // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i, retdest push sha2_constants_h %add_const(5) %mload_kernel_code_u32 - // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i + // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i, retdest %add_u32 - // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i + // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i, retdest swap6 - // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i + // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i, retdest push sha2_constants_h %add_const(6) %mload_kernel_code_u32 - // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i + // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i, retdest %add_u32 - // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i + // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i, retdest swap7 - // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i + // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i, retdest push sha2_constants_h %add_const(6) %mload_kernel_code_u32 - // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i + // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i, retdest %add_u32 - // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i + // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i, retdest swap8 - // stack: message_schedule_addr, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], i + // stack: message_schedule_addr, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], i, retdest pop - // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], i + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], i, retdest swap1 %shl(32) or @@ -592,9 +600,9 @@ sha2_compression_end: swap1 %shl(224) or - // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), i + // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), i, retdest swap1 - // stack: i, concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]) + // stack: i, concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), retdest pop - // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]) - + // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), retdest + STOP From dea9d0641623c165898f90b74cbf34acfee58540 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 15 Aug 2022 16:09:56 -0700 Subject: [PATCH 051/104] fix --- evm/src/cpu/kernel/asm/sha2/sha2.asm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index eb387084..b91ce4e1 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -419,7 +419,7 @@ sha2_compression_loop: // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest add // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest - %mload_kernel_general_u32, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i + %mload_kernel_general_u32 // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest push sha2_constants_k // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest From b42d3780804b9588526148785c903d5f48327fc1 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 15 Aug 2022 16:12:33 -0700 Subject: [PATCH 052/104] fix --- evm/src/cpu/kernel/asm/sha2/sha2.asm | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index b91ce4e1..100894ca 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -580,25 +580,25 @@ sha2_compression_end: pop // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], i, retdest swap1 - %shl(32) + %shl_const(32) or swap1 - %shl(64) + %shl_const(64) or swap1 - %shl(96) + %shl_const(96) or swap1 - %shl(128) + %shl_const(128) or swap1 - %shl(160) + %shl_const(160) or swap1 - %shl(192) + %shl_const(192) or swap1 - %shl(224) + %shl_const(224) or // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), i, retdest swap1 From 0ea1388206b6959e183aab1facc73ea60183cc24 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 15 Aug 2022 16:13:18 -0700 Subject: [PATCH 053/104] fix --- evm/src/cpu/kernel/asm/sha2/constants.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/constants.asm b/evm/src/cpu/kernel/asm/sha2/constants.asm index 22712a98..d39661f8 100644 --- a/evm/src/cpu/kernel/asm/sha2/constants.asm +++ b/evm/src/cpu/kernel/asm/sha2/constants.asm @@ -1,4 +1,4 @@ -sha2_constants_k: +global sha2_constants_k: BYTES 66, 138, 47, 152 BYTES 113, 55, 68, 145 BYTES 181, 192, 251, 207 @@ -64,7 +64,7 @@ sha2_constants_k: BYTES 190, 249, 163, 247 BYTES 198, 113, 120, 242 -sha2_constants_h: +global sha2_constants_h: BYTES 106, 9, 230, 103 BYTES 187, 103, 174, 133 BYTES 60, 110, 243, 114 From 94d967f5e0ad07ecd640eb30dcab8ee807adbd07 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 16 Aug 2022 18:37:36 -0700 Subject: [PATCH 054/104] many fixes --- evm/src/cpu/kernel/asm/sha2/ops.asm | 10 +- evm/src/cpu/kernel/asm/sha2/sha2.asm | 300 ++++++++++++--------- evm/src/cpu/kernel/asm/sha2/temp_words.asm | 10 +- evm/src/cpu/kernel/tests/sha2.rs | 14 +- 4 files changed, 181 insertions(+), 153 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index 5b042b40..893dc9de 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -192,17 +192,13 @@ and // stack: x and y, z, x, y, z swap2 - // stack: x, x and y, z, y, z - swap1 - // stack: x and y, x, z, y, z - swap2 - // stack: z, x, x and y, y, z + // stack: x, z, x and y, y, z and // stack: x and z, x and y, y, z swap2 - // stack: y, x and z, x and y, z + // stack: y, x and y, x and z, z swap1 - // stack: x and z, y, x and y, z + // stack: x and y, y, x and z, z swap3 // stack: z, y, x and z, x and y and diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 100894ca..2b5de553 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -1,7 +1,3 @@ -global sha2: - JUMPDEST - %jump(sha2_store) - global sha2_store: JUMPDEST // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest @@ -323,11 +319,10 @@ sha2_gen_message_schedule_remaining_end: // each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) global sha2_gen_all_message_schedules: JUMPDEST + // stack: output_addr, retdest + dup1 + // stack: output_addr, output_addr, retdest push 0 - // stack: 0, output_addr, retdest - dup2 - // stack: output_addr, 0, output_addr, retdest - swap1 // stack: 0, output_addr, output_addr, retdest %mload_kernel_general // stack: num_blocks, output_addr, output_addr, retdest @@ -367,218 +362,251 @@ sha2_gen_all_message_schedules_end: // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest %pop3 // stack: output_addr, retdest - push 0 - // stack: 0, output_addr, retdest - swap1 - // stack: output_addr, 0, retdest %jump(sha2_compression) -// TODO: message schedules for multiple blocks global sha2_compression: JUMPDEST + // stack: message_schedule_addr, retdest + push 0 + // stack: i=0, message_schedule_addr, retdest + swap1 // stack: message_schedule_addr, i=0, retdest + push 0 + // stack: 0, message_schedule_addr, i=0, retdest + %mload_kernel_general + // stack: num_blocks, message_schedule_addr, i=0, retdest push sha2_constants_h - %add_const(7) + %add_const(28) %mload_kernel_code_u32 - // stack: h[0], message_schedule_addr, i=0, retdest + // stack: h[0], num_blocks, message_schedule_addr, i=0, retdest push sha2_constants_h - %add_const(6) + %add_const(24) %mload_kernel_code_u32 - // stack: g[0], h[0], message_schedule_addr, i=0, retdest + // stack: g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest push sha2_constants_h - %add_const(5) + %add_const(20) %mload_kernel_code_u32 - // stack: f[0], g[0], h[0], message_schedule_addr, i=0, retdest + // stack: f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(16) + %mload_kernel_code_u32 + // stack: e[0], f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(12) + %mload_kernel_code_u32 + // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(8) + %mload_kernel_code_u32 + // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(4) %mload_kernel_code_u32 - // stack: e[0], f[0], g[0], h[0], message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(3) - %mload_kernel_code_u32 - // stack: d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(2) - %mload_kernel_code_u32 - // stack: c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(1) - %mload_kernel_code_u32 - // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0, retdest + // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest push sha2_constants_h %mload_kernel_code_u32 - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], message_schedule_addr, i=0, retdest + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest sha2_compression_loop: JUMPDEST - // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest - dup9 - // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest - dup11 - // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest - %mul_const(4) - // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest - add - // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest - %mload_kernel_general_u32 - // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest - push sha2_constants_k - // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest + dup10 + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest dup12 - // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest %mul_const(4) - // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest add - // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest + push sha2_constants_k + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest + dup13 + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest + add + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest %mload_kernel_code_u32 - // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest dup10 - // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest dup10 - // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest dup10 - // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest dup10 - // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest %sha2_temp_word1 - // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest dup4 - // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest dup4 - // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest dup4 - // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest %sha2_temp_word2 - // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest dup6 - // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest dup3 - // stack: T[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest %add_u32 - // stack: e[i+1]=T[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest swap2 - // stack: T[1], T2[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], message_schedule_addr, i, retdest + // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest %add_u32 - // stack: a[i+1]=T[1]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], message_schedule_addr, i, retdest + // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, message_schedule_addr, i, retdest swap1 - // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i, retdest + // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, message_schedule_addr, i, retdest swap5 - // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i, retdest + // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, message_schedule_addr, i, retdest pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], message_schedule_addr, i, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, message_schedule_addr, i, retdest swap8 - // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], message_schedule_addr, i, retdest + // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, message_schedule_addr, i, retdest pop - // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], message_schedule_addr, i, retdest + // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, message_schedule_addr, i, retdest swap7 - // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], message_schedule_addr, i, retdest + // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, message_schedule_addr, i, retdest swap1 swap7 swap1 - // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], message_schedule_addr, i, retdest + // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, message_schedule_addr, i, retdest swap2 swap7 swap2 - // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], message_schedule_addr, i, retdest + // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, message_schedule_addr, i, retdest swap3 swap7 swap3 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], message_schedule_addr, i, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, message_schedule_addr, i, retdest swap4 swap7 swap4 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], message_schedule_addr, i, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, message_schedule_addr, i, retdest swap5 swap7 swap5 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], message_schedule_addr, i, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, message_schedule_addr, i, retdest swap6 swap7 swap6 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i, retdest - dup10 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest + dup11 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest %increment - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i, retdest + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest dup1 - // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i, retdest + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest %eq_const(64) + // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest + dup1 + // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest + dup12 + // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest + sub + // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest + swap12 + // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, num_blocks new, i, retdest + swap1 + // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, num_blocks new, i, retdest + push 256 + mul + // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, num_blocks new, i, retdest + add + // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, num_blocks new, i, retdest + swap11 + // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest + dup1 + // stack: num_blocks new, num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest + iszero %jumpi(sha2_compression_end) - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i, retdest + // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest swap10 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i+1, retdest + // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, i, new_retdest pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], message_schedule_addr, i+1, retdest + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, i, new_retdest + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest + push 64 + swap1 + mod + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest + swap11 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, (i+1)%64, retdest + pop + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, (i+1)%64, retdest %jump(sha2_compression_loop) sha2_compression_end: JUMPDEST - // stack: i+1=64, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest - pop - // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest + // stack: num_blocks=0, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest + %pop2 + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest push sha2_constants_h %mload_kernel_code_u32 - // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest + // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest %add_u32 - // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest + // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest swap1 - // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest - push sha2_constants_h - %add_const(1) - %mload_kernel_code_u32 - // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest - %add_u32 - // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest - swap2 - // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest - push sha2_constants_h - %add_const(2) - %mload_kernel_code_u32 - // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest - %add_u32 - // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest - swap3 - // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest - push sha2_constants_h - %add_const(3) - %mload_kernel_code_u32 - // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest - %add_u32 - // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], message_schedule_addr, i, retdest - swap4 - // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i, retdest + // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest push sha2_constants_h %add_const(4) %mload_kernel_code_u32 - // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i, retdest + // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest %add_u32 - // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], message_schedule_addr, i, retdest + // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + swap2 + // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + push sha2_constants_h + %add_const(8) + %mload_kernel_code_u32 + // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + %add_u32 + // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + swap3 + // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + push sha2_constants_h + %add_const(12) + %mload_kernel_code_u32 + // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + %add_u32 + // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + swap4 + // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + push sha2_constants_h + %add_const(16) + %mload_kernel_code_u32 + // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + %add_u32 + // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest swap5 - // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i, retdest + // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest push sha2_constants_h - %add_const(5) + %add_const(20) %mload_kernel_code_u32 - // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i, retdest + // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest %add_u32 - // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], message_schedule_addr, i, retdest + // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest swap6 - // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i, retdest + // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, message_schedule_addr, i, retdest push sha2_constants_h - %add_const(6) + %add_const(24) %mload_kernel_code_u32 - // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i, retdest + // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, message_schedule_addr, i, retdest %add_u32 - // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], message_schedule_addr, i, retdest + // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, message_schedule_addr, i, retdest swap7 - // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i, retdest + // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, message_schedule_addr, i, retdest push sha2_constants_h - %add_const(6) + %add_const(28) %mload_kernel_code_u32 - // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i, retdest + // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, message_schedule_addr, i, retdest %add_u32 - // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], message_schedule_addr, i, retdest + // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, message_schedule_addr, i, retdest swap8 - // stack: message_schedule_addr, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], i, retdest + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest pop - // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], i, retdest + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest swap1 %shl_const(32) or @@ -600,9 +628,13 @@ sha2_compression_end: swap1 %shl_const(224) or - // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), i, retdest - swap1 - // stack: i, concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), retdest - pop - // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), retdest + // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), message_schedule_addr, i, retdest + swap2 + // stack: i, num_blocks, concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), retdest + %pop2 + // stack: sha2_result = concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), retdest STOP + +global sha2: + JUMPDEST + %jump(sha2_store) diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm index 18046c94..cd2bd303 100644 --- a/evm/src/cpu/kernel/asm/sha2/temp_words.asm +++ b/evm/src/cpu/kernel/asm/sha2/temp_words.asm @@ -12,10 +12,10 @@ // stack: e, f, g, Sigma_1(e), h, K[i], W[i] %sha2_choice // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] - add - add - add - add + %add_u32 + %add_u32 + %add_u32 + %add_u32 // stack: Ch(e, f, g) + Sigma_1(e) + h + K[i] + W[i] %endmacro @@ -29,6 +29,6 @@ // stack: c, a, b, Sigma_0(a) %sha2_majority // stack: Maj(c, a, b), Sigma_0(a) - add + %add_u32 // stack: Maj(c, a, b) + Sigma_0(a) %endmacro diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index a9a82a1a..ed8f8674 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -11,7 +11,7 @@ use crate::memory::segments::Segment; #[test] fn test_sha2_store() -> Result<()> { let kernel = combined_kernel(); - let sha2_store = kernel.global_labels["sha2_store"]; + let sha2 = kernel.global_labels["sha2"]; // let test_sha2_read = kernel.global_labels["test_sha2_read"]; let mut rng = thread_rng(); @@ -37,23 +37,23 @@ fn test_sha2_store() -> Result<()> { store_initial_stack.reverse(); dbg!(store_initial_stack.clone()); - let after_storing = run( + let after_sha2 = run( &kernel.code, - sha2_store, + sha2, store_initial_stack, &kernel.prover_inputs, )?; - let stack_after_storing = after_storing.stack(); + let stack_after_storing = after_sha2.stack(); dbg!(stack_after_storing.clone()); - let memory_after_storing = after_storing.memory; + let memory_after_storing = after_sha2.memory; let mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] .content .clone(); - dbg!(&mem[0..65]); + // dbg!(&mem[0..65]); - dbg!(&mem[100..356]); + // dbg!(&mem[100..356]); Ok(()) } From 7d2b17f43ceefa5f10f2a3e4a7786e27668d20ad Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 16 Aug 2022 18:57:57 -0700 Subject: [PATCH 055/104] clippy --- evm/src/cpu/kernel/tests/sha2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index ed8f8674..4a6472c2 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -48,7 +48,7 @@ fn test_sha2_store() -> Result<()> { dbg!(stack_after_storing.clone()); let memory_after_storing = after_sha2.memory; - let mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] + let _mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] .content .clone(); // dbg!(&mem[0..65]); From 0d14ef4e82c925f87910c458327cb0f444e60183 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 16 Aug 2022 19:30:55 -0700 Subject: [PATCH 056/104] clippy --- evm/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/evm/src/lib.rs b/evm/src/lib.rs index 923df55a..6f332b59 100644 --- a/evm/src/lib.rs +++ b/evm/src/lib.rs @@ -4,7 +4,6 @@ #![allow(clippy::type_complexity)] #![feature(let_chains)] #![feature(generic_const_exprs)] -#![feature(let_chains)] pub mod all_stark; pub mod arithmetic; From f4207e75a4d24999f22a927d7ef7c09c911cc6f5 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 19 Sep 2022 10:32:52 -0700 Subject: [PATCH 057/104] sha2 testing infrastructure --- evm/Cargo.toml | 5 ++- evm/src/cpu/kernel/asm/sha2/sha2.asm | 25 +++++--------- evm/src/cpu/kernel/assembler.rs | 2 +- evm/src/cpu/kernel/tests/sha2.rs | 51 ++++++++++++---------------- 4 files changed, 35 insertions(+), 48 deletions(-) diff --git a/evm/Cargo.toml b/evm/Cargo.toml index 6db81902..afddaeb6 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -10,12 +10,15 @@ plonky2_util = { path = "../util" } eth-trie-utils = { git = "https://github.com/mir-protocol/eth-trie-utils.git", rev = "dd3595b4ba7923f8d465450d210f17a2b4e20f96" } maybe_rayon = { path = "../maybe_rayon" } anyhow = "1.0.40" +ascii = "1.0.0" env_logger = "0.9.0" ethereum-types = "0.14.0" hex = { version = "0.4.3", optional = true } hex-literal = "0.3.4" itertools = "0.10.3" +keccak-hash = "0.9.0" log = "0.4.14" +maybe_rayon = { path = "../maybe_rayon" } once_cell = "1.13.0" pest = "2.1.3" pest_derive = "2.1.0" @@ -23,7 +26,7 @@ rand = "0.8.5" rand_chacha = "0.3.1" rlp = "0.5.1" serde = { version = "1.0.144", features = ["derive"] } -keccak-hash = "0.9.0" +sha2 = "0.10.2" tiny-keccak = "2.0.2" [dev-dependencies] diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 2b5de553..dc230488 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -607,32 +607,25 @@ sha2_compression_end: // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest pop // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest - swap1 %shl_const(32) or - swap1 - %shl_const(64) + %shl_const(32) or - swap1 - %shl_const(96) + %shl_const(32) or - swap1 - %shl_const(128) + %shl_const(32) or - swap1 - %shl_const(160) + %shl_const(32) or - swap1 - %shl_const(192) + %shl_const(32) or - swap1 - %shl_const(224) + %shl_const(32) or - // stack: concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), message_schedule_addr, i, retdest + // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), message_schedule_addr, i, retdest swap2 - // stack: i, num_blocks, concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), retdest + // stack: i, num_blocks, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest %pop2 - // stack: sha2_result = concat(h[0]+h[64], g[0]+g[64], f[0]+f[64], e[0]+e[64], d[0]+d[64], c[0]+c[64], b[0]+b[64], a[0]+a[64]), retdest + // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest STOP global sha2: diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs index ede60a29..5980e460 100644 --- a/evm/src/cpu/kernel/assembler.rs +++ b/evm/src/cpu/kernel/assembler.rs @@ -579,7 +579,7 @@ mod tests { ); let kernel = parse_and_assemble(&["%stack (a) -> (a)"]); - assert_eq!(kernel.code, vec![]); + assert_eq!(kernel.code, vec![] as Vec); let kernel = parse_and_assemble(&["%stack (a, b, c) -> (c, b, a)"]); assert_eq!(kernel.code, vec![swap2]); diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 4a6472c2..0c65b68c 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -1,18 +1,19 @@ use std::str::FromStr; use anyhow::Result; +use ascii::AsciiStr; use ethereum_types::U256; use rand::{thread_rng, Rng}; +use sha2::{Sha256, Digest}; use crate::cpu::kernel::aggregator::combined_kernel; use crate::cpu::kernel::interpreter::run; use crate::memory::segments::Segment; #[test] -fn test_sha2_store() -> Result<()> { +fn test_sha2() -> Result<()> { let kernel = combined_kernel(); let sha2 = kernel.global_labels["sha2"]; - // let test_sha2_read = kernel.global_labels["test_sha2_read"]; let mut rng = thread_rng(); let num_bytes = rng.gen_range(1..17); @@ -28,14 +29,21 @@ fn test_sha2_store() -> Result<()> { dbg!(num_bytes); dbg!(bytes.clone()); - let num_bytes = 3; - let bytes: Vec = vec![97.into(), 98.into(), 99.into()]; + let message = "blargh blargh blargh"; + let num_bytes = message.len(); + + let mut hasher = Sha256::new(); + hasher.update(message); + let expected = format!("{:02X}", hasher.finalize()); + + dbg!(expected); + + let bytes: Vec = AsciiStr::from_ascii(message).unwrap().as_bytes().iter().map(|&x| U256::from(x as u32)).collect(); let mut store_initial_stack = vec![U256::from(num_bytes)]; store_initial_stack.extend(bytes); store_initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); store_initial_stack.reverse(); - dbg!(store_initial_stack.clone()); let after_sha2 = run( &kernel.code, @@ -45,35 +53,18 @@ fn test_sha2_store() -> Result<()> { )?; let stack_after_storing = after_sha2.stack(); - dbg!(stack_after_storing.clone()); + let result = stack_after_storing.clone()[1]; + let actual = format!("{:02X}", result); + + dbg!(actual); - let memory_after_storing = after_sha2.memory; - let _mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] - .content - .clone(); + // let memory_after_storing = after_sha2.memory; + // let _mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] + // .content + // .clone(); // dbg!(&mem[0..65]); // dbg!(&mem[100..356]); Ok(()) } - -/*#[test] -fn test_sha2() -> Result<()> { - let kernel = combined_kernel(); - let sha2_store = kernel.global_labels["sha2_store"]; - let sha2_pad = kernel.global_labels["sha2_pad"]; - let mut rng = thread_rng(); - let a = U256([0; 4].map(|_| rng.gen())); - let b = U256([0; 4].map(|_| rng.gen())); - - let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a]; - let stack_with_kernel = run(&kernel.code, exp, initial_stack)?.stack; - let initial_stack = vec![b, a]; - let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP - - let expected_stack = todo!(); - assert_eq!(stack_with_kernel, expected_stack); - - Ok(()) -}*/ From 9f5d75fed4d138bc9558f61ac7a552b86b0bce29 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 18 Aug 2022 16:19:47 -0700 Subject: [PATCH 058/104] fix --- evm/src/cpu/kernel/tests/sha2.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 0c65b68c..9ecb0f2a 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -29,8 +29,9 @@ fn test_sha2() -> Result<()> { dbg!(num_bytes); dbg!(bytes.clone()); - let message = "blargh blargh blargh"; + let message = "blargh blargh blargh blargh blargh blargh blargh blargh blargh"; let num_bytes = message.len(); + dbg!(num_bytes); let mut hasher = Sha256::new(); hasher.update(message); From 0150c2f0b0b3f1992c6f2f19a237ba28b20ad5e4 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 18 Aug 2022 16:21:52 -0700 Subject: [PATCH 059/104] cleanup --- evm/src/cpu/kernel/tests/sha2.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 9ecb0f2a..51787585 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -4,7 +4,7 @@ use anyhow::Result; use ascii::AsciiStr; use ethereum_types::U256; use rand::{thread_rng, Rng}; -use sha2::{Sha256, Digest}; +use sha2::{Digest, Sha256}; use crate::cpu::kernel::aggregator::combined_kernel; use crate::cpu::kernel::interpreter::run; @@ -29,7 +29,7 @@ fn test_sha2() -> Result<()> { dbg!(num_bytes); dbg!(bytes.clone()); - let message = "blargh blargh blargh blargh blargh blargh blargh blargh blargh"; + let message = "blargh blargh blargh blarh blargh blargh blargh blargho"; let num_bytes = message.len(); dbg!(num_bytes); @@ -37,9 +37,12 @@ fn test_sha2() -> Result<()> { hasher.update(message); let expected = format!("{:02X}", hasher.finalize()); - dbg!(expected); - - let bytes: Vec = AsciiStr::from_ascii(message).unwrap().as_bytes().iter().map(|&x| U256::from(x as u32)).collect(); + let bytes: Vec = AsciiStr::from_ascii(message) + .unwrap() + .as_bytes() + .iter() + .map(|&x| U256::from(x as u32)) + .collect(); let mut store_initial_stack = vec![U256::from(num_bytes)]; store_initial_stack.extend(bytes); @@ -56,8 +59,8 @@ fn test_sha2() -> Result<()> { let stack_after_storing = after_sha2.stack(); let result = stack_after_storing.clone()[1]; let actual = format!("{:02X}", result); - - dbg!(actual); + + assert_eq!(expected, actual); // let memory_after_storing = after_sha2.memory; // let _mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] From 8e067dcffce078a90698599bc954db017a1673d5 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 18 Aug 2022 16:22:43 -0700 Subject: [PATCH 060/104] cleanup --- evm/src/cpu/kernel/tests/sha2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 51787585..0ff6d0f8 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -8,7 +8,7 @@ use sha2::{Digest, Sha256}; use crate::cpu::kernel::aggregator::combined_kernel; use crate::cpu::kernel::interpreter::run; -use crate::memory::segments::Segment; + #[test] fn test_sha2() -> Result<()> { From b7c9f2cb5b671435d16fd57f913f17151ea6e723 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 18 Aug 2022 16:22:49 -0700 Subject: [PATCH 061/104] fmt --- evm/src/cpu/kernel/tests/sha2.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 0ff6d0f8..ab6e1c6f 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -9,7 +9,6 @@ use sha2::{Digest, Sha256}; use crate::cpu::kernel::aggregator::combined_kernel; use crate::cpu::kernel::interpreter::run; - #[test] fn test_sha2() -> Result<()> { let kernel = combined_kernel(); From 67e19fd7c9f39d0493f04058f553bc8ed982a806 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 19 Aug 2022 13:50:59 -0700 Subject: [PATCH 062/104] fix of message schedule --- evm/src/cpu/kernel/aggregator.rs | 1 + evm/src/cpu/kernel/asm/sha2/sha2.asm | 7 +++++-- evm/src/cpu/kernel/tests/sha2.rs | 25 +++++++++++++++++-------- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index ce59a974..20c659ed 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -46,6 +46,7 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/sha2/ops.asm"), include_str!("asm/sha2/sha2.asm"), include_str!("asm/sha2/temp_words.asm"), + include_str!("asm/sha2/util.asm"), include_str!("asm/sha2/write_length.asm"), include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/decode.asm"), diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index dc230488..15adc0f8 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -84,11 +84,13 @@ global sha2_pad: // stack: last_addr = num_blocks*64, length, num_blocks, retdest %sha2_write_length // stack: num_blocks, retdest + dup1 + // stack: num_blocks, num_blocks, retdest // STEP 5: write num_blocks to x[0] push 0 %mstore_kernel_general - // stack: retdest - push 100 + // stack: num_blocks, retdest + %message_schedule_addr_from_num_blocks %jump(sha2_gen_all_message_schedules) // Precodition: stack contains address of one message block, followed by output address @@ -307,6 +309,7 @@ sha2_gen_message_schedule_remaining_loop: iszero %jumpi(sha2_gen_message_schedule_remaining_end) %jump(sha2_gen_message_schedule_remaining_loop) + STOP sha2_gen_message_schedule_remaining_end: JUMPDEST // stack: counter=0, output_addr, block[0], block[1], retdest diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index ab6e1c6f..7e400a61 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -8,6 +8,7 @@ use sha2::{Digest, Sha256}; use crate::cpu::kernel::aggregator::combined_kernel; use crate::cpu::kernel::interpreter::run; +use crate::memory::segments::Segment; #[test] fn test_sha2() -> Result<()> { @@ -28,7 +29,7 @@ fn test_sha2() -> Result<()> { dbg!(num_bytes); dbg!(bytes.clone()); - let message = "blargh blargh blargh blarh blargh blargh blargh blargho"; + let message = "blargh blargh blargh blarh blargh blargh blargh blarghooo"; let num_bytes = message.len(); dbg!(num_bytes); @@ -56,18 +57,26 @@ fn test_sha2() -> Result<()> { )?; let stack_after_storing = after_sha2.stack(); + + dbg!(stack_after_storing.clone()); + let result = stack_after_storing.clone()[1]; let actual = format!("{:02X}", result); + dbg!(expected); + dbg!(actual); - assert_eq!(expected, actual); + // assert_eq!(expected, actual); - // let memory_after_storing = after_sha2.memory; - // let _mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] - // .content - // .clone(); - // dbg!(&mem[0..65]); + let memory_after_storing = after_sha2.memory; + let mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] + .content + .clone(); + dbg!(&mem[0..65]); - // dbg!(&mem[100..356]); + let num_blocks = (num_bytes+8)/64 + 1; + let message_schedule_start = 64 * num_blocks + 2; + dbg!(&mem[message_schedule_start..message_schedule_start+256]); + dbg!(&mem[message_schedule_start+256..message_schedule_start+512]); Ok(()) } From 8ebf4c8f7c5bc236caf1dbf6a94c9ce2136f5745 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 19 Aug 2022 15:25:25 -0700 Subject: [PATCH 063/104] more fixes --- evm/src/cpu/kernel/asm/sha2/sha2.asm | 53 +++++++++++++++++++++------- evm/src/cpu/kernel/tests/sha2.rs | 6 ++-- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 15adc0f8..09078488 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -309,7 +309,6 @@ sha2_gen_message_schedule_remaining_loop: iszero %jumpi(sha2_gen_message_schedule_remaining_end) %jump(sha2_gen_message_schedule_remaining_loop) - STOP sha2_gen_message_schedule_remaining_end: JUMPDEST // stack: counter=0, output_addr, block[0], block[1], retdest @@ -521,29 +520,25 @@ sha2_compression_loop: // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, num_blocks new, i, retdest swap11 // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest - dup1 - // stack: num_blocks new, num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest - iszero - %jumpi(sha2_compression_end) - // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest swap10 // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, i, new_retdest pop // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, i, new_retdest - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest push 64 swap1 mod - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, i, retdest swap11 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, (i+1)%64, retdest + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, (i+1)%64, retdest pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, (i+1)%64, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, (i+1)%64, retdest + dup11 + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, (i+1)%64, retdest + iszero + %jumpi(sha2_compression_end_block) %jump(sha2_compression_loop) -sha2_compression_end: +sha2_compression_end_block: JUMPDEST - // stack: num_blocks=0, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest - %pop2 // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest push sha2_constants_h %mload_kernel_code_u32 @@ -608,6 +603,38 @@ sha2_compression_end: // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, message_schedule_addr, i, retdest swap8 // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest + dup1 + // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest + iszero + %jumpi(sha2_compression_end) + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest + // TODO: "insertion" macro for the below + swap1 + swap2 + swap1 + swap2 + swap3 + swap2 + swap3 + swap4 + swap3 + swap4 + swap5 + swap4 + swap5 + swap6 + swap5 + swap6 + swap7 + swap6 + swap7 + swap8 + swap7 + swap8 + %jump(sha2_compression_loop) +sha2_compression_end: + JUMPDEST + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest pop // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest %shl_const(32) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 7e400a61..ca28325d 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -71,12 +71,12 @@ fn test_sha2() -> Result<()> { let mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] .content .clone(); - dbg!(&mem[0..65]); + // dbg!(&mem[0..65]); let num_blocks = (num_bytes+8)/64 + 1; let message_schedule_start = 64 * num_blocks + 2; - dbg!(&mem[message_schedule_start..message_schedule_start+256]); - dbg!(&mem[message_schedule_start+256..message_schedule_start+512]); + // dbg!(&mem[message_schedule_start..message_schedule_start+256]); + // dbg!(&mem[message_schedule_start+256..message_schedule_start+512]); Ok(()) } From 5b7575684ccd41ecb18b0798ae54ee7ad6a29fb8 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 22 Aug 2022 16:23:30 -0700 Subject: [PATCH 064/104] fixes --- evm/src/cpu/kernel/asm/sha2/sha2.asm | 451 +++++++++++++++++---------- 1 file changed, 281 insertions(+), 170 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm index 09078488..8df532cb 100644 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ b/evm/src/cpu/kernel/asm/sha2/sha2.asm @@ -377,237 +377,316 @@ global sha2_compression: // stack: 0, message_schedule_addr, i=0, retdest %mload_kernel_general // stack: num_blocks, message_schedule_addr, i=0, retdest + dup1 + // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest + %scratch_space_addr_from_num_blocks + // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest + swap1 + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(28) %mload_kernel_code_u32 - // stack: h[0], num_blocks, message_schedule_addr, i=0, retdest + // stack: h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(24) %mload_kernel_code_u32 - // stack: g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest + // stack: g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(20) %mload_kernel_code_u32 - // stack: f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest + // stack: f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(16) %mload_kernel_code_u32 - // stack: e[0], f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest + // stack: e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(12) %mload_kernel_code_u32 - // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest + // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(8) %mload_kernel_code_u32 - // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest + // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest push sha2_constants_h %add_const(4) %mload_kernel_code_u32 - // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest + // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest push sha2_constants_h %mload_kernel_code_u32 - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, message_schedule_addr, i=0, retdest + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + dup10 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup2 + dup2 + // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup3 + dup2 + // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup4 + dup2 + // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup5 + dup2 + // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup6 + dup2 + // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup7 + dup2 + // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup8 + dup2 + // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup9 + dup2 + // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + pop + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest sha2_compression_loop: JUMPDEST - // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup10 - // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup12 - // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - %mul_const(4) - // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - add - // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - %mload_kernel_general_u32 - // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - push sha2_constants_k - // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup13 - // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - %mul_const(4) - // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - add - // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - %mload_kernel_code_u32 - // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup10 - // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup10 - // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup10 - // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup10 - // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - %sha2_temp_word1 - // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup4 - // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup4 - // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup4 - // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - %sha2_temp_word2 - // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup6 - // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - dup3 - // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - %add_u32 - // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - swap2 - // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, message_schedule_addr, i, retdest - %add_u32 - // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, message_schedule_addr, i, retdest - swap1 - // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, message_schedule_addr, i, retdest - swap5 - // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, message_schedule_addr, i, retdest - pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, message_schedule_addr, i, retdest - swap8 - // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, message_schedule_addr, i, retdest - pop - // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, message_schedule_addr, i, retdest - swap7 - // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, message_schedule_addr, i, retdest - swap1 - swap7 - swap1 - // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, message_schedule_addr, i, retdest - swap2 - swap7 - swap2 - // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, message_schedule_addr, i, retdest - swap3 - swap7 - swap3 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, message_schedule_addr, i, retdest - swap4 - swap7 - swap4 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, message_schedule_addr, i, retdest - swap5 - swap7 - swap5 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, message_schedule_addr, i, retdest - swap6 - swap7 - swap6 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest dup11 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest - %increment - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest - dup1 - // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest - %eq_const(64) - // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest - dup1 - // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest - dup12 - // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest - sub - // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr, i, retdest - swap12 - // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, num_blocks new, i, retdest + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup13 + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + add + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + push sha2_constants_k + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup14 + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + add + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_code_u32 + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word1 + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word2 + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup6 + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup3 + // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest swap1 - // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, num_blocks new, i, retdest + // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap8 + // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap7 + // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap1 + swap7 + swap1 + // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + swap7 + swap2 + // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap3 + swap7 + swap3 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap4 + swap7 + swap4 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + swap7 + swap5 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap6 + swap7 + swap6 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %increment + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %eq_const(64) + // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup12 + // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + sub + // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap13 + // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + swap1 + // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest push 256 mul - // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, num_blocks new, i, retdest + // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest add - // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, num_blocks new, i, retdest - swap11 - // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, message_schedule_addr new, i, retdest + // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + swap12 + // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest swap10 - // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, i, new_retdest + // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest pop - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, i, new_retdest + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest push 64 swap1 mod - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, i, retdest - swap11 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, (i+1)%64, retdest + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest + swap12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, (i+1)%64, retdest - dup11 - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, message_schedule_addr new, (i+1)%64, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + dup12 + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + //dup10 + //iszero + //dup2 + //iszero + //and + //%jumpi(sha2_stop_lol) iszero %jumpi(sha2_compression_end_block) %jump(sha2_compression_loop) sha2_compression_end_block: JUMPDEST - // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest - push sha2_constants_h - %mload_kernel_code_u32 - // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 - // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest swap1 - // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest - push sha2_constants_h + // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 %add_const(4) - %mload_kernel_code_u32 - // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 - // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest swap2 - // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest - push sha2_constants_h + // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 %add_const(8) - %mload_kernel_code_u32 - // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 - // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest swap3 - // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest - push sha2_constants_h + // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 %add_const(12) - %mload_kernel_code_u32 - // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 - // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest swap4 - // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest - push sha2_constants_h + // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 %add_const(16) - %mload_kernel_code_u32 - // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 - // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest swap5 - // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest - push sha2_constants_h + // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 %add_const(20) - %mload_kernel_code_u32 - // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 - // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, message_schedule_addr, i, retdest + // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest swap6 - // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, message_schedule_addr, i, retdest - push sha2_constants_h + // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 %add_const(24) - %mload_kernel_code_u32 - // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 - // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, message_schedule_addr, i, retdest + // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest swap7 - // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, message_schedule_addr, i, retdest - push sha2_constants_h + // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 %add_const(28) - %mload_kernel_code_u32 - // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 - // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, message_schedule_addr, i, retdest + // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest swap8 - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest dup1 - // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest + // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest iszero %jumpi(sha2_compression_end) - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest // TODO: "insertion" macro for the below swap1 swap2 @@ -631,12 +710,44 @@ sha2_compression_end_block: swap8 swap7 swap8 + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup1 + dup11 + %mstore_kernel_general_u32 + dup2 + dup11 + %add_const(4) + %mstore_kernel_general_u32 + dup3 + dup11 + %add_const(8) + %mstore_kernel_general_u32 + dup4 + dup11 + %add_const(12) + %mstore_kernel_general_u32 + dup5 + dup11 + %add_const(16) + %mstore_kernel_general_u32 + dup6 + dup11 + %add_const(20) + %mstore_kernel_general_u32 + dup7 + dup11 + %add_const(24) + %mstore_kernel_general_u32 + dup8 + dup11 + %add_const(28) + %mstore_kernel_general_u32 %jump(sha2_compression_loop) sha2_compression_end: JUMPDEST - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest pop - // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], message_schedule_addr, i, retdest + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest %shl_const(32) or %shl_const(32) @@ -651,10 +762,10 @@ sha2_compression_end: or %shl_const(32) or - // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), message_schedule_addr, i, retdest - swap2 - // stack: i, num_blocks, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest - %pop2 + // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest + swap3 + // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + %pop3 // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest STOP From db718900450c4411028bee49a1db5032c0d2647e Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 23 Aug 2022 14:59:29 -0700 Subject: [PATCH 065/104] randomized Sha2 test --- evm/src/cpu/kernel/tests/sha2.rs | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index ca28325d..f9f820e9 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -3,6 +3,7 @@ use std::str::FromStr; use anyhow::Result; use ascii::AsciiStr; use ethereum_types::U256; +use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use sha2::{Digest, Sha256}; @@ -16,28 +17,16 @@ fn test_sha2() -> Result<()> { let sha2 = kernel.global_labels["sha2"]; let mut rng = thread_rng(); - let num_bytes = rng.gen_range(1..17); - let mut bytes: Vec = Vec::with_capacity(num_bytes); - for _ in 0..num_bytes { - let byte: u8 = rng.gen(); - let mut v = vec![0; 31]; - v.push(byte); - let v2: [u8; 32] = v.try_into().unwrap(); - bytes.push(U256::from(v2)); - } - dbg!(num_bytes); - dbg!(bytes.clone()); - - let message = "blargh blargh blargh blarh blargh blargh blargh blarghooo"; - let num_bytes = message.len(); + let num_bytes = rng.gen_range(1..10000); + let message: String = rng.sample_iter(&Alphanumeric).take(num_bytes).map(char::from).collect(); dbg!(num_bytes); let mut hasher = Sha256::new(); - hasher.update(message); + hasher.update(message.clone()); let expected = format!("{:02X}", hasher.finalize()); - let bytes: Vec = AsciiStr::from_ascii(message) + let bytes: Vec = AsciiStr::from_ascii(&message) .unwrap() .as_bytes() .iter() From 0394fa3fba9ae7a7cd7bfd527351de822c057095 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 23 Aug 2022 15:22:00 -0700 Subject: [PATCH 066/104] cleaned up test --- evm/src/cpu/kernel/aggregator.rs | 4 +- evm/src/cpu/kernel/asm/sha2/sha2.asm | 774 --------------------------- evm/src/cpu/kernel/tests/sha2.rs | 13 - 3 files changed, 3 insertions(+), 788 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/sha2/sha2.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 20c659ed..12b27183 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -44,7 +44,9 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/sha2/constants.asm"), include_str!("asm/sha2/memory.asm"), include_str!("asm/sha2/ops.asm"), - include_str!("asm/sha2/sha2.asm"), + include_str!("asm/sha2/sha2_compression.asm"), + include_str!("asm/sha2/sha2_message_schedule.asm"), + include_str!("asm/sha2/sha2_store_pad.asm"), include_str!("asm/sha2/temp_words.asm"), include_str!("asm/sha2/util.asm"), include_str!("asm/sha2/write_length.asm"), diff --git a/evm/src/cpu/kernel/asm/sha2/sha2.asm b/evm/src/cpu/kernel/asm/sha2/sha2.asm deleted file mode 100644 index 8df532cb..00000000 --- a/evm/src/cpu/kernel/asm/sha2/sha2.asm +++ /dev/null @@ -1,774 +0,0 @@ -global sha2_store: - JUMPDEST - // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - dup1 - // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - push 0 - // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - %mstore_kernel_general - // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - push 1 - // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest -sha2_store_loop: - JUMPDEST - // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - dup1 - // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - swap3 - // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest - swap1 - // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest - %mstore_kernel_general - // stack: counter, addr, ... , x[num_bytes-1], retdest - %decrement - // stack: counter-1, addr, ... , x[num_bytes-1], retdest - dup1 - // stack: counter-1, counter-1, addr, ... , x[num_bytes-1], retdest - iszero - %jumpi(sha2_store_end) - // stack: counter-1, addr, ... , x[num_bytes-1], retdest - swap1 - // stack: addr, counter-1, ... , x[num_bytes-1], retdest - %increment - // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest - %jump(sha2_store_loop) -sha2_store_end: - JUMPDEST - // stack: counter=0, addr, retdest - %pop2 - // stack: retdest - %jump(sha2_pad) - -// Precodition: input is in memory, starting at 0 of kernel general segment, of the form -// num_bytes, x[0], x[1], ..., x[num_bytes - 1] -// Postcodition: output is in memory, starting at 0, of the form -// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] -global sha2_pad: - JUMPDEST - // stack: retdest - push 0 - %mload_kernel_general - // stack: num_bytes, retdest - // STEP 1: append 1 - // insert 128 (= 1 << 7) at x[num_bytes+1] - // stack: num_bytes, retdest - push 1 - push 7 - shl - // stack: 128, num_bytes, retdest - dup2 - // stack: num_bytes, 128, num_bytes, retdest - %increment - // stack: num_bytes+1, 128, num_bytes, retdest - %mstore_kernel_general - // stack: num_bytes, retdest - // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 - dup1 - // stack: num_bytes, num_bytes, retdest - %add_const(8) - %div_const(64) - - %increment - // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest - // STEP 3: calculate length := num_bytes*8 - swap1 - // stack: num_bytes, num_blocks, retdest - push 8 - mul - // stack: length = num_bytes*8, num_blocks, retdest - // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] - dup2 - // stack: num_blocks, length, num_blocks, retdest - push 64 - mul - // stack: last_addr = num_blocks*64, length, num_blocks, retdest - %sha2_write_length - // stack: num_blocks, retdest - dup1 - // stack: num_blocks, num_blocks, retdest - // STEP 5: write num_blocks to x[0] - push 0 - %mstore_kernel_general - // stack: num_blocks, retdest - %message_schedule_addr_from_num_blocks - %jump(sha2_gen_all_message_schedules) - -// Precodition: stack contains address of one message block, followed by output address -// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks -// of message schedule (in four-byte increments) -global sha2_gen_message_schedule_from_block: - JUMPDEST - // stack: block_addr, output_addr, retdest - dup1 - // stack: block_addr, block_addr, output_addr, retdest - %add_const(32) - // stack: block_addr + 32, block_addr, output_addr, retdest - swap1 - // stack: block_addr, block_addr + 32, output_addr, retdest - %mload_kernel_general_u256 - // stack: block[0], block_addr + 32, output_addr, retdest - swap1 - // stack: block_addr + 32, block[0], output_addr, retdest - %mload_kernel_general_u256 - // stack: block[1], block[0], output_addr, retdest - swap2 - // stack: output_addr, block[0], block[1], retdest - %add_const(28) - push 8 - // stack: counter=8, output_addr + 28, block[0], block[1], retdest - %jump(sha2_gen_message_schedule_from_block_0_loop) -sha2_gen_message_schedule_from_block_0_loop: - JUMPDEST - // stack: counter, output_addr, block[0], block[1], retdest - swap2 - // stack: block[0], output_addr, counter, block[1], retdest - push 1 - push 32 - shl - // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest - dup2 - dup2 - // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1], retdest - swap1 - // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1], retdest - mod - // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1], retdest - swap2 - // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest - div - // stack: block[0] >> 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest - swap1 - // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest - dup3 - // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest - %mstore_kernel_general_u32 - // stack: block[0] >> 32, output_addr, counter, block[1], retdest - swap1 - // stack: output_addr, block[0] >> 32, counter, block[1], retdest - %sub_const(4) - // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest - swap1 - // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest - swap2 - // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest - %decrement - dup1 - iszero - %jumpi(sha2_gen_message_schedule_from_block_0_end) - %jump(sha2_gen_message_schedule_from_block_0_loop) -sha2_gen_message_schedule_from_block_0_end: - JUMPDEST - // stack: old counter=0, output_addr, block[0], block[1], retdest - pop - push 8 - // stack: counter=8, output_addr, block[0], block[1], retdest - swap2 - // stack: block[0], output_addr, counter, block[1], retdest - swap3 - // stack: block[1], output_addr, counter, block[0], retdest - swap2 - // stack: counter, output_addr, block[1], block[0], retdest - swap1 - // stack: output_addr, counter, block[1], block[0], retdest - %add_const(64) - // stack: output_addr + 64, counter, block[1], block[0], retdest - swap1 - // stack: counter, output_addr + 64, block[1], block[0], retdest -sha2_gen_message_schedule_from_block_1_loop: - JUMPDEST - // stack: counter, output_addr, block[1], block[0], retdest - swap2 - // stack: block[1], output_addr, counter, block[0], retdest - push 1 - push 32 - shl - // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest - dup2 - dup2 - // stack: 1 << 32, block[1], 1 << 32, block[1], output_addr, counter, block[0], retdest - swap1 - // stack: block[1], 1 << 32, 1 << 32, block[1], output_addr, counter, block[0], retdest - mod - // stack: block[1] % (1 << 32), 1 << 32, block[1], output_addr, counter, block[0], retdest - swap2 - // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest - div - // stack: block[1] >> 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest - swap1 - // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest - dup3 - // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest - %mstore_kernel_general_u32 - // stack: block[1] >> 32, output_addr, counter, block[0], retdest - swap1 - // stack: output_addr, block[1] >> 32, counter, block[0], retdest - %sub_const(4) - // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest - swap1 - // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest - swap2 - // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest - %decrement - dup1 - iszero - %jumpi(sha2_gen_message_schedule_from_block_1_end) - %jump(sha2_gen_message_schedule_from_block_1_loop) -sha2_gen_message_schedule_from_block_1_end: - JUMPDEST - // stack: old counter=0, output_addr, block[1], block[0], retdest - pop - // stack: output_addr, block[0], block[1], retdest - push 48 - // stack: counter=48, output_addr, block[0], block[1], retdest - swap1 - // stack: output_addr, counter, block[0], block[1], retdest - %add_const(36) - // stack: output_addr + 36, counter, block[0], block[1], retdest - swap1 - // stack: counter, output_addr + 36, block[0], block[1], retdest -sha2_gen_message_schedule_remaining_loop: - JUMPDEST - // stack: counter, output_addr, block[0], block[1], retdest - swap1 - // stack: output_addr, counter, block[0], block[1], retdest - dup1 - // stack: output_addr, output_addr, counter, block[0], block[1], retdest - push 2 - push 4 - mul - swap1 - sub - // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest - %sha2_sigma_1 - // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest - swap1 - // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 - // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 7 - push 4 - mul - swap1 - sub - // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 - // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 - // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 15 - push 4 - mul - swap1 - sub - // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %sha2_sigma_0 - // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 - // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 - // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 16 - push 4 - mul - swap1 - sub - // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 - // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap4 - // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - %add_u32 - %add_u32 - %add_u32 - // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - swap1 - // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest - dup1 - // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest - swap2 - // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, output_addr, counter, block[0], block[1], retdest - swap1 - // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - %mstore_kernel_general_u32 - // stack: output_addr, counter, block[0], block[1], retdest - %add_const(4) - // stack: output_addr + 4, counter, block[0], block[1], retdest - swap1 - // stack: counter, output_addr + 4, block[0], block[1], retdest - %decrement - // stack: counter - 1, output_addr + 4, block[0], block[1], retdest - dup1 - iszero - %jumpi(sha2_gen_message_schedule_remaining_end) - %jump(sha2_gen_message_schedule_remaining_loop) -sha2_gen_message_schedule_remaining_end: - JUMPDEST - // stack: counter=0, output_addr, block[0], block[1], retdest - %pop4 - JUMP - -// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] -// stack contains output_addr -// Postcondition: starting at output_addr, set of 256 bytes per block -// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) -global sha2_gen_all_message_schedules: - JUMPDEST - // stack: output_addr, retdest - dup1 - // stack: output_addr, output_addr, retdest - push 0 - // stack: 0, output_addr, output_addr, retdest - %mload_kernel_general - // stack: num_blocks, output_addr, output_addr, retdest - push 1 - // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest -sha2_gen_all_message_schedules_loop: - JUMPDEST - // stack: cur_addr, counter, cur_output_addr, output_addr, retdest - push sha2_gen_all_message_schedules_loop_end - // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest - dup4 - // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest - dup3 - // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest - %jump(sha2_gen_message_schedule_from_block) -sha2_gen_all_message_schedules_loop_end: - JUMPDEST - // stack: cur_addr, counter, cur_output_addr, output_addr, retdest - %add_const(64) - // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest - swap1 - %decrement - swap1 - // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest - swap2 - %add_const(256) - swap2 - // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - dup2 - // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - iszero - %jumpi(sha2_gen_all_message_schedules_end) - %jump(sha2_gen_all_message_schedules_loop) - JUMPDEST -sha2_gen_all_message_schedules_end: - JUMPDEST - // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - %pop3 - // stack: output_addr, retdest - %jump(sha2_compression) - -global sha2_compression: - JUMPDEST - // stack: message_schedule_addr, retdest - push 0 - // stack: i=0, message_schedule_addr, retdest - swap1 - // stack: message_schedule_addr, i=0, retdest - push 0 - // stack: 0, message_schedule_addr, i=0, retdest - %mload_kernel_general - // stack: num_blocks, message_schedule_addr, i=0, retdest - dup1 - // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest - %scratch_space_addr_from_num_blocks - // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest - swap1 - // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(28) - %mload_kernel_code_u32 - // stack: h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(24) - %mload_kernel_code_u32 - // stack: g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(20) - %mload_kernel_code_u32 - // stack: f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(16) - %mload_kernel_code_u32 - // stack: e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(12) - %mload_kernel_code_u32 - // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(8) - %mload_kernel_code_u32 - // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(4) - %mload_kernel_code_u32 - // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %mload_kernel_code_u32 - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup10 - // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup2 - dup2 - // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup3 - dup2 - // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup4 - dup2 - // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup5 - dup2 - // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup6 - dup2 - // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup7 - dup2 - // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup8 - dup2 - // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup9 - dup2 - // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - pop - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest -sha2_compression_loop: - JUMPDEST - // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup11 - // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup13 - // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mul_const(4) - // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - add - // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_general_u32 - // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - push sha2_constants_k - // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup14 - // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mul_const(4) - // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - add - // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_code_u32 - // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %sha2_temp_word1 - // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 - // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 - // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 - // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %sha2_temp_word2 - // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup6 - // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup3 - // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 - // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 - // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 - // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap8 - // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - pop - // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap7 - // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 - swap7 - swap1 - // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 - swap7 - swap2 - // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap3 - swap7 - swap3 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap4 - swap7 - swap4 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 - swap7 - swap5 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap6 - swap7 - swap6 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup12 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %increment - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup1 - // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %eq_const(64) - // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup1 - // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup12 - // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - sub - // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap13 - // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - swap1 - // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - push 256 - mul - // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - add - // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - swap12 - // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest - swap10 - // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - pop - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - push 64 - swap1 - mod - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest - swap12 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - dup12 - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - //dup10 - //iszero - //dup2 - //iszero - //and - //%jumpi(sha2_stop_lol) - iszero - %jumpi(sha2_compression_end_block) - %jump(sha2_compression_loop) -sha2_compression_end_block: - JUMPDEST - // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_general_u32 - // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 - // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(4) - %mload_kernel_general_u32 - // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 - // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(8) - %mload_kernel_general_u32 - // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap3 - // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(12) - %mload_kernel_general_u32 - // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap4 - // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(16) - %mload_kernel_general_u32 - // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 - // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(20) - %mload_kernel_general_u32 - // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap6 - // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(24) - %mload_kernel_general_u32 - // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap7 - // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(28) - %mload_kernel_general_u32 - // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap8 - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - dup1 - // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - iszero - %jumpi(sha2_compression_end) - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - // TODO: "insertion" macro for the below - swap1 - swap2 - swap1 - swap2 - swap3 - swap2 - swap3 - swap4 - swap3 - swap4 - swap5 - swap4 - swap5 - swap6 - swap5 - swap6 - swap7 - swap6 - swap7 - swap8 - swap7 - swap8 - // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup1 - dup11 - %mstore_kernel_general_u32 - dup2 - dup11 - %add_const(4) - %mstore_kernel_general_u32 - dup3 - dup11 - %add_const(8) - %mstore_kernel_general_u32 - dup4 - dup11 - %add_const(12) - %mstore_kernel_general_u32 - dup5 - dup11 - %add_const(16) - %mstore_kernel_general_u32 - dup6 - dup11 - %add_const(20) - %mstore_kernel_general_u32 - dup7 - dup11 - %add_const(24) - %mstore_kernel_general_u32 - dup8 - dup11 - %add_const(28) - %mstore_kernel_general_u32 - %jump(sha2_compression_loop) -sha2_compression_end: - JUMPDEST - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - pop - // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - %shl_const(32) - or - %shl_const(32) - or - %shl_const(32) - or - %shl_const(32) - or - %shl_const(32) - or - %shl_const(32) - or - %shl_const(32) - or - // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest - swap3 - // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest - %pop3 - // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest - STOP - -global sha2: - JUMPDEST - %jump(sha2_store) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index f9f820e9..b6ff6e8b 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -54,18 +54,5 @@ fn test_sha2() -> Result<()> { dbg!(expected); dbg!(actual); - // assert_eq!(expected, actual); - - let memory_after_storing = after_sha2.memory; - let mem = memory_after_storing.context_memory[0].segments[Segment::KernelGeneral as usize] - .content - .clone(); - // dbg!(&mem[0..65]); - - let num_blocks = (num_bytes+8)/64 + 1; - let message_schedule_start = 64 * num_blocks + 2; - // dbg!(&mem[message_schedule_start..message_schedule_start+256]); - // dbg!(&mem[message_schedule_start+256..message_schedule_start+512]); - Ok(()) } From 4b2f1a484e51bd53d406753fb6d73f74091062b3 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 23 Aug 2022 15:22:32 -0700 Subject: [PATCH 067/104] split up sha2.asm file --- .../cpu/kernel/asm/sha2/sha2_compression.asm | 402 ++++++++++++++++++ .../kernel/asm/sha2/sha2_message_schedule.asm | 272 ++++++++++++ .../cpu/kernel/asm/sha2/sha2_store_pad.asm | 98 +++++ evm/src/cpu/kernel/asm/sha2/util.asm | 11 + 4 files changed, 783 insertions(+) create mode 100644 evm/src/cpu/kernel/asm/sha2/sha2_compression.asm create mode 100644 evm/src/cpu/kernel/asm/sha2/sha2_message_schedule.asm create mode 100644 evm/src/cpu/kernel/asm/sha2/sha2_store_pad.asm create mode 100644 evm/src/cpu/kernel/asm/sha2/util.asm diff --git a/evm/src/cpu/kernel/asm/sha2/sha2_compression.asm b/evm/src/cpu/kernel/asm/sha2/sha2_compression.asm new file mode 100644 index 00000000..6fe1a990 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/sha2_compression.asm @@ -0,0 +1,402 @@ +global sha2_compression: + JUMPDEST + // stack: message_schedule_addr, retdest + push 0 + // stack: i=0, message_schedule_addr, retdest + swap1 + // stack: message_schedule_addr, i=0, retdest + push 0 + // stack: 0, message_schedule_addr, i=0, retdest + %mload_kernel_general + // stack: num_blocks, message_schedule_addr, i=0, retdest + dup1 + // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest + %scratch_space_addr_from_num_blocks + // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest + swap1 + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(28) + %mload_kernel_code_u32 + // stack: h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(24) + %mload_kernel_code_u32 + // stack: g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(20) + %mload_kernel_code_u32 + // stack: f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(16) + %mload_kernel_code_u32 + // stack: e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(12) + %mload_kernel_code_u32 + // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(8) + %mload_kernel_code_u32 + // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %add_const(4) + %mload_kernel_code_u32 + // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + push sha2_constants_h + %mload_kernel_code_u32 + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + dup10 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup2 + dup2 + // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup3 + dup2 + // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup4 + dup2 + // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup5 + dup2 + // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup6 + dup2 + // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup7 + dup2 + // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup8 + dup2 + // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + dup9 + dup2 + // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + pop + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest +sha2_compression_loop: + JUMPDEST + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup11 + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup13 + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + add + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + push sha2_constants_k + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup14 + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + add + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_code_u32 + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word1 + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup4 + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word2 + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup6 + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup3 + // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap1 + // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap8 + // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap7 + // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap1 + swap7 + swap1 + // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + swap7 + swap2 + // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap3 + swap7 + swap3 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap4 + swap7 + swap4 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + swap7 + swap5 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap6 + swap7 + swap6 + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %increment + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %eq_const(64) + // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup12 + // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + sub + // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap13 + // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + swap1 + // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + push 256 + mul + // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + add + // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + swap12 + // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest + swap10 + // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + pop + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + push 64 + swap1 + mod + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest + swap12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + pop + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + dup12 + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + //dup10 + //iszero + //dup2 + //iszero + //and + //%jumpi(sha2_stop_lol) + iszero + %jumpi(sha2_compression_end_block) + %jump(sha2_compression_loop) +sha2_compression_end_block: + JUMPDEST + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap1 + // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(4) + %mload_kernel_general_u32 + // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap2 + // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(8) + %mload_kernel_general_u32 + // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap3 + // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(12) + %mload_kernel_general_u32 + // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap4 + // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(16) + %mload_kernel_general_u32 + // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap5 + // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(20) + %mload_kernel_general_u32 + // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap6 + // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(24) + %mload_kernel_general_u32 + // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap7 + // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup10 + %add_const(28) + %mload_kernel_general_u32 + // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + swap8 + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + dup1 + // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + iszero + %jumpi(sha2_compression_end) + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + // TODO: "insertion" macro for the below + swap1 + swap2 + swap1 + swap2 + swap3 + swap2 + swap3 + swap4 + swap3 + swap4 + swap5 + swap4 + swap5 + swap6 + swap5 + swap6 + swap7 + swap6 + swap7 + swap8 + swap7 + swap8 + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + dup1 + dup11 + %mstore_kernel_general_u32 + dup2 + dup11 + %add_const(4) + %mstore_kernel_general_u32 + dup3 + dup11 + %add_const(8) + %mstore_kernel_general_u32 + dup4 + dup11 + %add_const(12) + %mstore_kernel_general_u32 + dup5 + dup11 + %add_const(16) + %mstore_kernel_general_u32 + dup6 + dup11 + %add_const(20) + %mstore_kernel_general_u32 + dup7 + dup11 + %add_const(24) + %mstore_kernel_general_u32 + dup8 + dup11 + %add_const(28) + %mstore_kernel_general_u32 + %jump(sha2_compression_loop) +sha2_compression_end: + JUMPDEST + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + pop + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + %shl_const(32) + or + // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest + swap3 + // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + %pop3 + // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + STOP \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/sha2/sha2_message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/sha2_message_schedule.asm new file mode 100644 index 00000000..96a31b38 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/sha2_message_schedule.asm @@ -0,0 +1,272 @@ +// Precodition: stack contains address of one message block, followed by output address +// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks +// of message schedule (in four-byte increments) +global sha2_gen_message_schedule_from_block: + JUMPDEST + // stack: block_addr, output_addr, retdest + dup1 + // stack: block_addr, block_addr, output_addr, retdest + %add_const(32) + // stack: block_addr + 32, block_addr, output_addr, retdest + swap1 + // stack: block_addr, block_addr + 32, output_addr, retdest + %mload_kernel_general_u256 + // stack: block[0], block_addr + 32, output_addr, retdest + swap1 + // stack: block_addr + 32, block[0], output_addr, retdest + %mload_kernel_general_u256 + // stack: block[1], block[0], output_addr, retdest + swap2 + // stack: output_addr, block[0], block[1], retdest + %add_const(28) + push 8 + // stack: counter=8, output_addr + 28, block[0], block[1], retdest + %jump(sha2_gen_message_schedule_from_block_0_loop) +sha2_gen_message_schedule_from_block_0_loop: + JUMPDEST + // stack: counter, output_addr, block[0], block[1], retdest + swap2 + // stack: block[0], output_addr, counter, block[1], retdest + push 1 + push 32 + shl + // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest + dup2 + dup2 + // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1], retdest + swap1 + // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1], retdest + mod + // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1], retdest + swap2 + // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest + div + // stack: block[0] >> 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest + swap1 + // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest + dup3 + // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest + %mstore_kernel_general_u32 + // stack: block[0] >> 32, output_addr, counter, block[1], retdest + swap1 + // stack: output_addr, block[0] >> 32, counter, block[1], retdest + %sub_const(4) + // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest + swap1 + // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest + swap2 + // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest + %decrement + dup1 + iszero + %jumpi(sha2_gen_message_schedule_from_block_0_end) + %jump(sha2_gen_message_schedule_from_block_0_loop) +sha2_gen_message_schedule_from_block_0_end: + JUMPDEST + // stack: old counter=0, output_addr, block[0], block[1], retdest + pop + push 8 + // stack: counter=8, output_addr, block[0], block[1], retdest + swap2 + // stack: block[0], output_addr, counter, block[1], retdest + swap3 + // stack: block[1], output_addr, counter, block[0], retdest + swap2 + // stack: counter, output_addr, block[1], block[0], retdest + swap1 + // stack: output_addr, counter, block[1], block[0], retdest + %add_const(64) + // stack: output_addr + 64, counter, block[1], block[0], retdest + swap1 + // stack: counter, output_addr + 64, block[1], block[0], retdest +sha2_gen_message_schedule_from_block_1_loop: + JUMPDEST + // stack: counter, output_addr, block[1], block[0], retdest + swap2 + // stack: block[1], output_addr, counter, block[0], retdest + push 1 + push 32 + shl + // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest + dup2 + dup2 + // stack: 1 << 32, block[1], 1 << 32, block[1], output_addr, counter, block[0], retdest + swap1 + // stack: block[1], 1 << 32, 1 << 32, block[1], output_addr, counter, block[0], retdest + mod + // stack: block[1] % (1 << 32), 1 << 32, block[1], output_addr, counter, block[0], retdest + swap2 + // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest + div + // stack: block[1] >> 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest + swap1 + // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest + dup3 + // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest + %mstore_kernel_general_u32 + // stack: block[1] >> 32, output_addr, counter, block[0], retdest + swap1 + // stack: output_addr, block[1] >> 32, counter, block[0], retdest + %sub_const(4) + // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest + swap1 + // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest + swap2 + // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest + %decrement + dup1 + iszero + %jumpi(sha2_gen_message_schedule_from_block_1_end) + %jump(sha2_gen_message_schedule_from_block_1_loop) +sha2_gen_message_schedule_from_block_1_end: + JUMPDEST + // stack: old counter=0, output_addr, block[1], block[0], retdest + pop + // stack: output_addr, block[0], block[1], retdest + push 48 + // stack: counter=48, output_addr, block[0], block[1], retdest + swap1 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(36) + // stack: output_addr + 36, counter, block[0], block[1], retdest + swap1 + // stack: counter, output_addr + 36, block[0], block[1], retdest +sha2_gen_message_schedule_remaining_loop: + JUMPDEST + // stack: counter, output_addr, block[0], block[1], retdest + swap1 + // stack: output_addr, counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, counter, block[0], block[1], retdest + push 2 + push 4 + mul + swap1 + sub + // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest + %sha2_sigma_1 + // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + push 7 + push 4 + mul + swap1 + sub + // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap1 + // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + push 15 + push 4 + mul + swap1 + sub + // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %sha2_sigma_0 + // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + push 16 + push 4 + mul + swap1 + sub + // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap1 + // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + swap4 + // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %add_u32 + %add_u32 + %add_u32 + // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest + dup1 + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest + swap2 + // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, output_addr, counter, block[0], block[1], retdest + swap1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %mstore_kernel_general_u32 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(4) + // stack: output_addr + 4, counter, block[0], block[1], retdest + swap1 + // stack: counter, output_addr + 4, block[0], block[1], retdest + %decrement + // stack: counter - 1, output_addr + 4, block[0], block[1], retdest + dup1 + iszero + %jumpi(sha2_gen_message_schedule_remaining_end) + %jump(sha2_gen_message_schedule_remaining_loop) +sha2_gen_message_schedule_remaining_end: + JUMPDEST + // stack: counter=0, output_addr, block[0], block[1], retdest + %pop4 + JUMP + +// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +// stack contains output_addr +// Postcondition: starting at output_addr, set of 256 bytes per block +// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) +global sha2_gen_all_message_schedules: + JUMPDEST + // stack: output_addr, retdest + dup1 + // stack: output_addr, output_addr, retdest + push 0 + // stack: 0, output_addr, output_addr, retdest + %mload_kernel_general + // stack: num_blocks, output_addr, output_addr, retdest + push 1 + // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest +sha2_gen_all_message_schedules_loop: + JUMPDEST + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest + push sha2_gen_all_message_schedules_loop_end + // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest + dup4 + // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest + dup3 + // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest + %jump(sha2_gen_message_schedule_from_block) +sha2_gen_all_message_schedules_loop_end: + JUMPDEST + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest + %add_const(64) + // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest + swap1 + %decrement + swap1 + // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest + swap2 + %add_const(256) + swap2 + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + dup2 + // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + iszero + %jumpi(sha2_gen_all_message_schedules_end) + %jump(sha2_gen_all_message_schedules_loop) + JUMPDEST +sha2_gen_all_message_schedules_end: + JUMPDEST + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + %pop3 + // stack: output_addr, retdest + %jump(sha2_compression) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2_store_pad.asm b/evm/src/cpu/kernel/asm/sha2/sha2_store_pad.asm new file mode 100644 index 00000000..d27ebaf8 --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/sha2_store_pad.asm @@ -0,0 +1,98 @@ +global sha2_store: + JUMPDEST + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + dup1 + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + push 0 + // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + %mstore_kernel_general + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + push 1 + // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest +sha2_store_loop: + JUMPDEST + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + dup1 + // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + swap3 + // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest + swap1 + // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest + %mstore_kernel_general + // stack: counter, addr, ... , x[num_bytes-1], retdest + %decrement + // stack: counter-1, addr, ... , x[num_bytes-1], retdest + dup1 + // stack: counter-1, counter-1, addr, ... , x[num_bytes-1], retdest + iszero + %jumpi(sha2_store_end) + // stack: counter-1, addr, ... , x[num_bytes-1], retdest + swap1 + // stack: addr, counter-1, ... , x[num_bytes-1], retdest + %increment + // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest + %jump(sha2_store_loop) +sha2_store_end: + JUMPDEST + // stack: counter=0, addr, retdest + %pop2 + // stack: retdest + %jump(sha2_pad) + +// Precodition: input is in memory, starting at 0 of kernel general segment, of the form +// num_bytes, x[0], x[1], ..., x[num_bytes - 1] +// Postcodition: output is in memory, starting at 0, of the form +// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +global sha2_pad: + JUMPDEST + // stack: retdest + push 0 + %mload_kernel_general + // stack: num_bytes, retdest + // STEP 1: append 1 + // insert 128 (= 1 << 7) at x[num_bytes+1] + // stack: num_bytes, retdest + push 1 + push 7 + shl + // stack: 128, num_bytes, retdest + dup2 + // stack: num_bytes, 128, num_bytes, retdest + %increment + // stack: num_bytes+1, 128, num_bytes, retdest + %mstore_kernel_general + // stack: num_bytes, retdest + // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 + dup1 + // stack: num_bytes, num_bytes, retdest + %add_const(8) + %div_const(64) + + %increment + // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest + // STEP 3: calculate length := num_bytes*8 + swap1 + // stack: num_bytes, num_blocks, retdest + push 8 + mul + // stack: length = num_bytes*8, num_blocks, retdest + // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] + dup2 + // stack: num_blocks, length, num_blocks, retdest + push 64 + mul + // stack: last_addr = num_blocks*64, length, num_blocks, retdest + %sha2_write_length + // stack: num_blocks, retdest + dup1 + // stack: num_blocks, num_blocks, retdest + // STEP 5: write num_blocks to x[0] + push 0 + %mstore_kernel_general + // stack: num_blocks, retdest + %message_schedule_addr_from_num_blocks + %jump(sha2_gen_all_message_schedules) + +global sha2: + JUMPDEST + %jump(sha2_store) diff --git a/evm/src/cpu/kernel/asm/sha2/util.asm b/evm/src/cpu/kernel/asm/sha2/util.asm new file mode 100644 index 00000000..f3f5b54c --- /dev/null +++ b/evm/src/cpu/kernel/asm/sha2/util.asm @@ -0,0 +1,11 @@ +%macro message_schedule_addr_from_num_blocks + // stack: num_blocks + %mul_const(64) + %add_const(2) +%endmacro + +%macro scratch_space_addr_from_num_blocks + // stack: num_blocks + %mul_const(320) + %add_const(2) +%endmacro \ No newline at end of file From fee0963eca80104f12bd12e3486ae72b87d714ca Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 23 Aug 2022 15:22:40 -0700 Subject: [PATCH 068/104] cleanup --- evm/src/cpu/kernel/tests/sha2.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index b6ff6e8b..8d011a8c 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -9,7 +9,6 @@ use sha2::{Digest, Sha256}; use crate::cpu::kernel::aggregator::combined_kernel; use crate::cpu::kernel::interpreter::run; -use crate::memory::segments::Segment; #[test] fn test_sha2() -> Result<()> { From f3e48dcbfa8419771014f309d53b9043fa9a7782 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 23 Aug 2022 15:22:48 -0700 Subject: [PATCH 069/104] fmt --- evm/src/cpu/kernel/tests/sha2.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 8d011a8c..6fed346e 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -18,7 +18,11 @@ fn test_sha2() -> Result<()> { let mut rng = thread_rng(); let num_bytes = rng.gen_range(1..10000); - let message: String = rng.sample_iter(&Alphanumeric).take(num_bytes).map(char::from).collect(); + let message: String = rng + .sample_iter(&Alphanumeric) + .take(num_bytes) + .map(char::from) + .collect(); dbg!(num_bytes); let mut hasher = Sha256::new(); From 9f923f7b6d564cb170ce3461cc948932ba9367c3 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 23 Aug 2022 15:27:20 -0700 Subject: [PATCH 070/104] cleanup --- evm/src/cpu/kernel/tests/sha2.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 6fed346e..50bfd2c9 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -36,23 +36,21 @@ fn test_sha2() -> Result<()> { .map(|&x| U256::from(x as u32)) .collect(); - let mut store_initial_stack = vec![U256::from(num_bytes)]; - store_initial_stack.extend(bytes); - store_initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); - store_initial_stack.reverse(); + let mut initial_stack = vec![U256::from(num_bytes)]; + initial_stack.extend(bytes); + initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); + initial_stack.reverse(); let after_sha2 = run( &kernel.code, sha2, - store_initial_stack, + initial_stack, &kernel.prover_inputs, )?; - let stack_after_storing = after_sha2.stack(); + let stack_after_sha2 = after_sha2.stack(); - dbg!(stack_after_storing.clone()); - - let result = stack_after_storing.clone()[1]; + let result = stack_after_sha2.clone()[1]; let actual = format!("{:02X}", result); dbg!(expected); dbg!(actual); From e652ef922b09c540c3a4e92f78483224fceee6f7 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 24 Aug 2022 08:40:38 -0700 Subject: [PATCH 071/104] simplification and documentation --- evm/src/cpu/kernel/aggregator.rs | 6 +- .../cpu/kernel/asm/sha2/sha2_compression.asm | 402 ------------------ .../kernel/asm/sha2/sha2_message_schedule.asm | 272 ------------ .../cpu/kernel/asm/sha2/sha2_store_pad.asm | 98 ----- 4 files changed, 3 insertions(+), 775 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/sha2/sha2_compression.asm delete mode 100644 evm/src/cpu/kernel/asm/sha2/sha2_message_schedule.asm delete mode 100644 evm/src/cpu/kernel/asm/sha2/sha2_store_pad.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 12b27183..840ff6be 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -41,12 +41,12 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/memory/packing.asm"), include_str!("asm/memory/txn_fields.asm"), include_str!("asm/exp.asm"), + include_str!("asm/sha2/compression.asm"), include_str!("asm/sha2/constants.asm"), include_str!("asm/sha2/memory.asm"), + include_str!("asm/sha2/message_schedule.asm"), include_str!("asm/sha2/ops.asm"), - include_str!("asm/sha2/sha2_compression.asm"), - include_str!("asm/sha2/sha2_message_schedule.asm"), - include_str!("asm/sha2/sha2_store_pad.asm"), + include_str!("asm/sha2/store_pad.asm"), include_str!("asm/sha2/temp_words.asm"), include_str!("asm/sha2/util.asm"), include_str!("asm/sha2/write_length.asm"), diff --git a/evm/src/cpu/kernel/asm/sha2/sha2_compression.asm b/evm/src/cpu/kernel/asm/sha2/sha2_compression.asm deleted file mode 100644 index 6fe1a990..00000000 --- a/evm/src/cpu/kernel/asm/sha2/sha2_compression.asm +++ /dev/null @@ -1,402 +0,0 @@ -global sha2_compression: - JUMPDEST - // stack: message_schedule_addr, retdest - push 0 - // stack: i=0, message_schedule_addr, retdest - swap1 - // stack: message_schedule_addr, i=0, retdest - push 0 - // stack: 0, message_schedule_addr, i=0, retdest - %mload_kernel_general - // stack: num_blocks, message_schedule_addr, i=0, retdest - dup1 - // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest - %scratch_space_addr_from_num_blocks - // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest - swap1 - // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(28) - %mload_kernel_code_u32 - // stack: h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(24) - %mload_kernel_code_u32 - // stack: g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(20) - %mload_kernel_code_u32 - // stack: f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(16) - %mload_kernel_code_u32 - // stack: e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(12) - %mload_kernel_code_u32 - // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(8) - %mload_kernel_code_u32 - // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %add_const(4) - %mload_kernel_code_u32 - // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h - %mload_kernel_code_u32 - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup10 - // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup2 - dup2 - // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup3 - dup2 - // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup4 - dup2 - // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup5 - dup2 - // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup6 - dup2 - // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup7 - dup2 - // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup8 - dup2 - // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - dup9 - dup2 - // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - pop - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest -sha2_compression_loop: - JUMPDEST - // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup11 - // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup13 - // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mul_const(4) - // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - add - // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_general_u32 - // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - push sha2_constants_k - // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup14 - // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mul_const(4) - // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - add - // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_code_u32 - // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %sha2_temp_word1 - // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 - // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 - // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 - // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %sha2_temp_word2 - // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup6 - // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup3 - // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 - // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 - // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 - // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap8 - // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - pop - // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap7 - // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 - swap7 - swap1 - // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 - swap7 - swap2 - // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap3 - swap7 - swap3 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap4 - swap7 - swap4 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 - swap7 - swap5 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap6 - swap7 - swap6 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup12 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %increment - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup1 - // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %eq_const(64) - // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup1 - // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup12 - // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - sub - // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap13 - // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - swap1 - // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - push 256 - mul - // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - add - // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - swap12 - // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest - swap10 - // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - pop - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - push 64 - swap1 - mod - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest - swap12 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - pop - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - dup12 - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - //dup10 - //iszero - //dup2 - //iszero - //and - //%jumpi(sha2_stop_lol) - iszero - %jumpi(sha2_compression_end_block) - %jump(sha2_compression_loop) -sha2_compression_end_block: - JUMPDEST - // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_general_u32 - // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 - // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(4) - %mload_kernel_general_u32 - // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 - // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(8) - %mload_kernel_general_u32 - // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap3 - // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(12) - %mload_kernel_general_u32 - // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap4 - // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(16) - %mload_kernel_general_u32 - // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 - // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(20) - %mload_kernel_general_u32 - // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap6 - // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(24) - %mload_kernel_general_u32 - // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap7 - // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 - %add_const(28) - %mload_kernel_general_u32 - // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap8 - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - dup1 - // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - iszero - %jumpi(sha2_compression_end) - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - // TODO: "insertion" macro for the below - swap1 - swap2 - swap1 - swap2 - swap3 - swap2 - swap3 - swap4 - swap3 - swap4 - swap5 - swap4 - swap5 - swap6 - swap5 - swap6 - swap7 - swap6 - swap7 - swap8 - swap7 - swap8 - // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup1 - dup11 - %mstore_kernel_general_u32 - dup2 - dup11 - %add_const(4) - %mstore_kernel_general_u32 - dup3 - dup11 - %add_const(8) - %mstore_kernel_general_u32 - dup4 - dup11 - %add_const(12) - %mstore_kernel_general_u32 - dup5 - dup11 - %add_const(16) - %mstore_kernel_general_u32 - dup6 - dup11 - %add_const(20) - %mstore_kernel_general_u32 - dup7 - dup11 - %add_const(24) - %mstore_kernel_general_u32 - dup8 - dup11 - %add_const(28) - %mstore_kernel_general_u32 - %jump(sha2_compression_loop) -sha2_compression_end: - JUMPDEST - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - pop - // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - %shl_const(32) - or - %shl_const(32) - or - %shl_const(32) - or - %shl_const(32) - or - %shl_const(32) - or - %shl_const(32) - or - %shl_const(32) - or - // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest - swap3 - // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest - %pop3 - // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest - STOP \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/sha2/sha2_message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/sha2_message_schedule.asm deleted file mode 100644 index 96a31b38..00000000 --- a/evm/src/cpu/kernel/asm/sha2/sha2_message_schedule.asm +++ /dev/null @@ -1,272 +0,0 @@ -// Precodition: stack contains address of one message block, followed by output address -// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks -// of message schedule (in four-byte increments) -global sha2_gen_message_schedule_from_block: - JUMPDEST - // stack: block_addr, output_addr, retdest - dup1 - // stack: block_addr, block_addr, output_addr, retdest - %add_const(32) - // stack: block_addr + 32, block_addr, output_addr, retdest - swap1 - // stack: block_addr, block_addr + 32, output_addr, retdest - %mload_kernel_general_u256 - // stack: block[0], block_addr + 32, output_addr, retdest - swap1 - // stack: block_addr + 32, block[0], output_addr, retdest - %mload_kernel_general_u256 - // stack: block[1], block[0], output_addr, retdest - swap2 - // stack: output_addr, block[0], block[1], retdest - %add_const(28) - push 8 - // stack: counter=8, output_addr + 28, block[0], block[1], retdest - %jump(sha2_gen_message_schedule_from_block_0_loop) -sha2_gen_message_schedule_from_block_0_loop: - JUMPDEST - // stack: counter, output_addr, block[0], block[1], retdest - swap2 - // stack: block[0], output_addr, counter, block[1], retdest - push 1 - push 32 - shl - // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest - dup2 - dup2 - // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1], retdest - swap1 - // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1], retdest - mod - // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1], retdest - swap2 - // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest - div - // stack: block[0] >> 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest - swap1 - // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest - dup3 - // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest - %mstore_kernel_general_u32 - // stack: block[0] >> 32, output_addr, counter, block[1], retdest - swap1 - // stack: output_addr, block[0] >> 32, counter, block[1], retdest - %sub_const(4) - // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest - swap1 - // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest - swap2 - // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest - %decrement - dup1 - iszero - %jumpi(sha2_gen_message_schedule_from_block_0_end) - %jump(sha2_gen_message_schedule_from_block_0_loop) -sha2_gen_message_schedule_from_block_0_end: - JUMPDEST - // stack: old counter=0, output_addr, block[0], block[1], retdest - pop - push 8 - // stack: counter=8, output_addr, block[0], block[1], retdest - swap2 - // stack: block[0], output_addr, counter, block[1], retdest - swap3 - // stack: block[1], output_addr, counter, block[0], retdest - swap2 - // stack: counter, output_addr, block[1], block[0], retdest - swap1 - // stack: output_addr, counter, block[1], block[0], retdest - %add_const(64) - // stack: output_addr + 64, counter, block[1], block[0], retdest - swap1 - // stack: counter, output_addr + 64, block[1], block[0], retdest -sha2_gen_message_schedule_from_block_1_loop: - JUMPDEST - // stack: counter, output_addr, block[1], block[0], retdest - swap2 - // stack: block[1], output_addr, counter, block[0], retdest - push 1 - push 32 - shl - // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest - dup2 - dup2 - // stack: 1 << 32, block[1], 1 << 32, block[1], output_addr, counter, block[0], retdest - swap1 - // stack: block[1], 1 << 32, 1 << 32, block[1], output_addr, counter, block[0], retdest - mod - // stack: block[1] % (1 << 32), 1 << 32, block[1], output_addr, counter, block[0], retdest - swap2 - // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest - div - // stack: block[1] >> 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest - swap1 - // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest - dup3 - // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest - %mstore_kernel_general_u32 - // stack: block[1] >> 32, output_addr, counter, block[0], retdest - swap1 - // stack: output_addr, block[1] >> 32, counter, block[0], retdest - %sub_const(4) - // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest - swap1 - // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest - swap2 - // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest - %decrement - dup1 - iszero - %jumpi(sha2_gen_message_schedule_from_block_1_end) - %jump(sha2_gen_message_schedule_from_block_1_loop) -sha2_gen_message_schedule_from_block_1_end: - JUMPDEST - // stack: old counter=0, output_addr, block[1], block[0], retdest - pop - // stack: output_addr, block[0], block[1], retdest - push 48 - // stack: counter=48, output_addr, block[0], block[1], retdest - swap1 - // stack: output_addr, counter, block[0], block[1], retdest - %add_const(36) - // stack: output_addr + 36, counter, block[0], block[1], retdest - swap1 - // stack: counter, output_addr + 36, block[0], block[1], retdest -sha2_gen_message_schedule_remaining_loop: - JUMPDEST - // stack: counter, output_addr, block[0], block[1], retdest - swap1 - // stack: output_addr, counter, block[0], block[1], retdest - dup1 - // stack: output_addr, output_addr, counter, block[0], block[1], retdest - push 2 - push 4 - mul - swap1 - sub - // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest - %sha2_sigma_1 - // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest - swap1 - // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 - // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 7 - push 4 - mul - swap1 - sub - // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 - // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 - // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 15 - push 4 - mul - swap1 - sub - // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %sha2_sigma_0 - // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 - // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 - // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 16 - push 4 - mul - swap1 - sub - // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 - // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap4 - // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - %add_u32 - %add_u32 - %add_u32 - // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - swap1 - // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest - dup1 - // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest - swap2 - // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, output_addr, counter, block[0], block[1], retdest - swap1 - // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - %mstore_kernel_general_u32 - // stack: output_addr, counter, block[0], block[1], retdest - %add_const(4) - // stack: output_addr + 4, counter, block[0], block[1], retdest - swap1 - // stack: counter, output_addr + 4, block[0], block[1], retdest - %decrement - // stack: counter - 1, output_addr + 4, block[0], block[1], retdest - dup1 - iszero - %jumpi(sha2_gen_message_schedule_remaining_end) - %jump(sha2_gen_message_schedule_remaining_loop) -sha2_gen_message_schedule_remaining_end: - JUMPDEST - // stack: counter=0, output_addr, block[0], block[1], retdest - %pop4 - JUMP - -// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] -// stack contains output_addr -// Postcondition: starting at output_addr, set of 256 bytes per block -// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) -global sha2_gen_all_message_schedules: - JUMPDEST - // stack: output_addr, retdest - dup1 - // stack: output_addr, output_addr, retdest - push 0 - // stack: 0, output_addr, output_addr, retdest - %mload_kernel_general - // stack: num_blocks, output_addr, output_addr, retdest - push 1 - // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest -sha2_gen_all_message_schedules_loop: - JUMPDEST - // stack: cur_addr, counter, cur_output_addr, output_addr, retdest - push sha2_gen_all_message_schedules_loop_end - // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest - dup4 - // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest - dup3 - // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest - %jump(sha2_gen_message_schedule_from_block) -sha2_gen_all_message_schedules_loop_end: - JUMPDEST - // stack: cur_addr, counter, cur_output_addr, output_addr, retdest - %add_const(64) - // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest - swap1 - %decrement - swap1 - // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest - swap2 - %add_const(256) - swap2 - // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - dup2 - // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - iszero - %jumpi(sha2_gen_all_message_schedules_end) - %jump(sha2_gen_all_message_schedules_loop) - JUMPDEST -sha2_gen_all_message_schedules_end: - JUMPDEST - // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - %pop3 - // stack: output_addr, retdest - %jump(sha2_compression) diff --git a/evm/src/cpu/kernel/asm/sha2/sha2_store_pad.asm b/evm/src/cpu/kernel/asm/sha2/sha2_store_pad.asm deleted file mode 100644 index d27ebaf8..00000000 --- a/evm/src/cpu/kernel/asm/sha2/sha2_store_pad.asm +++ /dev/null @@ -1,98 +0,0 @@ -global sha2_store: - JUMPDEST - // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - dup1 - // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - push 0 - // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - %mstore_kernel_general - // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - push 1 - // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest -sha2_store_loop: - JUMPDEST - // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - dup1 - // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - swap3 - // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest - swap1 - // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest - %mstore_kernel_general - // stack: counter, addr, ... , x[num_bytes-1], retdest - %decrement - // stack: counter-1, addr, ... , x[num_bytes-1], retdest - dup1 - // stack: counter-1, counter-1, addr, ... , x[num_bytes-1], retdest - iszero - %jumpi(sha2_store_end) - // stack: counter-1, addr, ... , x[num_bytes-1], retdest - swap1 - // stack: addr, counter-1, ... , x[num_bytes-1], retdest - %increment - // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest - %jump(sha2_store_loop) -sha2_store_end: - JUMPDEST - // stack: counter=0, addr, retdest - %pop2 - // stack: retdest - %jump(sha2_pad) - -// Precodition: input is in memory, starting at 0 of kernel general segment, of the form -// num_bytes, x[0], x[1], ..., x[num_bytes - 1] -// Postcodition: output is in memory, starting at 0, of the form -// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] -global sha2_pad: - JUMPDEST - // stack: retdest - push 0 - %mload_kernel_general - // stack: num_bytes, retdest - // STEP 1: append 1 - // insert 128 (= 1 << 7) at x[num_bytes+1] - // stack: num_bytes, retdest - push 1 - push 7 - shl - // stack: 128, num_bytes, retdest - dup2 - // stack: num_bytes, 128, num_bytes, retdest - %increment - // stack: num_bytes+1, 128, num_bytes, retdest - %mstore_kernel_general - // stack: num_bytes, retdest - // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 - dup1 - // stack: num_bytes, num_bytes, retdest - %add_const(8) - %div_const(64) - - %increment - // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest - // STEP 3: calculate length := num_bytes*8 - swap1 - // stack: num_bytes, num_blocks, retdest - push 8 - mul - // stack: length = num_bytes*8, num_blocks, retdest - // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] - dup2 - // stack: num_blocks, length, num_blocks, retdest - push 64 - mul - // stack: last_addr = num_blocks*64, length, num_blocks, retdest - %sha2_write_length - // stack: num_blocks, retdest - dup1 - // stack: num_blocks, num_blocks, retdest - // STEP 5: write num_blocks to x[0] - push 0 - %mstore_kernel_general - // stack: num_blocks, retdest - %message_schedule_addr_from_num_blocks - %jump(sha2_gen_all_message_schedules) - -global sha2: - JUMPDEST - %jump(sha2_store) From 0eab1a4b0fbcf972d5f7f59bec8531ce74e03800 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 24 Aug 2022 08:42:23 -0700 Subject: [PATCH 072/104] fmt --- evm/src/cpu/kernel/tests/sha2.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 50bfd2c9..eeadb14b 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -41,12 +41,7 @@ fn test_sha2() -> Result<()> { initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); initial_stack.reverse(); - let after_sha2 = run( - &kernel.code, - sha2, - initial_stack, - &kernel.prover_inputs, - )?; + let after_sha2 = run(&kernel.code, sha2, initial_stack, &kernel.prover_inputs)?; let stack_after_sha2 = after_sha2.stack(); From cad56263f44044344ac013126976078e809d9d0b Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 24 Aug 2022 08:48:36 -0700 Subject: [PATCH 073/104] fix --- evm/src/cpu/kernel/aggregator.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 840ff6be..797e784d 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -11,8 +11,6 @@ pub static KERNEL: Lazy = Lazy::new(combined_kernel); pub(crate) fn combined_kernel() -> Kernel { let files = vec![ - include_str!("asm/assertions.asm"), - include_str!("asm/basic_macros.asm"), include_str!("asm/core/bootloader.asm"), include_str!("asm/core/create.asm"), include_str!("asm/core/create_addresses.asm"), From 790b32c30bf1fb270dc6962c7e426f2aac958a44 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 24 Aug 2022 09:10:41 -0700 Subject: [PATCH 074/104] fix --- evm/src/cpu/kernel/tests/sha2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index eeadb14b..03f55d32 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -45,7 +45,7 @@ fn test_sha2() -> Result<()> { let stack_after_sha2 = after_sha2.stack(); - let result = stack_after_sha2.clone()[1]; + let result = stack_after_sha2[1]; let actual = format!("{:02X}", result); dbg!(expected); dbg!(actual); From fa3436b1a0f94c914d019324cced53c5e46ac5ab Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 6 Sep 2022 17:59:42 -0700 Subject: [PATCH 075/104] removed JUMPDESTs --- evm/src/cpu/kernel/asm/sha2/compression.asm | 5 ----- evm/src/cpu/kernel/asm/sha2/message_schedule.asm | 12 ------------ evm/src/cpu/kernel/asm/sha2/store_pad.asm | 5 ----- 3 files changed, 22 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm index ef287cc0..31b30b21 100644 --- a/evm/src/cpu/kernel/asm/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -1,5 +1,4 @@ global sha2_compression: - JUMPDEST // stack: message_schedule_addr, retdest push 0 // stack: i=0, message_schedule_addr, retdest @@ -48,7 +47,6 @@ global sha2_compression: // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest sha2_compression_start_block: // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. - JUMPDEST dup10 // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest @@ -117,7 +115,6 @@ sha2_compression_start_block: // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest sha2_compression_loop: // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. - JUMPDEST // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest dup11 // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest @@ -253,7 +250,6 @@ sha2_compression_loop: %jump(sha2_compression_loop) sha2_compression_end_block: // Add the initial values of the eight working variables (from the start of this block's compression) back into them. - JUMPDEST // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest dup10 // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest @@ -351,7 +347,6 @@ sha2_compression_end_block: swap8 %jump(sha2_compression_start_block) sha2_compression_end: - JUMPDEST // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest pop // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm index 9d49e06f..8f0cd58d 100644 --- a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm +++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm @@ -2,7 +2,6 @@ // Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks // of message schedule (in four-byte increments) global sha2_gen_message_schedule_from_block: - JUMPDEST // stack: block_addr, output_addr, retdest dup1 // stack: block_addr, block_addr, output_addr, retdest @@ -24,7 +23,6 @@ global sha2_gen_message_schedule_from_block: %jump(sha2_gen_message_schedule_from_block_0_loop) sha2_gen_message_schedule_from_block_0_loop: // Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule. - JUMPDEST // stack: counter, output_addr, block[0], block[1], retdest swap2 // stack: block[0], output_addr, counter, block[1], retdest @@ -63,7 +61,6 @@ sha2_gen_message_schedule_from_block_0_loop: %jumpi(sha2_gen_message_schedule_from_block_0_end) %jump(sha2_gen_message_schedule_from_block_0_loop) sha2_gen_message_schedule_from_block_0_end: - JUMPDEST // stack: old counter=0, output_addr, block[0], block[1], retdest pop push 8 @@ -82,7 +79,6 @@ sha2_gen_message_schedule_from_block_0_end: // stack: counter, output_addr + 64, block[1], block[0], retdest sha2_gen_message_schedule_from_block_1_loop: // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule. - JUMPDEST // stack: counter, output_addr, block[1], block[0], retdest swap2 // stack: block[1], output_addr, counter, block[0], retdest @@ -121,7 +117,6 @@ sha2_gen_message_schedule_from_block_1_loop: %jumpi(sha2_gen_message_schedule_from_block_1_end) %jump(sha2_gen_message_schedule_from_block_1_loop) sha2_gen_message_schedule_from_block_1_end: - JUMPDEST // stack: old counter=0, output_addr, block[1], block[0], retdest pop // stack: output_addr, block[0], block[1], retdest @@ -135,7 +130,6 @@ sha2_gen_message_schedule_from_block_1_end: // stack: counter, output_addr + 36, block[0], block[1], retdest sha2_gen_message_schedule_remaining_loop: // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks. - JUMPDEST // stack: counter, output_addr, block[0], block[1], retdest swap1 // stack: output_addr, counter, block[0], block[1], retdest @@ -218,7 +212,6 @@ sha2_gen_message_schedule_remaining_loop: %jumpi(sha2_gen_message_schedule_remaining_end) %jump(sha2_gen_message_schedule_remaining_loop) sha2_gen_message_schedule_remaining_end: - JUMPDEST // stack: counter=0, output_addr, block[0], block[1], retdest %pop4 JUMP @@ -228,7 +221,6 @@ sha2_gen_message_schedule_remaining_end: // Postcondition: starting at output_addr, set of 256 bytes per block // each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) global sha2_gen_all_message_schedules: - JUMPDEST // stack: output_addr, retdest dup1 // stack: output_addr, output_addr, retdest @@ -239,7 +231,6 @@ global sha2_gen_all_message_schedules: push 1 // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest sha2_gen_all_message_schedules_loop: - JUMPDEST // stack: cur_addr, counter, cur_output_addr, output_addr, retdest push sha2_gen_all_message_schedules_loop_end // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest @@ -249,7 +240,6 @@ sha2_gen_all_message_schedules_loop: // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest %jump(sha2_gen_message_schedule_from_block) sha2_gen_all_message_schedules_loop_end: - JUMPDEST // stack: cur_addr, counter, cur_output_addr, output_addr, retdest %add_const(64) // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest @@ -266,9 +256,7 @@ sha2_gen_all_message_schedules_loop_end: iszero %jumpi(sha2_gen_all_message_schedules_end) %jump(sha2_gen_all_message_schedules_loop) - JUMPDEST sha2_gen_all_message_schedules_end: - JUMPDEST // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest %pop3 // stack: output_addr, retdest diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm index d27ebaf8..5cd02ac5 100644 --- a/evm/src/cpu/kernel/asm/sha2/store_pad.asm +++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm @@ -1,5 +1,4 @@ global sha2_store: - JUMPDEST // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest dup1 // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest @@ -10,7 +9,6 @@ global sha2_store: push 1 // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest sha2_store_loop: - JUMPDEST // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest dup1 // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest @@ -33,7 +31,6 @@ sha2_store_loop: // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest %jump(sha2_store_loop) sha2_store_end: - JUMPDEST // stack: counter=0, addr, retdest %pop2 // stack: retdest @@ -44,7 +41,6 @@ sha2_store_end: // Postcodition: output is in memory, starting at 0, of the form // num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] global sha2_pad: - JUMPDEST // stack: retdest push 0 %mload_kernel_general @@ -94,5 +90,4 @@ global sha2_pad: %jump(sha2_gen_all_message_schedules) global sha2: - JUMPDEST %jump(sha2_store) From 2e3366d1109e23e1d8bc9f6407cdfc4ce7a6b753 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 9 Sep 2022 12:05:21 -0700 Subject: [PATCH 076/104] started on using %stack in sha2 asm --- evm/src/cpu/kernel/asm/sha2/ops.asm | 42 ++++++++++------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index 893dc9de..6e114f1a 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -24,9 +24,7 @@ // stack: rot, value, rot, value shr // stack: value >> rot, rot, value - swap2 - // stack: value, rot, value >> rot - swap1 + %stack (shifted, rot, value) -> (rot, value, shifted) // stack: rot, value, value >> rot push 32 sub @@ -55,9 +53,7 @@ // stack: 32 - rot, value, rot, value shr // stack: value >> (32 - rot), rot, value - swap2 - // stack: value, rot, value >> (32 - rot) - swap1 + %stack (shifted, rot, value) -> (rot, value, shifted) // stack: rot, value, value >> (32 - rot) shl // stack: value << rot, value >> (32 - rot) @@ -79,9 +75,7 @@ push 7 %rotr // stack: rotr(x, 7), x - swap1 - // stack: x, rotr(x, 7) - dup1 + %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 7) push 18 %rotr @@ -102,9 +96,7 @@ push 17 %rotr // stack: rotr(x, 17), x - swap1 - // stack: x, rotr(x, 17) - dup1 + %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 17) push 19 %rotr @@ -125,9 +117,7 @@ push 2 %rotr // stack: rotr(x, 2), x - swap1 - // stack: x, rotr(x, 2) - dup1 + %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 2) push 13 %rotr @@ -148,9 +138,7 @@ push 6 %rotr // stack: rotr(x, 6), x - swap1 - // stack: x, rotr(x, 6) - dup1 + %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 6) push 11 %rotr @@ -168,18 +156,16 @@ // stack: x, y, z dup1 // stack: x, x, y, z - swap2 - // stack: y, x, x, z - and - // stack: x and y, x, z - swap2 - // stack: z, x, x and y - swap1 - // stack: x, z, x and y not - // stack: not x, z, x and y + // stack: not x, x, y, z + %stack (notx, x, y, z) -> (notx, z, x, y) + // stack: not x, z, x, y and - // stack: (not x) and z, x and y + // stack: (not x) and z, x, y + %stack (nxz, x, y) -> (x, y, nxz) + // stack: x, y, (not x) and z + and + // stack: x and y, (not x) and z or %endmacro From 83c959d91e44f0355f8150558a82a83ad4fefc37 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 19 Sep 2022 10:33:04 -0700 Subject: [PATCH 077/104] opcodes to uppercase, and cleanup --- evm/src/cpu/kernel/asm/sha2/compression.asm | 286 +++++++++--------- evm/src/cpu/kernel/asm/sha2/memory.asm | 66 ++-- .../cpu/kernel/asm/sha2/message_schedule.asm | 198 ++++++------ evm/src/cpu/kernel/asm/sha2/ops.asm | 154 +++++----- evm/src/cpu/kernel/asm/sha2/store_pad.asm | 46 +-- evm/src/cpu/kernel/asm/sha2/temp_words.asm | 12 +- evm/src/cpu/kernel/asm/sha2/write_length.asm | 142 ++++----- 7 files changed, 449 insertions(+), 455 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm index 31b30b21..efb940f9 100644 --- a/evm/src/cpu/kernel/asm/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -1,371 +1,365 @@ global sha2_compression: // stack: message_schedule_addr, retdest - push 0 + PUSH 0 // stack: i=0, message_schedule_addr, retdest - swap1 + SWAP1 // stack: message_schedule_addr, i=0, retdest - push 0 + PUSH 0 // stack: 0, message_schedule_addr, i=0, retdest %mload_kernel_general // stack: num_blocks, message_schedule_addr, i=0, retdest - dup1 + DUP1 // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest %scratch_space_addr_from_num_blocks // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest - swap1 + SWAP1 // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(28) %mload_kernel_code_u32 // stack: h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(24) %mload_kernel_code_u32 // stack: g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(20) %mload_kernel_code_u32 // stack: f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(16) %mload_kernel_code_u32 // stack: e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(12) %mload_kernel_code_u32 // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(8) %mload_kernel_code_u32 // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(4) %mload_kernel_code_u32 // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %mload_kernel_code_u32 // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest sha2_compression_start_block: // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. - dup10 + DUP10 // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup2 - dup2 + DUP2 + DUP2 // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup3 - dup2 + DUP3 + DUP2 // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup4 - dup2 + DUP4 + DUP2 // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup5 - dup2 + DUP5 + DUP2 // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup6 - dup2 + DUP6 + DUP2 // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup7 - dup2 + DUP7 + DUP2 // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup8 - dup2 + DUP8 + DUP2 // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup9 - dup2 + DUP9 + DUP2 // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - pop + POP // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest sha2_compression_loop: // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup11 + DUP11 // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup13 + DUP13 // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mul_const(4) // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - add + ADD // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mload_kernel_general_u32 // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - push sha2_constants_k + PUSH sha2_constants_k // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup14 + DUP14 // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mul_const(4) // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - add + ADD // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mload_kernel_code_u32 // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %sha2_temp_word1 // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 + DUP4 // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 + DUP4 // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 + DUP4 // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %sha2_temp_word2 // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup6 + DUP6 // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup3 + DUP3 // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 + SWAP2 // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 + SWAP1 // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 + SWAP5 // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - pop + POP // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap8 + SWAP8 // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - pop + POP // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap7 + SWAP7 // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 - swap7 - swap1 + SWAP1 + SWAP7 + SWAP1 // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 - swap7 - swap2 + SWAP2 + SWAP7 + SWAP2 // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap3 - swap7 - swap3 + SWAP3 + SWAP7 + SWAP3 // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap4 - swap7 - swap4 + SWAP4 + SWAP7 + SWAP4 // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 - swap7 - swap5 + SWAP5 + SWAP7 + SWAP5 // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap6 - swap7 - swap6 + SWAP6 + SWAP7 + SWAP6 // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup12 + DUP12 // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %increment // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup1 + DUP1 // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %eq_const(64) // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup1 + DUP1 // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup12 + DUP12 // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - sub + SUB // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap13 + SWAP13 // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - swap1 + SWAP1 // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - push 256 - mul + PUSH 256 + MUL // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - add + ADD // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - swap12 + SWAP12 // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest - swap10 + SWAP10 // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - pop + POP // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - push 64 - swap1 - mod + PUSH 64 + SWAP1 + MOD // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest - swap12 + SWAP12 // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - pop + POP // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - dup12 + DUP12 // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - //dup10 - //iszero - //dup2 - //iszero - //and - //%jumpi(sha2_stop_lol) - iszero + ISZERO %jumpi(sha2_compression_end_block) %jump(sha2_compression_loop) sha2_compression_end_block: // Add the initial values of the eight working variables (from the start of this block's compression) back into them. // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mload_kernel_general_u32 // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 + SWAP1 // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(4) %mload_kernel_general_u32 // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 + SWAP2 // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(8) %mload_kernel_general_u32 // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap3 + SWAP3 // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(12) %mload_kernel_general_u32 // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap4 + SWAP4 // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(16) %mload_kernel_general_u32 // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 + SWAP5 // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(20) %mload_kernel_general_u32 // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap6 + SWAP6 // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(24) %mload_kernel_general_u32 // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap7 + SWAP7 // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(28) %mload_kernel_general_u32 // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap8 + SWAP8 // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - dup1 + DUP1 // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - iszero + ISZERO // In this case, we've finished all the blocks. %jumpi(sha2_compression_end) // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest // TODO: "insertion" macro for the below // Move num_blocks to the ninth spot on the stack, past the working variables. - swap1 - swap2 - swap1 - swap2 - swap3 - swap2 - swap3 - swap4 - swap3 - swap4 - swap5 - swap4 - swap5 - swap6 - swap5 - swap6 - swap7 - swap6 - swap7 - swap8 - swap7 - swap8 + SWAP1 + SWAP2 + SWAP1 + SWAP2 + SWAP3 + SWAP2 + SWAP3 + SWAP4 + SWAP3 + SWAP4 + SWAP5 + SWAP4 + SWAP5 + SWAP6 + SWAP5 + SWAP6 + SWAP7 + SWAP6 + SWAP7 + SWAP8 + SWAP7 + SWAP8 %jump(sha2_compression_start_block) sha2_compression_end: // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - pop + POP // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest %shl_const(32) - or + OR %shl_const(32) - or + OR %shl_const(32) - or + OR %shl_const(32) - or + OR %shl_const(32) - or + OR %shl_const(32) - or + OR %shl_const(32) - or + OR // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest - swap3 + SWAP3 // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest %pop3 // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest diff --git a/evm/src/cpu/kernel/asm/sha2/memory.asm b/evm/src/cpu/kernel/asm/sha2/memory.asm index 0b722287..9c68f208 100644 --- a/evm/src/cpu/kernel/asm/sha2/memory.asm +++ b/evm/src/cpu/kernel/asm/sha2/memory.asm @@ -94,8 +94,8 @@ %mload_kernel_general_u32 OR // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0, offset - swap1 - pop + SWAP1 + POP // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0 %endmacro @@ -113,64 +113,64 @@ // to kernel general memory. %macro mstore_kernel_general_u32 // stack: offset, value - swap1 + SWAP1 // stack: value, offset - push 1 - push 8 - shl + PUSH 1 + PUSH 8 + SHL // stack: 1 << 8, value, offset - swap1 + SWAP1 // stack: value, 1 << 8, offset - dup2 - dup2 + DUP2 + DUP2 // stack: value, 1 << 8, value, 1 << 8, offset - mod + MOD // stack: c_0 = value % (1 << 8), value, 1 << 8, offset - swap2 - swap1 + SWAP2 + SWAP1 // stack: value, 1 << 8, c_0, offset - push 8 - shr + PUSH 8 + SHR // stack: value >> 8, 1 << 8, c_0, offset - dup2 - dup2 + DUP2 + DUP2 // stack: value >> 8, 1 << 8, value >> 8, 1 << 8, c_0, offset - mod + MOD // stack: c_1 = (value >> 8) % (1 << 8), value >> 8, 1 << 8, c_0, offset - swap2 - swap1 + SWAP2 + SWAP1 // stack: value >> 8, 1 << 8, c_1, c_0, offset - push 8 - shr + PUSH 8 + SHR // stack: value >> 16, 1 << 8, c_1, c_0, offset - dup2 - dup2 + DUP2 + DUP2 // stack: value >> 16, 1 << 8, value >> 16, 1 << 8, c_1, c_0, offset - mod + MOD // stack: c_2 = (value >> 16) % (1 << 8), value >> 16, 1 << 8, c_1, c_0, offset - swap2 - swap1 + SWAP2 + SWAP1 // stack: value >> 16, 1 << 8, c_2, c_1, c_0, offset - push 8 - shr + PUSH 8 + SHR // stack: value >> 24, 1 << 8, c_2, c_1, c_0, offset - mod + MOD // stack: c_3 = (value >> 24) % (1 << 8), c_2, c_1, c_0, offset - dup5 + DUP5 // stack: offset, c_3, c_2, c_1, c_0, offset %mstore_kernel_general // stack: c_2, c_1, c_0, offset - dup4 + DUP4 // stack: offset, c_2, c_1, c_0, offset %add_const(1) %mstore_kernel_general // stack: c_1, c_0, offset - dup3 + DUP3 // stack: offset, c_1, c_0, offset %add_const(2) %mstore_kernel_general // stack: c_0, offset - swap1 + SWAP1 // stack: offset, c_0 %add_const(3) %mstore_kernel_general diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm index 8f0cd58d..e6daa0b8 100644 --- a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm +++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm @@ -3,212 +3,212 @@ // of message schedule (in four-byte increments) global sha2_gen_message_schedule_from_block: // stack: block_addr, output_addr, retdest - dup1 + DUP1 // stack: block_addr, block_addr, output_addr, retdest %add_const(32) // stack: block_addr + 32, block_addr, output_addr, retdest - swap1 + SWAP1 // stack: block_addr, block_addr + 32, output_addr, retdest %mload_kernel_general_u256 // stack: block[0], block_addr + 32, output_addr, retdest - swap1 + SWAP1 // stack: block_addr + 32, block[0], output_addr, retdest %mload_kernel_general_u256 // stack: block[1], block[0], output_addr, retdest - swap2 + SWAP2 // stack: output_addr, block[0], block[1], retdest %add_const(28) - push 8 + PUSH 8 // stack: counter=8, output_addr + 28, block[0], block[1], retdest %jump(sha2_gen_message_schedule_from_block_0_loop) sha2_gen_message_schedule_from_block_0_loop: // Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule. // stack: counter, output_addr, block[0], block[1], retdest - swap2 + SWAP2 // stack: block[0], output_addr, counter, block[1], retdest - push 1 - push 32 - shl + PUSH 1 + PUSH 32 + SHL // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest - dup2 - dup2 + DUP2 + DUP2 // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1], retdest - swap1 + SWAP1 // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1], retdest - mod + MOD // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1], retdest - swap2 + SWAP2 // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest - div + DIV // stack: block[0] >> 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest - swap1 + SWAP1 // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest - dup3 + DUP3 // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest %mstore_kernel_general_u32 // stack: block[0] >> 32, output_addr, counter, block[1], retdest - swap1 + SWAP1 // stack: output_addr, block[0] >> 32, counter, block[1], retdest %sub_const(4) // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest - swap1 + SWAP1 // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest - swap2 + SWAP2 // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest %decrement - dup1 - iszero + DUP1 + ISZERO %jumpi(sha2_gen_message_schedule_from_block_0_end) %jump(sha2_gen_message_schedule_from_block_0_loop) sha2_gen_message_schedule_from_block_0_end: // stack: old counter=0, output_addr, block[0], block[1], retdest - pop - push 8 + POP + PUSH 8 // stack: counter=8, output_addr, block[0], block[1], retdest - swap2 + SWAP2 // stack: block[0], output_addr, counter, block[1], retdest - swap3 + SWAP3 // stack: block[1], output_addr, counter, block[0], retdest - swap2 + SWAP2 // stack: counter, output_addr, block[1], block[0], retdest - swap1 + SWAP1 // stack: output_addr, counter, block[1], block[0], retdest %add_const(64) // stack: output_addr + 64, counter, block[1], block[0], retdest - swap1 + SWAP1 // stack: counter, output_addr + 64, block[1], block[0], retdest sha2_gen_message_schedule_from_block_1_loop: // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule. // stack: counter, output_addr, block[1], block[0], retdest - swap2 + SWAP2 // stack: block[1], output_addr, counter, block[0], retdest - push 1 - push 32 - shl + PUSH 1 + PUSH 32 + SHL // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest - dup2 - dup2 + DUP2 + DUP2 // stack: 1 << 32, block[1], 1 << 32, block[1], output_addr, counter, block[0], retdest - swap1 + SWAP1 // stack: block[1], 1 << 32, 1 << 32, block[1], output_addr, counter, block[0], retdest - mod + MOD // stack: block[1] % (1 << 32), 1 << 32, block[1], output_addr, counter, block[0], retdest - swap2 + SWAP2 // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest - div + DIV // stack: block[1] >> 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest - swap1 + SWAP1 // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest - dup3 + DUP3 // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest %mstore_kernel_general_u32 // stack: block[1] >> 32, output_addr, counter, block[0], retdest - swap1 + SWAP1 // stack: output_addr, block[1] >> 32, counter, block[0], retdest %sub_const(4) // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest - swap1 + SWAP1 // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest - swap2 + SWAP2 // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest %decrement - dup1 - iszero + DUP1 + ISZERO %jumpi(sha2_gen_message_schedule_from_block_1_end) %jump(sha2_gen_message_schedule_from_block_1_loop) sha2_gen_message_schedule_from_block_1_end: // stack: old counter=0, output_addr, block[1], block[0], retdest - pop + POP // stack: output_addr, block[0], block[1], retdest - push 48 + PUSH 48 // stack: counter=48, output_addr, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, counter, block[0], block[1], retdest %add_const(36) // stack: output_addr + 36, counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: counter, output_addr + 36, block[0], block[1], retdest sha2_gen_message_schedule_remaining_loop: // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks. // stack: counter, output_addr, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, counter, block[0], block[1], retdest - dup1 + DUP1 // stack: output_addr, output_addr, counter, block[0], block[1], retdest - push 2 - push 4 - mul - swap1 - sub + PUSH 2 + PUSH 4 + MUL + SWAP1 + SUB // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest %sha2_sigma_1 // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 + DUP1 // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 7 - push 4 - mul - swap1 - sub + PUSH 7 + PUSH 4 + MUL + SWAP1 + SUB // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 + DUP1 // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 15 - push 4 - mul - swap1 - sub + PUSH 15 + PUSH 4 + MUL + SWAP1 + SUB // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %sha2_sigma_0 // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 + DUP1 // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 16 - push 4 - mul - swap1 - sub + PUSH 16 + PUSH 4 + MUL + SWAP1 + SUB // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap4 + SWAP4 // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest %add_u32 %add_u32 %add_u32 // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest - dup1 + DUP1 // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest - swap2 + SWAP2 // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, output_addr, counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest %mstore_kernel_general_u32 // stack: output_addr, counter, block[0], block[1], retdest %add_const(4) // stack: output_addr + 4, counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: counter, output_addr + 4, block[0], block[1], retdest %decrement // stack: counter - 1, output_addr + 4, block[0], block[1], retdest - dup1 - iszero + DUP1 + ISZERO %jumpi(sha2_gen_message_schedule_remaining_end) %jump(sha2_gen_message_schedule_remaining_loop) sha2_gen_message_schedule_remaining_end: @@ -222,38 +222,38 @@ sha2_gen_message_schedule_remaining_end: // each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) global sha2_gen_all_message_schedules: // stack: output_addr, retdest - dup1 + DUP1 // stack: output_addr, output_addr, retdest - push 0 + PUSH 0 // stack: 0, output_addr, output_addr, retdest %mload_kernel_general // stack: num_blocks, output_addr, output_addr, retdest - push 1 + PUSH 1 // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest sha2_gen_all_message_schedules_loop: // stack: cur_addr, counter, cur_output_addr, output_addr, retdest - push sha2_gen_all_message_schedules_loop_end + PUSH sha2_gen_all_message_schedules_loop_end // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest - dup4 + DUP4 // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest - dup3 + DUP3 // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest %jump(sha2_gen_message_schedule_from_block) sha2_gen_all_message_schedules_loop_end: // stack: cur_addr, counter, cur_output_addr, output_addr, retdest %add_const(64) // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest - swap1 + SWAP1 %decrement - swap1 + SWAP1 // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest - swap2 + SWAP2 %add_const(256) - swap2 + SWAP2 // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - dup2 + DUP2 // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - iszero + ISZERO %jumpi(sha2_gen_all_message_schedules_end) %jump(sha2_gen_all_message_schedules_loop) sha2_gen_all_message_schedules_end: diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index 6e114f1a..e84bc34c 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -1,17 +1,17 @@ // u32 addition (discarding 2^32 bit) %macro add_u32 // stack: x, y - add + ADD // stack: x + y - dup1 + DUP1 // stack: x + y, x + y %shr_const(32) // stack: (x + y) >> 32, x + y %shl_const(32) // stack: ((x + y) >> 32) << 32, x + y - swap1 + SWAP1 // stack: x + y, ((x + y) >> 32) << 32 - sub + SUB // stack: x + y - ((x + y) >> 32) << 32 %endmacro @@ -19,177 +19,177 @@ // 32-bit right rotation %macro rotr // stack: rot, value - dup2 - dup2 + DUP2 + DUP2 // stack: rot, value, rot, value - shr + SHR // stack: value >> rot, rot, value %stack (shifted, rot, value) -> (rot, value, shifted) // stack: rot, value, value >> rot - push 32 - sub + PUSH 32 + SUB // stack: 32 - rot, value, value >> rot - shl + SHL // stack: value << (32 - rot), value >> rot - push 32 - push 1 - swap1 - shl + PUSH 32 + PUSH 1 + SWAP1 + SHL // stack: 1 << 32, value << (32 - rot), value >> rot - swap1 - mod + SWAP1 + MOD // stack: (value << (32 - rot)) % (1 << 32), value >> rot - add + ADD %endmacro // 32-bit left rotation %macro rotl // stack: rot, value - dup2 - dup2 + DUP2 + DUP2 // stack: rot, value, rot, value - push 32 - sub + PUSH 32 + SUB // stack: 32 - rot, value, rot, value - shr + SHR // stack: value >> (32 - rot), rot, value %stack (shifted, rot, value) -> (rot, value, shifted) // stack: rot, value, value >> (32 - rot) - shl + SHL // stack: value << rot, value >> (32 - rot) - push 32 - push 1 - swap1 - shl + PUSH 32 + PUSH 1 + SWAP1 + SHL // stack: 1 << 32, value << rot, value >> (32 - rot) - swap1 - mod + SWAP1 + MOD // stack: (value << rot) % (1 << 32), value >> (32 - rot) - add + ADD %endmacro %macro sha2_sigma_0 // stack: x - dup1 + DUP1 // stack: x, x - push 7 + PUSH 7 %rotr // stack: rotr(x, 7), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 7) - push 18 + PUSH 18 %rotr // stack: rotr(x, 18), x, rotr(x, 7) - swap1 + SWAP1 // stack: x, rotr(x, 18), rotr(x, 7) - push 3 - shr + PUSH 3 + SHR // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) - xor - xor + XOR + XOR %endmacro %macro sha2_sigma_1 // stack: x - dup1 + DUP1 // stack: x, x - push 17 + PUSH 17 %rotr // stack: rotr(x, 17), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 17) - push 19 + PUSH 19 %rotr // stack: rotr(x, 19), x, rotr(x, 17) - swap1 + SWAP1 // stack: x, rotr(x, 19), rotr(x, 17) - push 10 - shr + PUSH 10 + SHR // stack: shr(x, 10), rotr(x, 19), rotr(x, 17) - xor - xor + XOR + XOR %endmacro %macro sha2_bigsigma_0 // stack: x - dup1 + DUP1 // stack: x, x - push 2 + PUSH 2 %rotr // stack: rotr(x, 2), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 2) - push 13 + PUSH 13 %rotr // stack: rotr(x, 13), x, rotr(x, 2) - swap1 + SWAP1 // stack: x, rotr(x, 13), rotr(x, 2) - push 22 + PUSH 22 %rotr // stack: rotr(x, 22), rotr(x, 13), rotr(x, 2) - xor - xor + XOR + XOR %endmacro %macro sha2_bigsigma_1 // stack: x - dup1 + DUP1 // stack: x, x - push 6 + PUSH 6 %rotr // stack: rotr(x, 6), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 6) - push 11 + PUSH 11 %rotr // stack: rotr(x, 11), x, rotr(x, 6) - swap1 + SWAP1 // stack: x, rotr(x, 11), rotr(x, 6) - push 25 + PUSH 25 %rotr // stack: rotr(x, 25), rotr(x, 11), rotr(x, 6) - xor - xor + XOR + XOR %endmacro %macro sha2_choice // stack: x, y, z - dup1 + DUP1 // stack: x, x, y, z - not + NOT // stack: not x, x, y, z %stack (notx, x, y, z) -> (notx, z, x, y) // stack: not x, z, x, y - and + AND // stack: (not x) and z, x, y %stack (nxz, x, y) -> (x, y, nxz) // stack: x, y, (not x) and z - and + AND // stack: x and y, (not x) and z - or + OR %endmacro %macro sha2_majority // stack: x, y, z - dup3 - dup3 - dup3 + DUP3 + DUP3 + DUP3 // stack: x, y, z, x, y, z - and + AND // stack: x and y, z, x, y, z - swap2 + SWAP2 // stack: x, z, x and y, y, z - and + AND // stack: x and z, x and y, y, z - swap2 + SWAP2 // stack: y, x and y, x and z, z - swap1 + SWAP1 // stack: x and y, y, x and z, z - swap3 + SWAP3 // stack: z, y, x and z, x and y - and + AND // stack: y and z, x and z, x and y - or - or + OR + OR %endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm index 5cd02ac5..c178202b 100644 --- a/evm/src/cpu/kernel/asm/sha2/store_pad.asm +++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm @@ -1,31 +1,31 @@ global sha2_store: // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - dup1 + DUP1 // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - push 0 + PUSH 0 // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest %mstore_kernel_general // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - push 1 + PUSH 1 // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest sha2_store_loop: // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - dup1 + DUP1 // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - swap3 + SWAP3 // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest - swap1 + SWAP1 // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest %mstore_kernel_general // stack: counter, addr, ... , x[num_bytes-1], retdest %decrement // stack: counter-1, addr, ... , x[num_bytes-1], retdest - dup1 + DUP1 // stack: counter-1, counter-1, addr, ... , x[num_bytes-1], retdest - iszero + ISZERO %jumpi(sha2_store_end) // stack: counter-1, addr, ... , x[num_bytes-1], retdest - swap1 + SWAP1 // stack: addr, counter-1, ... , x[num_bytes-1], retdest %increment // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest @@ -42,24 +42,24 @@ sha2_store_end: // num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] global sha2_pad: // stack: retdest - push 0 + PUSH 0 %mload_kernel_general // stack: num_bytes, retdest // STEP 1: append 1 // insert 128 (= 1 << 7) at x[num_bytes+1] // stack: num_bytes, retdest - push 1 - push 7 - shl + PUSH 1 + PUSH 7 + SHL // stack: 128, num_bytes, retdest - dup2 + DUP2 // stack: num_bytes, 128, num_bytes, retdest %increment // stack: num_bytes+1, 128, num_bytes, retdest %mstore_kernel_general // stack: num_bytes, retdest // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 - dup1 + DUP1 // stack: num_bytes, num_bytes, retdest %add_const(8) %div_const(64) @@ -67,23 +67,23 @@ global sha2_pad: %increment // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest // STEP 3: calculate length := num_bytes*8 - swap1 + SWAP1 // stack: num_bytes, num_blocks, retdest - push 8 - mul + PUSH 8 + MUL // stack: length = num_bytes*8, num_blocks, retdest // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] - dup2 + DUP2 // stack: num_blocks, length, num_blocks, retdest - push 64 - mul + PUSH 64 + MUL // stack: last_addr = num_blocks*64, length, num_blocks, retdest %sha2_write_length // stack: num_blocks, retdest - dup1 + DUP1 // stack: num_blocks, num_blocks, retdest // STEP 5: write num_blocks to x[0] - push 0 + PUSH 0 %mstore_kernel_general // stack: num_blocks, retdest %message_schedule_addr_from_num_blocks diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm index cd2bd303..07aba907 100644 --- a/evm/src/cpu/kernel/asm/sha2/temp_words.asm +++ b/evm/src/cpu/kernel/asm/sha2/temp_words.asm @@ -1,14 +1,14 @@ %macro sha2_temp_word1 // stack: e, f, g, h, K[i], W[i] - dup1 + DUP1 // stack: e, e, f, g, h, K[i], W[i] %sha2_bigsigma_1 // stack: Sigma_1(e), e, f, g, h, K[i], W[i] - swap3 + SWAP3 // stack: g, e, f, Sigma_1(e), h, K[i], W[i] - swap2 + SWAP2 // stack: f, e, g, Sigma_1(e), h, K[i], W[i] - swap1 + SWAP1 // stack: e, f, g, Sigma_1(e), h, K[i], W[i] %sha2_choice // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] @@ -21,11 +21,11 @@ %macro sha2_temp_word2 // stack: a, b, c - dup1 + DUP1 // stack: a, a, b, c %sha2_bigsigma_0 // stack: Sigma_0(a), a, b, c - swap3 + SWAP3 // stack: c, a, b, Sigma_0(a) %sha2_majority // stack: Maj(c, a, b), Sigma_0(a) diff --git a/evm/src/cpu/kernel/asm/sha2/write_length.asm b/evm/src/cpu/kernel/asm/sha2/write_length.asm index 7474cd0e..40395707 100644 --- a/evm/src/cpu/kernel/asm/sha2/write_length.asm +++ b/evm/src/cpu/kernel/asm/sha2/write_length.asm @@ -1,145 +1,145 @@ %macro sha2_write_length // stack: last_addr, length - swap1 + SWAP1 // stack: length, last_addr - push 1 - push 8 - shl + PUSH 1 + PUSH 8 + SHL // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: length % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, length % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 8 - shr + PUSH 8 + SHR // stack: length >> 8, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 8) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 8) % (1 << 8), 1 << 8, length, last_addr - push 1 - swap1 - sub + PUSH 1 + SWAP1 + SUB // stack: last_addr - 1, (length >> 8) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 16 - shr + PUSH 16 + SHR // stack: length >> 16, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 16) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 16) % (1 << 8), 1 << 8, length, last_addr - push 2 - swap1 - sub + PUSH 2 + SWAP1 + SUB // stack: last_addr - 2, (length >> 16) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 24 - shr + PUSH 24 + SHR // stack: length >> 24, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 24) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 24) % (1 << 8), 1 << 8, length, last_addr - push 3 - swap1 - sub + PUSH 3 + SWAP1 + SUB // stack: last_addr - 3, (length >> 24) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 32 - shr + PUSH 32 + SHR // stack: length >> 32, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 32) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 32) % (1 << 8), 1 << 8, length, last_addr - push 4 - swap1 - sub + PUSH 4 + SWAP1 + SUB // stack: last_addr - 4, (length >> 32) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 40 - shr + PUSH 40 + SHR // stack: length >> 40, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 40) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 40) % (1 << 8), 1 << 8, length, last_addr - push 5 - swap1 - sub + PUSH 5 + SWAP1 + SUB // stack: last_addr - 5, (length >> 40) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 48 - shr + PUSH 48 + SHR // stack: length >> 48, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 48) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 48) % (1 << 8), 1 << 8, length, last_addr - push 6 - swap1 - sub + PUSH 6 + SWAP1 + SUB // stack: last_addr - 6, (length >> 48) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 56 - shr + PUSH 56 + SHR // stack: length >> 56, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 56) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 56) % (1 << 8), 1 << 8, length, last_addr - push 7 - swap1 - sub + PUSH 7 + SWAP1 + SUB // stack: last_addr - 7, (length >> 56) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general %pop3 From e482bc7f0c90a91f5c41e3df86182bd8ffcdf1f7 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 9 Sep 2022 12:31:29 -0700 Subject: [PATCH 078/104] addressed comments --- evm/Cargo.toml | 1 - evm/src/cpu/kernel/asm/sha2/compression.asm | 8 +------ evm/src/cpu/kernel/asm/sha2/store_pad.asm | 12 +++++------ evm/src/cpu/kernel/tests/sha2.rs | 24 +++++---------------- 4 files changed, 12 insertions(+), 33 deletions(-) diff --git a/evm/Cargo.toml b/evm/Cargo.toml index afddaeb6..7df7edd5 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -10,7 +10,6 @@ plonky2_util = { path = "../util" } eth-trie-utils = { git = "https://github.com/mir-protocol/eth-trie-utils.git", rev = "dd3595b4ba7923f8d465450d210f17a2b4e20f96" } maybe_rayon = { path = "../maybe_rayon" } anyhow = "1.0.40" -ascii = "1.0.0" env_logger = "0.9.0" ethereum-types = "0.14.0" hex = { version = "0.4.3", optional = true } diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm index efb940f9..0c823c5e 100644 --- a/evm/src/cpu/kernel/asm/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -136,13 +136,7 @@ sha2_compression_loop: // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mload_kernel_code_u32 // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 + %stack ((start: 6), e, f, g, h, (end: 5)) -> (e, f, g, h, start, e, f, g, h, end) // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %sha2_temp_word1 // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm index c178202b..b43441a1 100644 --- a/evm/src/cpu/kernel/asm/sha2/store_pad.asm +++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm @@ -9,6 +9,11 @@ global sha2_store: PUSH 1 // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest sha2_store_loop: + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + DUP2 + // stack: counter, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + ISZERO + %jumpi(sha2_store_end) // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest DUP1 // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest @@ -20,18 +25,13 @@ sha2_store_loop: // stack: counter, addr, ... , x[num_bytes-1], retdest %decrement // stack: counter-1, addr, ... , x[num_bytes-1], retdest - DUP1 - // stack: counter-1, counter-1, addr, ... , x[num_bytes-1], retdest - ISZERO - %jumpi(sha2_store_end) - // stack: counter-1, addr, ... , x[num_bytes-1], retdest SWAP1 // stack: addr, counter-1, ... , x[num_bytes-1], retdest %increment // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest %jump(sha2_store_loop) sha2_store_end: - // stack: counter=0, addr, retdest + // stack: addr, counter, retdest %pop2 // stack: retdest %jump(sha2_pad) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 03f55d32..97edb992 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -1,9 +1,7 @@ use std::str::FromStr; use anyhow::Result; -use ascii::AsciiStr; use ethereum_types::U256; -use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use sha2::{Digest, Sha256}; @@ -17,24 +15,14 @@ fn test_sha2() -> Result<()> { let mut rng = thread_rng(); - let num_bytes = rng.gen_range(1..10000); - let message: String = rng - .sample_iter(&Alphanumeric) - .take(num_bytes) - .map(char::from) - .collect(); - dbg!(num_bytes); + let num_bytes = rng.gen_range(0..10000); + let message: Vec = (0..num_bytes).map(|_| rng.gen()).collect(); let mut hasher = Sha256::new(); hasher.update(message.clone()); let expected = format!("{:02X}", hasher.finalize()); - let bytes: Vec = AsciiStr::from_ascii(&message) - .unwrap() - .as_bytes() - .iter() - .map(|&x| U256::from(x as u32)) - .collect(); + let bytes: Vec = message.iter().map(|&x| U256::from(x as u32)).collect(); let mut initial_stack = vec![U256::from(num_bytes)]; initial_stack.extend(bytes); @@ -42,13 +30,11 @@ fn test_sha2() -> Result<()> { initial_stack.reverse(); let after_sha2 = run(&kernel.code, sha2, initial_stack, &kernel.prover_inputs)?; - let stack_after_sha2 = after_sha2.stack(); - let result = stack_after_sha2[1]; let actual = format!("{:02X}", result); - dbg!(expected); - dbg!(actual); + + assert_eq!(expected, actual); Ok(()) } From d8f2e04c2bdc8a98f585356454efe55c7d7fcce8 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 9 Sep 2022 12:41:08 -0700 Subject: [PATCH 079/104] more use of %stack macro to make sha2 cleaner --- evm/src/cpu/kernel/asm/sha2/compression.asm | 69 ++------------------- 1 file changed, 4 insertions(+), 65 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm index 0c823c5e..3288c405 100644 --- a/evm/src/cpu/kernel/asm/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -136,15 +136,11 @@ sha2_compression_loop: // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mload_kernel_code_u32 // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %stack ((start: 6), e, f, g, h, (end: 5)) -> (e, f, g, h, start, e, f, g, h, end) + %stack ((start: 6), e, f, g, h) -> (e, f, g, h, start, e, f, g, h) // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %sha2_temp_word1 // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP4 - // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP4 - // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP4 + %stack (t, a, b, c) -> (a, b, c, t, a, b, c) // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %sha2_temp_word2 // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest @@ -158,41 +154,7 @@ sha2_compression_loop: // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP1 - // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP5 - // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - POP - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP8 - // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - POP - // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP7 - // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP1 - SWAP7 - SWAP1 - // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP2 - SWAP7 - SWAP2 - // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP3 - SWAP7 - SWAP3 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP4 - SWAP7 - SWAP4 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP5 - SWAP7 - SWAP5 - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP6 - SWAP7 - SWAP6 + %stack (a, e, b, c, d, old_d, f, g, h, old_h) -> (a, b, c, d, e, f, g, h) // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest DUP12 // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest @@ -309,30 +271,7 @@ sha2_compression_end_block: // In this case, we've finished all the blocks. %jumpi(sha2_compression_end) // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - // TODO: "insertion" macro for the below - // Move num_blocks to the ninth spot on the stack, past the working variables. - SWAP1 - SWAP2 - SWAP1 - SWAP2 - SWAP3 - SWAP2 - SWAP3 - SWAP4 - SWAP3 - SWAP4 - SWAP5 - SWAP4 - SWAP5 - SWAP6 - SWAP5 - SWAP6 - SWAP7 - SWAP6 - SWAP7 - SWAP8 - SWAP7 - SWAP8 + %stack (num_blocks, (working: 8)) -> (working, num_blocks) %jump(sha2_compression_start_block) sha2_compression_end: // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest From 5ca5a3b3c82d5b6bf5581cf4786a14ee0007b79b Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 9 Sep 2022 12:45:17 -0700 Subject: [PATCH 080/104] more %stack --- evm/src/cpu/kernel/asm/sha2/ops.asm | 12 +++--------- evm/src/cpu/kernel/asm/sha2/temp_words.asm | 6 +----- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index e84bc34c..aced6530 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -171,9 +171,7 @@ %macro sha2_majority // stack: x, y, z - DUP3 - DUP3 - DUP3 + %stack ((a: 3)) -> (a, a) // stack: x, y, z, x, y, z AND // stack: x and y, z, x, y, z @@ -181,12 +179,8 @@ // stack: x, z, x and y, y, z AND // stack: x and z, x and y, y, z - SWAP2 - // stack: y, x and y, x and z, z - SWAP1 - // stack: x and y, y, x and z, z - SWAP3 - // stack: z, y, x and z, x and y + %stack ((a: 2), (b: 2)) -> (b, a) + // stack: y, z, x and z, x and y AND // stack: y and z, x and z, x and y OR diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm index 07aba907..cd84f240 100644 --- a/evm/src/cpu/kernel/asm/sha2/temp_words.asm +++ b/evm/src/cpu/kernel/asm/sha2/temp_words.asm @@ -4,11 +4,7 @@ // stack: e, e, f, g, h, K[i], W[i] %sha2_bigsigma_1 // stack: Sigma_1(e), e, f, g, h, K[i], W[i] - SWAP3 - // stack: g, e, f, Sigma_1(e), h, K[i], W[i] - SWAP2 - // stack: f, e, g, Sigma_1(e), h, K[i], W[i] - SWAP1 + %stack (sig, (efg: 3)) -> (efg, sig) // stack: e, f, g, Sigma_1(e), h, K[i], W[i] %sha2_choice // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] From 140242c556daa2b945cd1aa336bb4f2a92848f11 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 9 Sep 2022 12:50:43 -0700 Subject: [PATCH 081/104] more %stack sha2 cleanup --- evm/src/cpu/kernel/asm/sha2/message_schedule.asm | 16 ++-------------- evm/src/cpu/kernel/asm/sha2/ops.asm | 2 +- evm/src/cpu/kernel/asm/sha2/store_pad.asm | 6 +----- 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm index e6daa0b8..6b2fa472 100644 --- a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm +++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm @@ -65,13 +65,7 @@ sha2_gen_message_schedule_from_block_0_end: POP PUSH 8 // stack: counter=8, output_addr, block[0], block[1], retdest - SWAP2 - // stack: block[0], output_addr, counter, block[1], retdest - SWAP3 - // stack: block[1], output_addr, counter, block[0], retdest - SWAP2 - // stack: counter, output_addr, block[1], block[0], retdest - SWAP1 + %stack (counter, out, b0, b1) -> (out, counter, b1, b0) // stack: output_addr, counter, block[1], block[0], retdest %add_const(64) // stack: output_addr + 64, counter, block[1], block[0], retdest @@ -191,13 +185,7 @@ sha2_gen_message_schedule_remaining_loop: %add_u32 %add_u32 // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - SWAP1 - // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest - DUP1 - // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest - SWAP2 - // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, output_addr, counter, block[0], block[1], retdest - SWAP1 + DUP2 // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest %mstore_kernel_general_u32 // stack: output_addr, counter, block[0], block[1], retdest diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index aced6530..7bd96a10 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -171,7 +171,7 @@ %macro sha2_majority // stack: x, y, z - %stack ((a: 3)) -> (a, a) + %stack ((xyz: 3)) -> (xyz, xyz) // stack: x, y, z, x, y, z AND // stack: x and y, z, x, y, z diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm index b43441a1..82ed58c0 100644 --- a/evm/src/cpu/kernel/asm/sha2/store_pad.asm +++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm @@ -15,11 +15,7 @@ sha2_store_loop: ISZERO %jumpi(sha2_store_end) // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - DUP1 - // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - SWAP3 - // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest - SWAP1 + %stack (addr, counter, val) -> (addr, val, counter, addr) // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest %mstore_kernel_general // stack: counter, addr, ... , x[num_bytes-1], retdest From 5cf8bf2ba38ac03d716bcfd2310b2a0985e38ec6 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 9 Sep 2022 13:47:37 -0700 Subject: [PATCH 082/104] more %stack sha2 cleanup --- evm/src/cpu/kernel/asm/sha2/memory.asm | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/memory.asm b/evm/src/cpu/kernel/asm/sha2/memory.asm index 9c68f208..843461e7 100644 --- a/evm/src/cpu/kernel/asm/sha2/memory.asm +++ b/evm/src/cpu/kernel/asm/sha2/memory.asm @@ -119,10 +119,7 @@ PUSH 8 SHL // stack: 1 << 8, value, offset - SWAP1 - // stack: value, 1 << 8, offset - DUP2 - DUP2 + %stack (shift, val, offset) -> (val, shift, val, shift, offset) // stack: value, 1 << 8, value, 1 << 8, offset MOD // stack: c_0 = value % (1 << 8), value, 1 << 8, offset From d1d404b9892b878211762c5966c401252b35e9c1 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Sun, 11 Sep 2022 22:58:18 -0700 Subject: [PATCH 083/104] addressed comments --- .../cpu/kernel/stack/stack_manipulation.rs | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/evm/src/cpu/kernel/stack/stack_manipulation.rs b/evm/src/cpu/kernel/stack/stack_manipulation.rs index ebc54af1..7264dde9 100644 --- a/evm/src/cpu/kernel/stack/stack_manipulation.rs +++ b/evm/src/cpu/kernel/stack/stack_manipulation.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; use std::collections::hash_map::Entry::{Occupied, Vacant}; -use std::collections::{BinaryHeap, HashMap, HashSet}; +use std::collections::{BinaryHeap, HashMap}; use std::hash::Hash; use itertools::Itertools; @@ -27,21 +27,20 @@ pub(crate) fn expand_stack_manipulation(body: Vec) -> Vec { fn expand(names: Vec, replacements: Vec) -> Vec { let mut stack_blocks = HashMap::new(); - let mut stack_names = HashSet::new(); let mut src = names .iter() .cloned() .flat_map(|item| match item { StackPlaceholder::Identifier(name) => { - stack_names.insert(name.clone()); + stack_blocks.insert(name.clone(), 1); vec![StackItem::NamedItem(name)] } StackPlaceholder::Block(name, n) => { stack_blocks.insert(name.clone(), n); (0..n) .map(|i| { - let literal_name = format!("block_{}_{}", name, i); + let literal_name = format!("@{}.{}", name, i); StackItem::NamedItem(literal_name) }) .collect_vec() @@ -57,14 +56,17 @@ fn expand(names: Vec, replacements: Vec) -> // May be either a named item or a label. Named items have precedence. if stack_blocks.contains_key(&name) { let n = *stack_blocks.get(&name).unwrap(); - (0..n) - .map(|i| { - let literal_name = format!("block_{}_{}", name, i); - StackItem::NamedItem(literal_name) - }) - .collect_vec() - } else if stack_names.contains(&name) { - vec![StackItem::NamedItem(name)] + if n == 1 { + // A name, not an actual block. + vec![StackItem::NamedItem(name)] + } else { + (0..n) + .map(|i| { + let literal_name = format!("@{}.{}", name, i); + StackItem::NamedItem(literal_name) + }) + .collect_vec() + } } else { vec![StackItem::PushTarget(PushTarget::Label(name))] } From 7eadfee5804eadf33a18a1d159474c3dd0df9748 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 13 Sep 2022 22:00:51 -0700 Subject: [PATCH 084/104] removed parentheses --- evm/src/cpu/kernel/asm/sha2/compression.asm | 4 ++-- evm/src/cpu/kernel/asm/sha2/ops.asm | 4 ++-- evm/src/cpu/kernel/asm/sha2/temp_words.asm | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm index 3288c405..bee829f3 100644 --- a/evm/src/cpu/kernel/asm/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -136,7 +136,7 @@ sha2_compression_loop: // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mload_kernel_code_u32 // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %stack ((start: 6), e, f, g, h) -> (e, f, g, h, start, e, f, g, h) + %stack (start: 6, e, f, g, h) -> (e, f, g, h, start, e, f, g, h) // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %sha2_temp_word1 // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest @@ -271,7 +271,7 @@ sha2_compression_end_block: // In this case, we've finished all the blocks. %jumpi(sha2_compression_end) // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - %stack (num_blocks, (working: 8)) -> (working, num_blocks) + %stack (num_blocks, working: 8) -> (working, num_blocks) %jump(sha2_compression_start_block) sha2_compression_end: // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index 7bd96a10..3f20b2d0 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -171,7 +171,7 @@ %macro sha2_majority // stack: x, y, z - %stack ((xyz: 3)) -> (xyz, xyz) + %stack (xyz: 3) -> (xyz, xyz) // stack: x, y, z, x, y, z AND // stack: x and y, z, x, y, z @@ -179,7 +179,7 @@ // stack: x, z, x and y, y, z AND // stack: x and z, x and y, y, z - %stack ((a: 2), (b: 2)) -> (b, a) + %stack (a: 2, b: 2) -> (b, a) // stack: y, z, x and z, x and y AND // stack: y and z, x and z, x and y diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm index cd84f240..daf07ab5 100644 --- a/evm/src/cpu/kernel/asm/sha2/temp_words.asm +++ b/evm/src/cpu/kernel/asm/sha2/temp_words.asm @@ -4,7 +4,7 @@ // stack: e, e, f, g, h, K[i], W[i] %sha2_bigsigma_1 // stack: Sigma_1(e), e, f, g, h, K[i], W[i] - %stack (sig, (efg: 3)) -> (efg, sig) + %stack (sig, efg: 3) -> (efg, sig) // stack: e, f, g, Sigma_1(e), h, K[i], W[i] %sha2_choice // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] From 78a4b92e8324dafb92936399d3731c35255028dd Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 14 Sep 2022 11:20:24 -0700 Subject: [PATCH 085/104] in %stack, treat identifiers as 1-length blocks --- evm/src/cpu/kernel/ast.rs | 5 +-- evm/src/cpu/kernel/parser.rs | 4 +- .../cpu/kernel/stack/stack_manipulation.rs | 39 +++++++------------ evm/src/cpu/kernel/tests/sha2.rs | 4 +- 4 files changed, 19 insertions(+), 33 deletions(-) diff --git a/evm/src/cpu/kernel/ast.rs b/evm/src/cpu/kernel/ast.rs index 3728aa35..6180b1c8 100644 --- a/evm/src/cpu/kernel/ast.rs +++ b/evm/src/cpu/kernel/ast.rs @@ -38,10 +38,7 @@ pub(crate) enum Item { /// The left hand side of a %stack stack-manipulation macro. #[derive(Eq, PartialEq, Clone, Debug)] -pub(crate) enum StackPlaceholder { - Identifier(String), - Block(String, usize), -} +pub(crate) struct StackPlaceholder(pub String, pub usize); /// The right hand side of a %stack stack-manipulation macro. #[derive(Eq, PartialEq, Clone, Debug)] diff --git a/evm/src/cpu/kernel/parser.rs b/evm/src/cpu/kernel/parser.rs index fd762eae..b7a8124b 100644 --- a/evm/src/cpu/kernel/parser.rs +++ b/evm/src/cpu/kernel/parser.rs @@ -119,12 +119,12 @@ fn parse_stack_placeholder(target: Pair) -> StackPlaceholder { assert_eq!(target.as_rule(), Rule::stack_placeholder); let inner = target.into_inner().next().unwrap(); match inner.as_rule() { - Rule::identifier => StackPlaceholder::Identifier(inner.as_str().into()), + Rule::identifier => StackPlaceholder(inner.as_str().into(), 1), Rule::stack_block => { let mut block = inner.into_inner(); let identifier = block.next().unwrap().as_str(); let length = block.next().unwrap().as_str().parse().unwrap(); - StackPlaceholder::Block(identifier.to_string(), length) + StackPlaceholder(identifier.to_string(), length) } _ => panic!("Unexpected {:?}", inner.as_rule()), } diff --git a/evm/src/cpu/kernel/stack/stack_manipulation.rs b/evm/src/cpu/kernel/stack/stack_manipulation.rs index 7264dde9..36e4b83a 100644 --- a/evm/src/cpu/kernel/stack/stack_manipulation.rs +++ b/evm/src/cpu/kernel/stack/stack_manipulation.rs @@ -31,20 +31,14 @@ fn expand(names: Vec, replacements: Vec) -> let mut src = names .iter() .cloned() - .flat_map(|item| match item { - StackPlaceholder::Identifier(name) => { - stack_blocks.insert(name.clone(), 1); - vec![StackItem::NamedItem(name)] - } - StackPlaceholder::Block(name, n) => { - stack_blocks.insert(name.clone(), n); - (0..n) - .map(|i| { - let literal_name = format!("@{}.{}", name, i); - StackItem::NamedItem(literal_name) - }) - .collect_vec() - } + .flat_map(|StackPlaceholder(name, n)| { + stack_blocks.insert(name.clone(), n); + (0..n) + .map(|i| { + let literal_name = format!("@{}.{}", name, i); + StackItem::NamedItem(literal_name) + }) + .collect_vec() }) .collect_vec(); @@ -56,17 +50,12 @@ fn expand(names: Vec, replacements: Vec) -> // May be either a named item or a label. Named items have precedence. if stack_blocks.contains_key(&name) { let n = *stack_blocks.get(&name).unwrap(); - if n == 1 { - // A name, not an actual block. - vec![StackItem::NamedItem(name)] - } else { - (0..n) - .map(|i| { - let literal_name = format!("@{}.{}", name, i); - StackItem::NamedItem(literal_name) - }) - .collect_vec() - } + (0..n) + .map(|i| { + let literal_name = format!("@{}.{}", name, i); + StackItem::NamedItem(literal_name) + }) + .collect_vec() } else { vec![StackItem::PushTarget(PushTarget::Label(name))] } diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 97edb992..94b3ac61 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -20,7 +20,7 @@ fn test_sha2() -> Result<()> { let mut hasher = Sha256::new(); hasher.update(message.clone()); - let expected = format!("{:02X}", hasher.finalize()); + let expected = format!("{:X}", hasher.finalize()); let bytes: Vec = message.iter().map(|&x| U256::from(x as u32)).collect(); @@ -32,7 +32,7 @@ fn test_sha2() -> Result<()> { let after_sha2 = run(&kernel.code, sha2, initial_stack, &kernel.prover_inputs)?; let stack_after_sha2 = after_sha2.stack(); let result = stack_after_sha2[1]; - let actual = format!("{:02X}", result); + let actual = format!("{:X}", result); assert_eq!(expected, actual); From 7eba422792c25fc13632533f34a93f9d68f11291 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 14 Sep 2022 11:29:13 -0700 Subject: [PATCH 086/104] addressed comments --- evm/src/cpu/kernel/asm/sha2/ops.asm | 72 ++++--------------- evm/src/cpu/kernel/asm/sha2/temp_words.asm | 2 +- evm/src/cpu/kernel/asm/util/basic_macros.asm | 73 ++++++++------------ 3 files changed, 46 insertions(+), 101 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index 3f20b2d0..0953a9b5 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -3,21 +3,15 @@ // stack: x, y ADD // stack: x + y - DUP1 - // stack: x + y, x + y - %shr_const(32) - // stack: (x + y) >> 32, x + y - %shl_const(32) - // stack: ((x + y) >> 32) << 32, x + y - SWAP1 - // stack: x + y, ((x + y) >> 32) << 32 - SUB - // stack: x + y - ((x + y) >> 32) << 32 + %and_const(0xFFFFFFFF) + // stack: (x + y) & u32::MAX %endmacro // 32-bit right rotation -%macro rotr +%macro rotr(rot) + // stack: value + PUSH $rot // stack: rot, value DUP2 DUP2 @@ -42,43 +36,15 @@ ADD %endmacro -// 32-bit left rotation -%macro rotl - // stack: rot, value - DUP2 - DUP2 - // stack: rot, value, rot, value - PUSH 32 - SUB - // stack: 32 - rot, value, rot, value - SHR - // stack: value >> (32 - rot), rot, value - %stack (shifted, rot, value) -> (rot, value, shifted) - // stack: rot, value, value >> (32 - rot) - SHL - // stack: value << rot, value >> (32 - rot) - PUSH 32 - PUSH 1 - SWAP1 - SHL - // stack: 1 << 32, value << rot, value >> (32 - rot) - SWAP1 - MOD - // stack: (value << rot) % (1 << 32), value >> (32 - rot) - ADD -%endmacro - %macro sha2_sigma_0 // stack: x DUP1 // stack: x, x - PUSH 7 - %rotr + %rotr(7) // stack: rotr(x, 7), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 7) - PUSH 18 - %rotr + %rotr(18) // stack: rotr(x, 18), x, rotr(x, 7) SWAP1 // stack: x, rotr(x, 18), rotr(x, 7) @@ -93,13 +59,11 @@ // stack: x DUP1 // stack: x, x - PUSH 17 - %rotr + %rotr(17) // stack: rotr(x, 17), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 17) - PUSH 19 - %rotr + %rotr(19) // stack: rotr(x, 19), x, rotr(x, 17) SWAP1 // stack: x, rotr(x, 19), rotr(x, 17) @@ -114,18 +78,15 @@ // stack: x DUP1 // stack: x, x - PUSH 2 - %rotr + %rotr(2) // stack: rotr(x, 2), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 2) - PUSH 13 - %rotr + %rotr(13) // stack: rotr(x, 13), x, rotr(x, 2) SWAP1 // stack: x, rotr(x, 13), rotr(x, 2) - PUSH 22 - %rotr + %rotr(22) // stack: rotr(x, 22), rotr(x, 13), rotr(x, 2) XOR XOR @@ -135,18 +96,15 @@ // stack: x DUP1 // stack: x, x - PUSH 6 - %rotr + %rotr(6) // stack: rotr(x, 6), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 6) - PUSH 11 - %rotr + %rotr(11) // stack: rotr(x, 11), x, rotr(x, 6) SWAP1 // stack: x, rotr(x, 11), rotr(x, 6) - PUSH 25 - %rotr + %rotr(25) // stack: rotr(x, 25), rotr(x, 11), rotr(x, 6) XOR XOR diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm index daf07ab5..67ab73aa 100644 --- a/evm/src/cpu/kernel/asm/sha2/temp_words.asm +++ b/evm/src/cpu/kernel/asm/sha2/temp_words.asm @@ -4,7 +4,7 @@ // stack: e, e, f, g, h, K[i], W[i] %sha2_bigsigma_1 // stack: Sigma_1(e), e, f, g, h, K[i], W[i] - %stack (sig, efg: 3) -> (efg, sig) + %stack (sig, e, f, g) -> (e, f, g, sig) // stack: e, f, g, Sigma_1(e), h, K[i], W[i] %sha2_choice // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 3ea34bce..fb9e6d01 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -1,46 +1,46 @@ %macro jump(dst) - push $dst + PUSH $dst jump %endmacro %macro jumpi(dst) - push $dst + PUSH $dst jumpi %endmacro %macro pop2 %rep 2 - pop + POP %endrep %endmacro %macro pop3 %rep 3 - pop + POP %endrep %endmacro %macro pop4 %rep 4 - pop + POP %endrep %endmacro %macro pop5 %rep 5 - pop + POP %endrep %endmacro %macro pop6 %rep 6 - pop + POP %endrep %endmacro %macro pop7 %rep 7 - pop + POP %endrep %endmacro @@ -162,21 +162,21 @@ // If pred is zero, yields z; otherwise, yields nz %macro select // stack: pred, nz, z - iszero + ISZERO // stack: pred == 0, nz, z - dup1 + DUP1 // stack: pred == 0, pred == 0, nz, z - iszero + ISZERO // stack: pred != 0, pred == 0, nz, z - swap3 + SWAP3 // stack: z, pred == 0, nz, pred != 0 - mul + MUL // stack: (pred == 0) * z, nz, pred != 0 - swap2 + SWAP2 // stack: pred != 0, nz, (pred == 0) * z - mul + MUL // stack: (pred != 0) * nz, (pred == 0) * z - add + ADD // stack: (pred != 0) * nz + (pred == 0) * z %endmacro @@ -184,27 +184,27 @@ // Assumes pred is boolean (either 0 or 1). %macro select_bool // stack: pred, nz, z - dup1 + DUP1 // stack: pred, pred, nz, z - iszero + ISZERO // stack: notpred, pred, nz, z - swap3 + SWAP3 // stack: z, pred, nz, notpred - mul + MUL // stack: pred * z, nz, notpred - swap2 + SWAP2 // stack: notpred, nz, pred * z - mul + MUL // stack: notpred * nz, pred * z - add + ADD // stack: notpred * nz + pred * z %endmacro %macro square // stack: x - dup1 + DUP1 // stack: x, x - mul + MUL // stack: x^2 %endmacro @@ -231,31 +231,18 @@ %endmacro %macro increment - push 1 - add + %add_const(1) %endmacro %macro decrement - push 1 - swap1 - sub + %sub_const(1) %endmacro %macro div2 - push 2 - swap1 - div -%endmacro - -%macro lt(x) - push $x - swap1 - lt + %div_const(2) %endmacro %macro iseven - push 2 - swap1 - mod - iszero + %mod_const(2) + ISZERO %endmacro From 5d7edb33d4e3086ae959ad4d18e33a07f0369d24 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 14 Sep 2022 11:31:58 -0700 Subject: [PATCH 087/104] comment --- evm/src/cpu/kernel/asm/sha2/temp_words.asm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm index 67ab73aa..ed610947 100644 --- a/evm/src/cpu/kernel/asm/sha2/temp_words.asm +++ b/evm/src/cpu/kernel/asm/sha2/temp_words.asm @@ -1,3 +1,4 @@ +// "T_1" in the SHA-256 spec %macro sha2_temp_word1 // stack: e, f, g, h, K[i], W[i] DUP1 @@ -15,6 +16,7 @@ // stack: Ch(e, f, g) + Sigma_1(e) + h + K[i] + W[i] %endmacro +// "T_2" in the SHA-256 spec %macro sha2_temp_word2 // stack: a, b, c DUP1 From c11c52646281f0af0995199ddac723f1e49f15a9 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 14 Sep 2022 11:35:28 -0700 Subject: [PATCH 088/104] cleanup and comments --- evm/src/cpu/kernel/asm/sha2/ops.asm | 10 ++-------- evm/src/cpu/kernel/asm/sha2/util.asm | 7 +++++++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index 0953a9b5..4c57d491 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -3,7 +3,7 @@ // stack: x, y ADD // stack: x + y - %and_const(0xFFFFFFFF) + %truncate_to_u32 // stack: (x + y) & u32::MAX %endmacro @@ -25,13 +25,7 @@ // stack: 32 - rot, value, value >> rot SHL // stack: value << (32 - rot), value >> rot - PUSH 32 - PUSH 1 - SWAP1 - SHL - // stack: 1 << 32, value << (32 - rot), value >> rot - SWAP1 - MOD + %truncate_to_u32 // stack: (value << (32 - rot)) % (1 << 32), value >> rot ADD %endmacro diff --git a/evm/src/cpu/kernel/asm/sha2/util.asm b/evm/src/cpu/kernel/asm/sha2/util.asm index f3f5b54c..28e71c12 100644 --- a/evm/src/cpu/kernel/asm/sha2/util.asm +++ b/evm/src/cpu/kernel/asm/sha2/util.asm @@ -1,11 +1,18 @@ +// We put the message schedule in memory starting at 64 * num_blocks + 2. %macro message_schedule_addr_from_num_blocks // stack: num_blocks %mul_const(64) %add_const(2) %endmacro +// We use memory starting at 320 * num_blocks + 2 (after the message schedule +// space) as scratch space to store stack values. %macro scratch_space_addr_from_num_blocks // stack: num_blocks %mul_const(320) %add_const(2) +%endmacro + +%macro truncate_to_u32 + %and_const(0xFFFFFFFF) %endmacro \ No newline at end of file From 65b8993b6f78b5e95678ade9638327378024039c Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 19 Sep 2022 10:31:55 -0700 Subject: [PATCH 089/104] addressed comments --- evm/src/cpu/kernel/asm/sha2/compression.asm | 7 +- evm/src/cpu/kernel/asm/sha2/memory.asm | 56 ++--- .../cpu/kernel/asm/sha2/message_schedule.asm | 42 +--- evm/src/cpu/kernel/asm/sha2/write_length.asm | 210 ++++++++---------- evm/src/cpu/kernel/tests/sha2.rs | 15 +- 5 files changed, 137 insertions(+), 193 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm index bee829f3..2bf48e65 100644 --- a/evm/src/cpu/kernel/asm/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -185,9 +185,7 @@ sha2_compression_loop: // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest POP // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - PUSH 64 - SWAP1 - MOD + %and_const(63) // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest SWAP12 // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest @@ -296,4 +294,5 @@ sha2_compression_end: // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest %pop3 // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest - STOP \ No newline at end of file + SWAP1 + JUMP diff --git a/evm/src/cpu/kernel/asm/sha2/memory.asm b/evm/src/cpu/kernel/asm/sha2/memory.asm index 843461e7..cd7cef92 100644 --- a/evm/src/cpu/kernel/asm/sha2/memory.asm +++ b/evm/src/cpu/kernel/asm/sha2/memory.asm @@ -115,43 +115,31 @@ // stack: offset, value SWAP1 // stack: value, offset - PUSH 1 - PUSH 8 - SHL - // stack: 1 << 8, value, offset - %stack (shift, val, offset) -> (val, shift, val, shift, offset) - // stack: value, 1 << 8, value, 1 << 8, offset - MOD - // stack: c_0 = value % (1 << 8), value, 1 << 8, offset - SWAP2 + DUP1 + // stack: value, value, offset + %and_const(0xff) + // stack: c_0 = value % (1 << 8), value, offset SWAP1 - // stack: value, 1 << 8, c_0, offset - PUSH 8 - SHR - // stack: value >> 8, 1 << 8, c_0, offset - DUP2 - DUP2 - // stack: value >> 8, 1 << 8, value >> 8, 1 << 8, c_0, offset - MOD - // stack: c_1 = (value >> 8) % (1 << 8), value >> 8, 1 << 8, c_0, offset - SWAP2 + // stack: value, c_0, offset + %shr_const(8) + // stack: value >> 8, c_0, offset + DUP1 + // stack: value >> 8, value >> 8, c_0, offset + %and_const(0xff) + // stack: c_1 = (value >> 8) % (1 << 8), value >> 8, c_0, offset SWAP1 - // stack: value >> 8, 1 << 8, c_1, c_0, offset - PUSH 8 - SHR - // stack: value >> 16, 1 << 8, c_1, c_0, offset - DUP2 - DUP2 - // stack: value >> 16, 1 << 8, value >> 16, 1 << 8, c_1, c_0, offset - MOD - // stack: c_2 = (value >> 16) % (1 << 8), value >> 16, 1 << 8, c_1, c_0, offset - SWAP2 + // stack: value >> 8, c_1, c_0, offset + %shr_const(8) + // stack: value >> 16, c_1, c_0, offset + DUP1 + // stack: value >> 16, value >> 16, c_1, c_0, offset + %and_const(0xff) + // stack: c_2 = (value >> 16) % (1 << 8), value >> 16, c_1, c_0, offset SWAP1 - // stack: value >> 16, 1 << 8, c_2, c_1, c_0, offset - PUSH 8 - SHR - // stack: value >> 24, 1 << 8, c_2, c_1, c_0, offset - MOD + // stack: value >> 16, c_2, c_1, c_0, offset + %shr_const(8) + // stack: value >> 24, c_2, c_1, c_0, offset + %and_const(0xff) // stack: c_3 = (value >> 24) % (1 << 8), c_2, c_1, c_0, offset DUP5 // stack: offset, c_3, c_2, c_1, c_0, offset diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm index 6b2fa472..f786d31d 100644 --- a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm +++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm @@ -26,22 +26,13 @@ sha2_gen_message_schedule_from_block_0_loop: // stack: counter, output_addr, block[0], block[1], retdest SWAP2 // stack: block[0], output_addr, counter, block[1], retdest - PUSH 1 - PUSH 32 - SHL - // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest - DUP2 - DUP2 - // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1], retdest - SWAP1 - // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1], retdest - MOD - // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1], retdest - SWAP2 - // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest - DIV - // stack: block[0] >> 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest + DUP1 + // stack: block[0], block[0], output_addr, counter, block[1], retdest + %shr_const(32) + // stack: block[0] >> 32, block[0], output_addr, counter, block[1], retdest SWAP1 + // stack: block[0], block[0] >> 32, output_addr, counter, block[1], retdest + %truncate_to_u32 // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest DUP3 // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest @@ -76,22 +67,13 @@ sha2_gen_message_schedule_from_block_1_loop: // stack: counter, output_addr, block[1], block[0], retdest SWAP2 // stack: block[1], output_addr, counter, block[0], retdest - PUSH 1 - PUSH 32 - SHL - // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest - DUP2 - DUP2 - // stack: 1 << 32, block[1], 1 << 32, block[1], output_addr, counter, block[0], retdest - SWAP1 - // stack: block[1], 1 << 32, 1 << 32, block[1], output_addr, counter, block[0], retdest - MOD - // stack: block[1] % (1 << 32), 1 << 32, block[1], output_addr, counter, block[0], retdest - SWAP2 - // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest - DIV - // stack: block[1] >> 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest + DUP1 + // stack: block[1], block[1], output_addr, counter, block[0], retdest + %shr_const(32) + // stack: block[1] >> 32, block[1], output_addr, counter, block[0], retdest SWAP1 + // stack: block[1], block[1] >> 32, output_addr, counter, block[0], retdest + %truncate_to_u32 // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest DUP3 // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest diff --git a/evm/src/cpu/kernel/asm/sha2/write_length.asm b/evm/src/cpu/kernel/asm/sha2/write_length.asm index 40395707..5727498c 100644 --- a/evm/src/cpu/kernel/asm/sha2/write_length.asm +++ b/evm/src/cpu/kernel/asm/sha2/write_length.asm @@ -2,146 +2,118 @@ // stack: last_addr, length SWAP1 // stack: length, last_addr - PUSH 1 - PUSH 8 - SHL - - // stack: 1 << 8, length, last_addr DUP1 - // stack: 1 << 8, 1 << 8, length, last_addr + // stack: length, length, last_addr + %and_const(0xff) + // stack: length % (1 << 8), length, last_addr DUP3 - // stack: length, 1 << 8, 1 << 8, length, last_addr - MOD - // stack: length % (1 << 8), 1 << 8, length, last_addr - DUP4 - // stack: last_addr, length % (1 << 8), 1 << 8, length, last_addr + // stack: last_addr, length % (1 << 8), length, last_addr %mstore_kernel_general - // stack: 1 << 8, length, last_addr - DUP1 - // stack: 1 << 8, 1 << 8, length, last_addr - DUP3 - // stack: length, 1 << 8, 1 << 8, length, last_addr - PUSH 8 - SHR - // stack: length >> 8, 1 << 8, 1 << 8, length, last_addr - MOD - // stack: (length >> 8) % (1 << 8), 1 << 8, length, last_addr - DUP4 - // stack: last_addr, (length >> 8) % (1 << 8), 1 << 8, length, last_addr - PUSH 1 + // stack: length, last_addr SWAP1 - SUB - // stack: last_addr - 1, (length >> 8) % (1 << 8), 1 << 8, length, last_addr + %decrement + SWAP1 + // stack: length, last_addr - 1 + %shr_const(8) + // stack: length >> 8, last_addr - 1 + DUP1 + // stack: length >> 8, length >> 8, last_addr - 1 + %and_const(0xff) + // stack: (length >> 8) % (1 << 8), length >> 8, last_addr - 1 + DUP3 + // stack: last_addr - 1, (length >> 8) % (1 << 8), length >> 8, last_addr - 1 + %mstore_kernel_general + + // stack: length >> 8, last_addr - 1 + SWAP1 + %decrement + SWAP1 + // stack: length >> 8, last_addr - 2 + %shr_const(8) + // stack: length >> 16, last_addr - 2 + DUP1 + // stack: length >> 16, length >> 16, last_addr - 2 + %and_const(0xff) + // stack: (length >> 16) % (1 << 8), length >> 16, last_addr - 2 + DUP3 + // stack: last_addr - 2, (length >> 16) % (1 << 8), length >> 16, last_addr - 2 %mstore_kernel_general - // stack: 1 << 8, length, last_addr - DUP1 - // stack: 1 << 8, 1 << 8, length, last_addr - DUP3 - // stack: length, 1 << 8, 1 << 8, length, last_addr - PUSH 16 - SHR - // stack: length >> 16, 1 << 8, 1 << 8, length, last_addr - MOD - // stack: (length >> 16) % (1 << 8), 1 << 8, length, last_addr - DUP4 - // stack: last_addr, (length >> 16) % (1 << 8), 1 << 8, length, last_addr - PUSH 2 + // stack: length >> 16, last_addr - 2 SWAP1 - SUB - // stack: last_addr - 2, (length >> 16) % (1 << 8), 1 << 8, length, last_addr + %decrement + SWAP1 + // stack: length >> 16, last_addr - 3 + %shr_const(8) + // stack: length >> 24, last_addr - 3 + DUP1 + // stack: length >> 24, length >> 24, last_addr - 3 + %and_const(0xff) + // stack: (length >> 24) % (1 << 8), length >> 24, last_addr - 3 + DUP3 + // stack: last_addr - 3, (length >> 24) % (1 << 8), length >> 24, last_addr - 3 %mstore_kernel_general - // stack: 1 << 8, length, last_addr - DUP1 - // stack: 1 << 8, 1 << 8, length, last_addr - DUP3 - // stack: length, 1 << 8, 1 << 8, length, last_addr - PUSH 24 - SHR - // stack: length >> 24, 1 << 8, 1 << 8, length, last_addr - MOD - // stack: (length >> 24) % (1 << 8), 1 << 8, length, last_addr - DUP4 - // stack: last_addr, (length >> 24) % (1 << 8), 1 << 8, length, last_addr - PUSH 3 + // stack: length >> 24, last_addr - 3 SWAP1 - SUB - // stack: last_addr - 3, (length >> 24) % (1 << 8), 1 << 8, length, last_addr + %decrement + SWAP1 + // stack: length >> 24, last_addr - 4 + %shr_const(8) + // stack: length >> 32, last_addr - 4 + DUP1 + // stack: length >> 32, length >> 32, last_addr - 4 + %and_const(0xff) + // stack: (length >> 32) % (1 << 8), length >> 32, last_addr - 4 + DUP3 + // stack: last_addr - 4, (length >> 32) % (1 << 8), length >> 32, last_addr - 4 %mstore_kernel_general - // stack: 1 << 8, length, last_addr - DUP1 - // stack: 1 << 8, 1 << 8, length, last_addr - DUP3 - // stack: length, 1 << 8, 1 << 8, length, last_addr - PUSH 32 - SHR - // stack: length >> 32, 1 << 8, 1 << 8, length, last_addr - MOD - // stack: (length >> 32) % (1 << 8), 1 << 8, length, last_addr - DUP4 - // stack: last_addr, (length >> 32) % (1 << 8), 1 << 8, length, last_addr - PUSH 4 + // stack: length >> 32, last_addr - 4 SWAP1 - SUB - // stack: last_addr - 4, (length >> 32) % (1 << 8), 1 << 8, length, last_addr + %decrement + SWAP1 + // stack: length >> 32, last_addr - 5 + %shr_const(8) + // stack: length >> 40, last_addr - 5 + DUP1 + // stack: length >> 40, length >> 40, last_addr - 5 + %and_const(0xff) + // stack: (length >> 40) % (1 << 8), length >> 40, last_addr - 5 + DUP3 + // stack: last_addr - 5, (length >> 40) % (1 << 8), length >> 40, last_addr - 5 %mstore_kernel_general - // stack: 1 << 8, length, last_addr - DUP1 - // stack: 1 << 8, 1 << 8, length, last_addr - DUP3 - // stack: length, 1 << 8, 1 << 8, length, last_addr - PUSH 40 - SHR - // stack: length >> 40, 1 << 8, 1 << 8, length, last_addr - MOD - // stack: (length >> 40) % (1 << 8), 1 << 8, length, last_addr - DUP4 - // stack: last_addr, (length >> 40) % (1 << 8), 1 << 8, length, last_addr - PUSH 5 + // stack: length >> 40, last_addr - 5 SWAP1 - SUB - // stack: last_addr - 5, (length >> 40) % (1 << 8), 1 << 8, length, last_addr + %decrement + SWAP1 + // stack: length >> 40, last_addr - 6 + %shr_const(8) + // stack: length >> 48, last_addr - 6 + DUP1 + // stack: length >> 48, length >> 48, last_addr - 6 + %and_const(0xff) + // stack: (length >> 48) % (1 << 8), length >> 48, last_addr - 6 + DUP3 + // stack: last_addr - 6, (length >> 48) % (1 << 8), length >> 48, last_addr - 6 %mstore_kernel_general - // stack: 1 << 8, length, last_addr - DUP1 - // stack: 1 << 8, 1 << 8, length, last_addr - DUP3 - // stack: length, 1 << 8, 1 << 8, length, last_addr - PUSH 48 - SHR - // stack: length >> 48, 1 << 8, 1 << 8, length, last_addr - MOD - // stack: (length >> 48) % (1 << 8), 1 << 8, length, last_addr - DUP4 - // stack: last_addr, (length >> 48) % (1 << 8), 1 << 8, length, last_addr - PUSH 6 + // stack: length >> 48, last_addr - 6 SWAP1 - SUB - // stack: last_addr - 6, (length >> 48) % (1 << 8), 1 << 8, length, last_addr - %mstore_kernel_general - - // stack: 1 << 8, length, last_addr - DUP1 - // stack: 1 << 8, 1 << 8, length, last_addr - DUP3 - // stack: length, 1 << 8, 1 << 8, length, last_addr - PUSH 56 - SHR - // stack: length >> 56, 1 << 8, 1 << 8, length, last_addr - MOD - // stack: (length >> 56) % (1 << 8), 1 << 8, length, last_addr - DUP4 - // stack: last_addr, (length >> 56) % (1 << 8), 1 << 8, length, last_addr - PUSH 7 + %decrement SWAP1 - SUB - // stack: last_addr - 7, (length >> 56) % (1 << 8), 1 << 8, length, last_addr + // stack: length >> 48, last_addr - 7 + %shr_const(8) + // stack: length >> 56, last_addr - 7 + DUP1 + // stack: length >> 56, length >> 56, last_addr - 7 + %and_const(0xff) + // stack: (length >> 56) % (1 << 8), length >> 56, last_addr - 7 + DUP3 + // stack: last_addr - 7, (length >> 56) % (1 << 8), length >> 56, last_addr - 7 %mstore_kernel_general - %pop3 + %pop2 // stack: (empty) %endmacro diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index 94b3ac61..a493c1fb 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -15,25 +15,28 @@ fn test_sha2() -> Result<()> { let mut rng = thread_rng(); + // Generate a random message, between 0 and 9999 bytes. let num_bytes = rng.gen_range(0..10000); let message: Vec = (0..num_bytes).map(|_| rng.gen()).collect(); + // Hash the message using a standard Sha256 implementation. let mut hasher = Sha256::new(); hasher.update(message.clone()); let expected = format!("{:X}", hasher.finalize()); - let bytes: Vec = message.iter().map(|&x| U256::from(x as u32)).collect(); - + // Load the message onto the stack. let mut initial_stack = vec![U256::from(num_bytes)]; + let bytes: Vec = message.iter().map(|&x| U256::from(x as u32)).collect(); initial_stack.extend(bytes); initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); initial_stack.reverse(); - let after_sha2 = run(&kernel.code, sha2, initial_stack, &kernel.prover_inputs)?; - let stack_after_sha2 = after_sha2.stack(); - let result = stack_after_sha2[1]; - let actual = format!("{:X}", result); + // Run the sha2 kernel code. + let result = run(&kernel.code, sha2, initial_stack, &kernel.prover_inputs)?; + let result_hash = result.stack()[0]; + let actual = format!("{:X}", result_hash); + // Check that the result is correct. assert_eq!(expected, actual); Ok(()) From 42320d815e74bfb77933f6ee758f9ea09dfb11b7 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 30 Sep 2022 09:13:56 -0700 Subject: [PATCH 090/104] fix --- evm/Cargo.toml | 1 - evm/src/cpu/kernel/asm/modexp.asm | 76 +++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 evm/src/cpu/kernel/asm/modexp.asm diff --git a/evm/Cargo.toml b/evm/Cargo.toml index 7df7edd5..5ee3b1ff 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -8,7 +8,6 @@ edition = "2021" plonky2 = { path = "../plonky2", default-features = false, features = ["rand", "timing"] } plonky2_util = { path = "../util" } eth-trie-utils = { git = "https://github.com/mir-protocol/eth-trie-utils.git", rev = "dd3595b4ba7923f8d465450d210f17a2b4e20f96" } -maybe_rayon = { path = "../maybe_rayon" } anyhow = "1.0.40" env_logger = "0.9.0" ethereum-types = "0.14.0" diff --git a/evm/src/cpu/kernel/asm/modexp.asm b/evm/src/cpu/kernel/asm/modexp.asm new file mode 100644 index 00000000..f1b448cb --- /dev/null +++ b/evm/src/cpu/kernel/asm/modexp.asm @@ -0,0 +1,76 @@ +/// Recursive implementation of exp. +/// Equivalent to: +/// def exp(x, e): +/// if e == 0: +/// # The path where JUMPI does not jump to `step_case` +/// return 1 +/// else: +/// # This is under the `step_case` label +/// return (x if e % 2 else 1) * exp(x * x, e // 2) +/// Note that this correctly handles exp(0, 0) == 1. + +global modexp: + // stack: x, e, retdest + dup2 + // stack: e, x, e, retdest + %jumpi(step_case) + // stack: x, e, retdest + pop + // stack: e, retdest + pop + // stack: retdest + push 1 + // stack: 1, retdest + swap1 + // stack: retdest, 1 + jump + +step_case: + // stack: x, e, retdest + push recursion_return + // stack: recursion_return, x, e, retdest + push 2 + // stack: 2, recursion_return, x, e, retdest + dup4 + // stack: e, 2, recursion_return, x, e, retdest + div + // stack: e / 2, recursion_return, x, e, retdest + dup3 + // stack: x, e / 2, recursion_return, x, e, retdest + %square + // stack: x * x, e / 2, recursion_return, x, e, retdest + %jump(exp) +recursion_return: + // stack: exp(x * x, e / 2), x, e, retdest + push 2 + // stack: 2, exp(x * x, e / 2), x, e, retdest + dup4 + // stack: e, 2, exp(x * x, e / 2), x, e, retdest + mod + // stack: e % 2, exp(x * x, e / 2), x, e, retdest + push 1 + // stack: 1, e % 2, exp(x * x, e / 2), x, e, retdest + dup4 + // stack: x, 1, e % 2, exp(x * x, e / 2), x, e, retdest + sub + // stack: x - 1, e % 2, exp(x * x, e / 2), x, e, retdest + mul + // stack: (x - 1) * (e % 2), exp(x * x, e / 2), x, e, retdest + push 1 + // stack: 1, (x - 1) * (e % 2), exp(x * x, e / 2), x, e, retdest + add + // stack: 1 + (x - 1) * (e % 2), exp(x * x, e / 2), x, e, retdest + mul + // stack: (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2), x, e, retdest + swap3 + // stack: retdest, x, e, (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2) + swap2 + // stack: e, x, retdest, (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2) + pop + // stack: x, retdest, (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2) + pop + // stack: retdest, (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2) + jump + +global sys_exp: + PANIC From cfbc029e7b0b77822aff29703f012f4fbe603563 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 19 Sep 2022 10:51:13 -0700 Subject: [PATCH 091/104] cleaned up test: compare as U256, not string --- evm/src/cpu/kernel/tests/sha2.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index a493c1fb..e9e5e9d1 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -22,7 +22,7 @@ fn test_sha2() -> Result<()> { // Hash the message using a standard Sha256 implementation. let mut hasher = Sha256::new(); hasher.update(message.clone()); - let expected = format!("{:X}", hasher.finalize()); + let expected = U256::from(&hasher.finalize()[..]); // Load the message onto the stack. let mut initial_stack = vec![U256::from(num_bytes)]; @@ -33,8 +33,7 @@ fn test_sha2() -> Result<()> { // Run the sha2 kernel code. let result = run(&kernel.code, sha2, initial_stack, &kernel.prover_inputs)?; - let result_hash = result.stack()[0]; - let actual = format!("{:X}", result_hash); + let actual = result.stack()[0]; // Check that the result is correct. assert_eq!(expected, actual); From 19b14c272392e1f14613a8674b77bec9af837e0b Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 23 Sep 2022 11:56:07 -0700 Subject: [PATCH 092/104] Update evm/src/cpu/kernel/tests/sha2.rs Co-authored-by: Dima V <50062893+typ3c4t@users.noreply.github.com> --- evm/src/cpu/kernel/tests/sha2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index e9e5e9d1..d0c21090 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -6,7 +6,7 @@ use rand::{thread_rng, Rng}; use sha2::{Digest, Sha256}; use crate::cpu::kernel::aggregator::combined_kernel; -use crate::cpu::kernel::interpreter::run; +use crate::cpu::kernel::interpreter::run_with_kernel; #[test] fn test_sha2() -> Result<()> { From fa01f83e652215a6a5c1312a64fd5e5f6845ba06 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 23 Sep 2022 11:56:18 -0700 Subject: [PATCH 093/104] Update evm/src/cpu/kernel/tests/sha2.rs Co-authored-by: Dima V <50062893+typ3c4t@users.noreply.github.com> --- evm/src/cpu/kernel/tests/sha2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/sha2.rs index d0c21090..cb6d580a 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/sha2.rs @@ -32,7 +32,7 @@ fn test_sha2() -> Result<()> { initial_stack.reverse(); // Run the sha2 kernel code. - let result = run(&kernel.code, sha2, initial_stack, &kernel.prover_inputs)?; + let result = run_with_kernel(&kernel, sha2, initial_stack)?; let actual = result.stack()[0]; // Check that the result is correct. From 1f3ee6dae2727ff49e9ae6d2b2dff6d0a1c7cf3d Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 23 Sep 2022 12:00:05 -0700 Subject: [PATCH 094/104] remove prover_inputs from Interpreter --- evm/src/cpu/kernel/interpreter.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index c4eea076..10561896 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -74,9 +74,6 @@ pub struct Interpreter<'a> { pub(crate) memory: InterpreterMemory, pub(crate) generation_state: GenerationState, prover_inputs_map: &'a HashMap, - /// Non-deterministic prover inputs, stored backwards so that popping the last item gives the - /// next prover input. - prover_inputs: Vec, pub(crate) halt_offsets: Vec, running: bool, } From 1e03c438e011d0c2ee30722870fa12dea33e99a7 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 3 Oct 2022 10:00:39 -0700 Subject: [PATCH 095/104] moved memory functions to memory ASM file (not sha2) --- evm/src/cpu/kernel/aggregator.rs | 1 - evm/src/cpu/kernel/asm/memory/core.asm | 218 ++++++++++++++++++++++--- evm/src/cpu/kernel/asm/sha2/memory.asm | 162 ------------------ 3 files changed, 194 insertions(+), 187 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/sha2/memory.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 797e784d..1615ee1e 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -41,7 +41,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/exp.asm"), include_str!("asm/sha2/compression.asm"), include_str!("asm/sha2/constants.asm"), - include_str!("asm/sha2/memory.asm"), include_str!("asm/sha2/message_schedule.asm"), include_str!("asm/sha2/ops.asm"), include_str!("asm/sha2/store_pad.asm"), diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index 6722b0ca..df50929c 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -55,6 +55,148 @@ // stack: (empty) %endmacro +// Load a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// from the kernel. +%macro mload_kernel_u32(segment) + // stack: offset + DUP1 + %mload_kernel($segment) + // stack: c_3, offset + %shl_const(8) + // stack: c_3 << 8, offset + DUP2 + %add_const(1) + %mload_kernel($segment) + OR + // stack: (c_3 << 8) | c_2, offset + %shl_const(8) + // stack: ((c_3 << 8) | c_2) << 8, offset + DUP2 + %add_const(2) + %mload_kernel($segment) + OR + // stack: (((c_3 << 8) | c_2) << 8) | c_1, offset + %shl_const(8) + // stack: ((((c_3 << 8) | c_2) << 8) | c_1) << 8, offset + SWAP1 + %add_const(3) + %mload_kernel($segment) + OR + // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0 +%endmacro + +// Load a u256 (big-endian) from the kernel. +%macro mload_kernel_u256(segment) + // stack: offset + DUP1 + %mload_kernel_u32($segment) + // stack: c_7, offset + %shl_const(32) + // stack: c7 << 32, offset + DUP2 + %add_const(4) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 32) | c_6, offset + %shl_const(32) + // stack: ((c_7 << 32) | c_6) << 32, offset + DUP2 + %add_const(8) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 64) | (c_6 << 32) | c_5, offset + %shl_const(32) + // stack: ((c_7 << 64) | (c_6 << 32) | c_5) << 32, offset + DUP2 + %add_const(12) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4, offset + %shl_const(32) + // stack: ((c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4) << 32, offset + DUP2 + %add_const(16) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3, offset + %shl_const(32) + // stack: ((c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3) << 32, offset + DUP2 + %add_const(20) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2, offset + %shl_const(32) + // stack: ((c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2) << 32, offset + DUP2 + %add_const(24) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1, offset + %shl_const(32) + // stack: ((c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1) << 32, offset + DUP2 + %add_const(28) + %mload_kernel_u32($segment) + OR + // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0, offset + SWAP1 + POP + // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0 +%endmacro + +// Store a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// to the kernel. +%macro mstore_kernel_u32(segment) + // stack: offset, value + SWAP1 + // stack: value, offset + DUP1 + // stack: value, value, offset + %and_const(0xff) + // stack: c_0 = value % (1 << 8), value, offset + SWAP1 + // stack: value, c_0, offset + %shr_const(8) + // stack: value >> 8, c_0, offset + DUP1 + // stack: value >> 8, value >> 8, c_0, offset + %and_const(0xff) + // stack: c_1 = (value >> 8) % (1 << 8), value >> 8, c_0, offset + SWAP1 + // stack: value >> 8, c_1, c_0, offset + %shr_const(8) + // stack: value >> 16, c_1, c_0, offset + DUP1 + // stack: value >> 16, value >> 16, c_1, c_0, offset + %and_const(0xff) + // stack: c_2 = (value >> 16) % (1 << 8), value >> 16, c_1, c_0, offset + SWAP1 + // stack: value >> 16, c_2, c_1, c_0, offset + %shr_const(8) + // stack: value >> 24, c_2, c_1, c_0, offset + %and_const(0xff) + // stack: c_3 = (value >> 24) % (1 << 8), c_2, c_1, c_0, offset + DUP5 + // stack: offset, c_3, c_2, c_1, c_0, offset + %mstore_kernel($segment) + // stack: c_2, c_1, c_0, offset + DUP4 + // stack: offset, c_2, c_1, c_0, offset + %add_const(1) + %mstore_kernel($segment) + // stack: c_1, c_0, offset + DUP3 + // stack: offset, c_1, c_0, offset + %add_const(2) + %mstore_kernel($segment) + // stack: c_0, offset + SWAP1 + // stack: offset, c_0 + %add_const(3) + %mstore_kernel($segment) +%endmacro + // Load a single byte from kernel code. %macro mload_kernel_code // stack: offset @@ -62,34 +204,41 @@ // stack: value %endmacro +// Load a single byte from kernel general memory. +%macro mload_kernel_general + // stack: offset + %mload_kernel(@SEGMENT_KERNEL_GENERAL) + // stack: value +%endmacro + // Load a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), // from kernel code. %macro mload_kernel_code_u32 // stack: offset - DUP1 - %mload_kernel_code - // stack: c_3, offset - %shl_const(8) - // stack: c_3 << 8, offset - DUP2 - %add_const(1) - %mload_kernel_code - OR - // stack: (c_3 << 8) | c_2, offset - %shl_const(8) - // stack: ((c_3 << 8) | c_2) << 8, offset - DUP2 - %add_const(2) - %mload_kernel_code - OR - // stack: (((c_3 << 8) | c_2) << 8) | c_1, offset - %shl_const(8) - // stack: ((((c_3 << 8) | c_2) << 8) | c_1) << 8, offset - SWAP1 - %add_const(3) - %mload_kernel_code - OR - // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0 + %mload_kernel_u32(@SEGMENT_CODE) + // stack: value +%endmacro + +// Load a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// from kernel general memory. +%macro mload_kernel_general_u32 + // stack: offset + %mload_kernel_u32(@SEGMENT_KERNEL_GENERAL) + // stack: value +%endmacro + +// Load a u256 (big-endian) from kernel code. +%macro mload_kernel_code_u256 + // stack: offset + %mload_kernel_u256(@SEGMENT_CODE) + // stack: value +%endmacro + +// Load a u256 (big-endian) from kernel general memory. +%macro mload_kernel_general_u256 + // stack: offset + %mload_kernel_u256(@SEGMENT_KERNEL_GENERAL) + // stack: value %endmacro // Store a single byte to kernel code. @@ -99,6 +248,27 @@ // stack: (empty) %endmacro +// Store a single byte to kernel general memory. +%macro mstore_kernel_general + // stack: offset, value + %mstore_kernel(@SEGMENT_RLP_RAW) + // stack: (empty) +%endmacro + +// Store a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// to kernel code. +%macro mstore_kernel_code_u32 + // stack: offset, value + %mstore_kernel_u32(@SEGMENT_CODE) +%endmacro + +// Store a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// to kernel general memory. +%macro mstore_kernel_general_u32 + // stack: offset, value + %mstore_kernel_u32(@SEGMENT_KERNEL_GENERAL) +%endmacro + // Store a single byte to @SEGMENT_RLP_RAW. %macro mstore_rlp // stack: offset, value diff --git a/evm/src/cpu/kernel/asm/sha2/memory.asm b/evm/src/cpu/kernel/asm/sha2/memory.asm deleted file mode 100644 index cd7cef92..00000000 --- a/evm/src/cpu/kernel/asm/sha2/memory.asm +++ /dev/null @@ -1,162 +0,0 @@ -// Load a single byte from kernel general memory. -%macro mload_kernel_general - // stack: offset - PUSH @SEGMENT_KERNEL_GENERAL - // stack: segment, offset - PUSH 0 // kernel has context 0 - // stack: context, segment, offset - MLOAD_GENERAL - // stack: value -%endmacro - -// Load a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), -// from kernel general memory. -%macro mload_kernel_general_u32 - // stack: offset - DUP1 - %mload_kernel_general - // stack: c_3, offset - %shl_const(8) - // stack: c_3 << 8, offset - DUP2 - %increment - %mload_kernel_general - OR - // stack: (c_3 << 8) | c_2, offset - %shl_const(8) - // stack: ((c_3 << 8) | c_2) << 8, offset - DUP2 - %add_const(2) - %mload_kernel_general - OR - // stack: (((c_3 << 8) | c_2) << 8) | c_1, offset - %shl_const(8) - // stack: ((((c_3 << 8) | c_2) << 8) | c_1) << 8, offset - SWAP1 - %add_const(3) - %mload_kernel_general - OR - // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0 -%endmacro - -// Load 256 bits (half of a 512-bit SHA-2 block) from general kernel memory. -%macro mload_kernel_general_u256 - // stack: offset - DUP1 - %mload_kernel_general_u32 - // stack: c_7, offset - %shl_const(32) - // stack: c7 << 32, offset - DUP2 - %add_const(4) - %mload_kernel_general_u32 - OR - // stack: (c_7 << 32) | c_6, offset - %shl_const(32) - // stack: ((c_7 << 32) | c_6) << 32, offset - DUP2 - %add_const(8) - %mload_kernel_general_u32 - OR - // stack: (c_7 << 64) | (c_6 << 32) | c_5, offset - %shl_const(32) - // stack: ((c_7 << 64) | (c_6 << 32) | c_5) << 32, offset - DUP2 - %add_const(12) - %mload_kernel_general_u32 - OR - // stack: (c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4, offset - %shl_const(32) - // stack: ((c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4) << 32, offset - DUP2 - %add_const(16) - %mload_kernel_general_u32 - OR - // stack: (c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3, offset - %shl_const(32) - // stack: ((c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3) << 32, offset - DUP2 - %add_const(20) - %mload_kernel_general_u32 - OR - // stack: (c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2, offset - %shl_const(32) - // stack: ((c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2) << 32, offset - DUP2 - %add_const(24) - %mload_kernel_general_u32 - OR - // stack: (c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1, offset - %shl_const(32) - // stack: ((c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1) << 32, offset - DUP2 - %add_const(28) - %mload_kernel_general_u32 - OR - // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0, offset - SWAP1 - POP - // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0 -%endmacro - -// Store a single byte to kernel general memory. -%macro mstore_kernel_general - // stack: offset, value - PUSH @SEGMENT_KERNEL_GENERAL - // stack: segment, offset - PUSH 0 // kernel has context 0 - // stack: context, segment, offset, value - MSTORE_GENERAL -%endmacro - -// Store a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), -// to kernel general memory. -%macro mstore_kernel_general_u32 - // stack: offset, value - SWAP1 - // stack: value, offset - DUP1 - // stack: value, value, offset - %and_const(0xff) - // stack: c_0 = value % (1 << 8), value, offset - SWAP1 - // stack: value, c_0, offset - %shr_const(8) - // stack: value >> 8, c_0, offset - DUP1 - // stack: value >> 8, value >> 8, c_0, offset - %and_const(0xff) - // stack: c_1 = (value >> 8) % (1 << 8), value >> 8, c_0, offset - SWAP1 - // stack: value >> 8, c_1, c_0, offset - %shr_const(8) - // stack: value >> 16, c_1, c_0, offset - DUP1 - // stack: value >> 16, value >> 16, c_1, c_0, offset - %and_const(0xff) - // stack: c_2 = (value >> 16) % (1 << 8), value >> 16, c_1, c_0, offset - SWAP1 - // stack: value >> 16, c_2, c_1, c_0, offset - %shr_const(8) - // stack: value >> 24, c_2, c_1, c_0, offset - %and_const(0xff) - // stack: c_3 = (value >> 24) % (1 << 8), c_2, c_1, c_0, offset - DUP5 - // stack: offset, c_3, c_2, c_1, c_0, offset - %mstore_kernel_general - // stack: c_2, c_1, c_0, offset - DUP4 - // stack: offset, c_2, c_1, c_0, offset - %add_const(1) - %mstore_kernel_general - // stack: c_1, c_0, offset - DUP3 - // stack: offset, c_1, c_0, offset - %add_const(2) - %mstore_kernel_general - // stack: c_0, offset - SWAP1 - // stack: offset, c_0 - %add_const(3) - %mstore_kernel_general -%endmacro From 9f49521e22e6f4dedacaa0e0958a776b5b3ec30e Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 27 Sep 2022 17:16:04 -0700 Subject: [PATCH 096/104] label name simplification --- evm/src/cpu/kernel/asm/sha2/compression.asm | 16 +++---- .../cpu/kernel/asm/sha2/message_schedule.asm | 44 +++++++++---------- evm/src/cpu/kernel/asm/sha2/store_pad.asm | 8 ++-- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm index 2bf48e65..5325e7d0 100644 --- a/evm/src/cpu/kernel/asm/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -45,7 +45,7 @@ global sha2_compression: PUSH sha2_constants_h %mload_kernel_code_u32 // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest -sha2_compression_start_block: +compression_start_block: // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. DUP10 // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest @@ -113,7 +113,7 @@ sha2_compression_start_block: // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest POP // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest -sha2_compression_loop: +compression_loop: // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest DUP11 @@ -194,9 +194,9 @@ sha2_compression_loop: DUP12 // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest ISZERO - %jumpi(sha2_compression_end_block) - %jump(sha2_compression_loop) -sha2_compression_end_block: + %jumpi(compression_end_block) + %jump(compression_loop) +compression_end_block: // Add the initial values of the eight working variables (from the start of this block's compression) back into them. // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest DUP10 @@ -267,11 +267,11 @@ sha2_compression_end_block: // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest ISZERO // In this case, we've finished all the blocks. - %jumpi(sha2_compression_end) + %jumpi(compression_end) // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest %stack (num_blocks, working: 8) -> (working, num_blocks) - %jump(sha2_compression_start_block) -sha2_compression_end: + %jump(compression_start_block) +compression_end: // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest POP // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm index f786d31d..d3b5c6c1 100644 --- a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm +++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm @@ -1,7 +1,7 @@ // Precodition: stack contains address of one message block, followed by output address // Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks // of message schedule (in four-byte increments) -global sha2_gen_message_schedule_from_block: +gen_message_schedule_from_block: // stack: block_addr, output_addr, retdest DUP1 // stack: block_addr, block_addr, output_addr, retdest @@ -20,8 +20,8 @@ global sha2_gen_message_schedule_from_block: %add_const(28) PUSH 8 // stack: counter=8, output_addr + 28, block[0], block[1], retdest - %jump(sha2_gen_message_schedule_from_block_0_loop) -sha2_gen_message_schedule_from_block_0_loop: + %jump(gen_message_schedule_from_block_0_loop) +gen_message_schedule_from_block_0_loop: // Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule. // stack: counter, output_addr, block[0], block[1], retdest SWAP2 @@ -49,9 +49,9 @@ sha2_gen_message_schedule_from_block_0_loop: %decrement DUP1 ISZERO - %jumpi(sha2_gen_message_schedule_from_block_0_end) - %jump(sha2_gen_message_schedule_from_block_0_loop) -sha2_gen_message_schedule_from_block_0_end: + %jumpi(gen_message_schedule_from_block_0_end) + %jump(gen_message_schedule_from_block_0_loop) +gen_message_schedule_from_block_0_end: // stack: old counter=0, output_addr, block[0], block[1], retdest POP PUSH 8 @@ -62,7 +62,7 @@ sha2_gen_message_schedule_from_block_0_end: // stack: output_addr + 64, counter, block[1], block[0], retdest SWAP1 // stack: counter, output_addr + 64, block[1], block[0], retdest -sha2_gen_message_schedule_from_block_1_loop: +gen_message_schedule_from_block_1_loop: // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule. // stack: counter, output_addr, block[1], block[0], retdest SWAP2 @@ -90,9 +90,9 @@ sha2_gen_message_schedule_from_block_1_loop: %decrement DUP1 ISZERO - %jumpi(sha2_gen_message_schedule_from_block_1_end) - %jump(sha2_gen_message_schedule_from_block_1_loop) -sha2_gen_message_schedule_from_block_1_end: + %jumpi(gen_message_schedule_from_block_1_end) + %jump(gen_message_schedule_from_block_1_loop) +gen_message_schedule_from_block_1_end: // stack: old counter=0, output_addr, block[1], block[0], retdest POP // stack: output_addr, block[0], block[1], retdest @@ -104,7 +104,7 @@ sha2_gen_message_schedule_from_block_1_end: // stack: output_addr + 36, counter, block[0], block[1], retdest SWAP1 // stack: counter, output_addr + 36, block[0], block[1], retdest -sha2_gen_message_schedule_remaining_loop: +gen_message_schedule_remaining_loop: // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks. // stack: counter, output_addr, block[0], block[1], retdest SWAP1 @@ -179,9 +179,9 @@ sha2_gen_message_schedule_remaining_loop: // stack: counter - 1, output_addr + 4, block[0], block[1], retdest DUP1 ISZERO - %jumpi(sha2_gen_message_schedule_remaining_end) - %jump(sha2_gen_message_schedule_remaining_loop) -sha2_gen_message_schedule_remaining_end: + %jumpi(gen_message_schedule_remaining_end) + %jump(gen_message_schedule_remaining_loop) +gen_message_schedule_remaining_end: // stack: counter=0, output_addr, block[0], block[1], retdest %pop4 JUMP @@ -200,16 +200,16 @@ global sha2_gen_all_message_schedules: // stack: num_blocks, output_addr, output_addr, retdest PUSH 1 // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest -sha2_gen_all_message_schedules_loop: +gen_all_message_schedules_loop: // stack: cur_addr, counter, cur_output_addr, output_addr, retdest - PUSH sha2_gen_all_message_schedules_loop_end - // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest + PUSH gen_all_message_schedules_loop_end + // stack: new_retdest = gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest DUP4 // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest DUP3 // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest - %jump(sha2_gen_message_schedule_from_block) -sha2_gen_all_message_schedules_loop_end: + %jump(gen_message_schedule_from_block) +gen_all_message_schedules_loop_end: // stack: cur_addr, counter, cur_output_addr, output_addr, retdest %add_const(64) // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest @@ -224,9 +224,9 @@ sha2_gen_all_message_schedules_loop_end: DUP2 // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest ISZERO - %jumpi(sha2_gen_all_message_schedules_end) - %jump(sha2_gen_all_message_schedules_loop) -sha2_gen_all_message_schedules_end: + %jumpi(gen_all_message_schedules_end) + %jump(gen_all_message_schedules_loop) +gen_all_message_schedules_end: // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest %pop3 // stack: output_addr, retdest diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm index 82ed58c0..ce935ec3 100644 --- a/evm/src/cpu/kernel/asm/sha2/store_pad.asm +++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm @@ -8,12 +8,12 @@ global sha2_store: // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest PUSH 1 // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest -sha2_store_loop: +store_loop: // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest DUP2 // stack: counter, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest ISZERO - %jumpi(sha2_store_end) + %jumpi(store_end) // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest %stack (addr, counter, val) -> (addr, val, counter, addr) // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest @@ -25,8 +25,8 @@ sha2_store_loop: // stack: addr, counter-1, ... , x[num_bytes-1], retdest %increment // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest - %jump(sha2_store_loop) -sha2_store_end: + %jump(store_loop) +store_end: // stack: addr, counter, retdest %pop2 // stack: retdest From 69e33eff7296c4769d0c7ccc18fba2b3688d0c2e Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 28 Sep 2022 09:29:54 -0700 Subject: [PATCH 097/104] fix --- evm/src/cpu/kernel/interpreter.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 10561896..b0589db9 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -444,13 +444,7 @@ impl<'a> Interpreter<'a> { let value = self.pop(); self.push(value >> shift); } - - fn run_shr(&mut self) { - let shift = self.pop(); - let x = self.pop(); - self.push(x >> shift); - } - + fn run_keccak256(&mut self) { let offset = self.pop().as_usize(); let size = self.pop().as_usize(); From 416a7a868badf262d67f7ea84bd90851aa9ada76 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 28 Sep 2022 09:30:00 -0700 Subject: [PATCH 098/104] fmt --- evm/src/cpu/kernel/interpreter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index b0589db9..589ba6b3 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -444,7 +444,7 @@ impl<'a> Interpreter<'a> { let value = self.pop(); self.push(value >> shift); } - + fn run_keccak256(&mut self) { let offset = self.pop().as_usize(); let size = self.pop().as_usize(); From 249fc6c1e480a0ab635666575344f74bb58eb451 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 30 Sep 2022 12:07:25 -0700 Subject: [PATCH 099/104] fix --- evm/src/cpu/kernel/asm/modexp.asm | 76 ------------------------------- 1 file changed, 76 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/modexp.asm diff --git a/evm/src/cpu/kernel/asm/modexp.asm b/evm/src/cpu/kernel/asm/modexp.asm deleted file mode 100644 index f1b448cb..00000000 --- a/evm/src/cpu/kernel/asm/modexp.asm +++ /dev/null @@ -1,76 +0,0 @@ -/// Recursive implementation of exp. -/// Equivalent to: -/// def exp(x, e): -/// if e == 0: -/// # The path where JUMPI does not jump to `step_case` -/// return 1 -/// else: -/// # This is under the `step_case` label -/// return (x if e % 2 else 1) * exp(x * x, e // 2) -/// Note that this correctly handles exp(0, 0) == 1. - -global modexp: - // stack: x, e, retdest - dup2 - // stack: e, x, e, retdest - %jumpi(step_case) - // stack: x, e, retdest - pop - // stack: e, retdest - pop - // stack: retdest - push 1 - // stack: 1, retdest - swap1 - // stack: retdest, 1 - jump - -step_case: - // stack: x, e, retdest - push recursion_return - // stack: recursion_return, x, e, retdest - push 2 - // stack: 2, recursion_return, x, e, retdest - dup4 - // stack: e, 2, recursion_return, x, e, retdest - div - // stack: e / 2, recursion_return, x, e, retdest - dup3 - // stack: x, e / 2, recursion_return, x, e, retdest - %square - // stack: x * x, e / 2, recursion_return, x, e, retdest - %jump(exp) -recursion_return: - // stack: exp(x * x, e / 2), x, e, retdest - push 2 - // stack: 2, exp(x * x, e / 2), x, e, retdest - dup4 - // stack: e, 2, exp(x * x, e / 2), x, e, retdest - mod - // stack: e % 2, exp(x * x, e / 2), x, e, retdest - push 1 - // stack: 1, e % 2, exp(x * x, e / 2), x, e, retdest - dup4 - // stack: x, 1, e % 2, exp(x * x, e / 2), x, e, retdest - sub - // stack: x - 1, e % 2, exp(x * x, e / 2), x, e, retdest - mul - // stack: (x - 1) * (e % 2), exp(x * x, e / 2), x, e, retdest - push 1 - // stack: 1, (x - 1) * (e % 2), exp(x * x, e / 2), x, e, retdest - add - // stack: 1 + (x - 1) * (e % 2), exp(x * x, e / 2), x, e, retdest - mul - // stack: (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2), x, e, retdest - swap3 - // stack: retdest, x, e, (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2) - swap2 - // stack: e, x, retdest, (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2) - pop - // stack: x, retdest, (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2) - pop - // stack: retdest, (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2) - jump - -global sys_exp: - PANIC From 6ff0b84e59d124179320a747931677140dc12b03 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 3 Oct 2022 10:11:59 -0700 Subject: [PATCH 100/104] fix --- evm/src/cpu/kernel/asm/memory/core.asm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index df50929c..26196df5 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -251,7 +251,7 @@ // Store a single byte to kernel general memory. %macro mstore_kernel_general // stack: offset, value - %mstore_kernel(@SEGMENT_RLP_RAW) + %mstore_kernel(@SEGMENT_KERNEL_GENERAL) // stack: (empty) %endmacro From 9ee861fb15bbc2a7f1cf4833f33920aa69e36d4d Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 3 Oct 2022 13:43:03 -0700 Subject: [PATCH 101/104] minor fixes --- evm/src/cpu/kernel/aggregator.rs | 1 - evm/src/cpu/kernel/asm/sha2/ops.asm | 1 - evm/src/cpu/kernel/asm/sha2/util.asm | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 1615ee1e..67a0051b 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -73,7 +73,6 @@ pub(crate) fn combined_kernel() -> Kernel { #[cfg(test)] mod tests { - use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV}; use log::debug; diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index 4c57d491..f24455ef 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -138,4 +138,3 @@ OR OR %endmacro - \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/sha2/util.asm b/evm/src/cpu/kernel/asm/sha2/util.asm index 28e71c12..e090df0a 100644 --- a/evm/src/cpu/kernel/asm/sha2/util.asm +++ b/evm/src/cpu/kernel/asm/sha2/util.asm @@ -15,4 +15,4 @@ %macro truncate_to_u32 %and_const(0xFFFFFFFF) -%endmacro \ No newline at end of file +%endmacro From 99fb730aea0fea50a87a743805617ea342225800 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 3 Oct 2022 14:07:21 -0700 Subject: [PATCH 102/104] rearranging and cleanup --- evm/src/cpu/kernel/aggregator.rs | 1 - evm/src/cpu/kernel/asm/sha2/compression.asm | 8 ++++++ .../cpu/kernel/asm/sha2/message_schedule.asm | 11 ++++++-- evm/src/cpu/kernel/asm/sha2/ops.asm | 12 +------- evm/src/cpu/kernel/asm/sha2/store_pad.asm | 6 ++-- evm/src/cpu/kernel/asm/sha2/util.asm | 18 ------------ evm/src/cpu/kernel/asm/util/basic_macros.asm | 13 +++++++++ evm/src/cpu/kernel/tests/{sha2.rs => hash.rs} | 28 ++++++++++++------- evm/src/cpu/kernel/tests/mod.rs | 2 +- 9 files changed, 53 insertions(+), 46 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/sha2/util.asm rename evm/src/cpu/kernel/tests/{sha2.rs => hash.rs} (63%) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 67a0051b..5fb6ad08 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -45,7 +45,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/sha2/ops.asm"), include_str!("asm/sha2/store_pad.asm"), include_str!("asm/sha2/temp_words.asm"), - include_str!("asm/sha2/util.asm"), include_str!("asm/sha2/write_length.asm"), include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/decode.asm"), diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm index 5325e7d0..eb9b73b8 100644 --- a/evm/src/cpu/kernel/asm/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -1,3 +1,11 @@ +// We use memory starting at 320 * num_blocks + 2 (after the message schedule +// space) as scratch space to store stack values. +%macro scratch_space_addr_from_num_blocks + // stack: num_blocks + %mul_const(320) + %add_const(2) +%endmacro + global sha2_compression: // stack: message_schedule_addr, retdest PUSH 0 diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm index d3b5c6c1..78d98634 100644 --- a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm +++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm @@ -1,3 +1,10 @@ +// We put the message schedule in memory starting at 64 * num_blocks + 2. +%macro message_schedule_addr_from_num_blocks + // stack: num_blocks + %mul_const(64) + %add_const(2) +%endmacro + // Precodition: stack contains address of one message block, followed by output address // Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks // of message schedule (in four-byte increments) @@ -32,7 +39,7 @@ gen_message_schedule_from_block_0_loop: // stack: block[0] >> 32, block[0], output_addr, counter, block[1], retdest SWAP1 // stack: block[0], block[0] >> 32, output_addr, counter, block[1], retdest - %truncate_to_u32 + %as_u32 // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest DUP3 // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest @@ -73,7 +80,7 @@ gen_message_schedule_from_block_1_loop: // stack: block[1] >> 32, block[1], output_addr, counter, block[0], retdest SWAP1 // stack: block[1], block[1] >> 32, output_addr, counter, block[0], retdest - %truncate_to_u32 + %as_u32 // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest DUP3 // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index f24455ef..7d8054ca 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -1,13 +1,3 @@ -// u32 addition (discarding 2^32 bit) -%macro add_u32 - // stack: x, y - ADD - // stack: x + y - %truncate_to_u32 - // stack: (x + y) & u32::MAX -%endmacro - - // 32-bit right rotation %macro rotr(rot) // stack: value @@ -25,7 +15,7 @@ // stack: 32 - rot, value, value >> rot SHL // stack: value << (32 - rot), value >> rot - %truncate_to_u32 + %as_u32 // stack: (value << (32 - rot)) % (1 << 32), value >> rot ADD %endmacro diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm index ce935ec3..7594eb81 100644 --- a/evm/src/cpu/kernel/asm/sha2/store_pad.asm +++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm @@ -1,3 +1,6 @@ +global sha2: + %jump(sha2_store) + global sha2_store: // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest DUP1 @@ -84,6 +87,3 @@ global sha2_pad: // stack: num_blocks, retdest %message_schedule_addr_from_num_blocks %jump(sha2_gen_all_message_schedules) - -global sha2: - %jump(sha2_store) diff --git a/evm/src/cpu/kernel/asm/sha2/util.asm b/evm/src/cpu/kernel/asm/sha2/util.asm deleted file mode 100644 index e090df0a..00000000 --- a/evm/src/cpu/kernel/asm/sha2/util.asm +++ /dev/null @@ -1,18 +0,0 @@ -// We put the message schedule in memory starting at 64 * num_blocks + 2. -%macro message_schedule_addr_from_num_blocks - // stack: num_blocks - %mul_const(64) - %add_const(2) -%endmacro - -// We use memory starting at 320 * num_blocks + 2 (after the message schedule -// space) as scratch space to store stack values. -%macro scratch_space_addr_from_num_blocks - // stack: num_blocks - %mul_const(320) - %add_const(2) -%endmacro - -%macro truncate_to_u32 - %and_const(0xFFFFFFFF) -%endmacro diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index fb9e6d01..8ac92258 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -246,3 +246,16 @@ %mod_const(2) ISZERO %endmacro + +%macro as_u32 + %and_const(0xFFFFFFFF) +%endmacro + +// u32 addition (discarding 2^32 bit) +%macro add_u32 + // stack: x, y + ADD + // stack: x + y + %as_u32 + // stack: (x + y) & u32::MAX +%endmacro diff --git a/evm/src/cpu/kernel/tests/sha2.rs b/evm/src/cpu/kernel/tests/hash.rs similarity index 63% rename from evm/src/cpu/kernel/tests/sha2.rs rename to evm/src/cpu/kernel/tests/hash.rs index cb6d580a..de707e71 100644 --- a/evm/src/cpu/kernel/tests/sha2.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -8,21 +8,23 @@ use sha2::{Digest, Sha256}; use crate::cpu::kernel::aggregator::combined_kernel; use crate::cpu::kernel::interpreter::run_with_kernel; -#[test] -fn test_sha2() -> Result<()> { - let kernel = combined_kernel(); - let sha2 = kernel.global_labels["sha2"]; +/// Standard Sha2 implementation. +fn sha2(input: Vec) -> U256 { + let mut hasher = Sha256::new(); + hasher.update(input.clone()); + U256::from(&hasher.finalize()[..]) +} +fn test_hash(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U256) -> Result<()> { + let kernel = combined_kernel(); let mut rng = thread_rng(); // Generate a random message, between 0 and 9999 bytes. let num_bytes = rng.gen_range(0..10000); let message: Vec = (0..num_bytes).map(|_| rng.gen()).collect(); - // Hash the message using a standard Sha256 implementation. - let mut hasher = Sha256::new(); - hasher.update(message.clone()); - let expected = U256::from(&hasher.finalize()[..]); + // Hash the message using a standard implementation. + let expected = standard_implementation(message.clone()); // Load the message onto the stack. let mut initial_stack = vec![U256::from(num_bytes)]; @@ -31,8 +33,9 @@ fn test_sha2() -> Result<()> { initial_stack.push(U256::from_str("0xdeadbeef").unwrap()); initial_stack.reverse(); - // Run the sha2 kernel code. - let result = run_with_kernel(&kernel, sha2, initial_stack)?; + // Run the kernel code. + let kernel_function = kernel.global_labels[hash_fn_label]; + let result = run_with_kernel(&kernel, kernel_function, initial_stack)?; let actual = result.stack()[0]; // Check that the result is correct. @@ -40,3 +43,8 @@ fn test_sha2() -> Result<()> { Ok(()) } + +#[test] +fn test_sha2() -> Result<()> { + test_hash("sha2", &sha2) +} diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs index 36471dc4..45feb238 100644 --- a/evm/src/cpu/kernel/tests/mod.rs +++ b/evm/src/cpu/kernel/tests/mod.rs @@ -2,10 +2,10 @@ mod core; mod curve_ops; mod ecrecover; mod exp; +mod hash; mod mpt; mod packing; mod rlp; -mod sha2; mod transaction_parsing; use std::str::FromStr; From 9919562a64bb7f8d756bc47e967f966e3d4dcaf5 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 3 Oct 2022 14:08:09 -0700 Subject: [PATCH 103/104] clippy --- evm/src/cpu/kernel/tests/hash.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index de707e71..3acdce2b 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -11,7 +11,7 @@ use crate::cpu::kernel::interpreter::run_with_kernel; /// Standard Sha2 implementation. fn sha2(input: Vec) -> U256 { let mut hasher = Sha256::new(); - hasher.update(input.clone()); + hasher.update(input); U256::from(&hasher.finalize()[..]) } From 43df58ea18254e65345dc6959e901f4c29d68672 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 3 Oct 2022 14:10:10 -0700 Subject: [PATCH 104/104] alphabetical --- evm/src/cpu/kernel/aggregator.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 5fb6ad08..0c3015f2 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -31,6 +31,7 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/curve/secp256k1/inverse_scalar.asm"), include_str!("asm/curve/secp256k1/lift_x.asm"), include_str!("asm/curve/secp256k1/moddiv.asm"), + include_str!("asm/exp.asm"), include_str!("asm/halt.asm"), include_str!("asm/main.asm"), include_str!("asm/memory/core.asm"), @@ -38,17 +39,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/memory/metadata.asm"), include_str!("asm/memory/packing.asm"), include_str!("asm/memory/txn_fields.asm"), - include_str!("asm/exp.asm"), - include_str!("asm/sha2/compression.asm"), - include_str!("asm/sha2/constants.asm"), - include_str!("asm/sha2/message_schedule.asm"), - include_str!("asm/sha2/ops.asm"), - include_str!("asm/sha2/store_pad.asm"), - include_str!("asm/sha2/temp_words.asm"), - include_str!("asm/sha2/write_length.asm"), - include_str!("asm/rlp/encode.asm"), - include_str!("asm/rlp/decode.asm"), - include_str!("asm/rlp/read_to_memory.asm"), include_str!("asm/mpt/hash.asm"), include_str!("asm/mpt/hash_trie_specific.asm"), include_str!("asm/mpt/hex_prefix.asm"), @@ -58,6 +48,16 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/mpt/storage_write.asm"), include_str!("asm/mpt/util.asm"), include_str!("asm/mpt/write.asm"), + include_str!("asm/rlp/encode.asm"), + include_str!("asm/rlp/decode.asm"), + include_str!("asm/rlp/read_to_memory.asm"), + include_str!("asm/sha2/compression.asm"), + include_str!("asm/sha2/constants.asm"), + include_str!("asm/sha2/message_schedule.asm"), + include_str!("asm/sha2/ops.asm"), + include_str!("asm/sha2/store_pad.asm"), + include_str!("asm/sha2/temp_words.asm"), + include_str!("asm/sha2/write_length.asm"), include_str!("asm/transactions/router.asm"), include_str!("asm/transactions/type_0.asm"), include_str!("asm/transactions/type_1.asm"),