diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm
index 31b30b21..efb940f9 100644
--- a/evm/src/cpu/kernel/asm/sha2/compression.asm
+++ b/evm/src/cpu/kernel/asm/sha2/compression.asm
@@ -1,371 +1,365 @@
 global sha2_compression:
     // stack: message_schedule_addr, retdest
-    push 0
+    PUSH 0
     // stack: i=0, message_schedule_addr, retdest
-    swap1
+    SWAP1
     // stack: message_schedule_addr, i=0, retdest
-    push 0
+    PUSH 0
     // stack: 0, message_schedule_addr, i=0, retdest
     %mload_kernel_general
     // stack: num_blocks, message_schedule_addr, i=0, retdest
-    dup1
+    DUP1
     // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest
     %scratch_space_addr_from_num_blocks
     // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest
-    swap1
+    SWAP1
     // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
-    push sha2_constants_h
+    PUSH sha2_constants_h
     %add_const(28)
     %mload_kernel_code_u32
     // stack: h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
-    push sha2_constants_h
+    PUSH sha2_constants_h
     %add_const(24)
     %mload_kernel_code_u32
     // stack: g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
-    push sha2_constants_h
+    PUSH sha2_constants_h
     %add_const(20)
     %mload_kernel_code_u32
     // stack: f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
-    push sha2_constants_h
+    PUSH sha2_constants_h
     %add_const(16)
     %mload_kernel_code_u32
     // stack: e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
-    push sha2_constants_h
+    PUSH sha2_constants_h
     %add_const(12)
     %mload_kernel_code_u32
     // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
-    push sha2_constants_h
+    PUSH sha2_constants_h
     %add_const(8)
     %mload_kernel_code_u32
     // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
-    push sha2_constants_h
+    PUSH sha2_constants_h
     %add_const(4)
     %mload_kernel_code_u32
     // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
-    push sha2_constants_h
+    PUSH sha2_constants_h
     %mload_kernel_code_u32
     // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
 sha2_compression_start_block:
     // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block.
-    dup10
+    DUP10
     // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
 
-    dup2
-    dup2
+    DUP2
+    DUP2
     // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %mstore_kernel_general_u32
     // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %add_const(4)
     // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
 
-    dup3
-    dup2
+    DUP3
+    DUP2
     // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %mstore_kernel_general_u32
     // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %add_const(4)
     // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     
-    dup4
-    dup2
+    DUP4
+    DUP2
     // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %mstore_kernel_general_u32
     // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %add_const(4)
     // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     
-    dup5
-    dup2
+    DUP5
+    DUP2
     // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %mstore_kernel_general_u32
     // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %add_const(4)
     // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     
-    dup6
-    dup2
+    DUP6
+    DUP2
     // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %mstore_kernel_general_u32
     // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %add_const(4)
     // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     
-    dup7
-    dup2
+    DUP7
+    DUP2
     // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %mstore_kernel_general_u32
     // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %add_const(4)
     // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     
-    dup8
-    dup2
+    DUP8
+    DUP2
     // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %mstore_kernel_general_u32
     // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %add_const(4)
     // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
 
-    dup9
-    dup2
+    DUP9
+    DUP2
     // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
     %mstore_kernel_general_u32
     // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
-    pop
+    POP
     // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
 sha2_compression_loop:
     // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i].
     // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup11
+    DUP11
     // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup13
+    DUP13
     // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %mul_const(4)
     // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    add
+    ADD
     // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %mload_kernel_general_u32
     // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    push sha2_constants_k
+    PUSH sha2_constants_k
     // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup14
+    DUP14
     // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %mul_const(4)
     // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    add
+    ADD
     // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %mload_kernel_code_u32
     // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %sha2_temp_word1
     // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup4
+    DUP4
     // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup4
+    DUP4
     // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup4
+    DUP4
     // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %sha2_temp_word2
     // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup6
+    DUP6
     // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup3
+    DUP3
     // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %add_u32
     // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap2
+    SWAP2
     // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %add_u32
     // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap1
+    SWAP1
     // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap5
+    SWAP5
     // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    pop
+    POP
     // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap8
+    SWAP8
     // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    pop
+    POP
     // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap7
+    SWAP7
     // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap1
-    swap7
-    swap1
+    SWAP1
+    SWAP7
+    SWAP1
     // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap2
-    swap7
-    swap2
+    SWAP2
+    SWAP7
+    SWAP2
     // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap3
-    swap7
-    swap3
+    SWAP3
+    SWAP7
+    SWAP3
     // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap4
-    swap7
-    swap4
+    SWAP4
+    SWAP7
+    SWAP4
     // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap5
-    swap7
-    swap5
+    SWAP5
+    SWAP7
+    SWAP5
     // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap6
-    swap7
-    swap6
+    SWAP6
+    SWAP7
+    SWAP6
     // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup12
+    DUP12
     // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %increment
     // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup1
+    DUP1
     // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %eq_const(64)
     // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup1
+    DUP1
     // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup12
+    DUP12
     // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    sub
+    SUB
     // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap13
+    SWAP13
     // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
-    swap1
+    SWAP1
     // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
-    push 256
-    mul
+    PUSH 256
+    MUL
     // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
-    add
+    ADD
     // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
-    swap12
+    SWAP12
     // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest
-    swap10
+    SWAP10
     // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest
-    pop
+    POP
     // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest
-    push 64
-    swap1
-    mod
+    PUSH 64
+    SWAP1
+    MOD
     // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest
-    swap12
+    SWAP12
     // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest
-    pop
+    POP
     // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest
-    dup12
+    DUP12
     // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest
-    //dup10
-    //iszero
-    //dup2
-    //iszero
-    //and
-    //%jumpi(sha2_stop_lol)
-    iszero
+    ISZERO
     %jumpi(sha2_compression_end_block)
     %jump(sha2_compression_loop)
 sha2_compression_end_block:
     // Add the initial values of the eight working variables (from the start of this block's compression) back into them.
     // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %mload_kernel_general_u32
     // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %add_u32
     // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap1
+    SWAP1
     // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     %add_const(4)
     %mload_kernel_general_u32
     // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %add_u32
     // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap2
+    SWAP2
     // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     %add_const(8)
     %mload_kernel_general_u32
     // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %add_u32
     // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap3
+    SWAP3
     // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     %add_const(12)
     %mload_kernel_general_u32
     // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %add_u32
     // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap4
+    SWAP4
     // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     %add_const(16)
     %mload_kernel_general_u32
     // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %add_u32
     // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap5
+    SWAP5
     // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     %add_const(20)
     %mload_kernel_general_u32
     // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %add_u32
     // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap6
+    SWAP6
     // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     %add_const(24)
     %mload_kernel_general_u32
     // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %add_u32
     // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap7
+    SWAP7
     // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    dup10
+    DUP10
     %add_const(28)
     %mload_kernel_general_u32
     // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
     %add_u32
     // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
-    swap8
+    SWAP8
     // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
-    dup1
+    DUP1
     // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
-    iszero
+    ISZERO
     // In this case, we've finished all the blocks.
     %jumpi(sha2_compression_end)
     // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
     // TODO: "insertion" macro for the below
     // Move num_blocks to the ninth spot on the stack, past the working variables.
-    swap1
-    swap2
-    swap1
-    swap2
-    swap3
-    swap2
-    swap3
-    swap4
-    swap3
-    swap4
-    swap5
-    swap4
-    swap5
-    swap6
-    swap5
-    swap6
-    swap7
-    swap6
-    swap7
-    swap8
-    swap7
-    swap8
+    SWAP1
+    SWAP2
+    SWAP1
+    SWAP2
+    SWAP3
+    SWAP2
+    SWAP3
+    SWAP4
+    SWAP3
+    SWAP4
+    SWAP5
+    SWAP4
+    SWAP5
+    SWAP6
+    SWAP5
+    SWAP6
+    SWAP7
+    SWAP6
+    SWAP7
+    SWAP8
+    SWAP7
+    SWAP8
     %jump(sha2_compression_start_block)
 sha2_compression_end:
     // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
-    pop
+    POP
     // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
     %shl_const(32)
-    or
+    OR
     %shl_const(32)
-    or
+    OR
     %shl_const(32)
-    or
+    OR
     %shl_const(32)
-    or
+    OR
     %shl_const(32)
-    or
+    OR
     %shl_const(32)
-    or
+    OR
     %shl_const(32)
-    or
+    OR
     // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest
-    swap3
+    SWAP3
     // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest
     %pop3
     // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest
diff --git a/evm/src/cpu/kernel/asm/sha2/memory.asm b/evm/src/cpu/kernel/asm/sha2/memory.asm
index 0b722287..9c68f208 100644
--- a/evm/src/cpu/kernel/asm/sha2/memory.asm
+++ b/evm/src/cpu/kernel/asm/sha2/memory.asm
@@ -94,8 +94,8 @@
     %mload_kernel_general_u32
     OR
     // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0, offset
-    swap1
-    pop
+    SWAP1
+    POP
     // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0
 %endmacro
 
@@ -113,64 +113,64 @@
 // to kernel general memory.
 %macro mstore_kernel_general_u32
     // stack: offset, value
-    swap1
+    SWAP1
     // stack: value, offset
-    push 1
-    push 8
-    shl
+    PUSH 1
+    PUSH 8
+    SHL
     // stack: 1 << 8, value, offset
-    swap1
+    SWAP1
     // stack: value, 1 << 8, offset
-    dup2
-    dup2
+    DUP2
+    DUP2
     // stack: value, 1 << 8, value, 1 << 8, offset
-    mod
+    MOD
     // stack: c_0 = value % (1 << 8), value, 1 << 8, offset
-    swap2
-    swap1
+    SWAP2
+    SWAP1
     // stack: value, 1 << 8, c_0, offset
-    push 8
-    shr
+    PUSH 8
+    SHR
     // stack: value >> 8, 1 << 8, c_0, offset
-    dup2
-    dup2
+    DUP2
+    DUP2
     // stack: value >> 8, 1 << 8, value >> 8, 1 << 8, c_0, offset
-    mod
+    MOD
     // stack: c_1 = (value >> 8) % (1 << 8), value >> 8, 1 << 8, c_0, offset
-    swap2
-    swap1
+    SWAP2
+    SWAP1
     // stack: value >> 8, 1 << 8, c_1, c_0, offset
-    push 8
-    shr
+    PUSH 8
+    SHR
     // stack: value >> 16, 1 << 8, c_1, c_0, offset
-    dup2
-    dup2
+    DUP2
+    DUP2
     // stack: value >> 16, 1 << 8, value >> 16, 1 << 8, c_1, c_0, offset
-    mod
+    MOD
     // stack: c_2 = (value >> 16) % (1 << 8), value >> 16, 1 << 8, c_1, c_0, offset
-    swap2
-    swap1
+    SWAP2
+    SWAP1
     // stack: value >> 16, 1 << 8, c_2, c_1, c_0, offset
-    push 8
-    shr
+    PUSH 8
+    SHR
     // stack: value >> 24, 1 << 8, c_2, c_1, c_0, offset
-    mod
+    MOD
     // stack: c_3 = (value >> 24) % (1 << 8), c_2, c_1, c_0, offset
-    dup5
+    DUP5
     // stack: offset, c_3, c_2, c_1, c_0, offset
     %mstore_kernel_general
     // stack: c_2, c_1, c_0, offset
-    dup4
+    DUP4
     // stack: offset, c_2, c_1, c_0, offset
     %add_const(1)
     %mstore_kernel_general
     // stack: c_1, c_0, offset
-    dup3
+    DUP3
     // stack: offset, c_1, c_0, offset
     %add_const(2)
     %mstore_kernel_general
     // stack: c_0, offset
-    swap1
+    SWAP1
     // stack: offset, c_0
     %add_const(3)
     %mstore_kernel_general
diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm
index 8f0cd58d..e6daa0b8 100644
--- a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm
+++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm
@@ -3,212 +3,212 @@
 //                of message schedule (in four-byte increments)
 global sha2_gen_message_schedule_from_block:
     // stack: block_addr, output_addr, retdest
-    dup1
+    DUP1
     // stack: block_addr, block_addr, output_addr, retdest
     %add_const(32)
     // stack: block_addr + 32, block_addr, output_addr, retdest
-    swap1
+    SWAP1
     // stack: block_addr, block_addr + 32, output_addr, retdest
     %mload_kernel_general_u256
     // stack: block[0], block_addr + 32, output_addr, retdest
-    swap1
+    SWAP1
     // stack: block_addr + 32, block[0], output_addr, retdest
     %mload_kernel_general_u256
     // stack: block[1], block[0], output_addr, retdest
-    swap2
+    SWAP2
     // stack: output_addr, block[0], block[1], retdest
     %add_const(28)
-    push 8
+    PUSH 8
     // stack: counter=8, output_addr + 28, block[0], block[1], retdest
     %jump(sha2_gen_message_schedule_from_block_0_loop)
 sha2_gen_message_schedule_from_block_0_loop:
     // Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule.
     // stack: counter, output_addr, block[0], block[1], retdest
-    swap2
+    SWAP2
     // stack: block[0], output_addr, counter, block[1], retdest
-    push 1
-    push 32
-    shl
+    PUSH 1
+    PUSH 32
+    SHL
     // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest
-    dup2
-    dup2
+    DUP2
+    DUP2
     // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1], retdest
-    swap1
+    SWAP1
     // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1], retdest
-    mod
+    MOD
     // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1], retdest
-    swap2
+    SWAP2
     // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest
-    div
+    DIV
     // stack: block[0] >> 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest
-    swap1
+    SWAP1
     // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest
-    dup3
+    DUP3
     // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest
     %mstore_kernel_general_u32
     // stack: block[0] >> 32, output_addr, counter, block[1], retdest
-    swap1
+    SWAP1
     // stack: output_addr, block[0] >> 32, counter, block[1], retdest
     %sub_const(4)
     // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest
-    swap1
+    SWAP1
     // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest
-    swap2
+    SWAP2
     // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest
     %decrement
-    dup1
-    iszero
+    DUP1
+    ISZERO
     %jumpi(sha2_gen_message_schedule_from_block_0_end)
     %jump(sha2_gen_message_schedule_from_block_0_loop)
 sha2_gen_message_schedule_from_block_0_end:
     // stack: old counter=0, output_addr, block[0], block[1], retdest
-    pop
-    push 8
+    POP
+    PUSH 8
     // stack: counter=8, output_addr, block[0], block[1], retdest
-    swap2
+    SWAP2
     // stack: block[0], output_addr, counter, block[1], retdest
-    swap3
+    SWAP3
     // stack: block[1], output_addr, counter, block[0], retdest
-    swap2
+    SWAP2
     // stack: counter, output_addr, block[1], block[0], retdest
-    swap1
+    SWAP1
     // stack: output_addr, counter, block[1], block[0], retdest
     %add_const(64)
     // stack: output_addr + 64, counter, block[1], block[0], retdest
-    swap1
+    SWAP1
     // stack: counter, output_addr + 64, block[1], block[0], retdest
 sha2_gen_message_schedule_from_block_1_loop:
     // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule.
     // stack: counter, output_addr, block[1], block[0], retdest
-    swap2
+    SWAP2
     // stack: block[1], output_addr, counter, block[0], retdest
-    push 1
-    push 32
-    shl
+    PUSH 1
+    PUSH 32
+    SHL
     // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest
-    dup2
-    dup2
+    DUP2
+    DUP2
     // stack: 1 << 32, block[1], 1 << 32, block[1], output_addr, counter, block[0], retdest
-    swap1
+    SWAP1
     // stack: block[1], 1 << 32, 1 << 32, block[1], output_addr, counter, block[0], retdest
-    mod
+    MOD
     // stack: block[1] % (1 << 32), 1 << 32, block[1], output_addr, counter, block[0], retdest
-    swap2
+    SWAP2
     // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest
-    div
+    DIV
     // stack: block[1] >> 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest
-    swap1
+    SWAP1
     // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest
-    dup3
+    DUP3
     // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest
     %mstore_kernel_general_u32
     // stack: block[1] >> 32, output_addr, counter, block[0], retdest
-    swap1
+    SWAP1
     // stack: output_addr, block[1] >> 32, counter, block[0], retdest
     %sub_const(4)
     // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest
-    swap1
+    SWAP1
     // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest
-    swap2
+    SWAP2
     // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest
     %decrement
-    dup1
-    iszero
+    DUP1
+    ISZERO
     %jumpi(sha2_gen_message_schedule_from_block_1_end)
     %jump(sha2_gen_message_schedule_from_block_1_loop)
 sha2_gen_message_schedule_from_block_1_end:
     // stack: old counter=0, output_addr, block[1], block[0], retdest
-    pop
+    POP
     // stack: output_addr, block[0], block[1], retdest
-    push 48
+    PUSH 48
     // stack: counter=48, output_addr, block[0], block[1], retdest
-    swap1
+    SWAP1
     // stack: output_addr, counter, block[0], block[1], retdest
     %add_const(36)
     // stack: output_addr + 36, counter, block[0], block[1], retdest
-    swap1
+    SWAP1
     // stack: counter, output_addr + 36, block[0], block[1], retdest
 sha2_gen_message_schedule_remaining_loop:
     // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks.
     // stack: counter, output_addr, block[0], block[1], retdest
-    swap1
+    SWAP1
     // stack: output_addr, counter, block[0], block[1], retdest
-    dup1
+    DUP1
     // stack: output_addr, output_addr, counter, block[0], block[1], retdest
-    push 2
-    push 4
-    mul
-    swap1
-    sub
+    PUSH 2
+    PUSH 4
+    MUL
+    SWAP1
+    SUB
     // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest
     %mload_kernel_general_u32
     // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest
     %sha2_sigma_1
     // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest
-    swap1
+    SWAP1
     // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
-    dup1
+    DUP1
     // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
-    push 7
-    push 4
-    mul
-    swap1
-    sub
+    PUSH 7
+    PUSH 4
+    MUL
+    SWAP1
+    SUB
     // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
     %mload_kernel_general_u32
     // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
-    swap1
+    SWAP1
     // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
-    dup1
+    DUP1
     // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
-    push 15
-    push 4
-    mul
-    swap1
-    sub
+    PUSH 15
+    PUSH 4
+    MUL
+    SWAP1
+    SUB
     // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
     %mload_kernel_general_u32
     // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
     %sha2_sigma_0
     // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
-    swap1
+    SWAP1
     // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
-    dup1
+    DUP1
     // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
-    push 16
-    push 4
-    mul
-    swap1
-    sub
+    PUSH 16
+    PUSH 4
+    MUL
+    SWAP1
+    SUB
     // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
     %mload_kernel_general_u32
     // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
-    swap1
+    SWAP1
     // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
-    swap4
+    SWAP4
     // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest
     %add_u32
     %add_u32
     %add_u32
     // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest
-    swap1
+    SWAP1
     // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest
-    dup1
+    DUP1
     // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest
-    swap2
+    SWAP2
     // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, output_addr, counter, block[0], block[1], retdest
-    swap1
+    SWAP1
     // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest
     %mstore_kernel_general_u32
     // stack: output_addr, counter, block[0], block[1], retdest
     %add_const(4)
     // stack: output_addr + 4, counter, block[0], block[1], retdest
-    swap1
+    SWAP1
     // stack: counter, output_addr + 4, block[0], block[1], retdest
     %decrement
     // stack: counter - 1, output_addr + 4, block[0], block[1], retdest
-    dup1
-    iszero
+    DUP1
+    ISZERO
     %jumpi(sha2_gen_message_schedule_remaining_end)
     %jump(sha2_gen_message_schedule_remaining_loop)
 sha2_gen_message_schedule_remaining_end:
@@ -222,38 +222,38 @@ sha2_gen_message_schedule_remaining_end:
 //                each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments)
 global sha2_gen_all_message_schedules: 
     // stack: output_addr, retdest
-    dup1
+    DUP1
     // stack: output_addr, output_addr, retdest
-    push 0
+    PUSH 0
     // stack: 0, output_addr, output_addr, retdest
     %mload_kernel_general
     // stack: num_blocks, output_addr, output_addr, retdest
-    push 1
+    PUSH 1
     // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest
 sha2_gen_all_message_schedules_loop:
     // stack: cur_addr, counter, cur_output_addr, output_addr, retdest
-    push sha2_gen_all_message_schedules_loop_end
+    PUSH sha2_gen_all_message_schedules_loop_end
     // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest
-    dup4
+    DUP4
     // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest
-    dup3
+    DUP3
     // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest
     %jump(sha2_gen_message_schedule_from_block)
 sha2_gen_all_message_schedules_loop_end:
     // stack: cur_addr, counter, cur_output_addr, output_addr, retdest
     %add_const(64)
     // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest
-    swap1
+    SWAP1
     %decrement
-    swap1
+    SWAP1
     // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest
-    swap2
+    SWAP2
     %add_const(256)
-    swap2
+    SWAP2
     // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
-    dup2
+    DUP2
     // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
-    iszero
+    ISZERO
     %jumpi(sha2_gen_all_message_schedules_end)
     %jump(sha2_gen_all_message_schedules_loop)
 sha2_gen_all_message_schedules_end:
diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm
index 6e114f1a..e84bc34c 100644
--- a/evm/src/cpu/kernel/asm/sha2/ops.asm
+++ b/evm/src/cpu/kernel/asm/sha2/ops.asm
@@ -1,17 +1,17 @@
 // u32 addition (discarding 2^32 bit)
 %macro add_u32
     // stack: x, y
-    add
+    ADD
     // stack: x + y
-    dup1
+    DUP1
     // stack: x + y, x + y
     %shr_const(32)
     // stack: (x + y) >> 32, x + y
     %shl_const(32)
     // stack: ((x + y) >> 32) << 32, x + y
-    swap1
+    SWAP1
     // stack: x + y, ((x + y) >> 32) << 32
-    sub
+    SUB
     // stack: x + y - ((x + y) >> 32) << 32
 %endmacro
 
@@ -19,177 +19,177 @@
 // 32-bit right rotation
 %macro rotr
     // stack: rot, value
-    dup2
-    dup2
+    DUP2
+    DUP2
     // stack: rot, value, rot, value
-    shr
+    SHR
     // stack: value >> rot, rot, value
     %stack (shifted, rot, value) -> (rot, value, shifted)
     // stack: rot, value, value >> rot
-    push 32
-    sub
+    PUSH 32
+    SUB
     // stack: 32 - rot, value, value >> rot
-    shl
+    SHL
     // stack: value << (32 - rot), value >> rot
-    push 32
-    push 1
-    swap1
-    shl
+    PUSH 32
+    PUSH 1
+    SWAP1
+    SHL
     // stack: 1 << 32, value << (32 - rot), value >> rot
-    swap1
-    mod
+    SWAP1
+    MOD
     // stack: (value << (32 - rot)) % (1 << 32), value >> rot
-    add
+    ADD
 %endmacro
 
 // 32-bit left rotation
 %macro rotl
     // stack: rot, value
-    dup2
-    dup2
+    DUP2
+    DUP2
     // stack: rot, value, rot, value
-    push 32
-    sub
+    PUSH 32
+    SUB
     // stack: 32 - rot, value, rot, value
-    shr
+    SHR
     // stack: value >> (32 - rot), rot, value
     %stack (shifted, rot, value) -> (rot, value, shifted)
     // stack: rot, value, value >> (32 - rot)
-    shl
+    SHL
     // stack: value << rot, value >> (32 - rot)
-    push 32
-    push 1
-    swap1
-    shl
+    PUSH 32
+    PUSH 1
+    SWAP1
+    SHL
     // stack: 1 << 32, value << rot, value >> (32 - rot)
-    swap1
-    mod
+    SWAP1
+    MOD
     // stack: (value << rot) % (1 << 32), value >> (32 - rot)
-    add
+    ADD
 %endmacro
 
 %macro sha2_sigma_0
     // stack: x
-    dup1
+    DUP1
     // stack: x, x
-    push 7
+    PUSH 7
     %rotr
     // stack: rotr(x, 7), x
     %stack (rotated, x) -> (x, x, rotated)
     // stack: x, x, rotr(x, 7)
-    push 18
+    PUSH 18
     %rotr
     // stack: rotr(x, 18), x, rotr(x, 7)
-    swap1
+    SWAP1
     // stack: x, rotr(x, 18), rotr(x, 7)
-    push 3
-    shr
+    PUSH 3
+    SHR
     // stack: shr(x, 3), rotr(x, 18), rotr(x, 7)
-    xor
-    xor
+    XOR
+    XOR
 %endmacro
 
 %macro sha2_sigma_1
     // stack: x
-    dup1
+    DUP1
     // stack: x, x
-    push 17
+    PUSH 17
     %rotr
     // stack: rotr(x, 17), x
     %stack (rotated, x) -> (x, x, rotated)
     // stack: x, x, rotr(x, 17)
-    push 19
+    PUSH 19
     %rotr
     // stack: rotr(x, 19), x, rotr(x, 17)
-    swap1
+    SWAP1
     // stack: x, rotr(x, 19), rotr(x, 17)
-    push 10
-    shr
+    PUSH 10
+    SHR
     // stack: shr(x, 10), rotr(x, 19), rotr(x, 17)
-    xor
-    xor
+    XOR
+    XOR
 %endmacro
 
 %macro sha2_bigsigma_0
     // stack: x
-    dup1
+    DUP1
     // stack: x, x
-    push 2
+    PUSH 2
     %rotr
     // stack: rotr(x, 2), x
     %stack (rotated, x) -> (x, x, rotated)
     // stack: x, x, rotr(x, 2)
-    push 13
+    PUSH 13
     %rotr
     // stack: rotr(x, 13), x, rotr(x, 2)
-    swap1
+    SWAP1
     // stack: x, rotr(x, 13), rotr(x, 2)
-    push 22
+    PUSH 22
     %rotr
     // stack: rotr(x, 22), rotr(x, 13), rotr(x, 2)
-    xor
-    xor
+    XOR
+    XOR
 %endmacro
 
 %macro sha2_bigsigma_1
     // stack: x
-    dup1
+    DUP1
     // stack: x, x
-    push 6
+    PUSH 6
     %rotr
     // stack: rotr(x, 6), x
     %stack (rotated, x) -> (x, x, rotated)
     // stack: x, x, rotr(x, 6)
-    push 11
+    PUSH 11
     %rotr
     // stack: rotr(x, 11), x, rotr(x, 6)
-    swap1
+    SWAP1
     // stack: x, rotr(x, 11), rotr(x, 6)
-    push 25
+    PUSH 25
     %rotr
     // stack: rotr(x, 25), rotr(x, 11), rotr(x, 6)
-    xor
-    xor
+    XOR
+    XOR
 %endmacro
 
 %macro sha2_choice
     // stack: x, y, z
-    dup1
+    DUP1
     // stack: x, x, y, z
-    not
+    NOT
     // stack: not x, x, y, z
     %stack (notx, x, y, z) -> (notx, z, x, y)
     // stack: not x, z, x, y
-    and
+    AND
     // stack: (not x) and z, x, y
     %stack (nxz, x, y) -> (x, y, nxz)
     // stack: x, y, (not x) and z
-    and
+    AND
     // stack: x and y, (not x) and z
-    or
+    OR
 %endmacro
 
 %macro sha2_majority
     // stack: x, y, z
-    dup3
-    dup3
-    dup3
+    DUP3
+    DUP3
+    DUP3
     // stack: x, y, z, x, y, z
-    and
+    AND
     // stack: x and y, z, x, y, z
-    swap2
+    SWAP2
     // stack: x, z, x and y, y, z
-    and
+    AND
     // stack: x and z, x and y, y, z
-    swap2
+    SWAP2
     // stack: y, x and y, x and z, z
-    swap1
+    SWAP1
     // stack: x and y, y, x and z, z
-    swap3
+    SWAP3
     // stack: z, y, x and z, x and y
-    and
+    AND
     // stack: y and z, x and z, x and y
-    or
-    or
+    OR
+    OR
 %endmacro
     
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm
index 5cd02ac5..c178202b 100644
--- a/evm/src/cpu/kernel/asm/sha2/store_pad.asm
+++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm
@@ -1,31 +1,31 @@
 global sha2_store:
     // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
-    dup1
+    DUP1
     // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
-    push 0
+    PUSH 0
     // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
     %mstore_kernel_general
     // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest
-    push 1
+    PUSH 1
     // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest
 sha2_store_loop:
     // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest
-    dup1
+    DUP1
     // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest
-    swap3
+    SWAP3
     // stack: x[num_bytes-counter], addr, counter, addr,  ... , x[num_bytes-1], retdest
-    swap1
+    SWAP1
     // stack: addr, x[num_bytes-counter], counter, addr,  ... , x[num_bytes-1], retdest
     %mstore_kernel_general
     // stack: counter, addr,  ... , x[num_bytes-1], retdest
     %decrement
     // stack: counter-1, addr,  ... , x[num_bytes-1], retdest
-    dup1
+    DUP1
     // stack: counter-1, counter-1, addr,  ... , x[num_bytes-1], retdest
-    iszero
+    ISZERO
     %jumpi(sha2_store_end)
     // stack: counter-1, addr,  ... , x[num_bytes-1], retdest
-    swap1
+    SWAP1
     // stack: addr, counter-1,  ... , x[num_bytes-1], retdest
     %increment
     // stack: addr+1, counter-1,  ... , x[num_bytes-1], retdest
@@ -42,24 +42,24 @@ sha2_store_end:
 //               num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63]
 global sha2_pad:
     // stack: retdest
-    push 0
+    PUSH 0
     %mload_kernel_general
     // stack: num_bytes, retdest
     // STEP 1: append 1
     // insert 128 (= 1 << 7) at x[num_bytes+1]
     // stack: num_bytes, retdest
-    push 1
-    push 7
-    shl
+    PUSH 1
+    PUSH 7
+    SHL
     // stack: 128, num_bytes, retdest
-    dup2
+    DUP2
     // stack: num_bytes, 128, num_bytes, retdest
     %increment
     // stack: num_bytes+1, 128, num_bytes, retdest
     %mstore_kernel_general
     // stack: num_bytes, retdest
     // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1
-    dup1
+    DUP1
     // stack: num_bytes, num_bytes, retdest
     %add_const(8)
     %div_const(64)
@@ -67,23 +67,23 @@ global sha2_pad:
     %increment
     // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest
     // STEP 3: calculate length := num_bytes*8
-    swap1
+    SWAP1
     // stack: num_bytes, num_blocks, retdest
-    push 8
-    mul
+    PUSH 8
+    MUL
     // stack: length = num_bytes*8, num_blocks, retdest
     // STEP 4: write length to x[num_blocks*64-7..num_blocks*64]
-    dup2
+    DUP2
     // stack: num_blocks, length, num_blocks, retdest
-    push 64
-    mul
+    PUSH 64
+    MUL
     // stack: last_addr = num_blocks*64, length, num_blocks, retdest
     %sha2_write_length
     // stack: num_blocks, retdest
-    dup1
+    DUP1
     // stack: num_blocks, num_blocks, retdest
     // STEP 5: write num_blocks to x[0]
-    push 0
+    PUSH 0
     %mstore_kernel_general
     // stack: num_blocks, retdest
     %message_schedule_addr_from_num_blocks
diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm
index cd2bd303..07aba907 100644
--- a/evm/src/cpu/kernel/asm/sha2/temp_words.asm
+++ b/evm/src/cpu/kernel/asm/sha2/temp_words.asm
@@ -1,14 +1,14 @@
 %macro sha2_temp_word1
     // stack: e, f, g, h, K[i], W[i]
-    dup1
+    DUP1
     // stack: e, e, f, g, h, K[i], W[i]
     %sha2_bigsigma_1
     // stack: Sigma_1(e), e, f, g, h, K[i], W[i]
-    swap3
+    SWAP3
     // stack: g, e, f, Sigma_1(e), h, K[i], W[i]
-    swap2
+    SWAP2
     // stack: f, e, g, Sigma_1(e), h, K[i], W[i]
-    swap1
+    SWAP1
     // stack: e, f, g, Sigma_1(e), h, K[i], W[i]
     %sha2_choice
     // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i]
@@ -21,11 +21,11 @@
 
 %macro sha2_temp_word2
     // stack: a, b, c
-    dup1
+    DUP1
     // stack: a, a, b, c
     %sha2_bigsigma_0
     // stack: Sigma_0(a), a, b, c
-    swap3
+    SWAP3
     // stack: c, a, b, Sigma_0(a)
     %sha2_majority
     // stack: Maj(c, a, b), Sigma_0(a)
diff --git a/evm/src/cpu/kernel/asm/sha2/write_length.asm b/evm/src/cpu/kernel/asm/sha2/write_length.asm
index 7474cd0e..40395707 100644
--- a/evm/src/cpu/kernel/asm/sha2/write_length.asm
+++ b/evm/src/cpu/kernel/asm/sha2/write_length.asm
@@ -1,145 +1,145 @@
 %macro sha2_write_length
     // stack: last_addr, length
-    swap1
+    SWAP1
     // stack: length, last_addr
-    push 1
-    push 8
-    shl
+    PUSH 1
+    PUSH 8
+    SHL
 
     // stack: 1 << 8, length, last_addr
-    dup1
+    DUP1
     // stack: 1 << 8, 1 << 8, length, last_addr
-    dup3
+    DUP3
     // stack: length, 1 << 8, 1 << 8, length, last_addr
-    mod
+    MOD
     // stack: length % (1 << 8), 1 << 8, length, last_addr
-    dup4
+    DUP4
     // stack: last_addr, length % (1 << 8), 1 << 8, length, last_addr
     %mstore_kernel_general
     
     // stack: 1 << 8, length, last_addr
-    dup1
+    DUP1
     // stack: 1 << 8, 1 << 8, length, last_addr
-    dup3
+    DUP3
     // stack: length, 1 << 8, 1 << 8, length, last_addr
-    push 8
-    shr
+    PUSH 8
+    SHR
     // stack: length >> 8, 1 << 8, 1 << 8, length, last_addr
-    mod
+    MOD
     // stack: (length >> 8) % (1 << 8), 1 << 8, length, last_addr
-    dup4
+    DUP4
     // stack: last_addr, (length >> 8) % (1 << 8), 1 << 8, length, last_addr
-    push 1
-    swap1
-    sub
+    PUSH 1
+    SWAP1
+    SUB
     // stack: last_addr - 1, (length >> 8) % (1 << 8), 1 << 8, length, last_addr
     %mstore_kernel_general
 
     // stack: 1 << 8, length, last_addr
-    dup1
+    DUP1
     // stack: 1 << 8, 1 << 8, length, last_addr
-    dup3
+    DUP3
     // stack: length, 1 << 8, 1 << 8, length, last_addr
-    push 16
-    shr
+    PUSH 16
+    SHR
     // stack: length >> 16, 1 << 8, 1 << 8, length, last_addr
-    mod
+    MOD
     // stack: (length >> 16) % (1 << 8), 1 << 8, length, last_addr
-    dup4
+    DUP4
     // stack: last_addr, (length >> 16) % (1 << 8), 1 << 8, length, last_addr
-    push 2
-    swap1
-    sub
+    PUSH 2
+    SWAP1
+    SUB
     // stack: last_addr - 2, (length >> 16) % (1 << 8), 1 << 8, length, last_addr
     %mstore_kernel_general
 
     // stack: 1 << 8, length, last_addr
-    dup1
+    DUP1
     // stack: 1 << 8, 1 << 8, length, last_addr
-    dup3
+    DUP3
     // stack: length, 1 << 8, 1 << 8, length, last_addr
-    push 24
-    shr
+    PUSH 24
+    SHR
     // stack: length >> 24, 1 << 8, 1 << 8, length, last_addr
-    mod
+    MOD
     // stack: (length >> 24) % (1 << 8), 1 << 8, length, last_addr
-    dup4
+    DUP4
     // stack: last_addr, (length >> 24) % (1 << 8), 1 << 8, length, last_addr
-    push 3
-    swap1
-    sub
+    PUSH 3
+    SWAP1
+    SUB
     // stack: last_addr - 3, (length >> 24) % (1 << 8), 1 << 8, length, last_addr
     %mstore_kernel_general
 
     // stack: 1 << 8, length, last_addr
-    dup1
+    DUP1
     // stack: 1 << 8, 1 << 8, length, last_addr
-    dup3
+    DUP3
     // stack: length, 1 << 8, 1 << 8, length, last_addr
-    push 32
-    shr
+    PUSH 32
+    SHR
     // stack: length >> 32, 1 << 8, 1 << 8, length, last_addr
-    mod
+    MOD
     // stack: (length >> 32) % (1 << 8), 1 << 8, length, last_addr
-    dup4
+    DUP4
     // stack: last_addr, (length >> 32) % (1 << 8), 1 << 8, length, last_addr
-    push 4
-    swap1
-    sub
+    PUSH 4
+    SWAP1
+    SUB
     // stack: last_addr - 4, (length >> 32) % (1 << 8), 1 << 8, length, last_addr
     %mstore_kernel_general
 
     // stack: 1 << 8, length, last_addr
-    dup1
+    DUP1
     // stack: 1 << 8, 1 << 8, length, last_addr
-    dup3
+    DUP3
     // stack: length, 1 << 8, 1 << 8, length, last_addr
-    push 40
-    shr
+    PUSH 40
+    SHR
     // stack: length >> 40, 1 << 8, 1 << 8, length, last_addr
-    mod
+    MOD
     // stack: (length >> 40) % (1 << 8), 1 << 8, length, last_addr
-    dup4
+    DUP4
     // stack: last_addr, (length >> 40) % (1 << 8), 1 << 8, length, last_addr
-    push 5
-    swap1
-    sub
+    PUSH 5
+    SWAP1
+    SUB
     // stack: last_addr - 5, (length >> 40) % (1 << 8), 1 << 8, length, last_addr
     %mstore_kernel_general
 
     // stack: 1 << 8, length, last_addr
-    dup1
+    DUP1
     // stack: 1 << 8, 1 << 8, length, last_addr
-    dup3
+    DUP3
     // stack: length, 1 << 8, 1 << 8, length, last_addr
-    push 48
-    shr
+    PUSH 48
+    SHR
     // stack: length >> 48, 1 << 8, 1 << 8, length, last_addr
-    mod
+    MOD
     // stack: (length >> 48) % (1 << 8), 1 << 8, length, last_addr
-    dup4
+    DUP4
     // stack: last_addr, (length >> 48) % (1 << 8), 1 << 8, length, last_addr
-    push 6
-    swap1
-    sub
+    PUSH 6
+    SWAP1
+    SUB
     // stack: last_addr - 6, (length >> 48) % (1 << 8), 1 << 8, length, last_addr
     %mstore_kernel_general
 
     // stack: 1 << 8, length, last_addr
-    dup1
+    DUP1
     // stack: 1 << 8, 1 << 8, length, last_addr
-    dup3
+    DUP3
     // stack: length, 1 << 8, 1 << 8, length, last_addr
-    push 56
-    shr
+    PUSH 56
+    SHR
     // stack: length >> 56, 1 << 8, 1 << 8, length, last_addr
-    mod
+    MOD
     // stack: (length >> 56) % (1 << 8), 1 << 8, length, last_addr
-    dup4
+    DUP4
     // stack: last_addr, (length >> 56) % (1 << 8), 1 << 8, length, last_addr
-    push 7
-    swap1
-    sub
+    PUSH 7
+    SWAP1
+    SUB
     // stack: last_addr - 7, (length >> 56) % (1 << 8), 1 << 8, length, last_addr
     %mstore_kernel_general
     %pop3