diff --git a/ecdsa/src/gadgets/curve_fixed_base.rs b/ecdsa/src/gadgets/curve_fixed_base.rs
index 8d675d12..d99d5760 100644
--- a/ecdsa/src/gadgets/curve_fixed_base.rs
+++ b/ecdsa/src/gadgets/curve_fixed_base.rs
@@ -40,14 +40,18 @@ pub fn fixed_base_curve_mul_circuit<C: Curve, F: RichField + Extendable<D>, cons
     // `s * P = sum s_i * P_i` with `P_i = (16^i) * P` and `s = sum s_i * (16^i)`.
     for (limb, point) in limbs.into_iter().zip(scaled_base) {
         // `muls_point[t] = t * P_i` for `t=0..16`.
-        let muls_point = (0..16)
+        let mut muls_point = (0..16)
             .scan(AffinePoint::ZERO, |acc, _| {
                 let tmp = *acc;
                 *acc = (point + *acc).to_affine();
                 Some(tmp)
             })
+            // First element if zero, so we skip it since `constant_affine_point` takes non-zero input.
+            .skip(1)
             .map(|p| builder.constant_affine_point(p))
             .collect::<Vec<_>>();
+        // We add back a point in position 0. `limb == zero` is checked below, so this point can be arbitrary.
+        muls_point.insert(0, muls_point[0].clone());
         let is_zero = builder.is_equal(limb, zero);
         let should_add = builder.not(is_zero);
         // `r = s_i * P_i`
diff --git a/evm/Cargo.toml b/evm/Cargo.toml
index 1e22ef33..c10ab104 100644
--- a/evm/Cargo.toml
+++ b/evm/Cargo.toml
@@ -11,6 +11,7 @@ anyhow = "1.0.40"
 env_logger = "0.9.0"
 ethereum-types = "0.13.1"
 hex = { version = "0.4.3", optional = true }
+hex-literal = "0.3.4"
 itertools = "0.10.3"
 log = "0.4.14"
 once_cell = "1.13.0"
@@ -24,7 +25,6 @@ keccak-rust = { git = "https://github.com/npwardberkeley/keccak-rust" }
 keccak-hash = "0.9.0"
 
 [dev-dependencies]
-hex-literal = "0.3.4"
 hex = "0.4.3"
 
 [features]
diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs
index 58756703..038c1b93 100644
--- a/evm/src/all_stark.rs
+++ b/evm/src/all_stark.rs
@@ -143,6 +143,7 @@ mod tests {
     use crate::all_stark::AllStark;
     use crate::config::StarkConfig;
     use crate::cpu::cpu_stark::CpuStark;
+    use crate::cpu::kernel::aggregator::KERNEL;
     use crate::cross_table_lookup::testutils::check_ctls;
     use crate::keccak::keccak_stark::{KeccakStark, NUM_INPUTS, NUM_ROUNDS};
     use crate::logic::{self, LogicStark, Operation};
@@ -319,8 +320,27 @@ mod tests {
 
         // Pad to a power of two.
         for _ in cpu_trace_rows.len()..cpu_trace_rows.len().next_power_of_two() {
-            cpu_trace_rows.push([F::ZERO; CpuStark::<F, D>::COLUMNS]);
+            let mut row: cpu::columns::CpuColumnsView<F> =
+                [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            row.is_cpu_cycle = F::ONE;
+            cpu_stark.generate(row.borrow_mut());
+            cpu_trace_rows.push(row.into());
         }
+
+        // Ensure we finish in a halted state.
+        {
+            let num_rows = cpu_trace_rows.len();
+            let halt_label = F::from_canonical_usize(KERNEL.global_labels["halt_pc0"]);
+
+            let last_row: &mut cpu::columns::CpuColumnsView<F> =
+                cpu_trace_rows[num_rows - 1].borrow_mut();
+            last_row.program_counter = halt_label;
+
+            let second_last_row: &mut cpu::columns::CpuColumnsView<F> =
+                cpu_trace_rows[num_rows - 2].borrow_mut();
+            second_last_row.next_program_counter = halt_label;
+        }
+
         trace_rows_to_poly_values(cpu_trace_rows)
     }
 
diff --git a/evm/src/cpu/columns/mod.rs b/evm/src/cpu/columns/mod.rs
index 63e91085..fbf20af2 100644
--- a/evm/src/cpu/columns/mod.rs
+++ b/evm/src/cpu/columns/mod.rs
@@ -21,9 +21,12 @@ pub struct CpuColumnsView<T: Copy> {
     pub is_bootstrap_contract: T,
 
     /// Filter. 1 if the row corresponds to a cycle of execution and 0 otherwise.
-    /// Lets us re-use decode columns in non-cycle rows.
+    /// Lets us re-use columns in non-cycle rows.
     pub is_cpu_cycle: T,
 
+    /// If CPU cycle: The program counter for the current instruction.
+    pub program_counter: T,
+
     /// If CPU cycle: The opcode being decoded, in {0, ..., 255}.
     pub opcode: T,
 
@@ -56,7 +59,7 @@ pub struct CpuColumnsView<T: Copy> {
     pub is_shl: T,
     pub is_shr: T,
     pub is_sar: T,
-    pub is_sha3: T,
+    pub is_keccak256: T,
     pub is_address: T,
     pub is_balance: T,
     pub is_origin: T,
@@ -82,6 +85,7 @@ pub struct CpuColumnsView<T: Copy> {
     pub is_chainid: T,
     pub is_selfbalance: T,
     pub is_basefee: T,
+    pub is_prover_input: T,
     pub is_pop: T,
     pub is_mload: T,
     pub is_mstore: T,
@@ -94,6 +98,10 @@ pub struct CpuColumnsView<T: Copy> {
     pub is_msize: T,
     pub is_gas: T,
     pub is_jumpdest: T,
+    pub is_get_state_root: T,
+    pub is_set_state_root: T,
+    pub is_get_receipt_root: T,
+    pub is_set_receipt_root: T,
     pub is_push: T,
     pub is_dup: T,
     pub is_swap: T,
@@ -102,13 +110,20 @@ pub struct CpuColumnsView<T: Copy> {
     pub is_log2: T,
     pub is_log3: T,
     pub is_log4: T,
+    pub is_panic: T,
     pub is_create: T,
     pub is_call: T,
     pub is_callcode: T,
     pub is_return: T,
     pub is_delegatecall: T,
     pub is_create2: T,
+    pub is_get_context: T,
+    pub is_set_context: T,
+    pub is_consume_gas: T,
+    pub is_exit_kernel: T,
     pub is_staticcall: T,
+    pub is_mload_general: T,
+    pub is_mstore_general: T,
     pub is_revert: T,
     pub is_selfdestruct: T,
 
@@ -127,17 +142,13 @@ pub struct CpuColumnsView<T: Copy> {
     pub is_invalid_11: T,
     pub is_invalid_12: T,
     pub is_invalid_13: T,
-    pub is_invalid_14: T,
-    pub is_invalid_15: T,
-    pub is_invalid_16: T,
-    pub is_invalid_17: T,
-    pub is_invalid_18: T,
-    pub is_invalid_19: T,
-    pub is_invalid_20: T,
 
     /// If CPU cycle: the opcode, broken up into bits in **big-endian** order.
     pub opcode_bits: [T; 8],
 
+    /// If CPU cycle: The program counter for the next instruction.
+    pub next_program_counter: T,
+
     /// Filter. 1 iff a Keccak permutation is computed on this row.
     pub is_keccak: T,
 
diff --git a/evm/src/cpu/control_flow.rs b/evm/src/cpu/control_flow.rs
new file mode 100644
index 00000000..cf24afca
--- /dev/null
+++ b/evm/src/cpu/control_flow.rs
@@ -0,0 +1,112 @@
+use plonky2::field::extension::Extendable;
+use plonky2::field::packed::PackedField;
+use plonky2::field::types::Field;
+use plonky2::hash::hash_types::RichField;
+use plonky2::iop::ext_target::ExtensionTarget;
+
+use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
+use crate::cpu::columns::CpuColumnsView;
+use crate::cpu::kernel::aggregator::KERNEL;
+
+fn get_halt_pcs<F: Field>() -> (F, F) {
+    let halt_pc0 = KERNEL.global_labels["halt_pc0"];
+    let halt_pc1 = KERNEL.global_labels["halt_pc1"];
+
+    (
+        F::from_canonical_usize(halt_pc0),
+        F::from_canonical_usize(halt_pc1),
+    )
+}
+
+pub fn eval_packed_generic<P: PackedField>(
+    lv: &CpuColumnsView<P>,
+    nv: &CpuColumnsView<P>,
+    yield_constr: &mut ConstraintConsumer<P>,
+) {
+    // Once we start executing instructions, then we continue until the end of the table.
+    yield_constr.constraint_transition(lv.is_cpu_cycle * (nv.is_cpu_cycle - P::ONES));
+
+    // If a row is a CPU cycle, then its `next_program_counter` becomes the `program_counter` of the
+    // next row.
+    yield_constr
+        .constraint_transition(lv.is_cpu_cycle * (nv.program_counter - lv.next_program_counter));
+
+    // If a non-CPU cycle row is followed by a CPU cycle row, then the `program_counter` of the CPU
+    // cycle row is 0.
+    yield_constr
+        .constraint_transition((lv.is_cpu_cycle - P::ONES) * nv.is_cpu_cycle * nv.program_counter);
+
+    // The first row has nowhere to continue execution from, so if it's a cycle row, then its
+    // `program_counter` must be 0.
+    // NB: I know the first few rows will be used for initialization and will not be CPU cycle rows.
+    // Once that's done, then this constraint can be removed. Until then, it is needed to ensure
+    // that execution starts at 0 and not at any arbitrary offset.
+    yield_constr.constraint_first_row(lv.is_cpu_cycle * lv.program_counter);
+
+    // The last row must be a CPU cycle row.
+    yield_constr.constraint_last_row(lv.is_cpu_cycle - P::ONES);
+    // Also, the last row's `program_counter` must be inside the `halt` infinite loop. Note that
+    // that loop consists of two instructions, so we must check for `halt` and `halt_inner` labels.
+    let (halt_pc0, halt_pc1) = get_halt_pcs::<P::Scalar>();
+    yield_constr
+        .constraint_last_row((lv.program_counter - halt_pc0) * (lv.program_counter - halt_pc1));
+}
+
+pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
+    lv: &CpuColumnsView<ExtensionTarget<D>>,
+    nv: &CpuColumnsView<ExtensionTarget<D>>,
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+) {
+    // Once we start executing instructions, then we continue until the end of the table.
+    {
+        let constr = builder.mul_sub_extension(lv.is_cpu_cycle, nv.is_cpu_cycle, lv.is_cpu_cycle);
+        yield_constr.constraint_transition(builder, constr);
+    }
+
+    // If a row is a CPU cycle, then its `next_program_counter` becomes the `program_counter` of the
+    // next row.
+    {
+        let constr = builder.sub_extension(nv.program_counter, lv.next_program_counter);
+        let constr = builder.mul_extension(lv.is_cpu_cycle, constr);
+        yield_constr.constraint_transition(builder, constr);
+    }
+
+    // If a non-CPU cycle row is followed by a CPU cycle row, then the `program_counter` of the CPU
+    // cycle row is 0.
+    {
+        let constr = builder.mul_extension(nv.is_cpu_cycle, nv.program_counter);
+        let constr = builder.mul_sub_extension(lv.is_cpu_cycle, constr, constr);
+        yield_constr.constraint_transition(builder, constr);
+    }
+
+    // The first row has nowhere to continue execution from, so if it's a cycle row, then its
+    // `program_counter` must be 0.
+    // NB: I know the first few rows will be used for initialization and will not be CPU cycle rows.
+    // Once that's done, then this constraint can be removed. Until then, it is needed to ensure
+    // that execution starts at 0 and not at any arbitrary offset.
+    {
+        let constr = builder.mul_extension(lv.is_cpu_cycle, lv.program_counter);
+        yield_constr.constraint_first_row(builder, constr);
+    }
+
+    // The last row must be a CPU cycle row.
+    {
+        let one = builder.one_extension();
+        let constr = builder.sub_extension(lv.is_cpu_cycle, one);
+        yield_constr.constraint_last_row(builder, constr);
+    }
+    // Also, the last row's `program_counter` must be inside the `halt` infinite loop. Note that
+    // that loop consists of two instructions, so we must check for `halt` and `halt_inner` labels.
+    {
+        let (halt_pc0, halt_pc1) = get_halt_pcs();
+        let halt_pc0_target = builder.constant_extension(halt_pc0);
+        let halt_pc1_target = builder.constant_extension(halt_pc1);
+
+        let halt_pc0_offset = builder.sub_extension(lv.program_counter, halt_pc0_target);
+        let halt_pc1_offset = builder.sub_extension(lv.program_counter, halt_pc1_target);
+        let constr = builder.mul_extension(halt_pc0_offset, halt_pc1_offset);
+
+        yield_constr.constraint_last_row(builder, constr);
+    }
+}
diff --git a/evm/src/cpu/cpu_stark.rs b/evm/src/cpu/cpu_stark.rs
index 0e4d69f2..6b0bc0fd 100644
--- a/evm/src/cpu/cpu_stark.rs
+++ b/evm/src/cpu/cpu_stark.rs
@@ -9,7 +9,7 @@ use plonky2::hash::hash_types::RichField;
 
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::cpu::columns::{CpuColumnsView, COL_MAP, NUM_CPU_COLUMNS};
-use crate::cpu::{bootstrap_kernel, decode, simple_logic};
+use crate::cpu::{bootstrap_kernel, control_flow, decode, simple_logic};
 use crate::cross_table_lookup::Column;
 use crate::memory::NUM_CHANNELS;
 use crate::stark::Stark;
@@ -90,7 +90,9 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for CpuStark<F, D
         P: PackedField<Scalar = FE>,
     {
         let local_values = vars.local_values.borrow();
+        let next_values = vars.next_values.borrow();
         bootstrap_kernel::eval_bootstrap_kernel(vars, yield_constr);
+        control_flow::eval_packed_generic(local_values, next_values, yield_constr);
         decode::eval_packed_generic(local_values, yield_constr);
         simple_logic::eval_packed(local_values, yield_constr);
     }
@@ -102,7 +104,9 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for CpuStark<F, D
         yield_constr: &mut RecursiveConstraintConsumer<F, D>,
     ) {
         let local_values = vars.local_values.borrow();
+        let next_values = vars.next_values.borrow();
         bootstrap_kernel::eval_bootstrap_kernel_circuit(builder, vars, yield_constr);
+        control_flow::eval_ext_circuit(builder, local_values, next_values, yield_constr);
         decode::eval_ext_circuit(builder, local_values, yield_constr);
         simple_logic::eval_ext_circuit(builder, local_values, yield_constr);
     }
diff --git a/evm/src/cpu/decode.rs b/evm/src/cpu/decode.rs
index 0b091558..233c01c4 100644
--- a/evm/src/cpu/decode.rs
+++ b/evm/src/cpu/decode.rs
@@ -15,7 +15,7 @@ use crate::cpu::columns::{CpuColumnsView, COL_MAP};
 // - its start index is a multiple of its length (it is aligned)
 // These properties permit us to check if an opcode belongs to a block of length 2^n by checking its
 // top 8-n bits.
-const OPCODES: [(u64, usize, usize); 102] = [
+const OPCODES: [(u64, usize, usize); 107] = [
     // (start index of block, number of top bits to check (log2), flag column)
     (0x00, 0, COL_MAP.is_stop),
     (0x01, 0, COL_MAP.is_add),
@@ -45,7 +45,7 @@ const OPCODES: [(u64, usize, usize); 102] = [
     (0x1c, 0, COL_MAP.is_shr),
     (0x1d, 0, COL_MAP.is_sar),
     (0x1e, 1, COL_MAP.is_invalid_1), // 0x1e-0x1f
-    (0x20, 0, COL_MAP.is_sha3),
+    (0x20, 0, COL_MAP.is_keccak256),
     (0x21, 0, COL_MAP.is_invalid_2),
     (0x22, 1, COL_MAP.is_invalid_3), // 0x22-0x23
     (0x24, 2, COL_MAP.is_invalid_4), // 0x24-0x27
@@ -75,9 +75,9 @@ const OPCODES: [(u64, usize, usize); 102] = [
     (0x46, 0, COL_MAP.is_chainid),
     (0x47, 0, COL_MAP.is_selfbalance),
     (0x48, 0, COL_MAP.is_basefee),
-    (0x49, 0, COL_MAP.is_invalid_6),
-    (0x4a, 1, COL_MAP.is_invalid_7), // 0x4a-0x4b
-    (0x4c, 2, COL_MAP.is_invalid_8), // 0x4c-0x4f
+    (0x49, 0, COL_MAP.is_prover_input),
+    (0x4a, 1, COL_MAP.is_invalid_6), // 0x4a-0x4b
+    (0x4c, 2, COL_MAP.is_invalid_7), // 0x4c-0x4f
     (0x50, 0, COL_MAP.is_pop),
     (0x51, 0, COL_MAP.is_mload),
     (0x52, 0, COL_MAP.is_mstore),
@@ -90,34 +90,39 @@ const OPCODES: [(u64, usize, usize); 102] = [
     (0x59, 0, COL_MAP.is_msize),
     (0x5a, 0, COL_MAP.is_gas),
     (0x5b, 0, COL_MAP.is_jumpdest),
-    (0x5c, 2, COL_MAP.is_invalid_9), // 0x5c-0x5f
-    (0x60, 5, COL_MAP.is_push),      // 0x60-0x7f
-    (0x80, 4, COL_MAP.is_dup),       // 0x80-0x8f
-    (0x90, 4, COL_MAP.is_swap),      // 0x90-0x9f
+    (0x5c, 0, COL_MAP.is_get_state_root),
+    (0x5d, 0, COL_MAP.is_set_state_root),
+    (0x5e, 0, COL_MAP.is_get_receipt_root),
+    (0x5f, 0, COL_MAP.is_set_receipt_root),
+    (0x60, 5, COL_MAP.is_push), // 0x60-0x7f
+    (0x80, 4, COL_MAP.is_dup),  // 0x80-0x8f
+    (0x90, 4, COL_MAP.is_swap), // 0x90-0x9f
     (0xa0, 0, COL_MAP.is_log0),
     (0xa1, 0, COL_MAP.is_log1),
     (0xa2, 0, COL_MAP.is_log2),
     (0xa3, 0, COL_MAP.is_log3),
     (0xa4, 0, COL_MAP.is_log4),
-    (0xa5, 0, COL_MAP.is_invalid_10),
-    (0xa6, 1, COL_MAP.is_invalid_11), // 0xa6-0xa7
-    (0xa8, 3, COL_MAP.is_invalid_12), // 0xa8-0xaf
-    (0xb0, 4, COL_MAP.is_invalid_13), // 0xb0-0xbf
-    (0xc0, 5, COL_MAP.is_invalid_14), // 0xc0-0xdf
-    (0xe0, 4, COL_MAP.is_invalid_15), // 0xe0-0xef
+    (0xa5, 0, COL_MAP.is_panic),
+    (0xa6, 1, COL_MAP.is_invalid_8),  // 0xa6-0xa7
+    (0xa8, 3, COL_MAP.is_invalid_9),  // 0xa8-0xaf
+    (0xb0, 4, COL_MAP.is_invalid_10), // 0xb0-0xbf
+    (0xc0, 5, COL_MAP.is_invalid_11), // 0xc0-0xdf
+    (0xe0, 4, COL_MAP.is_invalid_12), // 0xe0-0xef
     (0xf0, 0, COL_MAP.is_create),
     (0xf1, 0, COL_MAP.is_call),
     (0xf2, 0, COL_MAP.is_callcode),
     (0xf3, 0, COL_MAP.is_return),
     (0xf4, 0, COL_MAP.is_delegatecall),
     (0xf5, 0, COL_MAP.is_create2),
-    (0xf6, 1, COL_MAP.is_invalid_16), // 0xf6-0xf7
-    (0xf8, 1, COL_MAP.is_invalid_17), // 0xf8-0xf9
+    (0xf6, 0, COL_MAP.is_get_context),
+    (0xf7, 0, COL_MAP.is_set_context),
+    (0xf8, 0, COL_MAP.is_consume_gas),
+    (0xf9, 0, COL_MAP.is_exit_kernel),
     (0xfa, 0, COL_MAP.is_staticcall),
-    (0xfb, 0, COL_MAP.is_invalid_18),
-    (0xfc, 0, COL_MAP.is_invalid_19),
+    (0xfb, 0, COL_MAP.is_mload_general),
+    (0xfc, 0, COL_MAP.is_mstore_general),
     (0xfd, 0, COL_MAP.is_revert),
-    (0xfe, 0, COL_MAP.is_invalid_20),
+    (0xfe, 0, COL_MAP.is_invalid_13),
     (0xff, 0, COL_MAP.is_selfdestruct),
 ];
 
diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 501edd3e..ec9e34e8 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -3,31 +3,44 @@
 use std::collections::HashMap;
 
 use ethereum_types::U256;
+use hex_literal::hex;
 use itertools::Itertools;
 use once_cell::sync::Lazy;
 
 use super::assembler::{assemble, Kernel};
 use crate::cpu::kernel::parser::parse;
+use crate::cpu::kernel::txn_fields::NormalizedTxnField;
 use crate::memory::segments::Segment;
 
 pub static KERNEL: Lazy<Kernel> = Lazy::new(combined_kernel);
 
 pub fn evm_constants() -> HashMap<String, U256> {
     let mut c = HashMap::new();
+    c.insert(
+        "BN_BASE".into(),
+        U256::from_big_endian(&hex!(
+            "30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"
+        )),
+    );
     for segment in Segment::all() {
         c.insert(segment.var_name().into(), (segment as u32).into());
     }
+    for txn_field in NormalizedTxnField::all() {
+        c.insert(txn_field.var_name().into(), (txn_field as u32).into());
+    }
     c
 }
 
 #[allow(dead_code)] // TODO: Should be used once witness generation is done.
 pub(crate) fn combined_kernel() -> Kernel {
     let files = vec![
-        // include_str!("asm/assertions.asm"), // TODO: Should work once PR 619 is merged.
+        include_str!("asm/assertions.asm"),
         include_str!("asm/basic_macros.asm"),
         include_str!("asm/exp.asm"),
         include_str!("asm/curve_mul.asm"),
         include_str!("asm/curve_add.asm"),
+        include_str!("asm/halt.asm"),
+        include_str!("asm/memory.asm"),
         include_str!("asm/moddiv.asm"),
         include_str!("asm/secp256k1/curve_mul.asm"),
         include_str!("asm/secp256k1/curve_add.asm"),
@@ -35,8 +48,16 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/secp256k1/lift_x.asm"),
         include_str!("asm/secp256k1/inverse_scalar.asm"),
         include_str!("asm/ecrecover.asm"),
-        include_str!("asm/storage_read.asm"),
-        include_str!("asm/storage_write.asm"),
+        include_str!("asm/rlp/encode.asm"),
+        include_str!("asm/rlp/decode.asm"),
+        include_str!("asm/rlp/read_to_memory.asm"),
+        include_str!("asm/storage/read.asm"),
+        include_str!("asm/storage/write.asm"),
+        include_str!("asm/transactions/process_normalized.asm"),
+        include_str!("asm/transactions/router.asm"),
+        include_str!("asm/transactions/type_0.asm"),
+        include_str!("asm/transactions/type_1.asm"),
+        include_str!("asm/transactions/type_2.asm"),
     ];
 
     let parsed_files = files.iter().map(|f| parse(f)).collect_vec();
diff --git a/evm/src/cpu/kernel/asm/assertions.asm b/evm/src/cpu/kernel/asm/assertions.asm
index a8e65036..69193e5f 100644
--- a/evm/src/cpu/kernel/asm/assertions.asm
+++ b/evm/src/cpu/kernel/asm/assertions.asm
@@ -6,13 +6,13 @@ global panic:
 
 // Consumes the top element and asserts that it is zero.
 %macro assert_zero
-    %jumpi panic
+    %jumpi(panic)
 %endmacro
 
 // Consumes the top element and asserts that it is nonzero.
 %macro assert_nonzero
     ISZERO
-    %jumpi panic
+    %jumpi(panic)
 %endmacro
 
 %macro assert_eq
@@ -49,34 +49,34 @@ global panic:
 %endmacro
 
 %macro assert_eq_const(c)
-    %eq_const(c)
+    %eq_const($c)
     %assert_nonzero
 %endmacro
 
 %macro assert_lt_const(c)
     // %assert_zero is cheaper than %assert_nonzero, so we will leverage the
     // fact that (x < c) == !(x >= c).
-    %ge_const(c)
+    %ge_const($c)
     %assert_zero
 %endmacro
 
 %macro assert_le_const(c)
     // %assert_zero is cheaper than %assert_nonzero, so we will leverage the
     // fact that (x <= c) == !(x > c).
-    %gt_const(c)
+    %gt_const($c)
     %assert_zero
 %endmacro
 
 %macro assert_gt_const(c)
     // %assert_zero is cheaper than %assert_nonzero, so we will leverage the
     // fact that (x > c) == !(x <= c).
-    %le_const(c)
+    %le_const($c)
     %assert_zero
 %endmacro
 
 %macro assert_ge_const(c)
     // %assert_zero is cheaper than %assert_nonzero, so we will leverage the
     // fact that (x >= c) == !(x < c).
-    %lt_const(c)
+    %lt_const($c)
     %assert_zero
 %endmacro
diff --git a/evm/src/cpu/kernel/asm/basic_macros.asm b/evm/src/cpu/kernel/asm/basic_macros.asm
index 7bf001b4..e266b2cb 100644
--- a/evm/src/cpu/kernel/asm/basic_macros.asm
+++ b/evm/src/cpu/kernel/asm/basic_macros.asm
@@ -26,6 +26,24 @@
     %endrep
 %endmacro
 
+%macro pop5
+    %rep 5
+        pop
+    %endrep
+%endmacro
+
+%macro pop6
+    %rep 6
+        pop
+    %endrep
+%endmacro
+
+%macro pop7
+    %rep 7
+        pop
+    %endrep
+%endmacro
+
 %macro add_const(c)
     // stack: input, ...
     PUSH $c
@@ -60,10 +78,29 @@
     // stack: c, input, ...
     SWAP1
     // stack: input, c, ...
-    SUB
+    DIV
     // stack: input / c, ...
 %endmacro
 
+// Slightly inefficient as we need to swap the inputs.
+// Consider avoiding this in performance-critical code.
+%macro mod_const(c)
+    // stack: input, ...
+    PUSH $c
+    // stack: c, input, ...
+    SWAP1
+    // stack: input, c, ...
+    MOD
+    // stack: input % c, ...
+%endmacro
+
+%macro shl_const(c)
+    // stack: input, ...
+    PUSH $c
+    SHL
+    // stack: input << c, ...
+%endmacro
+
 %macro eq_const(c)
     // stack: input, ...
     PUSH $c
diff --git a/evm/src/cpu/kernel/asm/curve_add.asm b/evm/src/cpu/kernel/asm/curve_add.asm
index 4ac4e0e4..15f9df05 100644
--- a/evm/src/cpu/kernel/asm/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve_add.asm
@@ -94,14 +94,8 @@ global ec_add_valid_points:
 ec_add_first_zero:
     JUMPDEST
     // stack: x0, y0, x1, y1, retdest
-
     // Just return (x1,y1)
-    %pop2
-    // stack: x1, y1, retdest
-    SWAP1
-    // stack: y1, x1, retdest
-    SWAP2
-    // stack: retdest, x1, y1
+    %stack (x0, y0, x1, y1, retdest) -> (retdest, x1, y1)
     JUMP
 
 // BN254 elliptic curve addition.
@@ -110,19 +104,8 @@ ec_add_snd_zero:
     JUMPDEST
     // stack: x0, y0, x1, y1, retdest
 
-    // Just return (x1,y1)
-    SWAP2
-    // stack: x1, y0, x0, y1, retdest
-    POP
-    // stack: y0, x0, y1, retdest
-    SWAP2
-    // stack: y1, x0, y0, retdest
-    POP
-    // stack: x0, y0, retdest
-    SWAP1
-    // stack: y0, x0, retdest
-    SWAP2
-    // stack: retdest, x0, y0
+    // Just return (x0,y0)
+    %stack (x0, y0, x1, y1, retdest) -> (retdest, x0, y0)
     JUMP
 
 // BN254 elliptic curve addition.
@@ -170,16 +153,7 @@ ec_add_valid_points_with_lambda:
     // stack: y2, x2, lambda, x0, y0, x1, y1, retdest
 
     // Return x2,y2
-    SWAP5
-    // stack: x1, x2, lambda, x0, y0, y2, y1, retdest
-    POP
-    // stack: x2, lambda, x0, y0, y2, y1, retdest
-    SWAP5
-    // stack: y1, lambda, x0, y0, y2, x2, retdest
-    %pop4
-    // stack: y2, x2, retdest
-    SWAP2
-    // stack: retdest, x2, y2
+    %stack (y2, x2, lambda, x0, y0, x1, y1, retdest) -> (retdest, x2, y2)
     JUMP
 
 // BN254 elliptic curve addition.
@@ -291,21 +265,7 @@ global ec_double:
     // stack: y < N, x < N, x, y
     AND
     // stack: (y < N) & (x < N), x, y
-    SWAP2
-    // stack: y, x, (y < N) & (x < N), x
-    SWAP1
-    // stack: x, y, (y < N) & (x < N)
-    %bn_base
-    // stack: N, x, y, b
-    %bn_base
-    // stack: N, N, x, y, b
-    DUP3
-    // stack: x, N, N, x, y, b
-    %bn_base
-    // stack: N, x, N, N, x, y, b
-    DUP2
-    // stack: x, N, x, N, N, x, y, b
-    DUP1
+    %stack (b, x, y) -> (x, x, @BN_BASE, x, @BN_BASE, @BN_BASE, x, y, b)
     // stack: x, x, N, x, N, N, x, y, b
     MULMOD
     // stack: x^2 % N, x, N, N, x, y, b
diff --git a/evm/src/cpu/kernel/asm/ecrecover.asm b/evm/src/cpu/kernel/asm/ecrecover.asm
index d0994054..538a86dc 100644
--- a/evm/src/cpu/kernel/asm/ecrecover.asm
+++ b/evm/src/cpu/kernel/asm/ecrecover.asm
@@ -107,33 +107,53 @@ ecrecover_with_first_point:
     // stack: u2, Y, X, retdest
 
     // Compute u2 * GENERATOR and chain the call to `ec_mul` with a call to `ec_add` to compute PUBKEY = (X,Y) + u2 * GENERATOR,
-    // and a call to `final_hashing` to get the final result `SHA3(PUBKEY)[-20:]`.
-    PUSH final_hashing
-    // stack: final_hashing, u2, Y, X, retdest
+    // and a call to `pubkey_to_addr` to get the final result `KECCAK256(PUBKEY)[-20:]`.
+    PUSH pubkey_to_addr
+    // stack: pubkey_to_addr, u2, Y, X, retdest
     SWAP3
-    // stack: X, u2, Y, final_hashing, retdest
+    // stack: X, u2, Y, pubkey_to_addr, retdest
     PUSH ec_add_valid_points_secp
-    // stack: ec_add_valid_points_secp, X, u2, Y, final_hashing, retdest
+    // stack: ec_add_valid_points_secp, X, u2, Y, pubkey_to_addr, retdest
     SWAP1
-    // stack: X, ec_add_valid_points_secp, u2, Y, final_hashing, retdest
+    // stack: X, ec_add_valid_points_secp, u2, Y, pubkey_to_addr, retdest
     PUSH 0x79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798 // x-coordinate of generator
-    // stack: Gx, X, ec_add_valid_points_secp, u2, Y, final_hashing, retdest
+    // stack: Gx, X, ec_add_valid_points_secp, u2, Y, pubkey_to_addr, retdest
     SWAP1
-    // stack: X, Gx, ec_add_valid_points_secp, u2, Y, final_hashing, retdest
+    // stack: X, Gx, ec_add_valid_points_secp, u2, Y, pubkey_to_addr, retdest
     PUSH 0x483ada7726a3c4655da4fbfc0e1108a8fd17b448a68554199c47d08ffb10d4b8 // y-coordinate of generator
-    // stack: Gy, X, Gx, ec_add_valid_points_secp, u2, Y, final_hashing, retdest
+    // stack: Gy, X, Gx, ec_add_valid_points_secp, u2, Y, pubkey_to_addr, retdest
     SWAP1
-    // stack: X, Gy, Gx, ec_add_valid_points_secp, u2, Y, final_hashing, retdest
+    // stack: X, Gy, Gx, ec_add_valid_points_secp, u2, Y, pubkey_to_addr, retdest
     SWAP4
-    // stack: u2, Gy, Gx, ec_add_valid_points_secp, X, Y, final_hashing, retdest
+    // stack: u2, Gy, Gx, ec_add_valid_points_secp, X, Y, pubkey_to_addr, retdest
     SWAP2
-    // stack: Gx, Gy, u2, ec_add_valid_points_secp, X, Y, final_hashing, retdest
+    // stack: Gx, Gy, u2, ec_add_valid_points_secp, X, Y, pubkey_to_addr, retdest
     %jump(ec_mul_valid_point_secp)
 
-// TODO
-final_hashing:
+// Take a public key (PKx, PKy) and return the associated address KECCAK256(PKx || PKy)[-20:].
+pubkey_to_addr:
     JUMPDEST
-    PUSH 0xdeadbeef
+    // stack: PKx, PKy, retdest
+    PUSH 0
+    // stack: 0, PKx, PKy, retdest
+    MSTORE // TODO: switch to kernel memory (like `%mstore_current(@SEGMENT_KERNEL_GENERAL)`).
+    // stack: PKy, retdest
+    PUSH 0x20
+    // stack: 0x20, PKy, retdest
+    MSTORE
+    // stack: retdest
+    PUSH 0x40
+    // stack: 0x40, retdest
+    PUSH 0
+    // stack: 0, 0x40, retdest
+    KECCAK256
+    // stack: hash, retdest
+    PUSH 0xffffffffffffffffffffffffffffffffffffffff
+    // stack: 2^160-1, hash, retdest
+    AND
+    // stack: address, retdest
+    SWAP1
+    // stack: retdest, address
     JUMP
 
 // Check if v, r, and s are in correct form.
diff --git a/evm/src/cpu/kernel/asm/halt.asm b/evm/src/cpu/kernel/asm/halt.asm
new file mode 100644
index 00000000..906ce51a
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/halt.asm
@@ -0,0 +1,6 @@
+global halt:
+    PUSH halt_pc0
+global halt_pc0:
+    DUP1
+global halt_pc1:
+    JUMP
diff --git a/evm/src/cpu/kernel/asm/memory.asm b/evm/src/cpu/kernel/asm/memory.asm
new file mode 100644
index 00000000..81474d12
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/memory.asm
@@ -0,0 +1,120 @@
+// Load a value from the given segment of the current context's memory space.
+// Note that main memory values are one byte each, but in general memory values
+// can be 256 bits. This macro deals with a single address (unlike MLOAD), so
+// if it is used with main memory, it will load a single byte.
+%macro mload_current(segment)
+    // stack: offset
+    PUSH $segment
+    // stack: segment, offset
+    GET_CONTEXT
+    // stack: context, segment, offset
+    MLOAD_GENERAL
+    // stack: value
+%endmacro
+
+// Store a value to the given segment of the current context's memory space.
+// Note that main memory values are one byte each, but in general memory values
+// can be 256 bits. This macro deals with a single address (unlike MSTORE), so
+// if it is used with main memory, it will store a single byte.
+%macro mstore_current(segment)
+    // stack: offset, value
+    PUSH $segment
+    // stack: segment, offset, value
+    GET_CONTEXT
+    // stack: context, segment, offset, value
+    MSTORE_GENERAL
+    // stack: (empty)
+%endmacro
+
+// Load a single byte from kernel code.
+%macro mload_kernel_code
+    // stack: offset
+    PUSH @SEGMENT_CODE
+    // stack: segment, offset
+    PUSH 0 // kernel has context 0
+    // stack: context, segment, offset
+    MLOAD_GENERAL
+    // stack: value
+%endmacro
+
+// Load a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0),
+// from kernel code.
+%macro mload_kernel_code_u32
+    // stack: offset
+    DUP1
+    %mload_kernel_code
+    // stack: c_3, offset
+    %shl_const(8)
+    // stack: c_3 << 8, offset
+    DUP2
+    %add_const(1)
+    %mload_kernel_code
+    OR
+    // stack: (c_3 << 8) | c_2, offset
+    %shl_const(8)
+    // stack: ((c_3 << 8) | c_2) << 8, offset
+    DUP2
+    %add_const(2)
+    %mload_kernel_code
+    OR
+    // stack: (((c_3 << 8) | c_2) << 8) | c_1, offset
+    %shl_const(8)
+    // stack: ((((c_3 << 8) | c_2) << 8) | c_1) << 8, offset
+    SWAP1
+    %add_const(3)
+    %mload_kernel_code
+    OR
+    // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0
+%endmacro
+
+// Copies `count` values from
+//     SRC = (src_ctx, src_segment, src_addr)
+// to
+//     DST = (dst_ctx, dst_segment, dst_addr).
+// These tuple definitions are used for brevity in the stack comments below.
+global memcpy:
+    JUMPDEST
+    // stack: DST, SRC, count, retdest
+    DUP7
+    // stack: count, DST, SRC, count, retdest
+    ISZERO
+    // stack: count == 0, DST, SRC, count, retdest
+    %jumpi(memcpy_finish)
+    // stack: DST, SRC, count, retdest
+
+    // Copy the next value.
+    DUP6
+    DUP6
+    DUP6
+    // stack: SRC, DST, SRC, count, retdest
+    MLOAD_GENERAL
+    // stack: value, DST, SRC, count, retdest
+    DUP4
+    DUP4
+    DUP4
+    // stack: DST, value, DST, SRC, count, retdest
+    MSTORE_GENERAL
+    // stack: DST, SRC, count, retdest
+
+    // Increment dst_addr.
+    SWAP2
+    %add_const(1)
+    SWAP2
+    // Increment src_addr.
+    SWAP5
+    %add_const(1)
+    SWAP5
+    // Decrement count.
+    SWAP6
+    %sub_const(1)
+    SWAP6
+
+    // Continue the loop.
+    %jump(memcpy)
+
+memcpy_finish:
+    JUMPDEST
+    // stack: DST, SRC, count, retdest
+    %pop7
+    // stack: retdest
+    JUMP
diff --git a/evm/src/cpu/kernel/asm/rlp/decode.asm b/evm/src/cpu/kernel/asm/rlp/decode.asm
new file mode 100644
index 00000000..76daec1a
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/rlp/decode.asm
@@ -0,0 +1,153 @@
+// Note: currently, these methods do not check that RLP input is in canonical
+// form; for example a single byte could be encoded with the length-of-length
+// form. Technically an EVM must perform these checks, but we aren't really
+// concerned with it in our setting. An attacker who corrupted consensus could
+// prove a non-canonical state, but this would just temporarily stall the bridge
+// until a fix was deployed. We are more concerned with preventing any theft of
+// assets.
+
+// Parse the length of a bytestring from RLP memory. The next len bytes after
+// pos' will contain the string.
+//
+// Pre stack: pos, retdest
+// Post stack: pos', len
+global decode_rlp_string_len:
+    JUMPDEST
+    // stack: pos, retdest
+    DUP1
+    %mload_current(@SEGMENT_RLP_RAW)
+    // stack: first_byte, pos, retdest
+    DUP1
+    %gt_const(0xb7)
+    // stack: first_byte >= 0xb8, first_byte, pos, retdest
+    %jumpi(decode_rlp_string_len_large)
+    // stack: first_byte, pos, retdest
+    DUP1
+    %gt_const(0x7f)
+    // stack: first_byte >= 0x80, first_byte, pos, retdest
+    %jumpi(decode_rlp_string_len_medium)
+
+    // String is a single byte in the range [0x00, 0x7f].
+    %stack (first_byte, pos, retdest) -> (retdest, pos, 1)
+    JUMP
+
+decode_rlp_string_len_medium:
+    // String is 0-55 bytes long. First byte contains the len.
+    // stack: first_byte, pos, retdest
+    %sub_const(0x80)
+    // stack: len, pos, retdest
+    SWAP1
+    %add_const(1)
+    // stack: pos', len, retdest
+    %stack (pos, len, retdest) -> (retdest, pos, len)
+    JUMP
+
+decode_rlp_string_len_large:
+    // String is >55 bytes long. First byte contains the len of the len.
+    // stack: first_byte, pos, retdest
+    %sub_const(0xb7)
+    // stack: len_of_len, pos, retdest
+    SWAP1
+    %add_const(1)
+    // stack: pos', len_of_len, retdest
+    %jump(decode_int_given_len)
+
+// Parse a scalar from RLP memory.
+// Pre stack: pos, retdest
+// Post stack: pos', scalar
+//
+// Scalars are variable-length, but this method assumes a max length of 32
+// bytes, so that the result can be returned as a single word on the stack.
+// As per the spec, scalars must not have leading zeros.
+global decode_rlp_scalar:
+    JUMPDEST
+    // stack: pos, retdest
+    PUSH decode_int_given_len
+    // stack: decode_int_given_len, pos, retdest
+    SWAP1
+    // stack: pos, decode_int_given_len, retdest
+    // decode_rlp_string_len will return to decode_int_given_len, at which point
+    // the stack will contain (pos', len, retdest), which are the proper args
+    // to decode_int_given_len.
+    %jump(decode_rlp_string_len)
+
+// Parse the length of an RLP list from memory.
+// Pre stack: pos, retdest
+// Post stack: pos', len
+global decode_rlp_list_len:
+    JUMPDEST
+    // stack: pos, retdest
+    DUP1
+    %mload_current(@SEGMENT_RLP_RAW)
+    // stack: first_byte, pos, retdest
+    SWAP1
+    %add_const(1) // increment pos
+    SWAP1
+    // stack: first_byte, pos', retdest
+    // If first_byte is >= 0xf8, it's a > 55 byte list, and
+    // first_byte - 0xf7 is the length of the length.
+    DUP1
+    %gt_const(0xf7) // GT is native while GE is not, so compare to 0xf6 instead
+    // stack: first_byte >= 0xf7, first_byte, pos', retdest
+    %jumpi(decode_rlp_list_len_big)
+
+    // This is the "small list" case.
+    // The list length is first_byte - 0xc0.
+    // stack: first_byte, pos', retdest
+    %sub_const(0xc0)
+    // stack: len, pos', retdest
+    %stack (len, pos, retdest) -> (retdest, pos, len)
+    JUMP
+
+decode_rlp_list_len_big:
+    JUMPDEST
+    // The length of the length is first_byte - 0xf7.
+    // stack: first_byte, pos', retdest
+    %sub_const(0xf7)
+    // stack: len_of_len, pos', retdest
+    SWAP1
+    // stack: pos', len_of_len, retdest
+    %jump(decode_int_given_len)
+
+// Parse an integer of the given length. It is assumed that the integer will
+// fit in a single (256-bit) word on the stack.
+// Pre stack: pos, len, retdest
+// Post stack: pos', int
+decode_int_given_len:
+    JUMPDEST
+    %stack (pos, len, retdest) -> (pos, len, pos, retdest)
+    ADD
+    // stack: end_pos, pos, retdest
+    SWAP1
+    // stack: pos, end_pos, retdest
+    PUSH 0 // initial accumulator state
+    // stack: acc, pos, end_pos, retdest
+
+decode_int_given_len_loop:
+    JUMPDEST
+    // stack: acc, pos, end_pos, retdest
+    DUP3
+    DUP3
+    EQ
+    // stack: pos == end_pos, acc, pos, end_pos, retdest
+    %jumpi(decode_int_given_len_finish)
+    // stack: acc, pos, end_pos, retdest
+    %shl_const(8)
+    // stack: acc << 8, pos, end_pos, retdest
+    DUP2
+    // stack: pos, acc << 8, pos, end_pos, retdest
+    %mload_current(@SEGMENT_RLP_RAW)
+    // stack: byte, acc << 8, pos, end_pos, retdest
+    ADD
+    // stack: acc', pos, end_pos, retdest
+    // Increment pos.
+    SWAP1
+    %add_const(1)
+    SWAP1
+    // stack: acc', pos', end_pos, retdest
+    %jump(decode_int_given_len_loop)
+
+decode_int_given_len_finish:
+    JUMPDEST
+    %stack (acc, pos, end_pos, retdest) -> (retdest, pos, acc)
+    JUMP
diff --git a/evm/src/cpu/kernel/asm/rlp/encode.asm b/evm/src/cpu/kernel/asm/rlp/encode.asm
new file mode 100644
index 00000000..b2446c37
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/rlp/encode.asm
@@ -0,0 +1,17 @@
+// RLP-encode a scalar, i.e. a variable-length integer.
+// Pre stack: pos, scalar
+// Post stack: (empty)
+global encode_rlp_scalar:
+    PANIC // TODO: implement
+
+// RLP-encode a fixed-length 160-bit string. Assumes string < 2^160.
+// Pre stack: pos, string
+// Post stack: (empty)
+global encode_rlp_160:
+    PANIC // TODO: implement
+
+// RLP-encode a fixed-length 256-bit string.
+// Pre stack: pos, string
+// Post stack: (empty)
+global encode_rlp_256:
+    PANIC // TODO: implement
diff --git a/evm/src/cpu/kernel/asm/rlp/read_to_memory.asm b/evm/src/cpu/kernel/asm/rlp/read_to_memory.asm
new file mode 100644
index 00000000..ae75e3d7
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/rlp/read_to_memory.asm
@@ -0,0 +1,39 @@
+// Read RLP data from the prover's tape, and save it to the SEGMENT_RLP_RAW
+// segment of memory.
+
+// Pre stack: retdest
+// Post stack: (empty)
+
+global read_rlp_to_memory:
+    JUMPDEST
+    // stack: retdest
+    PROVER_INPUT // Read the RLP blob length from the prover tape.
+    // stack: len, retdest
+    PUSH 0 // initial position
+    // stack: pos, len, retdest
+
+read_rlp_to_memory_loop:
+    JUMPDEST
+    // stack: pos, len, retdest
+    DUP2
+    DUP2
+    EQ
+    // stack: pos == len, pos, len, retdest
+    %jumpi(read_rlp_to_memory_finish)
+    // stack: pos, len, retdest
+    PROVER_INPUT
+    // stack: byte, pos, len, retdest
+    DUP2
+    // stack: pos, byte, pos, len, retdest
+    %mstore_current(@SEGMENT_RLP_RAW)
+    // stack: pos, len, retdest
+    %add_const(1)
+    // stack: pos', len, retdest
+    %jump(read_rlp_to_memory_loop)
+
+read_rlp_to_memory_finish:
+    JUMPDEST
+    // stack: pos, len, retdest
+    %pop2
+    // stack: retdest
+    JUMP
diff --git a/evm/src/cpu/kernel/asm/storage/read.asm b/evm/src/cpu/kernel/asm/storage/read.asm
new file mode 100644
index 00000000..04fea17a
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/storage/read.asm
@@ -0,0 +1,2 @@
+global storage_read:
+    // TODO
diff --git a/evm/src/cpu/kernel/asm/storage/write.asm b/evm/src/cpu/kernel/asm/storage/write.asm
new file mode 100644
index 00000000..940fb548
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/storage/write.asm
@@ -0,0 +1,2 @@
+global storage_write:
+    // TODO
diff --git a/evm/src/cpu/kernel/asm/storage_read.asm b/evm/src/cpu/kernel/asm/storage_read.asm
deleted file mode 100644
index 6a704c61..00000000
--- a/evm/src/cpu/kernel/asm/storage_read.asm
+++ /dev/null
@@ -1,10 +0,0 @@
-// TODO: Dummy code for now.
-global storage_read:
-    JUMPDEST
-    PUSH 1234
-    POP
-    // An infinite loop:
-mylabel:
-    JUMPDEST
-    PUSH mylabel
-    JUMP
diff --git a/evm/src/cpu/kernel/asm/storage_write.asm b/evm/src/cpu/kernel/asm/storage_write.asm
deleted file mode 100644
index 15c41b7c..00000000
--- a/evm/src/cpu/kernel/asm/storage_write.asm
+++ /dev/null
@@ -1,6 +0,0 @@
-// TODO: Dummy code for now.
-global storage_write:
-    JUMPDEST
-    PUSH 123 // Whatever.
-    POP
-    BYTES 0x1, 0x02, 3
diff --git a/evm/src/cpu/kernel/asm/transactions/process_normalized.asm b/evm/src/cpu/kernel/asm/transactions/process_normalized.asm
new file mode 100644
index 00000000..d99041b0
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/transactions/process_normalized.asm
@@ -0,0 +1,5 @@
+// After the transaction data has been parsed into a normalized set of fields
+// (see TxnField), this routine processes the transaction.
+
+global process_normalized_txn:
+    // TODO
diff --git a/evm/src/cpu/kernel/asm/transactions/router.asm b/evm/src/cpu/kernel/asm/transactions/router.asm
new file mode 100644
index 00000000..01a65fec
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/transactions/router.asm
@@ -0,0 +1,38 @@
+// This is the entry point of transaction processing. We load the transaction
+// RLP data into memory, check the transaction type, then based on the type we
+// jump to the appropriate transaction parsing method.
+
+global route_txn:
+    JUMPDEST
+    // stack: (empty)
+    // First load transaction data into memory, where it will be parsed.
+    PUSH read_txn_from_memory
+    %jump(read_rlp_to_memory)
+
+// At this point, the raw txn data is in memory.
+read_txn_from_memory:
+    JUMPDEST
+    // stack: (empty)
+
+    // We will peak at the first byte to determine what type of transaction this is.
+    // Note that type 1 and 2 transactions have a first byte of 1 and 2, respectively.
+    // Type 0 (legacy) transactions have no such prefix, but their RLP will have a
+    // first byte >= 0xc0, so there is no overlap.
+
+    PUSH 0
+    %mload_current(@SEGMENT_RLP_RAW)
+    %eq_const(1)
+    // stack: first_byte == 1
+    %jumpi(process_type_1_txn)
+    // stack: (empty)
+
+    PUSH 0
+    %mload_current(@SEGMENT_RLP_RAW)
+    %eq_const(2)
+    // stack: first_byte == 2
+    %jumpi(process_type_2_txn)
+    // stack: (empty)
+
+    // At this point, since it's not a type 1 or 2 transaction,
+    // it must be a legacy (aka type 0) transaction.
+    %jump(process_type_2_txn)
diff --git a/evm/src/cpu/kernel/asm/transactions/type_0.asm b/evm/src/cpu/kernel/asm/transactions/type_0.asm
new file mode 100644
index 00000000..543095a7
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/transactions/type_0.asm
@@ -0,0 +1,189 @@
+// Type 0 transactions, aka legacy transaction, have the format
+//     rlp([nonce, gas_price, gas_limit, to, value, data, v, r, s])
+//
+// The field v was originally encoded as
+//     27 + y_parity
+// but as of EIP 155 it can also be encoded as
+//     35 + 2 * chain_id + y_parity
+//
+// If a chain_id is present in v, the signed data is
+//     keccak256(rlp([nonce, gas_price, gas_limit, to, value, data, chain_id, 0, 0]))
+// otherwise, it is
+//     keccak256(rlp([nonce, gas_price, gas_limit, to, value, data]))
+
+global process_type_0_txn:
+    JUMPDEST
+    // stack: (empty)
+    PUSH process_txn_with_len
+    PUSH 0 // initial pos
+    // stack: pos, process_txn_with_len
+    %jump(decode_rlp_list_len)
+
+process_txn_with_len:
+    // We don't actually need the length.
+    %stack (pos, len) -> (pos)
+
+    PUSH store_nonce
+    SWAP1
+    // stack: pos, store_nonce
+    %jump(decode_rlp_scalar)
+
+store_nonce:
+    %stack (pos, nonce) -> (@TXN_FIELD_NONCE, nonce, pos)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+
+    // stack: pos
+    PUSH store_gas_price
+    SWAP1
+    // stack: pos, store_gas_price
+    %jump(decode_rlp_scalar)
+
+store_gas_price:
+    // For legacy transactions, we set both the
+    // TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS and TXN_FIELD_MAX_FEE_PER_GAS
+    // fields to gas_price.
+    %stack (pos, gas_price) -> (@TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS, gas_price,
+                                @TXN_FIELD_MAX_FEE_PER_GAS, gas_price, pos)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+
+    // stack: pos
+    PUSH store_gas_limit
+    SWAP1
+    // stack: pos, store_gas_limit
+    %jump(decode_rlp_scalar)
+
+store_gas_limit:
+    %stack (pos, gas_limit) -> (@TXN_FIELD_GAS_LIMIT, gas_limit, pos)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+
+    // Peak at the RLP to see if the next byte is zero.
+    // If so, there is no value field, so skip the store_to step.
+    // stack: pos
+    DUP1
+    %mload_current(@SEGMENT_RLP_RAW)
+    ISZERO
+    // stack: to_empty, pos
+    %jumpi(parse_value)
+
+    // If we got here, there is a "to" field.
+    PUSH store_to
+    SWAP1
+    // stack: pos, store_to
+    %jump(decode_rlp_scalar)
+
+store_to:
+    %stack (pos, to) -> (@TXN_FIELD_TO, to, pos)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+    // stack: pos
+
+parse_value:
+    // stack: pos
+    PUSH store_value
+    SWAP1
+    // stack: pos, store_value
+    %jump(decode_rlp_scalar)
+
+store_value:
+    %stack (pos, value) -> (@TXN_FIELD_VALUE, value, pos)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+
+    // stack: pos
+    PUSH store_data_len
+    SWAP1
+    // stack: pos, store_data_len
+    %jump(decode_rlp_string_len)
+
+store_data_len:
+    %stack (pos, data_len) -> (@TXN_FIELD_DATA_LEN, data_len, pos, data_len, pos, data_len)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+    // stack: pos, data_len, pos, data_len
+    ADD
+    // stack: new_pos, pos, data_len
+
+    // Memcpy the txn data from @SEGMENT_RLP_RAW to @SEGMENT_TXN_DATA.
+    PUSH parse_v
+    %stack (parse_v, new_pos, old_pos, data_len) -> (old_pos, data_len, parse_v, new_pos)
+    PUSH @SEGMENT_RLP_RAW
+    GET_CONTEXT
+    PUSH 0
+    PUSH @SEGMENT_TXN_DATA
+    GET_CONTEXT
+    // stack: DST, SRC, data_len, parse_v, new_pos
+    %jump(memcpy)
+
+parse_v:
+    // stack: pos
+    PUSH process_v
+    SWAP1
+    // stack: pos, process_v
+    %jump(decode_rlp_scalar)
+
+process_v:
+    // stack: pos, v
+    SWAP1
+    // stack: v, pos
+    DUP1
+    %gt_const(28)
+    // stack: v > 28, v, pos
+    %jumpi(process_v_new_style)
+
+    // We have an old style v, so y_parity = v - 27.
+    // No chain ID is present, so we can leave TXN_FIELD_CHAIN_ID_PRESENT and
+    // TXN_FIELD_CHAIN_ID with their default values of zero.
+    // stack: v, pos
+    %sub_const(27)
+    %stack (y_parity, pos) -> (@TXN_FIELD_Y_PARITY, y_parity, pos)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+
+    // stack: pos
+    %jump(parse_r)
+
+process_v_new_style:
+    // stack: v, pos
+    // We have a new style v, so chain_id_present = 1,
+    // chain_id = (v - 35) / 2, and y_parity = (v - 35) % 2.
+    %stack (v, pos) -> (@TXN_FIELD_CHAIN_ID_PRESENT, 1, v, pos)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+
+    // stack: v, pos
+    %sub_const(35)
+    DUP1
+    // stack: v - 35, v - 35, pos
+    %div_const(2)
+    // stack: chain_id, v - 35, pos
+    PUSH @TXN_FIELD_CHAIN_ID
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+
+    // stack: v - 35, pos
+    %mod_const(2)
+    // stack: y_parity, pos
+    PUSH @TXN_FIELD_Y_PARITY
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+
+parse_r:
+    // stack: pos
+    PUSH store_r
+    SWAP1
+    // stack: pos, store_r
+    %jump(decode_rlp_scalar)
+
+store_r:
+    %stack (pos, r) -> (@TXN_FIELD_R, r, pos)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+
+    // stack: pos
+    PUSH store_s
+    SWAP1
+    // stack: pos, store_s
+    %jump(decode_rlp_scalar)
+
+store_s:
+    %stack (pos, s) -> (@TXN_FIELD_S, s)
+    %mstore_current(@SEGMENT_NORMALIZED_TXN)
+    // stack: (empty)
+
+    // TODO: Write the signed txn data to memory, where it can be hashed and
+    // checked against the signature.
+
+    %jump(process_normalized_txn)
diff --git a/evm/src/cpu/kernel/asm/transactions/type_1.asm b/evm/src/cpu/kernel/asm/transactions/type_1.asm
new file mode 100644
index 00000000..5b9d2cdf
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/transactions/type_1.asm
@@ -0,0 +1,12 @@
+// Type 1 transactions, introduced by EIP 2930, have the format
+//     0x01 || rlp([chain_id, nonce, gas_price, gas_limit, to, value, data,
+//                  access_list, y_parity, r, s])
+//
+// The signed data is
+//     keccak256(0x01 || rlp([chain_id, nonce, gas_price, gas_limit, to, value,
+//                            data, access_list]))
+
+global process_type_1_txn:
+    JUMPDEST
+    // stack: (empty)
+    PANIC // TODO: Unfinished
diff --git a/evm/src/cpu/kernel/asm/transactions/type_2.asm b/evm/src/cpu/kernel/asm/transactions/type_2.asm
new file mode 100644
index 00000000..9807f88f
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/transactions/type_2.asm
@@ -0,0 +1,13 @@
+// Type 2 transactions, introduced by EIP 1559, have the format
+//     0x02 || rlp([chain_id, nonce, max_priority_fee_per_gas, max_fee_per_gas,
+//                  gas_limit, to, value, data, access_list, y_parity, r, s])
+//
+// The signed data is
+//     keccak256(0x02 || rlp([chain_id, nonce, max_priority_fee_per_gas,
+//                            max_fee_per_gas, gas_limit, to, value, data,
+//                            access_list]))
+
+global process_type_2_txn:
+    JUMPDEST
+    // stack: (empty)
+    PANIC // TODO: Unfinished
diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs
index 8b7327dc..4dbc46ca 100644
--- a/evm/src/cpu/kernel/assembler.rs
+++ b/evm/src/cpu/kernel/assembler.rs
@@ -5,8 +5,9 @@ use itertools::izip;
 use log::debug;
 
 use super::ast::PushTarget;
-use crate::cpu::kernel::ast::Literal;
+use crate::cpu::kernel::ast::{Literal, StackReplacement};
 use crate::cpu::kernel::keccak_util::hash_kernel;
+use crate::cpu::kernel::stack_manipulation::expand_stack_manipulation;
 use crate::cpu::kernel::{
     ast::{File, Item},
     opcodes::{get_opcode, get_push_opcode},
@@ -63,6 +64,7 @@ pub(crate) fn assemble(files: Vec<File>, constants: HashMap<String, U256>) -> Ke
         let expanded_file = expand_macros(file.body, &macros);
         let expanded_file = expand_repeats(expanded_file);
         let expanded_file = inline_constants(expanded_file, &constants);
+        let expanded_file = expand_stack_manipulation(expanded_file);
         local_labels.push(find_labels(&expanded_file, &mut offset, &mut global_labels));
         expanded_files.push(expanded_file);
     }
@@ -130,13 +132,29 @@ fn expand_macro_call(
         args.len()
     );
 
+    let get_arg = |var| {
+        let param_index = _macro.get_param_index(var);
+        args[param_index].clone()
+    };
+
     let expanded_item = _macro
         .items
         .iter()
         .map(|item| {
             if let Item::Push(PushTarget::MacroVar(var)) = item {
-                let param_index = _macro.get_param_index(var);
-                Item::Push(args[param_index].clone())
+                Item::Push(get_arg(var))
+            } else if let Item::MacroCall(name, args) = item {
+                let expanded_args = args
+                    .iter()
+                    .map(|arg| {
+                        if let PushTarget::MacroVar(var) = arg {
+                            get_arg(var)
+                        } else {
+                            arg.clone()
+                        }
+                    })
+                    .collect();
+                Item::MacroCall(name.clone(), expanded_args)
             } else {
                 item.clone()
             }
@@ -163,14 +181,31 @@ fn expand_repeats(body: Vec<Item>) -> Vec<Item> {
 }
 
 fn inline_constants(body: Vec<Item>, constants: &HashMap<String, U256>) -> Vec<Item> {
+    let resolve_const = |c| {
+        Literal::Decimal(
+            constants
+                .get(&c)
+                .unwrap_or_else(|| panic!("No such constant: {}", c))
+                .to_string(),
+        )
+    };
+
     body.into_iter()
         .map(|item| {
             if let Item::Push(PushTarget::Constant(c)) = item {
-                let value = constants
-                    .get(&c)
-                    .unwrap_or_else(|| panic!("No such constant: {}", c));
-                let literal = Literal::Decimal(value.to_string());
-                Item::Push(PushTarget::Literal(literal))
+                Item::Push(PushTarget::Literal(resolve_const(c)))
+            } else if let Item::StackManipulation(from, to) = item {
+                let to = to
+                    .into_iter()
+                    .map(|replacement| {
+                        if let StackReplacement::Constant(c) = replacement {
+                            StackReplacement::Literal(resolve_const(c))
+                        } else {
+                            replacement
+                        }
+                    })
+                    .collect();
+                Item::StackManipulation(from, to)
             } else {
                 item
             }
@@ -187,8 +222,11 @@ fn find_labels(
     let mut local_labels = HashMap::<String, usize>::new();
     for item in body {
         match item {
-            Item::MacroDef(_, _, _) | Item::MacroCall(_, _) | Item::Repeat(_, _) => {
-                panic!("Macros and repeats should have been expanded already")
+            Item::MacroDef(_, _, _)
+            | Item::MacroCall(_, _)
+            | Item::Repeat(_, _)
+            | Item::StackManipulation(_, _) => {
+                panic!("Item should have been expanded already: {:?}", item);
             }
             Item::GlobalLabelDeclaration(label) => {
                 let old = global_labels.insert(label.clone(), *offset);
@@ -215,8 +253,11 @@ fn assemble_file(
     // Assemble the file.
     for item in body {
         match item {
-            Item::MacroDef(_, _, _) | Item::MacroCall(_, _) | Item::Repeat(_, _) => {
-                panic!("Macros and repeats should have been expanded already")
+            Item::MacroDef(_, _, _)
+            | Item::MacroCall(_, _)
+            | Item::Repeat(_, _)
+            | Item::StackManipulation(_, _) => {
+                panic!("Item should have been expanded already: {:?}", item);
             }
             Item::GlobalLabelDeclaration(_) | Item::LocalLabelDeclaration(_) => {
                 // Nothing to do; we processed labels in the prior phase.
@@ -394,6 +435,17 @@ mod tests {
         assert_eq!(kernel.code, vec![push1, 2, push1, 3, add]);
     }
 
+    #[test]
+    fn macro_in_macro_with_vars() {
+        let kernel = parse_and_assemble(&[
+            "%macro foo(x) %bar($x) %bar($x) %endmacro",
+            "%macro bar(y) PUSH $y %endmacro",
+            "%foo(42)",
+        ]);
+        let push = get_push_opcode(1);
+        assert_eq!(kernel.code, vec![push, 42, push, 42]);
+    }
+
     #[test]
     #[should_panic]
     fn macro_with_wrong_vars() {
@@ -427,6 +479,24 @@ mod tests {
         assert_eq!(kernel.code, vec![add, add, add]);
     }
 
+    #[test]
+    fn stack_manipulation() {
+        let pop = get_opcode("POP");
+        let swap1 = get_opcode("SWAP1");
+        let swap2 = get_opcode("SWAP2");
+
+        let kernel = parse_and_assemble(&["%stack (a, b, c) -> (c, b, a)"]);
+        assert_eq!(kernel.code, vec![swap2]);
+
+        let kernel = parse_and_assemble(&["%stack (a, b, c) -> (b)"]);
+        assert_eq!(kernel.code, vec![pop, swap1, pop]);
+
+        let mut consts = HashMap::new();
+        consts.insert("LIFE".into(), 42.into());
+        parse_and_assemble_with_constants(&["%stack (a, b) -> (b, @LIFE)"], consts);
+        // We won't check the code since there are two equally efficient implementations.
+    }
+
     fn parse_and_assemble(files: &[&str]) -> Kernel {
         parse_and_assemble_with_constants(files, HashMap::new())
     }
diff --git a/evm/src/cpu/kernel/ast.rs b/evm/src/cpu/kernel/ast.rs
index 9bb315ff..92728104 100644
--- a/evm/src/cpu/kernel/ast.rs
+++ b/evm/src/cpu/kernel/ast.rs
@@ -14,6 +14,11 @@ pub(crate) enum Item {
     MacroCall(String, Vec<PushTarget>),
     /// Repetition, like `%rep` in NASM.
     Repeat(Literal, Vec<Item>),
+    /// A directive to manipulate the stack according to a specified pattern.
+    /// The first list gives names to items on the top of the stack.
+    /// The second list specifies replacement items.
+    /// Example: `(a, b, c) -> (c, 5, 0x20, @SOME_CONST, a)`.
+    StackManipulation(Vec<String>, Vec<StackReplacement>),
     /// Declares a global label.
     GlobalLabelDeclaration(String),
     /// Declares a label that is local to the current file.
@@ -26,6 +31,14 @@ pub(crate) enum Item {
     Bytes(Vec<Literal>),
 }
 
+#[derive(Clone, Debug)]
+pub(crate) enum StackReplacement {
+    NamedItem(String),
+    Literal(Literal),
+    MacroVar(String),
+    Constant(String),
+}
+
 /// The target of a `PUSH` operation.
 #[derive(Clone, Debug)]
 pub(crate) enum PushTarget {
@@ -35,7 +48,7 @@ pub(crate) enum PushTarget {
     Constant(String),
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Eq, PartialEq, Hash)]
 pub(crate) enum Literal {
     Decimal(String),
     Hex(String),
diff --git a/evm/src/cpu/kernel/evm_asm.pest b/evm/src/cpu/kernel/evm_asm.pest
index d5a89d99..78938b64 100644
--- a/evm/src/cpu/kernel/evm_asm.pest
+++ b/evm/src/cpu/kernel/evm_asm.pest
@@ -15,12 +15,15 @@ literal = { literal_hex | literal_decimal }
 variable = ${ "$" ~ identifier }
 constant = ${ "@" ~ identifier }
 
-item = { macro_def | macro_call | repeat | global_label | local_label | bytes_item | push_instruction | nullary_instruction }
-macro_def = { ^"%macro" ~ identifier ~ macro_paramlist? ~ item* ~ ^"%endmacro" }
-macro_call = ${ "%" ~ !(^"macro" | ^"endmacro" | ^"rep" | ^"endrep") ~ identifier ~ macro_arglist? }
+item = { macro_def | macro_call | repeat | stack | global_label | local_label | bytes_item | push_instruction | nullary_instruction }
+macro_def = { ^"%macro" ~ identifier ~ paramlist? ~ item* ~ ^"%endmacro" }
+macro_call = ${ "%" ~ !(^"macro" | ^"endmacro" | ^"rep" | ^"endrep" | ^"stack") ~ identifier ~ macro_arglist? }
 repeat = { ^"%rep" ~ literal ~ item* ~ ^"%endrep" }
-macro_paramlist = { "(" ~ identifier ~ ("," ~ identifier)* ~ ")" }
+paramlist = { "(" ~ identifier ~ ("," ~ identifier)* ~ ")" }
 macro_arglist = !{ "(" ~ push_target ~ ("," ~ push_target)* ~ ")" }
+stack = { ^"%stack" ~ paramlist ~ "->" ~ stack_replacements }
+stack_replacements = { "(" ~ stack_replacement ~ ("," ~ stack_replacement)* ~ ")" }
+stack_replacement = { literal | identifier | constant }
 global_label = { ^"GLOBAL " ~ identifier ~ ":" }
 local_label = { identifier ~ ":" }
 bytes_item = { ^"BYTES " ~ literal ~ ("," ~ literal)* }
diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 81ce287b..016e3c44 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -1,71 +1,83 @@
-use ethereum_types::{U256, U512};
+use anyhow::{anyhow, bail};
+use ethereum_types::{BigEndianHash, U256, U512};
+use keccak_hash::keccak;
+
+use crate::generation::memory::MemoryContextState;
+use crate::memory::segments::Segment;
 
 /// Halt interpreter execution whenever a jump to this offset is done.
 const HALT_OFFSET: usize = 0xdeadbeef;
 
-#[derive(Debug, Default)]
-pub(crate) struct EvmMemory {
-    memory: Vec<u8>,
+#[derive(Debug)]
+pub(crate) struct InterpreterMemory {
+    context_memory: Vec<MemoryContextState>,
 }
 
-impl EvmMemory {
-    fn len(&self) -> usize {
-        self.memory.len()
-    }
-
-    /// Expand memory until `self.len() >= offset`.
-    fn expand(&mut self, offset: usize) {
-        while self.len() < offset {
-            self.memory.extend([0; 32]);
+impl Default for InterpreterMemory {
+    fn default() -> Self {
+        Self {
+            context_memory: vec![MemoryContextState::default()],
         }
     }
+}
 
-    fn mload(&mut self, offset: usize) -> U256 {
-        self.expand(offset + 32);
-        U256::from_big_endian(&self.memory[offset..offset + 32])
+impl InterpreterMemory {
+    fn mload_general(&self, context: usize, segment: Segment, offset: usize) -> U256 {
+        self.context_memory[context].segments[segment as usize].get(offset)
     }
 
-    fn mstore(&mut self, offset: usize, value: U256) {
-        self.expand(offset + 32);
-        let value_be = {
-            let mut tmp = [0; 32];
-            value.to_big_endian(&mut tmp);
-            tmp
-        };
-        self.memory[offset..offset + 32].copy_from_slice(&value_be);
-    }
-
-    fn mstore8(&mut self, offset: usize, value: U256) {
-        self.expand(offset + 1);
-        let value_byte = value.0[0] as u8;
-        self.memory[offset] = value_byte;
+    fn mstore_general(&mut self, context: usize, segment: Segment, offset: usize, value: U256) {
+        self.context_memory[context].segments[segment as usize].set(offset, value)
     }
 }
 
+// TODO: Remove `code` and `stack` fields as they are contained in `memory`.
 pub(crate) struct Interpreter<'a> {
     code: &'a [u8],
     jumpdests: Vec<usize>,
     offset: usize,
     pub(crate) stack: Vec<U256>,
-    pub(crate) memory: EvmMemory,
+    context: usize,
+    memory: InterpreterMemory,
+    /// Non-deterministic prover inputs, stored backwards so that popping the last item gives the
+    /// next prover input.
+    prover_inputs: Vec<U256>,
     running: bool,
 }
 
-pub(crate) fn run(code: &[u8], initial_offset: usize, initial_stack: Vec<U256>) -> Interpreter {
+pub(crate) fn run(
+    code: &[u8],
+    initial_offset: usize,
+    initial_stack: Vec<U256>,
+) -> anyhow::Result<Interpreter> {
+    run_with_input(code, initial_offset, initial_stack, vec![])
+}
+
+pub(crate) fn run_with_input(
+    code: &[u8],
+    initial_offset: usize,
+    initial_stack: Vec<U256>,
+    mut prover_inputs: Vec<U256>,
+) -> anyhow::Result<Interpreter> {
+    // Prover inputs are stored backwards, so that popping the last item gives the next input.
+    prover_inputs.reverse();
+
     let mut interpreter = Interpreter {
         code,
         jumpdests: find_jumpdests(code),
         offset: initial_offset,
         stack: initial_stack,
-        memory: EvmMemory::default(),
+        context: 0,
+        memory: InterpreterMemory::default(),
+        prover_inputs,
         running: true,
     };
 
     while interpreter.running {
-        interpreter.run_opcode();
+        interpreter.run_opcode()?;
     }
 
-    interpreter
+    Ok(interpreter)
 }
 
 impl<'a> Interpreter<'a> {
@@ -89,7 +101,7 @@ impl<'a> Interpreter<'a> {
         self.stack.pop().expect("Pop on empty stack.")
     }
 
-    fn run_opcode(&mut self) {
+    fn run_opcode(&mut self) -> anyhow::Result<()> {
         let opcode = self.code.get(self.offset).copied().unwrap_or_default();
         self.incr(1);
         match opcode {
@@ -119,7 +131,7 @@ impl<'a> Interpreter<'a> {
             0x1b => todo!(),                                           // "SHL",
             0x1c => todo!(),                                           // "SHR",
             0x1d => todo!(),                                           // "SAR",
-            0x20 => todo!(),                                           // "KECCAK256",
+            0x20 => self.run_keccak256(),                              // "KECCAK256",
             0x30 => todo!(),                                           // "ADDRESS",
             0x31 => todo!(),                                           // "BALANCE",
             0x32 => todo!(),                                           // "ORIGIN",
@@ -144,6 +156,7 @@ impl<'a> Interpreter<'a> {
             0x45 => todo!(),                                           // "GASLIMIT",
             0x46 => todo!(),                                           // "CHAINID",
             0x48 => todo!(),                                           // "BASEFEE",
+            0x49 => self.run_prover_input()?,                          // "PROVER_INPUT",
             0x50 => self.run_pop(),                                    // "POP",
             0x51 => self.run_mload(),                                  // "MLOAD",
             0x52 => self.run_mstore(),                                 // "MSTORE",
@@ -156,6 +169,10 @@ impl<'a> Interpreter<'a> {
             0x59 => todo!(),                                           // "MSIZE",
             0x5a => todo!(),                                           // "GAS",
             0x5b => (),                                                // "JUMPDEST",
+            0x5c => todo!(),                                           // "GET_STATE_ROOT",
+            0x5d => todo!(),                                           // "SET_STATE_ROOT",
+            0x5e => todo!(),                                           // "GET_RECEIPT_ROOT",
+            0x5f => todo!(),                                           // "SET_RECEIPT_ROOT",
             x if (0x60..0x80).contains(&x) => self.run_push(x - 0x5f), // "PUSH"
             x if (0x80..0x90).contains(&x) => self.run_dup(x - 0x7f),  // "DUP"
             x if (0x90..0xa0).contains(&x) => self.run_swap(x - 0x8f), // "SWAP"
@@ -164,18 +181,26 @@ impl<'a> Interpreter<'a> {
             0xa2 => todo!(),                                           // "LOG2",
             0xa3 => todo!(),                                           // "LOG3",
             0xa4 => todo!(),                                           // "LOG4",
+            0xa5 => bail!("Executed PANIC"),                           // "PANIC",
             0xf0 => todo!(),                                           // "CREATE",
             0xf1 => todo!(),                                           // "CALL",
             0xf2 => todo!(),                                           // "CALLCODE",
             0xf3 => todo!(),                                           // "RETURN",
             0xf4 => todo!(),                                           // "DELEGATECALL",
             0xf5 => todo!(),                                           // "CREATE2",
+            0xf6 => self.run_get_context(),                            // "GET_CONTEXT",
+            0xf7 => self.run_set_context(),                            // "SET_CONTEXT",
+            0xf8 => todo!(),                                           // "CONSUME_GAS",
+            0xf9 => todo!(),                                           // "EXIT_KERNEL",
             0xfa => todo!(),                                           // "STATICCALL",
+            0xfb => self.run_mload_general(),                          // "MLOAD_GENERAL",
+            0xfc => self.run_mstore_general(),                         // "MSTORE_GENERAL",
             0xfd => todo!(),                                           // "REVERT",
-            0xfe => todo!(),                                           // "INVALID",
+            0xfe => bail!("Executed INVALID"),                         // "INVALID",
             0xff => todo!(),                                           // "SELFDESTRUCT",
-            _ => panic!("Unrecognized opcode {}.", opcode),
+            _ => bail!("Unrecognized opcode {}.", opcode),
         };
+        Ok(())
     }
 
     fn run_stop(&mut self) {
@@ -286,26 +311,67 @@ impl<'a> Interpreter<'a> {
         self.push(!x);
     }
 
+    fn run_keccak256(&mut self) {
+        let offset = self.pop().as_usize();
+        let size = self.pop().as_usize();
+        let bytes = (offset..offset + size)
+            .map(|i| {
+                self.memory
+                    .mload_general(self.context, Segment::MainMemory, i)
+                    .byte(0)
+            })
+            .collect::<Vec<_>>();
+        let hash = keccak(bytes);
+        self.push(hash.into_uint());
+    }
+
+    fn run_prover_input(&mut self) -> anyhow::Result<()> {
+        let input = self
+            .prover_inputs
+            .pop()
+            .ok_or_else(|| anyhow!("Out of prover inputs"))?;
+        self.stack.push(input);
+        Ok(())
+    }
+
     fn run_pop(&mut self) {
         self.pop();
     }
 
     fn run_mload(&mut self) {
-        let offset = self.pop();
-        let value = self.memory.mload(offset.as_usize());
+        let offset = self.pop().as_usize();
+        let value = U256::from_big_endian(
+            &(0..32)
+                .map(|i| {
+                    self.memory
+                        .mload_general(self.context, Segment::MainMemory, offset + i)
+                        .byte(0)
+                })
+                .collect::<Vec<_>>(),
+        );
         self.push(value);
     }
 
     fn run_mstore(&mut self) {
-        let offset = self.pop();
+        let offset = self.pop().as_usize();
         let value = self.pop();
-        self.memory.mstore(offset.as_usize(), value);
+        let mut bytes = [0; 32];
+        value.to_big_endian(&mut bytes);
+        for (i, byte) in (0..32).zip(bytes) {
+            self.memory
+                .mstore_general(self.context, Segment::MainMemory, offset + i, byte.into());
+        }
     }
 
     fn run_mstore8(&mut self) {
-        let offset = self.pop();
+        let offset = self.pop().as_usize();
         let value = self.pop();
-        self.memory.mstore8(offset.as_usize(), value);
+        self.memory.mstore_general(
+            self.context,
+            Segment::MainMemory,
+            offset,
+            value.byte(0).into(),
+        );
     }
 
     fn run_jump(&mut self) {
@@ -345,6 +411,33 @@ impl<'a> Interpreter<'a> {
         let len = self.stack.len();
         self.stack.swap(len - 1, len - n as usize - 1);
     }
+
+    fn run_get_context(&mut self) {
+        self.push(self.context.into());
+    }
+
+    fn run_set_context(&mut self) {
+        let x = self.pop();
+        self.context = x.as_usize();
+    }
+
+    fn run_mload_general(&mut self) {
+        let context = self.pop().as_usize();
+        let segment = Segment::all()[self.pop().as_usize()];
+        let offset = self.pop().as_usize();
+        let value = self.memory.mload_general(context, segment, offset);
+        assert!(value.bits() <= segment.bit_range());
+        self.push(value);
+    }
+
+    fn run_mstore_general(&mut self) {
+        let context = self.pop().as_usize();
+        let segment = Segment::all()[self.pop().as_usize()];
+        let offset = self.pop().as_usize();
+        let value = self.pop();
+        assert!(value.bits() <= segment.bit_range());
+        self.memory.mstore_general(context, segment, offset, value);
+    }
 }
 
 /// Return the (ordered) JUMPDEST offsets in the code.
@@ -365,20 +458,20 @@ fn find_jumpdests(code: &[u8]) -> Vec<usize> {
 
 #[cfg(test)]
 mod tests {
-    use hex_literal::hex;
-
     use crate::cpu::kernel::interpreter::{run, Interpreter};
+    use crate::memory::segments::Segment;
 
     #[test]
-    fn test_run() {
+    fn test_run() -> anyhow::Result<()> {
         let code = vec![
             0x60, 0x1, 0x60, 0x2, 0x1, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56,
         ]; // PUSH1, 1, PUSH1, 2, ADD, PUSH4 deadbeef, JUMP
-        assert_eq!(run(&code, 0, vec![]).stack, vec![0x3.into()]);
+        assert_eq!(run(&code, 0, vec![])?.stack, vec![0x3.into()]);
+        Ok(())
     }
 
     #[test]
-    fn test_run_with_memory() {
+    fn test_run_with_memory() -> anyhow::Result<()> {
         //         PUSH1 0xff
         //         PUSH1 0
         //         MSTORE
@@ -396,9 +489,17 @@ mod tests {
             0x60, 0xff, 0x60, 0x0, 0x52, 0x60, 0, 0x51, 0x60, 0x1, 0x51, 0x60, 0x42, 0x60, 0x27,
             0x53,
         ];
-        let run = run(&code, 0, vec![]);
+        let run = run(&code, 0, vec![])?;
         let Interpreter { stack, memory, .. } = run;
         assert_eq!(stack, vec![0xff.into(), 0xff00.into()]);
-        assert_eq!(&memory.memory, &hex!("00000000000000000000000000000000000000000000000000000000000000ff0000000000000042000000000000000000000000000000000000000000000000"));
+        assert_eq!(
+            memory.context_memory[0].segments[Segment::MainMemory as usize].get(0x27),
+            0x42.into()
+        );
+        assert_eq!(
+            memory.context_memory[0].segments[Segment::MainMemory as usize].get(0x1f),
+            0xff.into()
+        );
+        Ok(())
     }
 }
diff --git a/evm/src/cpu/kernel/mod.rs b/evm/src/cpu/kernel/mod.rs
index 2dd70aa3..1d545260 100644
--- a/evm/src/cpu/kernel/mod.rs
+++ b/evm/src/cpu/kernel/mod.rs
@@ -4,6 +4,8 @@ mod ast;
 pub(crate) mod keccak_util;
 mod opcodes;
 mod parser;
+mod stack_manipulation;
+mod txn_fields;
 
 #[cfg(test)]
 mod interpreter;
diff --git a/evm/src/cpu/kernel/opcodes.rs b/evm/src/cpu/kernel/opcodes.rs
index b8633178..69ee13fe 100644
--- a/evm/src/cpu/kernel/opcodes.rs
+++ b/evm/src/cpu/kernel/opcodes.rs
@@ -59,6 +59,7 @@ pub(crate) fn get_opcode(mnemonic: &str) -> u8 {
         "GASLIMIT" => 0x45,
         "CHAINID" => 0x46,
         "BASEFEE" => 0x48,
+        "PROVER_INPUT" => 0x49,
         "POP" => 0x50,
         "MLOAD" => 0x51,
         "MSTORE" => 0x52,
@@ -71,6 +72,10 @@ pub(crate) fn get_opcode(mnemonic: &str) -> u8 {
         "MSIZE" => 0x59,
         "GAS" => 0x5a,
         "JUMPDEST" => 0x5b,
+        "GET_STATE_ROOT" => 0x5c,
+        "SET_STATE_ROOT" => 0x5d,
+        "GET_RECEIPT_ROOT" => 0x5e,
+        "SET_RECEIPT_ROOT" => 0x5f,
         "DUP1" => 0x80,
         "DUP2" => 0x81,
         "DUP3" => 0x82,
@@ -108,13 +113,20 @@ pub(crate) fn get_opcode(mnemonic: &str) -> u8 {
         "LOG2" => 0xa2,
         "LOG3" => 0xa3,
         "LOG4" => 0xa4,
+        "PANIC" => 0xa5,
         "CREATE" => 0xf0,
         "CALL" => 0xf1,
         "CALLCODE" => 0xf2,
         "RETURN" => 0xf3,
         "DELEGATECALL" => 0xf4,
         "CREATE2" => 0xf5,
+        "GET_CONTEXT" => 0xf6,
+        "SET_CONTEXT" => 0xf7,
+        "CONSUME_GAS" => 0xf8,
+        "EXIT_KERNEL" => 0xf9,
         "STATICCALL" => 0xfa,
+        "MLOAD_GENERAL" => 0xfb,
+        "MSTORE_GENERAL" => 0xfc,
         "REVERT" => 0xfd,
         "INVALID" => 0xfe,
         "SELFDESTRUCT" => 0xff,
diff --git a/evm/src/cpu/kernel/parser.rs b/evm/src/cpu/kernel/parser.rs
index b8ac3f40..aa84ee05 100644
--- a/evm/src/cpu/kernel/parser.rs
+++ b/evm/src/cpu/kernel/parser.rs
@@ -1,7 +1,7 @@
 use pest::iterators::Pair;
 use pest::Parser;
 
-use crate::cpu::kernel::ast::{File, Item, Literal, PushTarget};
+use crate::cpu::kernel::ast::{File, Item, Literal, PushTarget, StackReplacement};
 
 /// Parses EVM assembly code.
 #[derive(pest_derive::Parser)]
@@ -24,6 +24,7 @@ fn parse_item(item: Pair<Rule>) -> Item {
         Rule::macro_def => parse_macro_def(item),
         Rule::macro_call => parse_macro_call(item),
         Rule::repeat => parse_repeat(item),
+        Rule::stack => parse_stack(item),
         Rule::global_label => {
             Item::GlobalLabelDeclaration(item.into_inner().next().unwrap().as_str().into())
         }
@@ -44,7 +45,7 @@ fn parse_macro_def(item: Pair<Rule>) -> Item {
     let name = inner.next().unwrap().as_str().into();
 
     // The parameter list is optional.
-    let params = if let Some(Rule::macro_paramlist) = inner.peek().map(|pair| pair.as_rule()) {
+    let params = if let Some(Rule::paramlist) = inner.peek().map(|pair| pair.as_rule()) {
         let params = inner.next().unwrap().into_inner();
         params.map(|param| param.as_str().to_string()).collect()
     } else {
@@ -78,6 +79,42 @@ fn parse_repeat(item: Pair<Rule>) -> Item {
     Item::Repeat(count, inner.map(parse_item).collect())
 }
 
+fn parse_stack(item: Pair<Rule>) -> Item {
+    assert_eq!(item.as_rule(), Rule::stack);
+    let mut inner = item.into_inner().peekable();
+
+    let params = inner.next().unwrap();
+    assert_eq!(params.as_rule(), Rule::paramlist);
+    let replacements = inner.next().unwrap();
+    assert_eq!(replacements.as_rule(), Rule::stack_replacements);
+
+    let params = params
+        .into_inner()
+        .map(|param| param.as_str().to_string())
+        .collect();
+    let replacements = replacements
+        .into_inner()
+        .map(parse_stack_replacement)
+        .collect();
+    Item::StackManipulation(params, replacements)
+}
+
+fn parse_stack_replacement(target: Pair<Rule>) -> StackReplacement {
+    assert_eq!(target.as_rule(), Rule::stack_replacement);
+    let inner = target.into_inner().next().unwrap();
+    match inner.as_rule() {
+        Rule::identifier => StackReplacement::NamedItem(inner.as_str().into()),
+        Rule::literal => StackReplacement::Literal(parse_literal(inner)),
+        Rule::variable => {
+            StackReplacement::MacroVar(inner.into_inner().next().unwrap().as_str().into())
+        }
+        Rule::constant => {
+            StackReplacement::Constant(inner.into_inner().next().unwrap().as_str().into())
+        }
+        _ => panic!("Unexpected {:?}", inner.as_rule()),
+    }
+}
+
 fn parse_push_target(target: Pair<Rule>) -> PushTarget {
     assert_eq!(target.as_rule(), Rule::push_target);
     let inner = target.into_inner().next().unwrap();
diff --git a/evm/src/cpu/kernel/stack_manipulation.rs b/evm/src/cpu/kernel/stack_manipulation.rs
new file mode 100644
index 00000000..63d0566c
--- /dev/null
+++ b/evm/src/cpu/kernel/stack_manipulation.rs
@@ -0,0 +1,262 @@
+use std::cmp::Ordering;
+use std::collections::hash_map::Entry::{Occupied, Vacant};
+use std::collections::{BinaryHeap, HashMap};
+
+use itertools::Itertools;
+
+use crate::cpu::columns::NUM_CPU_COLUMNS;
+use crate::cpu::kernel::ast::{Item, Literal, PushTarget, StackReplacement};
+use crate::cpu::kernel::stack_manipulation::StackOp::Pop;
+use crate::memory;
+
+pub(crate) fn expand_stack_manipulation(body: Vec<Item>) -> Vec<Item> {
+    let mut expanded = vec![];
+    for item in body {
+        if let Item::StackManipulation(names, replacements) = item {
+            expanded.extend(expand(names, replacements));
+        } else {
+            expanded.push(item);
+        }
+    }
+    expanded
+}
+
+fn expand(names: Vec<String>, replacements: Vec<StackReplacement>) -> Vec<Item> {
+    let mut src = names.into_iter().map(StackItem::NamedItem).collect_vec();
+
+    let unique_literals = replacements
+        .iter()
+        .filter_map(|item| match item {
+            StackReplacement::Literal(n) => Some(n.clone()),
+            _ => None,
+        })
+        .unique()
+        .collect_vec();
+
+    let mut dst = replacements
+        .into_iter()
+        .map(|item| match item {
+            StackReplacement::NamedItem(name) => StackItem::NamedItem(name),
+            StackReplacement::Literal(n) => StackItem::Literal(n),
+            StackReplacement::MacroVar(_) | StackReplacement::Constant(_) => {
+                panic!("Should have been expanded already: {:?}", item)
+            }
+        })
+        .collect_vec();
+
+    // %stack uses our convention where the top item is written on the left side.
+    // `shortest_path` expects the opposite, so we reverse src and dst.
+    src.reverse();
+    dst.reverse();
+
+    let path = shortest_path(src, dst, unique_literals);
+    path.into_iter().map(StackOp::into_item).collect()
+}
+
+/// Finds the lowest-cost sequence of `StackOp`s that transforms `src` to `dst`.
+/// Uses a variant of Dijkstra's algorithm.
+fn shortest_path(
+    src: Vec<StackItem>,
+    dst: Vec<StackItem>,
+    unique_literals: Vec<Literal>,
+) -> Vec<StackOp> {
+    // Nodes to visit, starting with the lowest-cost node.
+    let mut queue = BinaryHeap::new();
+    queue.push(Node {
+        stack: src.clone(),
+        cost: 0,
+    });
+
+    // For each node, stores `(best_cost, Option<(parent, op)>)`.
+    let mut node_info = HashMap::<Vec<StackItem>, (u32, Option<(Vec<StackItem>, StackOp)>)>::new();
+    node_info.insert(src.clone(), (0, None));
+
+    while let Some(node) = queue.pop() {
+        if node.stack == dst {
+            // The destination is now the lowest-cost node, so we must have found the best path.
+            let mut path = vec![];
+            let mut stack = &node.stack;
+            // Rewind back to src, recording a list of operations which will be backwards.
+            while let Some((parent, op)) = &node_info[stack].1 {
+                stack = parent;
+                path.push(op.clone());
+            }
+            assert_eq!(stack, &src);
+            path.reverse();
+            return path;
+        }
+
+        let (best_cost, _) = node_info[&node.stack];
+        if best_cost < node.cost {
+            // Since we can't efficiently remove nodes from the heap, it can contain duplicates.
+            // In this case, we've already visited this stack state with a lower cost.
+            continue;
+        }
+
+        for op in next_ops(&node.stack, &dst, &unique_literals) {
+            let neighbor = match op.apply_to(node.stack.clone()) {
+                Some(n) => n,
+                None => continue,
+            };
+
+            let cost = node.cost + op.cost();
+            let entry = node_info.entry(neighbor.clone());
+            if let Occupied(e) = &entry && e.get().0 <= cost {
+                // We already found a better or equal path.
+                continue;
+            }
+
+            let neighbor_info = (cost, Some((node.stack.clone(), op.clone())));
+            match entry {
+                Occupied(mut e) => {
+                    e.insert(neighbor_info);
+                }
+                Vacant(e) => {
+                    e.insert(neighbor_info);
+                }
+            }
+
+            queue.push(Node {
+                stack: neighbor,
+                cost,
+            });
+        }
+    }
+
+    panic!("No path found from {:?} to {:?}", src, dst)
+}
+
+/// A node in the priority queue used by Dijkstra's algorithm.
+#[derive(Eq, PartialEq)]
+struct Node {
+    stack: Vec<StackItem>,
+    cost: u32,
+}
+
+impl PartialOrd for Node {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for Node {
+    fn cmp(&self, other: &Self) -> Ordering {
+        // We want a min-heap rather than the default max-heap, so this is the opposite of the
+        // natural ordering of costs.
+        other.cost.cmp(&self.cost)
+    }
+}
+
+/// Like `StackReplacement`, but without constants or macro vars, since those were expanded already.
+#[derive(Eq, PartialEq, Hash, Clone, Debug)]
+enum StackItem {
+    NamedItem(String),
+    Literal(Literal),
+}
+
+#[derive(Clone, Debug)]
+enum StackOp {
+    Push(Literal),
+    Pop,
+    Dup(u8),
+    Swap(u8),
+}
+
+/// A set of candidate operations to consider for the next step in the path from `src` to `dst`.
+fn next_ops(src: &[StackItem], dst: &[StackItem], unique_literals: &[Literal]) -> Vec<StackOp> {
+    if let Some(top) = src.last() && !dst.contains(top) {
+        // If the top of src doesn't appear in dst, don't bother with anything other than a POP.
+        return vec![StackOp::Pop]
+    }
+
+    let mut ops = vec![StackOp::Pop];
+
+    ops.extend(
+        unique_literals
+            .iter()
+            // Only consider pushing this literal if we need more occurrences of it, otherwise swaps
+            // will be a better way to rearrange the existing occurrences as needed.
+            .filter(|lit| {
+                let item = StackItem::Literal((*lit).clone());
+                let src_count = src.iter().filter(|x| **x == item).count();
+                let dst_count = dst.iter().filter(|x| **x == item).count();
+                src_count < dst_count
+            })
+            .cloned()
+            .map(StackOp::Push),
+    );
+
+    let src_len = src.len() as u8;
+
+    ops.extend(
+        (1..=src_len)
+            // Only consider duplicating this item if we need more occurrences of it, otherwise swaps
+            // will be a better way to rearrange the existing occurrences as needed.
+            .filter(|i| {
+                let item = &src[src.len() - *i as usize];
+                let src_count = src.iter().filter(|x| *x == item).count();
+                let dst_count = dst.iter().filter(|x| *x == item).count();
+                src_count < dst_count
+            })
+            .map(StackOp::Dup),
+    );
+
+    ops.extend((1..src_len).map(StackOp::Swap));
+
+    ops
+}
+
+impl StackOp {
+    fn cost(&self) -> u32 {
+        let (cpu_rows, memory_rows) = match self {
+            StackOp::Push(n) => {
+                let bytes = n.to_trimmed_be_bytes().len() as u32;
+                // This is just a rough estimate; we can update it after implementing PUSH.
+                (bytes, bytes)
+            }
+            // A POP takes one cycle, and doesn't involve memory, it just decrements a pointer.
+            Pop => (1, 0),
+            // A DUP takes one cycle, and a read and a write.
+            StackOp::Dup(_) => (1, 2),
+            // A SWAP takes one cycle with four memory ops, to read both values then write to them.
+            StackOp::Swap(_) => (1, 4),
+        };
+
+        let cpu_cost = cpu_rows * NUM_CPU_COLUMNS as u32;
+        let memory_cost = memory_rows * memory::columns::NUM_COLUMNS as u32;
+        cpu_cost + memory_cost
+    }
+
+    /// Returns an updated stack after this operation is performed, or `None` if this operation
+    /// would not be valid on the given stack.
+    fn apply_to(&self, mut stack: Vec<StackItem>) -> Option<Vec<StackItem>> {
+        let len = stack.len();
+        match self {
+            StackOp::Push(n) => {
+                stack.push(StackItem::Literal(n.clone()));
+            }
+            Pop => {
+                stack.pop()?;
+            }
+            StackOp::Dup(n) => {
+                let idx = len.checked_sub(*n as usize)?;
+                stack.push(stack[idx].clone());
+            }
+            StackOp::Swap(n) => {
+                let from = len.checked_sub(1)?;
+                let to = len.checked_sub(*n as usize + 1)?;
+                stack.swap(from, to);
+            }
+        }
+        Some(stack)
+    }
+
+    fn into_item(self) -> Item {
+        match self {
+            StackOp::Push(n) => Item::Push(PushTarget::Literal(n)),
+            Pop => Item::StandardOp("POP".into()),
+            StackOp::Dup(n) => Item::StandardOp(format!("DUP{}", n)),
+            StackOp::Swap(n) => Item::StandardOp(format!("SWAP{}", n)),
+        }
+    }
+}
diff --git a/evm/src/cpu/kernel/tests/curve_ops.rs b/evm/src/cpu/kernel/tests/curve_ops.rs
index 7d7f042a..6d8c6696 100644
--- a/evm/src/cpu/kernel/tests/curve_ops.rs
+++ b/evm/src/cpu/kernel/tests/curve_ops.rs
@@ -43,76 +43,76 @@ mod bn {
 
         // Standard addition #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
         // Standard addition #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
 
         // Standard doubling #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #2
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_double, initial_stack).stack;
+        let stack = run(&kernel.code, ec_double, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #3
         let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
 
         // Addition with identity #1
         let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #3
         let initial_stack =
             u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Addition with invalid point(s) #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, invalid.1, invalid.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #2
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #3
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #4
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, invalid.1, invalid.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
 
         // Scalar multiplication #1
         let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
         // Scalar multiplication #2
         let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #3
         let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point0.1, point0.0])?);
         // Scalar multiplication #4
         let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #5
         let initial_stack = u256ify(["0xdeadbeef", s, invalid.1, invalid.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
 
         // Multiple calls
@@ -126,7 +126,7 @@ mod bn {
             point0.1,
             point0.0,
         ])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
 
         Ok(())
@@ -176,55 +176,55 @@ mod secp {
 
         // Standard addition #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
         // Standard addition #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
 
         // Standard doubling #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #2
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_double, initial_stack).stack;
+        let stack = run(&kernel.code, ec_double, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #3
         let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
 
         // Addition with identity #1
         let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #3
         let initial_stack =
             u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Scalar multiplication #1
         let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
         // Scalar multiplication #2
         let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #3
         let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point0.1, point0.0])?);
         // Scalar multiplication #4
         let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack).stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Multiple calls
@@ -238,7 +238,7 @@ mod secp {
             point0.1,
             point0.0,
         ])?;
-        let stack = run(&kernel.code, ec_add, initial_stack).stack;
+        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
 
         Ok(())
diff --git a/evm/src/cpu/kernel/tests/ecrecover.rs b/evm/src/cpu/kernel/tests/ecrecover.rs
index 5077d042..78bdea3e 100644
--- a/evm/src/cpu/kernel/tests/ecrecover.rs
+++ b/evm/src/cpu/kernel/tests/ecrecover.rs
@@ -1,20 +1,13 @@
+use std::str::FromStr;
+
 use anyhow::Result;
 use ethereum_types::U256;
-use keccak_hash::keccak;
 
 use crate::cpu::kernel::aggregator::combined_kernel;
 use crate::cpu::kernel::assembler::Kernel;
 use crate::cpu::kernel::interpreter::run;
 use crate::cpu::kernel::tests::u256ify;
 
-fn pubkey_to_addr(x: U256, y: U256) -> Vec<u8> {
-    let mut buf = [0; 64];
-    x.to_big_endian(&mut buf[0..32]);
-    y.to_big_endian(&mut buf[32..64]);
-    let hash = keccak(buf);
-    hash.0[12..].to_vec()
-}
-
 fn test_valid_ecrecover(
     hash: &str,
     v: &str,
@@ -24,10 +17,9 @@ fn test_valid_ecrecover(
     kernel: &Kernel,
 ) -> Result<()> {
     let ecrecover = kernel.global_labels["ecrecover"];
-    let initial_stack = u256ify([s, r, v, hash])?;
-    let stack = run(&kernel.code, ecrecover, initial_stack).stack;
-    let got = pubkey_to_addr(stack[1], stack[0]);
-    assert_eq!(got, hex::decode(&expected[2..]).unwrap());
+    let initial_stack = u256ify(["0xdeadbeef", s, r, v, hash])?;
+    let stack = run(&kernel.code, ecrecover, initial_stack)?.stack;
+    assert_eq!(stack[0], U256::from_str(expected).unwrap());
 
     Ok(())
 }
@@ -35,7 +27,7 @@ fn test_valid_ecrecover(
 fn test_invalid_ecrecover(hash: &str, v: &str, r: &str, s: &str, kernel: &Kernel) -> Result<()> {
     let ecrecover = kernel.global_labels["ecrecover"];
     let initial_stack = u256ify(["0xdeadbeef", s, r, v, hash])?;
-    let stack = run(&kernel.code, ecrecover, initial_stack).stack;
+    let stack = run(&kernel.code, ecrecover, initial_stack)?.stack;
     assert_eq!(stack, vec![U256::MAX]);
 
     Ok(())
diff --git a/evm/src/cpu/kernel/tests/exp.rs b/evm/src/cpu/kernel/tests/exp.rs
index b12b943e..25c88623 100644
--- a/evm/src/cpu/kernel/tests/exp.rs
+++ b/evm/src/cpu/kernel/tests/exp.rs
@@ -18,26 +18,26 @@ fn test_exp() -> Result<()> {
 
     // Random input
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a];
-    let stack_with_kernel = run(&kernel.code, exp, initial_stack).stack;
+    let stack_with_kernel = run(&kernel.code, exp, initial_stack)?.stack;
     let initial_stack = vec![b, a];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
-    let stack_with_opcode = run(&code, 0, initial_stack).stack;
+    let stack_with_opcode = run(&code, 0, initial_stack)?.stack;
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     // 0 base
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, U256::zero()];
-    let stack_with_kernel = run(&kernel.code, exp, initial_stack).stack;
+    let stack_with_kernel = run(&kernel.code, exp, initial_stack)?.stack;
     let initial_stack = vec![b, U256::zero()];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
-    let stack_with_opcode = run(&code, 0, initial_stack).stack;
+    let stack_with_opcode = run(&code, 0, initial_stack)?.stack;
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     // 0 exponent
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, U256::zero(), a];
-    let stack_with_kernel = run(&kernel.code, exp, initial_stack).stack;
+    let stack_with_kernel = run(&kernel.code, exp, initial_stack)?.stack;
     let initial_stack = vec![U256::zero(), a];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
-    let stack_with_opcode = run(&code, 0, initial_stack).stack;
+    let stack_with_opcode = run(&code, 0, initial_stack)?.stack;
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     Ok(())
diff --git a/evm/src/cpu/kernel/txn_fields.rs b/evm/src/cpu/kernel/txn_fields.rs
new file mode 100644
index 00000000..141eee39
--- /dev/null
+++ b/evm/src/cpu/kernel/txn_fields.rs
@@ -0,0 +1,59 @@
+/// These are normalized transaction fields, i.e. not specific to any transaction type.
+#[allow(dead_code)]
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
+pub(crate) enum NormalizedTxnField {
+    /// Whether a chain ID was present in the txn data. Type 0 transaction with v=27 or v=28 have
+    /// no chain ID. This affects what fields get signed.
+    ChainIdPresent = 0,
+    ChainId = 1,
+    Nonce = 2,
+    MaxPriorityFeePerGas = 3,
+    MaxFeePerGas = 4,
+    GasLimit = 5,
+    To = 6,
+    Value = 7,
+    /// The length of the data field. The data itself is stored in another segment.
+    DataLen = 8,
+    YParity = 9,
+    R = 10,
+    S = 11,
+}
+
+impl NormalizedTxnField {
+    pub(crate) const COUNT: usize = 12;
+
+    pub(crate) fn all() -> [Self; Self::COUNT] {
+        [
+            Self::ChainIdPresent,
+            Self::ChainId,
+            Self::Nonce,
+            Self::MaxPriorityFeePerGas,
+            Self::MaxFeePerGas,
+            Self::GasLimit,
+            Self::To,
+            Self::Value,
+            Self::DataLen,
+            Self::YParity,
+            Self::R,
+            Self::S,
+        ]
+    }
+
+    /// The variable name that gets passed into kernel assembly code.
+    pub(crate) fn var_name(&self) -> &'static str {
+        match self {
+            NormalizedTxnField::ChainIdPresent => "TXN_FIELD_CHAIN_ID_PRESENT",
+            NormalizedTxnField::ChainId => "TXN_FIELD_CHAIN_ID",
+            NormalizedTxnField::Nonce => "TXN_FIELD_NONCE",
+            NormalizedTxnField::MaxPriorityFeePerGas => "TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS",
+            NormalizedTxnField::MaxFeePerGas => "TXN_FIELD_MAX_FEE_PER_GAS",
+            NormalizedTxnField::GasLimit => "TXN_FIELD_GAS_LIMIT",
+            NormalizedTxnField::To => "TXN_FIELD_TO",
+            NormalizedTxnField::Value => "TXN_FIELD_VALUE",
+            NormalizedTxnField::DataLen => "TXN_FIELD_DATA_LEN",
+            NormalizedTxnField::YParity => "TXN_FIELD_Y_PARITY",
+            NormalizedTxnField::R => "TXN_FIELD_R",
+            NormalizedTxnField::S => "TXN_FIELD_S",
+        }
+    }
+}
diff --git a/evm/src/cpu/mod.rs b/evm/src/cpu/mod.rs
index 8da8a125..6c767998 100644
--- a/evm/src/cpu/mod.rs
+++ b/evm/src/cpu/mod.rs
@@ -1,5 +1,6 @@
 pub(crate) mod bootstrap_kernel;
 pub(crate) mod columns;
+mod control_flow;
 pub mod cpu_stark;
 pub(crate) mod decode;
 pub mod kernel;
diff --git a/evm/src/generation/memory.rs b/evm/src/generation/memory.rs
index 60bfe794..5e2919a4 100644
--- a/evm/src/generation/memory.rs
+++ b/evm/src/generation/memory.rs
@@ -34,14 +34,14 @@ pub(crate) struct MemorySegmentState {
 }
 
 impl MemorySegmentState {
-    pub(super) fn get(&self, virtual_addr: usize) -> U256 {
+    pub(crate) fn get(&self, virtual_addr: usize) -> U256 {
         self.content
             .get(virtual_addr)
             .copied()
             .unwrap_or(U256::zero())
     }
 
-    pub(super) fn set(&mut self, virtual_addr: usize, value: U256) {
+    pub(crate) fn set(&mut self, virtual_addr: usize, value: U256) {
         if virtual_addr >= self.content.len() {
             self.content.resize(virtual_addr + 1, U256::zero());
         }
diff --git a/evm/src/generation/mod.rs b/evm/src/generation/mod.rs
index b73270db..02c91d16 100644
--- a/evm/src/generation/mod.rs
+++ b/evm/src/generation/mod.rs
@@ -1,3 +1,4 @@
+use ethereum_types::U256;
 use plonky2::field::extension::Extendable;
 use plonky2::field::polynomial::PolynomialValues;
 use plonky2::field::types::Field;
@@ -9,7 +10,7 @@ use crate::cpu::columns::NUM_CPU_COLUMNS;
 use crate::generation::state::GenerationState;
 use crate::util::trace_rows_to_poly_values;
 
-mod memory;
+pub(crate) mod memory;
 pub(crate) mod state;
 
 /// A piece of data which has been encoded using Recursive Length Prefix (RLP) serialization.
@@ -45,18 +46,28 @@ pub fn generate_traces<F: RichField + Extendable<D>, const D: usize>(
         current_cpu_row,
         memory,
         keccak_inputs,
-        logic_ops: logic_inputs,
+        logic_ops,
+        prover_inputs,
         ..
     } = state;
     assert_eq!(current_cpu_row, [F::ZERO; NUM_CPU_COLUMNS].into());
+    assert_eq!(prover_inputs, vec![], "Not all prover inputs were consumed");
 
     let cpu_trace = trace_rows_to_poly_values(cpu_rows);
     let keccak_trace = all_stark.keccak_stark.generate_trace(keccak_inputs);
-    let logic_trace = all_stark.logic_stark.generate_trace(logic_inputs);
+    let logic_trace = all_stark.logic_stark.generate_trace(logic_ops);
     let memory_trace = all_stark.memory_stark.generate_trace(memory.log);
     vec![cpu_trace, keccak_trace, logic_trace, memory_trace]
 }
 
-fn generate_txn<F: Field>(_state: &mut GenerationState<F>, _txn: &TransactionData) {
-    todo!()
+fn generate_txn<F: Field>(state: &mut GenerationState<F>, txn: &TransactionData) {
+    // TODO: Add transaction RLP to prover_input.
+
+    // Supply Merkle trie proofs as prover inputs.
+    for proof in &txn.trie_proofs {
+        let proof = proof
+            .iter()
+            .flat_map(|node_rlp| node_rlp.iter().map(|byte| U256::from(*byte)));
+        state.prover_inputs.extend(proof);
+    }
 }
diff --git a/evm/src/generation/state.rs b/evm/src/generation/state.rs
index 46ccc4e3..c7f1003e 100644
--- a/evm/src/generation/state.rs
+++ b/evm/src/generation/state.rs
@@ -19,6 +19,9 @@ pub(crate) struct GenerationState<F: Field> {
 
     pub(crate) keccak_inputs: Vec<[u64; keccak::keccak_stark::NUM_INPUTS]>,
     pub(crate) logic_ops: Vec<logic::Operation>,
+
+    /// Non-deterministic inputs provided by the prover.
+    pub(crate) prover_inputs: Vec<U256>,
 }
 
 impl<F: Field> GenerationState<F> {
@@ -111,6 +114,7 @@ impl<F: Field> Default for GenerationState<F> {
             memory: MemoryState::default(),
             keccak_inputs: vec![],
             logic_ops: vec![],
+            prover_inputs: vec![],
         }
     }
 }
diff --git a/evm/src/memory/segments.rs b/evm/src/memory/segments.rs
index f1b92dfc..15545ea0 100644
--- a/evm/src/memory/segments.rs
+++ b/evm/src/memory/segments.rs
@@ -1,4 +1,4 @@
-#[allow(dead_code)] // TODO: Not all segments are used yet.
+#[allow(dead_code)]
 #[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
 pub(crate) enum Segment {
     /// Contains EVM bytecode.
@@ -17,12 +17,12 @@ pub(crate) enum Segment {
     /// General purpose kernel memory, used by various kernel functions.
     /// In general, calling a helper function can result in this memory being clobbered.
     KernelGeneral = 6,
-    /// Contains transaction data (after it's parsed and converted to a standard format).
-    TxnData = 7,
+    /// Contains normalized transaction fields; see `TxnField`.
+    TxnFields = 7,
+    /// Contains the data field of a transaction.
+    TxnData = 8,
     /// Raw RLP data.
-    RlpRaw = 8,
-    /// RLP data that has been parsed and converted to a more "friendly" format.
-    RlpParsed = 9,
+    RlpRaw = 9,
 }
 
 impl Segment {
@@ -37,9 +37,9 @@ impl Segment {
             Self::Returndata,
             Self::Metadata,
             Self::KernelGeneral,
+            Self::TxnFields,
             Self::TxnData,
             Self::RlpRaw,
-            Self::RlpParsed,
         ]
     }
 
@@ -53,9 +53,25 @@ impl Segment {
             Segment::Returndata => "SEGMENT_RETURNDATA",
             Segment::Metadata => "SEGMENT_METADATA",
             Segment::KernelGeneral => "SEGMENT_KERNEL_GENERAL",
+            Segment::TxnFields => "SEGMENT_NORMALIZED_TXN",
             Segment::TxnData => "SEGMENT_TXN_DATA",
             Segment::RlpRaw => "SEGMENT_RLP_RAW",
-            Segment::RlpParsed => "SEGMENT_RLP_PARSED",
+        }
+    }
+
+    #[allow(dead_code)]
+    pub(crate) fn bit_range(&self) -> usize {
+        match self {
+            Segment::Code => 8,
+            Segment::Stack => 256,
+            Segment::MainMemory => 8,
+            Segment::Calldata => 8,
+            Segment::Returndata => 8,
+            Segment::Metadata => 256,
+            Segment::KernelGeneral => 256,
+            Segment::TxnFields => 256,
+            Segment::TxnData => 256,
+            Segment::RlpRaw => 8,
         }
     }
 }
diff --git a/plonky2/plonky2.pdf b/plonky2/plonky2.pdf
index 299d1724..349b22a6 100644
Binary files a/plonky2/plonky2.pdf and b/plonky2/plonky2.pdf differ
diff --git a/plonky2/src/gates/gate.rs b/plonky2/src/gates/gate.rs
index 2e6b36ef..781e0cbd 100644
--- a/plonky2/src/gates/gate.rs
+++ b/plonky2/src/gates/gate.rs
@@ -244,7 +244,7 @@ fn compute_filter<K: Field>(row: usize, group_range: Range<usize>, s: K, many_se
     debug_assert!(group_range.contains(&row));
     group_range
         .filter(|&i| i != row)
-        .chain(many_selector.then(|| UNUSED_SELECTOR))
+        .chain(many_selector.then_some(UNUSED_SELECTOR))
         .map(|i| K::from_canonical_usize(i) - s)
         .product()
 }
@@ -259,7 +259,7 @@ fn compute_filter_circuit<F: RichField + Extendable<D>, const D: usize>(
     debug_assert!(group_range.contains(&row));
     let v = group_range
         .filter(|&i| i != row)
-        .chain(many_selectors.then(|| UNUSED_SELECTOR))
+        .chain(many_selectors.then_some(UNUSED_SELECTOR))
         .map(|i| {
             let c = builder.constant_extension(F::Extension::from_canonical_usize(i));
             builder.sub_extension(c, s)
diff --git a/plonky2/src/iop/challenger.rs b/plonky2/src/iop/challenger.rs
index 52412409..97d21197 100644
--- a/plonky2/src/iop/challenger.rs
+++ b/plonky2/src/iop/challenger.rs
@@ -33,8 +33,8 @@ impl<F: RichField, H: Hasher<F>> Challenger<F, H> {
     pub fn new() -> Challenger<F, H> {
         Challenger {
             sponge_state: [F::ZERO; SPONGE_WIDTH],
-            input_buffer: Vec::new(),
-            output_buffer: Vec::new(),
+            input_buffer: Vec::with_capacity(SPONGE_RATE),
+            output_buffer: Vec::with_capacity(SPONGE_RATE),
             _phantom: Default::default(),
         }
     }
@@ -44,6 +44,10 @@ impl<F: RichField, H: Hasher<F>> Challenger<F, H> {
         self.output_buffer.clear();
 
         self.input_buffer.push(element);
+
+        if self.input_buffer.len() == SPONGE_RATE {
+            self.duplexing();
+        }
     }
 
     pub fn observe_extension_element<const D: usize>(&mut self, element: &F::Extension)
@@ -79,12 +83,10 @@ impl<F: RichField, H: Hasher<F>> Challenger<F, H> {
     }
 
     pub fn get_challenge(&mut self) -> F {
-        self.absorb_buffered_inputs();
-
-        if self.output_buffer.is_empty() {
-            // Evaluate the permutation to produce `r` new outputs.
-            self.sponge_state = H::Permutation::permute(self.sponge_state);
-            self.output_buffer = self.sponge_state[0..SPONGE_RATE].to_vec();
+        // If we have buffered inputs, we must perform a duplexing so that the challenge will
+        // reflect them. Or if we've run out of outputs, we must perform a duplexing to get more.
+        if !self.input_buffer.is_empty() || self.output_buffer.is_empty() {
+            self.duplexing();
         }
 
         self.output_buffer
@@ -125,27 +127,24 @@ impl<F: RichField, H: Hasher<F>> Challenger<F, H> {
             .collect()
     }
 
-    /// Absorb any buffered inputs. After calling this, the input buffer will be empty.
-    fn absorb_buffered_inputs(&mut self) {
-        if self.input_buffer.is_empty() {
-            return;
+    /// Absorb any buffered inputs. After calling this, the input buffer will be empty, and the
+    /// output buffer will be full.
+    fn duplexing(&mut self) {
+        assert!(self.input_buffer.len() <= SPONGE_RATE);
+
+        // Overwrite the first r elements with the inputs. This differs from a standard sponge,
+        // where we would xor or add in the inputs. This is a well-known variant, though,
+        // sometimes called "overwrite mode".
+        for (i, input) in self.input_buffer.drain(..).enumerate() {
+            self.sponge_state[i] = input;
         }
 
-        for input_chunk in self.input_buffer.chunks(SPONGE_RATE) {
-            // Overwrite the first r elements with the inputs. This differs from a standard sponge,
-            // where we would xor or add in the inputs. This is a well-known variant, though,
-            // sometimes called "overwrite mode".
-            for (i, &input) in input_chunk.iter().enumerate() {
-                self.sponge_state[i] = input;
-            }
+        // Apply the permutation.
+        self.sponge_state = H::Permutation::permute(self.sponge_state);
 
-            // Apply the permutation.
-            self.sponge_state = H::Permutation::permute(self.sponge_state);
-        }
-
-        self.output_buffer = self.sponge_state[0..SPONGE_RATE].to_vec();
-
-        self.input_buffer.clear();
+        self.output_buffer.clear();
+        self.output_buffer
+            .extend_from_slice(&self.sponge_state[0..SPONGE_RATE]);
     }
 }
 
@@ -155,7 +154,9 @@ impl<F: RichField, H: AlgebraicHasher<F>> Default for Challenger<F, H> {
     }
 }
 
-/// A recursive version of `Challenger`.
+/// A recursive version of `Challenger`. The main difference is that `RecursiveChallenger`'s input
+/// buffer can grow beyond `SPONGE_RATE`. This is so that `observe_element` etc do not need access
+/// to the `CircuitBuilder`.
 pub struct RecursiveChallenger<F: RichField + Extendable<D>, H: AlgebraicHasher<F>, const D: usize>
 {
     sponge_state: [Target; SPONGE_WIDTH],
@@ -248,7 +249,8 @@ impl<F: RichField + Extendable<D>, H: AlgebraicHasher<F>, const D: usize>
         self.get_n_challenges(builder, D).try_into().unwrap()
     }
 
-    /// Absorb any buffered inputs. After calling this, the input buffer will be empty.
+    /// Absorb any buffered inputs. After calling this, the input buffer will be empty, and the
+    /// output buffer will be full.
     fn absorb_buffered_inputs(&mut self, builder: &mut CircuitBuilder<F, D>) {
         if self.input_buffer.is_empty() {
             return;