From b98dd47820f62d2c422bf2c50fa9badbece90913 Mon Sep 17 00:00:00 2001
From: Jacqueline Nabaglo <jakub@mirprotocol.org>
Date: Tue, 16 Aug 2022 09:46:10 -0700
Subject: [PATCH] Permission levels, jumps, traps (#653)

* Permission levels, jumps, traps

* Tests passing

* PR comments + documentation

* Docs + minor bugfixes

* Tests

* Use already-defined `stop` and `exception` (but renamed to `sys_stop`, `fault_exception`)

* Daniel comments
---
 evm/src/all_stark.rs                      | 254 ++++++++++++-
 evm/src/cpu/columns/general.rs            |  78 ++++
 evm/src/cpu/columns/mod.rs                |  16 +-
 evm/src/cpu/control_flow.rs               |  37 +-
 evm/src/cpu/cpu_stark.rs                  |   6 +-
 evm/src/cpu/decode.rs                     | 425 +++++++++++++---------
 evm/src/cpu/jumps.rs                      | 353 ++++++++++++++++++
 evm/src/cpu/kernel/asm/core/terminate.asm |  10 +-
 evm/src/cpu/kernel/asm/exp.asm            |   3 +
 evm/src/cpu/mod.rs                        |   2 +
 evm/src/cpu/syscalls.rs                   | 110 ++++++
 11 files changed, 1110 insertions(+), 184 deletions(-)
 create mode 100644 evm/src/cpu/jumps.rs
 create mode 100644 evm/src/cpu/syscalls.rs
diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs
index fbcd2115..4b8c7d0a 100644
--- a/evm/src/all_stark.rs
+++ b/evm/src/all_stark.rs
@@ -260,6 +260,7 @@ mod tests {
             let mut row: cpu::columns::CpuColumnsView<F> =
                 [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
             row.is_cpu_cycle = F::ONE;
+            row.is_kernel_mode = F::ONE;
             row.program_counter = F::from_canonical_usize(i);
             row.opcode = [
                 (logic::columns::IS_AND, 0x16),
@@ -319,12 +320,263 @@ mod tests {
             }
         }
 
+        // Trap to kernel
+        {
+            let mut row: cpu::columns::CpuColumnsView<F> =
+                [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            let last_row: cpu::columns::CpuColumnsView<F> =
+                cpu_trace_rows[cpu_trace_rows.len() - 1].into();
+            row.is_cpu_cycle = F::ONE;
+            row.opcode = F::from_canonical_u8(0x0a); // `EXP` is implemented in software
+            row.is_kernel_mode = F::ONE;
+            row.program_counter = last_row.program_counter + F::ONE;
+            row.general.syscalls_mut().output = [
+                row.program_counter,
+                F::ONE,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            cpu_stark.generate(row.borrow_mut());
+            cpu_trace_rows.push(row.into());
+        }
+
+        // `EXIT_KERNEL` (to kernel)
+        {
+            let mut row: cpu::columns::CpuColumnsView<F> =
+                [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            row.is_cpu_cycle = F::ONE;
+            row.opcode = F::from_canonical_u8(0xf9);
+            row.is_kernel_mode = F::ONE;
+            row.program_counter = F::from_canonical_usize(KERNEL.global_labels["sys_exp"]);
+            row.general.jumps_mut().input0 = [
+                F::from_canonical_u16(15682),
+                F::ONE,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            cpu_stark.generate(row.borrow_mut());
+            cpu_trace_rows.push(row.into());
+        }
+
+        // `JUMP` (in kernel mode)
+        {
+            let mut row: cpu::columns::CpuColumnsView<F> =
+                [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            row.is_cpu_cycle = F::ONE;
+            row.opcode = F::from_canonical_u8(0x56);
+            row.is_kernel_mode = F::ONE;
+            row.program_counter = F::from_canonical_u16(15682);
+            row.general.jumps_mut().input0 = [
+                F::from_canonical_u16(15106),
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            row.general.jumps_mut().input1 = [
+                F::ONE,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            row.general.jumps_mut().input0_upper_zero = F::ONE;
+            row.general.jumps_mut().dst_valid_or_kernel = F::ONE;
+            row.general.jumps_mut().input0_jumpable = F::ONE;
+            row.general.jumps_mut().input1_sum_inv = F::ONE;
+            row.general.jumps_mut().should_jump = F::ONE;
+            cpu_stark.generate(row.borrow_mut());
+            cpu_trace_rows.push(row.into());
+        }
+
+        // `EXIT_KERNEL` (to userspace)
+        {
+            let mut row: cpu::columns::CpuColumnsView<F> =
+                [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            row.is_cpu_cycle = F::ONE;
+            row.opcode = F::from_canonical_u8(0xf9);
+            row.is_kernel_mode = F::ONE;
+            row.program_counter = F::from_canonical_u16(15106);
+            row.general.jumps_mut().input0 = [
+                F::from_canonical_u16(63064),
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            cpu_stark.generate(row.borrow_mut());
+            cpu_trace_rows.push(row.into());
+        }
+
+        // `JUMP` (taken)
+        {
+            let mut row: cpu::columns::CpuColumnsView<F> =
+                [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            row.is_cpu_cycle = F::ONE;
+            row.opcode = F::from_canonical_u8(0x56);
+            row.is_kernel_mode = F::ZERO;
+            row.program_counter = F::from_canonical_u16(63064);
+            row.general.jumps_mut().input0 = [
+                F::from_canonical_u16(3754),
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            row.general.jumps_mut().input1 = [
+                F::ONE,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            row.general.jumps_mut().input0_upper_zero = F::ONE;
+            row.general.jumps_mut().dst_valid = F::ONE;
+            row.general.jumps_mut().dst_valid_or_kernel = F::ONE;
+            row.general.jumps_mut().input0_jumpable = F::ONE;
+            row.general.jumps_mut().input1_sum_inv = F::ONE;
+            row.general.jumps_mut().should_jump = F::ONE;
+            cpu_stark.generate(row.borrow_mut());
+            cpu_trace_rows.push(row.into());
+        }
+
+        // `JUMPI` (taken)
+        {
+            let mut row: cpu::columns::CpuColumnsView<F> =
+                [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            row.is_cpu_cycle = F::ONE;
+            row.opcode = F::from_canonical_u8(0x57);
+            row.is_kernel_mode = F::ZERO;
+            row.program_counter = F::from_canonical_u16(3754);
+            row.general.jumps_mut().input0 = [
+                F::from_canonical_u16(37543),
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            row.general.jumps_mut().input1 = [
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ONE,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            row.general.jumps_mut().input0_upper_zero = F::ONE;
+            row.general.jumps_mut().dst_valid = F::ONE;
+            row.general.jumps_mut().dst_valid_or_kernel = F::ONE;
+            row.general.jumps_mut().input0_jumpable = F::ONE;
+            row.general.jumps_mut().input1_sum_inv = F::ONE;
+            row.general.jumps_mut().should_jump = F::ONE;
+            cpu_stark.generate(row.borrow_mut());
+            cpu_trace_rows.push(row.into());
+        }
+
+        // `JUMPI` (not taken)
+        {
+            let mut row: cpu::columns::CpuColumnsView<F> =
+                [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            row.is_cpu_cycle = F::ONE;
+            row.opcode = F::from_canonical_u8(0x57);
+            row.is_kernel_mode = F::ZERO;
+            row.program_counter = F::from_canonical_u16(37543);
+            row.general.jumps_mut().input0 = [
+                F::from_canonical_u16(37543),
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ONE,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            row.general.jumps_mut().input0_upper_sum_inv = F::ONE;
+            row.general.jumps_mut().dst_valid = F::ONE;
+            row.general.jumps_mut().dst_valid_or_kernel = F::ONE;
+            row.general.jumps_mut().input0_jumpable = F::ZERO;
+            row.general.jumps_mut().should_continue = F::ONE;
+            cpu_stark.generate(row.borrow_mut());
+            cpu_trace_rows.push(row.into());
+        }
+
+        // `JUMP` (trapping)
+        {
+            let mut row: cpu::columns::CpuColumnsView<F> =
+                [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            let last_row: cpu::columns::CpuColumnsView<F> =
+                cpu_trace_rows[cpu_trace_rows.len() - 1].into();
+            row.is_cpu_cycle = F::ONE;
+            row.opcode = F::from_canonical_u8(0x56);
+            row.is_kernel_mode = F::ZERO;
+            row.program_counter = last_row.program_counter + F::ONE;
+            row.general.jumps_mut().input0 = [
+                F::from_canonical_u16(37543),
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ONE,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            row.general.jumps_mut().input1 = [
+                F::ONE,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+                F::ZERO,
+            ];
+            row.general.jumps_mut().input0_upper_sum_inv = F::ONE;
+            row.general.jumps_mut().dst_valid = F::ONE;
+            row.general.jumps_mut().dst_valid_or_kernel = F::ONE;
+            row.general.jumps_mut().input0_jumpable = F::ZERO;
+            row.general.jumps_mut().input1_sum_inv = F::ONE;
+            row.general.jumps_mut().should_trap = F::ONE;
+            cpu_stark.generate(row.borrow_mut());
+            cpu_trace_rows.push(row.into());
+        }
+
         // Pad to a power of two.
         for i in 0..cpu_trace_rows.len().next_power_of_two() - cpu_trace_rows.len() {
             let mut row: cpu::columns::CpuColumnsView<F> =
                 [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            row.opcode = F::from_canonical_u8(0xff);
             row.is_cpu_cycle = F::ONE;
-            row.program_counter = F::from_canonical_usize(i + num_logic_rows);
+            row.is_kernel_mode = F::ONE;
+            row.program_counter =
+                F::from_canonical_usize(KERNEL.global_labels["fault_exception"] + i);
             cpu_stark.generate(row.borrow_mut());
             cpu_trace_rows.push(row.into());
         }
diff --git a/evm/src/cpu/columns/general.rs b/evm/src/cpu/columns/general.rs
index 600dda87..db7436ba 100644
--- a/evm/src/cpu/columns/general.rs
+++ b/evm/src/cpu/columns/general.rs
@@ -8,6 +8,8 @@ pub(crate) union CpuGeneralColumnsView<T: Copy> {
     keccak: CpuKeccakView<T>,
     arithmetic: CpuArithmeticView<T>,
     logic: CpuLogicView<T>,
+    jumps: CpuJumpsView<T>,
+    syscalls: CpuSyscallsView<T>,
 }
 
 impl<T: Copy> CpuGeneralColumnsView<T> {
@@ -40,6 +42,26 @@ impl<T: Copy> CpuGeneralColumnsView<T> {
     pub(crate) fn logic_mut(&mut self) -> &mut CpuLogicView<T> {
         unsafe { &mut self.logic }
     }
+
+    // SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn jumps(&self) -> &CpuJumpsView<T> {
+        unsafe { &self.jumps }
+    }
+
+    // SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn jumps_mut(&mut self) -> &mut CpuJumpsView<T> {
+        unsafe { &mut self.jumps }
+    }
+
+    // SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn syscalls(&self) -> &CpuSyscallsView<T> {
+        unsafe { &self.syscalls }
+    }
+
+    // SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn syscalls_mut(&mut self) -> &mut CpuSyscallsView<T> {
+        unsafe { &mut self.syscalls }
+    }
 }
 
 impl<T: Copy + PartialEq> PartialEq<Self> for CpuGeneralColumnsView<T> {
@@ -91,5 +113,61 @@ pub(crate) struct CpuLogicView<T: Copy> {
     pub(crate) output: [T; 16],
 }
 
+#[derive(Copy, Clone)]
+pub(crate) struct CpuJumpsView<T: Copy> {
+    /// Assuming a limb size of 32 bits.
+    /// The top stack value at entry (for jumps, the address; for `EXIT_KERNEL`, the address and new
+    /// privilege level).
+    pub(crate) input0: [T; 8],
+    /// For `JUMPI`, the second stack value (the predicate). For `JUMP`, 1.
+    pub(crate) input1: [T; 8],
+
+    /// Inverse of `input0[1] + ... + input0[7]`, if one exists; otherwise, an arbitrary value.
+    /// Needed to prove that `input0` is nonzero.
+    pub(crate) input0_upper_sum_inv: T,
+    /// 1 if `input0[1..7]` is zero; else 0.
+    pub(crate) input0_upper_zero: T,
+
+    /// 1 if `input0[0]` is the address of a valid jump destination (i.e. `JUMPDEST` that is not
+    /// part of a `PUSH` immediate); else 0. Note that the kernel is allowed to jump anywhere it
+    /// wants, so this flag is computed but ignored in kernel mode.
+    /// NOTE: this flag only considers `input0[0]`, the low 32 bits of the 256-bit register. Even if
+    /// this flag is 1, `input0` will still be an invalid address if the high 224 bits are not 0.
+    pub(crate) dst_valid: T, // TODO: populate this (check for JUMPDEST)
+    /// 1 if either `dst_valid` is 1 or we are in kernel mode; else 0. (Just a logical OR.)
+    pub(crate) dst_valid_or_kernel: T,
+    /// 1 if `dst_valid_or_kernel` and `input0_upper_zero` are both 1; else 0. In other words, we
+    /// are allowed to jump to `input0[0]` because either it's a valid address or we're in kernel
+    /// mode (`dst_valid_or_kernel`), and also `input0[1..7]` are all 0 so `input0[0]` is in fact
+    /// the whole address (we're not being asked to jump to an address that would overflow).
+    pub(crate) input0_jumpable: T,
+
+    /// Inverse of `input1[0] + ... + input1[7]`, if one exists; otherwise, an arbitrary value.
+    /// Needed to prove that `input1` is nonzero.
+    pub(crate) input1_sum_inv: T,
+
+    /// Note that the below flags are mutually exclusive.
+    /// 1 if the JUMPI falls though (because input1 is 0); else 0.
+    pub(crate) should_continue: T,
+    /// 1 if the JUMP/JUMPI does in fact jump to `input0`; else 0. This requires `input0` to be a
+    /// valid destination (`input0[0]` is a `JUMPDEST` not in an immediate or we are in kernel mode
+    /// and also `input0[1..7]` is 0) and `input1` to be nonzero.
+    pub(crate) should_jump: T,
+    /// 1 if the JUMP/JUMPI faults; else 0. This happens when `input0` is not a valid destination
+    /// (`input0[0]` is not `JUMPDEST` that is not in an immediate while we are in user mode, or
+    /// `input0[1..7]` is nonzero) and `input1` is nonzero.
+    pub(crate) should_trap: T,
+}
+
+#[derive(Copy, Clone)]
+pub(crate) struct CpuSyscallsView<T: Copy> {
+    /// Assuming a limb size of 32 bits.
+    /// The output contains the context that is required to from the system call in `EXIT_KERNEL`.
+    /// `output[0]` contains the program counter at the time the system call was made (the address
+    /// of the syscall instruction). `output[1]` is 1 if we were in kernel mode at the time and 0
+    /// otherwise. `output[2]`, ..., `output[7]` are zero.
+    pub(crate) output: [T; 8],
+}
+
 // `u8` is guaranteed to have a `size_of` of 1.
 pub const NUM_SHARED_COLUMNS: usize = size_of::<CpuGeneralColumnsView<u8>>();
diff --git a/evm/src/cpu/columns/mod.rs b/evm/src/cpu/columns/mod.rs
index 8168bc63..824ae13d 100644
--- a/evm/src/cpu/columns/mod.rs
+++ b/evm/src/cpu/columns/mod.rs
@@ -24,6 +24,9 @@ pub struct CpuColumnsView<T: Copy> {
     /// If CPU cycle: The program counter for the current instruction.
     pub program_counter: T,
 
+    /// If CPU cycle: We're in kernel (privileged) mode.
+    pub is_kernel_mode: T,
+
     /// If CPU cycle: The opcode being decoded, in {0, ..., 255}.
     pub opcode: T,
 
@@ -89,8 +92,8 @@ pub struct CpuColumnsView<T: Copy> {
     pub is_mstore8: T,
     pub is_sload: T,
     pub is_sstore: T,
-    pub is_jump: T,
-    pub is_jumpi: T,
+    pub is_jump: T,  // Note: This column must be 0 when is_cpu_cycle = 0.
+    pub is_jumpi: T, // Note: This column must be 0 when is_cpu_cycle = 0.
     pub is_pc: T,
     pub is_msize: T,
     pub is_gas: T,
@@ -139,8 +142,15 @@ pub struct CpuColumnsView<T: Copy> {
     pub is_invalid_11: T,
     pub is_invalid_12: T,
     pub is_invalid_13: T,
+    pub is_invalid_14: T,
+    pub is_invalid_15: T,
+    pub is_invalid_16: T,
+    pub is_invalid_17: T,
+    pub is_invalid_18: T,
+    pub is_invalid_19: T,
+    pub is_invalid_20: T,
 
-    /// If CPU cycle: the opcode, broken up into bits in **big-endian** order.
+    /// If CPU cycle: the opcode, broken up into bits in little-endian order.
     pub opcode_bits: [T; 8],
 
     /// Filter. 1 iff a Keccak permutation is computed on this row.
diff --git a/evm/src/cpu/control_flow.rs b/evm/src/cpu/control_flow.rs
index 90a76d46..a157653f 100644
--- a/evm/src/cpu/control_flow.rs
+++ b/evm/src/cpu/control_flow.rs
@@ -57,18 +57,24 @@ pub fn eval_packed_generic<P: PackedField>(
 
     // If a row is a CPU cycle and executing a native instruction (implemented as a table row; not
     // microcoded) then the program counter is incremented by 1 to obtain the next row's program
-    // counter.
+    // counter. Also, the next row has the same kernel flag.
     let is_native_instruction: P = NATIVE_INSTRUCTIONS.iter().map(|&col_i| lv[col_i]).sum();
     yield_constr.constraint_transition(
         lv.is_cpu_cycle
             * is_native_instruction
             * (lv.program_counter - nv.program_counter + P::ONES),
     );
+    yield_constr.constraint_transition(
+        lv.is_cpu_cycle * is_native_instruction * (lv.is_kernel_mode - nv.is_kernel_mode),
+    );
 
     // If a non-CPU cycle row is followed by a CPU cycle row, then the `program_counter` of the CPU
-    // cycle row is 0.
+    // cycle row is 0 and it is in kernel mode.
     yield_constr
         .constraint_transition((lv.is_cpu_cycle - P::ONES) * nv.is_cpu_cycle * nv.program_counter);
+    yield_constr.constraint_transition(
+        (lv.is_cpu_cycle - P::ONES) * nv.is_cpu_cycle * (nv.is_kernel_mode - P::ONES),
+    );
 
     // The first row has nowhere to continue execution from, so if it's a cycle row, then its
     // `program_counter` must be 0.
@@ -84,6 +90,8 @@ pub fn eval_packed_generic<P: PackedField>(
     let (halt_pc0, halt_pc1) = get_halt_pcs::<P::Scalar>();
     yield_constr
         .constraint_last_row((lv.program_counter - halt_pc0) * (lv.program_counter - halt_pc1));
+    // Finally, the last row must be in kernel mode.
+    yield_constr.constraint_last_row(lv.is_kernel_mode - P::ONES);
 }
 
 pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
@@ -100,22 +108,27 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
 
     // If a row is a CPU cycle and executing a native instruction (implemented as a table row; not
     // microcoded) then the program counter is incremented by 1 to obtain the next row's program
-    // counter.
+    // counter. Also, the next row has the same kernel flag.
     {
         let is_native_instruction =
             builder.add_many_extension(NATIVE_INSTRUCTIONS.iter().map(|&col_i| lv[col_i]));
         let filter = builder.mul_extension(lv.is_cpu_cycle, is_native_instruction);
         let pc_diff = builder.sub_extension(lv.program_counter, nv.program_counter);
-        let constr = builder.mul_add_extension(filter, pc_diff, filter);
-        yield_constr.constraint_transition(builder, constr);
+        let pc_constr = builder.mul_add_extension(filter, pc_diff, filter);
+        yield_constr.constraint_transition(builder, pc_constr);
+        let kernel_diff = builder.sub_extension(lv.is_kernel_mode, nv.is_kernel_mode);
+        let kernel_constr = builder.mul_extension(filter, kernel_diff);
+        yield_constr.constraint_transition(builder, kernel_constr);
     }
 
     // If a non-CPU cycle row is followed by a CPU cycle row, then the `program_counter` of the CPU
-    // cycle row is 0.
+    // cycle row is 0 and it is in kernel mode.
     {
-        let constr = builder.mul_extension(nv.is_cpu_cycle, nv.program_counter);
-        let constr = builder.mul_sub_extension(lv.is_cpu_cycle, constr, constr);
-        yield_constr.constraint_transition(builder, constr);
+        let filter = builder.mul_sub_extension(lv.is_cpu_cycle, nv.is_cpu_cycle, nv.is_cpu_cycle);
+        let pc_constr = builder.mul_extension(filter, nv.program_counter);
+        yield_constr.constraint_transition(builder, pc_constr);
+        let kernel_constr = builder.mul_sub_extension(filter, nv.is_kernel_mode, filter);
+        yield_constr.constraint_transition(builder, kernel_constr);
     }
 
     // The first row has nowhere to continue execution from, so if it's a cycle row, then its
@@ -147,4 +160,10 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
 
         yield_constr.constraint_last_row(builder, constr);
     }
+    // Finally, the last row must be in kernel mode.
+    {
+        let one = builder.one_extension();
+        let constr = builder.sub_extension(lv.is_kernel_mode, one);
+        yield_constr.constraint_last_row(builder, constr);
+    }
 }
diff --git a/evm/src/cpu/cpu_stark.rs b/evm/src/cpu/cpu_stark.rs
index 6b0bc0fd..918f7d9b 100644
--- a/evm/src/cpu/cpu_stark.rs
+++ b/evm/src/cpu/cpu_stark.rs
@@ -9,7 +9,7 @@ use plonky2::hash::hash_types::RichField;
 
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::cpu::columns::{CpuColumnsView, COL_MAP, NUM_CPU_COLUMNS};
-use crate::cpu::{bootstrap_kernel, control_flow, decode, simple_logic};
+use crate::cpu::{bootstrap_kernel, control_flow, decode, jumps, simple_logic, syscalls};
 use crate::cross_table_lookup::Column;
 use crate::memory::NUM_CHANNELS;
 use crate::stark::Stark;
@@ -94,7 +94,9 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for CpuStark<F, D
         bootstrap_kernel::eval_bootstrap_kernel(vars, yield_constr);
         control_flow::eval_packed_generic(local_values, next_values, yield_constr);
         decode::eval_packed_generic(local_values, yield_constr);
+        jumps::eval_packed(local_values, next_values, yield_constr);
         simple_logic::eval_packed(local_values, yield_constr);
+        syscalls::eval_packed(local_values, next_values, yield_constr);
     }
 
     fn eval_ext_circuit(
@@ -108,7 +110,9 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for CpuStark<F, D
         bootstrap_kernel::eval_bootstrap_kernel_circuit(builder, vars, yield_constr);
         control_flow::eval_ext_circuit(builder, local_values, next_values, yield_constr);
         decode::eval_ext_circuit(builder, local_values, yield_constr);
+        jumps::eval_ext_circuit(builder, local_values, next_values, yield_constr);
         simple_logic::eval_ext_circuit(builder, local_values, yield_constr);
+        syscalls::eval_ext_circuit(builder, local_values, next_values, yield_constr);
     }
 
     fn constraint_degree(&self) -> usize {
diff --git a/evm/src/cpu/decode.rs b/evm/src/cpu/decode.rs
index cf43f909..4faf7925 100644
--- a/evm/src/cpu/decode.rs
+++ b/evm/src/cpu/decode.rs
@@ -7,123 +7,144 @@ use plonky2::iop::ext_target::ExtensionTarget;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::cpu::columns::{CpuColumnsView, COL_MAP};
 
-// List of opcode blocks
-// Each block corresponds to exactly one flag, and each flag corresponds to exactly one block.
-// Each block of opcodes:
-// - is contiguous
-// - has a length that is a power of 2
-// - its start index is a multiple of its length (it is aligned)
-// These properties permit us to check if an opcode belongs to a block of length 2^n by checking its
-// top 8-n bits.
-const OPCODES: [(u64, usize, usize); 106] = [
-    // (start index of block, number of top bits to check (log2), flag column)
-    (0x00, 0, COL_MAP.is_stop),
-    (0x01, 0, COL_MAP.is_add),
-    (0x02, 0, COL_MAP.is_mul),
-    (0x03, 0, COL_MAP.is_sub),
-    (0x04, 0, COL_MAP.is_div),
-    (0x05, 0, COL_MAP.is_sdiv),
-    (0x06, 0, COL_MAP.is_mod),
-    (0x07, 0, COL_MAP.is_smod),
-    (0x08, 0, COL_MAP.is_addmod),
-    (0x09, 0, COL_MAP.is_mulmod),
-    (0x0a, 0, COL_MAP.is_exp),
-    (0x0b, 0, COL_MAP.is_signextend),
-    (0x0c, 2, COL_MAP.is_invalid_0), // 0x0c-0x0f
-    (0x10, 0, COL_MAP.is_lt),
-    (0x11, 0, COL_MAP.is_gt),
-    (0x12, 0, COL_MAP.is_slt),
-    (0x13, 0, COL_MAP.is_sgt),
-    (0x14, 0, COL_MAP.is_eq),
-    (0x15, 0, COL_MAP.is_iszero),
-    (0x16, 0, COL_MAP.is_and),
-    (0x17, 0, COL_MAP.is_or),
-    (0x18, 0, COL_MAP.is_xor),
-    (0x19, 0, COL_MAP.is_not),
-    (0x1a, 0, COL_MAP.is_byte),
-    (0x1b, 0, COL_MAP.is_shl),
-    (0x1c, 0, COL_MAP.is_shr),
-    (0x1d, 0, COL_MAP.is_sar),
-    (0x1e, 1, COL_MAP.is_invalid_1), // 0x1e-0x1f
-    (0x20, 0, COL_MAP.is_keccak256),
-    (0x21, 0, COL_MAP.is_invalid_2),
-    (0x22, 1, COL_MAP.is_invalid_3), // 0x22-0x23
-    (0x24, 2, COL_MAP.is_invalid_4), // 0x24-0x27
-    (0x28, 3, COL_MAP.is_invalid_5), // 0x28-0x2f
-    (0x30, 0, COL_MAP.is_address),
-    (0x31, 0, COL_MAP.is_balance),
-    (0x32, 0, COL_MAP.is_origin),
-    (0x33, 0, COL_MAP.is_caller),
-    (0x34, 0, COL_MAP.is_callvalue),
-    (0x35, 0, COL_MAP.is_calldataload),
-    (0x36, 0, COL_MAP.is_calldatasize),
-    (0x37, 0, COL_MAP.is_calldatacopy),
-    (0x38, 0, COL_MAP.is_codesize),
-    (0x39, 0, COL_MAP.is_codecopy),
-    (0x3a, 0, COL_MAP.is_gasprice),
-    (0x3b, 0, COL_MAP.is_extcodesize),
-    (0x3c, 0, COL_MAP.is_extcodecopy),
-    (0x3d, 0, COL_MAP.is_returndatasize),
-    (0x3e, 0, COL_MAP.is_returndatacopy),
-    (0x3f, 0, COL_MAP.is_extcodehash),
-    (0x40, 0, COL_MAP.is_blockhash),
-    (0x41, 0, COL_MAP.is_coinbase),
-    (0x42, 0, COL_MAP.is_timestamp),
-    (0x43, 0, COL_MAP.is_number),
-    (0x44, 0, COL_MAP.is_difficulty),
-    (0x45, 0, COL_MAP.is_gaslimit),
-    (0x46, 0, COL_MAP.is_chainid),
-    (0x47, 0, COL_MAP.is_selfbalance),
-    (0x48, 0, COL_MAP.is_basefee),
-    (0x49, 0, COL_MAP.is_prover_input),
-    (0x4a, 1, COL_MAP.is_invalid_6), // 0x4a-0x4b
-    (0x4c, 2, COL_MAP.is_invalid_7), // 0x4c-0x4f
-    (0x50, 0, COL_MAP.is_pop),
-    (0x51, 0, COL_MAP.is_mload),
-    (0x52, 0, COL_MAP.is_mstore),
-    (0x53, 0, COL_MAP.is_mstore8),
-    (0x54, 0, COL_MAP.is_sload),
-    (0x55, 0, COL_MAP.is_sstore),
-    (0x56, 0, COL_MAP.is_jump),
-    (0x57, 0, COL_MAP.is_jumpi),
-    (0x58, 0, COL_MAP.is_pc),
-    (0x59, 0, COL_MAP.is_msize),
-    (0x5a, 0, COL_MAP.is_gas),
-    (0x5b, 0, COL_MAP.is_jumpdest),
-    (0x5c, 0, COL_MAP.is_get_state_root),
-    (0x5d, 0, COL_MAP.is_set_state_root),
-    (0x5e, 0, COL_MAP.is_get_receipt_root),
-    (0x5f, 0, COL_MAP.is_set_receipt_root),
-    (0x60, 5, COL_MAP.is_push), // 0x60-0x7f
-    (0x80, 4, COL_MAP.is_dup),  // 0x80-0x8f
-    (0x90, 4, COL_MAP.is_swap), // 0x90-0x9f
-    (0xa0, 0, COL_MAP.is_log0),
-    (0xa1, 0, COL_MAP.is_log1),
-    (0xa2, 0, COL_MAP.is_log2),
-    (0xa3, 0, COL_MAP.is_log3),
-    (0xa4, 0, COL_MAP.is_log4),
-    // Opcode 0xa5 is PANIC. Make the proof unverifiable by giving it no flag to decode to.
-    (0xa6, 1, COL_MAP.is_invalid_8),  // 0xa6-0xa7
-    (0xa8, 3, COL_MAP.is_invalid_9),  // 0xa8-0xaf
-    (0xb0, 4, COL_MAP.is_invalid_10), // 0xb0-0xbf
-    (0xc0, 5, COL_MAP.is_invalid_11), // 0xc0-0xdf
-    (0xe0, 4, COL_MAP.is_invalid_12), // 0xe0-0xef
-    (0xf0, 0, COL_MAP.is_create),
-    (0xf1, 0, COL_MAP.is_call),
-    (0xf2, 0, COL_MAP.is_callcode),
-    (0xf3, 0, COL_MAP.is_return),
-    (0xf4, 0, COL_MAP.is_delegatecall),
-    (0xf5, 0, COL_MAP.is_create2),
-    (0xf6, 0, COL_MAP.is_get_context),
-    (0xf7, 0, COL_MAP.is_set_context),
-    (0xf8, 0, COL_MAP.is_consume_gas),
-    (0xf9, 0, COL_MAP.is_exit_kernel),
-    (0xfa, 0, COL_MAP.is_staticcall),
-    (0xfb, 0, COL_MAP.is_mload_general),
-    (0xfc, 0, COL_MAP.is_mstore_general),
-    (0xfd, 0, COL_MAP.is_revert),
-    (0xfe, 0, COL_MAP.is_invalid_13),
-    (0xff, 0, COL_MAP.is_selfdestruct),
+#[derive(PartialEq, Eq)]
+enum Availability {
+    All,
+    User,
+    Kernel,
+}
+use Availability::{All, Kernel, User};
+
+/// List of opcode blocks
+///  Each block corresponds to exactly one flag, and each flag corresponds to exactly one block.
+///  Each block of opcodes:
+/// - is contiguous,
+/// - has a length that is a power of 2, and
+/// - its start index is a multiple of its length (it is aligned).
+///  These properties permit us to check if an opcode belongs to a block of length 2^n by checking
+/// its top 8-n bits.
+///  Additionally, each block can be made available only to the user, only to the kernel, or to
+/// both. This is mainly useful for making some instructions kernel-only, while still decoding to
+/// invalid for the user. We do this by making one kernel-only block and another user-only block.
+/// The exception is the PANIC instruction which is user-only without a corresponding kernel block.
+/// This makes the proof unverifiable when PANIC is executed in kernel mode, which is the intended
+/// behavior.
+const OPCODES: [(u8, usize, Availability, usize); 113] = [
+    // (start index of block, number of top bits to check (log2), availability, flag column)
+    (0x00, 0, All, COL_MAP.is_stop),
+    (0x01, 0, All, COL_MAP.is_add),
+    (0x02, 0, All, COL_MAP.is_mul),
+    (0x03, 0, All, COL_MAP.is_sub),
+    (0x04, 0, All, COL_MAP.is_div),
+    (0x05, 0, All, COL_MAP.is_sdiv),
+    (0x06, 0, All, COL_MAP.is_mod),
+    (0x07, 0, All, COL_MAP.is_smod),
+    (0x08, 0, All, COL_MAP.is_addmod),
+    (0x09, 0, All, COL_MAP.is_mulmod),
+    (0x0a, 0, All, COL_MAP.is_exp),
+    (0x0b, 0, All, COL_MAP.is_signextend),
+    (0x0c, 2, All, COL_MAP.is_invalid_0), // 0x0c-0x0f
+    (0x10, 0, All, COL_MAP.is_lt),
+    (0x11, 0, All, COL_MAP.is_gt),
+    (0x12, 0, All, COL_MAP.is_slt),
+    (0x13, 0, All, COL_MAP.is_sgt),
+    (0x14, 0, All, COL_MAP.is_eq),
+    (0x15, 0, All, COL_MAP.is_iszero),
+    (0x16, 0, All, COL_MAP.is_and),
+    (0x17, 0, All, COL_MAP.is_or),
+    (0x18, 0, All, COL_MAP.is_xor),
+    (0x19, 0, All, COL_MAP.is_not),
+    (0x1a, 0, All, COL_MAP.is_byte),
+    (0x1b, 0, All, COL_MAP.is_shl),
+    (0x1c, 0, All, COL_MAP.is_shr),
+    (0x1d, 0, All, COL_MAP.is_sar),
+    (0x1e, 1, All, COL_MAP.is_invalid_1), // 0x1e-0x1f
+    (0x20, 0, All, COL_MAP.is_keccak256),
+    (0x21, 0, All, COL_MAP.is_invalid_2),
+    (0x22, 1, All, COL_MAP.is_invalid_3), // 0x22-0x23
+    (0x24, 2, All, COL_MAP.is_invalid_4), // 0x24-0x27
+    (0x28, 3, All, COL_MAP.is_invalid_5), // 0x28-0x2f
+    (0x30, 0, All, COL_MAP.is_address),
+    (0x31, 0, All, COL_MAP.is_balance),
+    (0x32, 0, All, COL_MAP.is_origin),
+    (0x33, 0, All, COL_MAP.is_caller),
+    (0x34, 0, All, COL_MAP.is_callvalue),
+    (0x35, 0, All, COL_MAP.is_calldataload),
+    (0x36, 0, All, COL_MAP.is_calldatasize),
+    (0x37, 0, All, COL_MAP.is_calldatacopy),
+    (0x38, 0, All, COL_MAP.is_codesize),
+    (0x39, 0, All, COL_MAP.is_codecopy),
+    (0x3a, 0, All, COL_MAP.is_gasprice),
+    (0x3b, 0, All, COL_MAP.is_extcodesize),
+    (0x3c, 0, All, COL_MAP.is_extcodecopy),
+    (0x3d, 0, All, COL_MAP.is_returndatasize),
+    (0x3e, 0, All, COL_MAP.is_returndatacopy),
+    (0x3f, 0, All, COL_MAP.is_extcodehash),
+    (0x40, 0, All, COL_MAP.is_blockhash),
+    (0x41, 0, All, COL_MAP.is_coinbase),
+    (0x42, 0, All, COL_MAP.is_timestamp),
+    (0x43, 0, All, COL_MAP.is_number),
+    (0x44, 0, All, COL_MAP.is_difficulty),
+    (0x45, 0, All, COL_MAP.is_gaslimit),
+    (0x46, 0, All, COL_MAP.is_chainid),
+    (0x47, 0, All, COL_MAP.is_selfbalance),
+    (0x48, 0, All, COL_MAP.is_basefee),
+    (0x49, 0, User, COL_MAP.is_invalid_6),
+    (0x49, 0, Kernel, COL_MAP.is_prover_input),
+    (0x4a, 1, All, COL_MAP.is_invalid_7), // 0x4a-0x4b
+    (0x4c, 2, All, COL_MAP.is_invalid_8), // 0x4c-0x4f
+    (0x50, 0, All, COL_MAP.is_pop),
+    (0x51, 0, All, COL_MAP.is_mload),
+    (0x52, 0, All, COL_MAP.is_mstore),
+    (0x53, 0, All, COL_MAP.is_mstore8),
+    (0x54, 0, All, COL_MAP.is_sload),
+    (0x55, 0, All, COL_MAP.is_sstore),
+    (0x56, 0, All, COL_MAP.is_jump),
+    (0x57, 0, All, COL_MAP.is_jumpi),
+    (0x58, 0, All, COL_MAP.is_pc),
+    (0x59, 0, All, COL_MAP.is_msize),
+    (0x5a, 0, All, COL_MAP.is_gas),
+    (0x5b, 0, All, COL_MAP.is_jumpdest),
+    (0x5c, 2, User, COL_MAP.is_invalid_9), // 0x5c-5f
+    (0x5c, 0, Kernel, COL_MAP.is_get_state_root),
+    (0x5d, 0, Kernel, COL_MAP.is_set_state_root),
+    (0x5e, 0, Kernel, COL_MAP.is_get_receipt_root),
+    (0x5f, 0, Kernel, COL_MAP.is_set_receipt_root),
+    (0x60, 5, All, COL_MAP.is_push), // 0x60-0x7f
+    (0x80, 4, All, COL_MAP.is_dup),  // 0x80-0x8f
+    (0x90, 4, All, COL_MAP.is_swap), // 0x90-0x9f
+    (0xa0, 0, All, COL_MAP.is_log0),
+    (0xa1, 0, All, COL_MAP.is_log1),
+    (0xa2, 0, All, COL_MAP.is_log2),
+    (0xa3, 0, All, COL_MAP.is_log3),
+    (0xa4, 0, All, COL_MAP.is_log4),
+    (0xa5, 0, User, COL_MAP.is_invalid_10),
+    // Opcode 0xa5 is PANIC when Kernel. Make the proof unverifiable by giving it no flag to decode to.
+    (0xa6, 1, All, COL_MAP.is_invalid_11), // 0xa6-0xa7
+    (0xa8, 3, All, COL_MAP.is_invalid_12), // 0xa8-0xaf
+    (0xb0, 4, All, COL_MAP.is_invalid_13), // 0xb0-0xbf
+    (0xc0, 5, All, COL_MAP.is_invalid_14), // 0xc0-0xdf
+    (0xe0, 4, All, COL_MAP.is_invalid_15), // 0xe0-0xef
+    (0xf0, 0, All, COL_MAP.is_create),
+    (0xf1, 0, All, COL_MAP.is_call),
+    (0xf2, 0, All, COL_MAP.is_callcode),
+    (0xf3, 0, All, COL_MAP.is_return),
+    (0xf4, 0, All, COL_MAP.is_delegatecall),
+    (0xf5, 0, All, COL_MAP.is_create2),
+    (0xf6, 1, User, COL_MAP.is_invalid_16), // 0xf6-0xf7
+    (0xf6, 0, Kernel, COL_MAP.is_get_context),
+    (0xf7, 0, Kernel, COL_MAP.is_set_context),
+    (0xf8, 1, User, COL_MAP.is_invalid_17), // 0xf8-0xf9
+    (0xf8, 0, Kernel, COL_MAP.is_consume_gas),
+    (0xf9, 0, Kernel, COL_MAP.is_exit_kernel),
+    (0xfa, 0, All, COL_MAP.is_staticcall),
+    (0xfb, 0, User, COL_MAP.is_invalid_18),
+    (0xfb, 0, Kernel, COL_MAP.is_mload_general),
+    (0xfc, 0, User, COL_MAP.is_invalid_19),
+    (0xfc, 0, Kernel, COL_MAP.is_mstore_general),
+    (0xfd, 0, All, COL_MAP.is_revert),
+    (0xfe, 0, All, COL_MAP.is_invalid_20),
+    (0xff, 0, All, COL_MAP.is_selfdestruct),
 ];
 
 pub fn generate<F: RichField>(lv: &mut CpuColumnsView<F>) {
@@ -139,12 +160,13 @@ pub fn generate<F: RichField>(lv: &mut CpuColumnsView<F>) {
 
     let opcode = lv.opcode.to_canonical_u64();
     assert!(opcode < 256, "opcode should be in {{0, ..., 255}}");
+    let opcode = opcode as u8;
 
     for (i, bit) in lv.opcode_bits.iter_mut().enumerate() {
-        *bit = F::from_canonical_u64((opcode >> (7 - i)) & 1);
+        *bit = F::from_bool(opcode & (1 << i) != 0);
     }
 
-    let top_bits: [u64; 9] = [
+    let top_bits: [u8; 9] = [
         0,
         opcode & 0x80,
         opcode & 0xc0,
@@ -156,54 +178,101 @@ pub fn generate<F: RichField>(lv: &mut CpuColumnsView<F>) {
         opcode,
     ];
 
-    for (oc, block_length, col) in OPCODES {
-        lv[col] = F::from_bool(top_bits[8 - block_length] == oc);
+    let kernel = lv.is_kernel_mode.to_canonical_u64();
+    assert!(kernel <= 1);
+    let kernel = kernel != 0;
+
+    for (oc, block_length, availability, col) in OPCODES {
+        let available = match availability {
+            All => true,
+            User => !kernel,
+            Kernel => kernel,
+        };
+        let opcode_match = top_bits[8 - block_length] == oc;
+        lv[col] = F::from_bool(available && opcode_match);
     }
 }
 
+/// Break up an opcode (which is 8 bits long) into its eight bits.
+const fn bits_from_opcode(opcode: u8) -> [bool; 8] {
+    [
+        opcode & (1 << 0) != 0,
+        opcode & (1 << 1) != 0,
+        opcode & (1 << 2) != 0,
+        opcode & (1 << 3) != 0,
+        opcode & (1 << 4) != 0,
+        opcode & (1 << 5) != 0,
+        opcode & (1 << 6) != 0,
+        opcode & (1 << 7) != 0,
+    ]
+}
+
 pub fn eval_packed_generic<P: PackedField>(
     lv: &CpuColumnsView<P>,
     yield_constr: &mut ConstraintConsumer<P>,
 ) {
     let cycle_filter = lv.is_cpu_cycle;
 
+    // Ensure that the kernel flag is valid (either 0 or 1).
+    let kernel_mode = lv.is_kernel_mode;
+    yield_constr.constraint(cycle_filter * kernel_mode * (kernel_mode - P::ONES));
+
     // Ensure that the opcode bits are valid: each has to be either 0 or 1, and they must match
-    // the opcode. Note that this also validates that this implicitly range-checks the opcode.
+    // the opcode. Note that this also implicitly range-checks the opcode.
     let bits = lv.opcode_bits;
     // First check that the bits are either 0 or 1.
     for bit in bits {
         yield_constr.constraint(cycle_filter * bit * (bit - P::ONES));
     }
-
-    // top_bits[i] is the opcode with all but the top i bits cleared.
-    let top_bits = {
-        let mut top_bits = [P::ZEROS; 9];
-        for i in 0..8 {
-            top_bits[i + 1] = top_bits[i] + bits[i] * P::Scalar::from_canonical_u64(1 << (7 - i));
-        }
-        top_bits
-    };
-
     // Now check that they match the opcode.
-    let opcode = lv.opcode;
-    yield_constr.constraint(cycle_filter * (opcode - top_bits[8]));
+    {
+        let opcode = lv.opcode;
+        let reconstructed_opcode: P = bits
+            .into_iter()
+            .enumerate()
+            .map(|(i, bit)| bit * P::Scalar::from_canonical_u64(1 << i))
+            .sum();
+        yield_constr.constraint(cycle_filter * (opcode - reconstructed_opcode));
+    }
 
     // Check that the instruction flags are valid.
     // First, check that they are all either 0 or 1.
-    for (_, _, flag_col) in OPCODES {
+    for (_, _, _, flag_col) in OPCODES {
         let flag = lv[flag_col];
         yield_constr.constraint(cycle_filter * flag * (flag - P::ONES));
     }
     // Now check that exactly one is 1.
     let flag_sum: P = OPCODES
         .into_iter()
-        .map(|(_, _, flag_col)| lv[flag_col])
+        .map(|(_, _, _, flag_col)| lv[flag_col])
         .sum();
     yield_constr.constraint(cycle_filter * (P::ONES - flag_sum));
 
-    // Finally, classify all opcodes into blocks
-    for (oc, block_length, col) in OPCODES {
-        let constr = lv[col] * (top_bits[8 - block_length] - P::Scalar::from_canonical_u64(oc));
+    // Finally, classify all opcodes, together with the kernel flag, into blocks
+    for (oc, block_length, availability, col) in OPCODES {
+        // 0 if the block/flag is available to us (is always available, is user-only and we are in
+        // user mode, or kernel-only and we are in kernel mode) and 1 otherwise.
+        let unavailable = match availability {
+            All => P::ZEROS,
+            User => kernel_mode,
+            Kernel => P::ONES - kernel_mode,
+        };
+        // 0 if all the opcode bits match, and something in {1, ..., 8}, otherwise.
+        let opcode_mismatch: P = bits
+            .into_iter()
+            .zip(bits_from_opcode(oc))
+            .rev()
+            .take(block_length + 1)
+            .map(|(row_bit, flag_bit)| match flag_bit {
+                // 1 if the bit does not match, and 0 otherwise
+                false => row_bit,
+                true => P::ONES - row_bit,
+            })
+            .sum();
+
+        // If unavailable + opcode_mismatch is 0, then the opcode bits all match and we are in the
+        // correct mode.
+        let constr = lv[col] * (unavailable + opcode_mismatch);
         yield_constr.constraint(cycle_filter * constr);
     }
 }
@@ -213,10 +282,20 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     lv: &CpuColumnsView<ExtensionTarget<D>>,
     yield_constr: &mut RecursiveConstraintConsumer<F, D>,
 ) {
+    let one = builder.one_extension();
+
     let cycle_filter = lv.is_cpu_cycle;
 
+    // Ensure that the kernel flag is valid (either 0 or 1).
+    let kernel_mode = lv.is_kernel_mode;
+    {
+        let constr = builder.mul_sub_extension(kernel_mode, kernel_mode, kernel_mode);
+        let constr = builder.mul_extension(cycle_filter, constr);
+        yield_constr.constraint(builder, constr);
+    }
+
     // Ensure that the opcode bits are valid: each has to be either 0 or 1, and they must match
-    // the opcode. Note that this also validates that this implicitly range-checks the opcode.
+    // the opcode. Note that this also implicitly range-checks the opcode.
     let bits = lv.opcode_bits;
     // First check that the bits are either 0 or 1.
     for bit in bits {
@@ -224,38 +303,32 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
         let constr = builder.mul_extension(cycle_filter, constr);
         yield_constr.constraint(builder, constr);
     }
-
-    let top_bits = {
-        let mut top_bits = [builder.zero_extension(); 9];
-        for i in 0..8 {
-            top_bits[i + 1] = builder.mul_const_add_extension(
-                F::from_canonical_u64(1 << (7 - i)),
-                bits[i],
-                top_bits[i],
-            );
-        }
-        top_bits
-    };
-
-    // Now check that the bits match the opcode.
+    // Now check that they match the opcode.
     {
-        let constr = builder.sub_extension(lv.opcode, top_bits[8]);
-        let constr = builder.mul_extension(cycle_filter, constr);
+        let opcode = lv.opcode;
+        let reconstructed_opcode =
+            bits.into_iter()
+                .enumerate()
+                .fold(builder.zero_extension(), |cumul, (i, bit)| {
+                    builder.mul_const_add_extension(F::from_canonical_u64(1 << i), bit, cumul)
+                });
+        let diff = builder.sub_extension(opcode, reconstructed_opcode);
+        let constr = builder.mul_extension(cycle_filter, diff);
         yield_constr.constraint(builder, constr);
-    };
+    }
 
     // Check that the instruction flags are valid.
     // First, check that they are all either 0 or 1.
-    for (_, _, flag_col) in OPCODES {
+    for (_, _, _, flag_col) in OPCODES {
         let flag = lv[flag_col];
         let constr = builder.mul_sub_extension(flag, flag, flag);
         let constr = builder.mul_extension(cycle_filter, constr);
         yield_constr.constraint(builder, constr);
     }
-    // Now check that they sum to 1.
+    // Now check that exactly one is 1.
     {
         let mut constr = builder.one_extension();
-        for (_, _, flag_col) in OPCODES {
+        for (_, _, _, flag_col) in OPCODES {
             let flag = lv[flag_col];
             constr = builder.sub_extension(constr, flag);
         }
@@ -263,11 +336,33 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
         yield_constr.constraint(builder, constr);
     }
 
-    for (oc, block_length, col) in OPCODES {
-        let flag = lv[col];
-        let constr = builder.constant_extension(F::from_canonical_u64(oc).into());
-        let constr = builder.sub_extension(top_bits[8 - block_length], constr);
-        let constr = builder.mul_extension(flag, constr);
+    // Finally, classify all opcodes, together with the kernel flag, into blocks
+    for (oc, block_length, availability, col) in OPCODES {
+        // 0 if the block/flag is available to us (is always available, is user-only and we are in
+        // user mode, or kernel-only and we are in kernel mode) and 1 otherwise.
+        let unavailable = match availability {
+            All => builder.zero_extension(),
+            User => kernel_mode,
+            Kernel => builder.sub_extension(one, kernel_mode),
+        };
+        // 0 if all the opcode bits match, and something in {1, ..., 8}, otherwise.
+        let opcode_mismatch = bits
+            .into_iter()
+            .zip(bits_from_opcode(oc))
+            .rev()
+            .take(block_length + 1)
+            .fold(builder.zero_extension(), |cumul, (row_bit, flag_bit)| {
+                let to_add = match flag_bit {
+                    false => row_bit,
+                    true => builder.sub_extension(one, row_bit),
+                };
+                builder.add_extension(cumul, to_add)
+            });
+
+        // If unavailable + opcode_mismatch is 0, then the opcode bits all match and we are in the
+        // correct mode.
+        let constr = builder.add_extension(unavailable, opcode_mismatch);
+        let constr = builder.mul_extension(lv[col], constr);
         let constr = builder.mul_extension(cycle_filter, constr);
         yield_constr.constraint(builder, constr);
     }
diff --git a/evm/src/cpu/jumps.rs b/evm/src/cpu/jumps.rs
new file mode 100644
index 00000000..10c9503a
--- /dev/null
+++ b/evm/src/cpu/jumps.rs
@@ -0,0 +1,353 @@
+use once_cell::sync::Lazy;
+use plonky2::field::extension::Extendable;
+use plonky2::field::packed::PackedField;
+use plonky2::field::types::Field;
+use plonky2::hash::hash_types::RichField;
+use plonky2::iop::ext_target::ExtensionTarget;
+
+use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
+use crate::cpu::columns::CpuColumnsView;
+use crate::cpu::kernel::aggregator::KERNEL;
+
+static INVALID_DST_HANDLER_ADDR: Lazy<usize> =
+    Lazy::new(|| KERNEL.global_labels["fault_exception"]);
+
+pub fn eval_packed_exit_kernel<P: PackedField>(
+    lv: &CpuColumnsView<P>,
+    nv: &CpuColumnsView<P>,
+    yield_constr: &mut ConstraintConsumer<P>,
+) {
+    let jumps_lv = lv.general.jumps();
+
+    // If we are executing `EXIT_KERNEL` then we simply restore the program counter and kernel mode
+    // flag. The top 6 (32-bit) limbs are ignored (this is not part of the spec, but we trust the
+    // kernel to set them to zero).
+    yield_constr.constraint_transition(
+        lv.is_cpu_cycle * lv.is_exit_kernel * (jumps_lv.input0[0] - nv.program_counter),
+    );
+    yield_constr.constraint_transition(
+        lv.is_cpu_cycle * lv.is_exit_kernel * (jumps_lv.input0[1] - nv.is_kernel_mode),
+    );
+}
+
+pub fn eval_ext_circuit_exit_kernel<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
+    lv: &CpuColumnsView<ExtensionTarget<D>>,
+    nv: &CpuColumnsView<ExtensionTarget<D>>,
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+) {
+    let jumps_lv = lv.general.jumps();
+    let filter = builder.mul_extension(lv.is_cpu_cycle, lv.is_exit_kernel);
+
+    // If we are executing `EXIT_KERNEL` then we simply restore the program counter and kernel mode
+    // flag. The top 6 (32-bit) limbs are ignored (this is not part of the spec, but we trust the
+    // kernel to set them to zero).
+
+    let pc_constr = builder.sub_extension(jumps_lv.input0[0], nv.program_counter);
+    let pc_constr = builder.mul_extension(filter, pc_constr);
+    yield_constr.constraint_transition(builder, pc_constr);
+
+    let kernel_constr = builder.sub_extension(jumps_lv.input0[1], nv.is_kernel_mode);
+    let kernel_constr = builder.mul_extension(filter, kernel_constr);
+    yield_constr.constraint_transition(builder, kernel_constr);
+}
+
+pub fn eval_packed_jump_jumpi<P: PackedField>(
+    lv: &CpuColumnsView<P>,
+    nv: &CpuColumnsView<P>,
+    yield_constr: &mut ConstraintConsumer<P>,
+) {
+    let jumps_lv = lv.general.jumps();
+    let filter = lv.is_jump + lv.is_jumpi; // `JUMP` or `JUMPI`
+
+    // If `JUMP`, re-use the `JUMPI` logic, but setting the second input (the predicate) to be 1.
+    // In other words, we implement `JUMP(addr)` as `JUMPI(addr, cond=1)`.
+    yield_constr.constraint(lv.is_jump * (jumps_lv.input1[0] - P::ONES));
+    for &limb in &jumps_lv.input1[1..] {
+        // Set all limbs (other than the least-significant limb) to 0.
+        // NB: Technically, they don't have to be 0, as long as the sum
+        // `input1[0] + ... + input1[7]` cannot overflow.
+        yield_constr.constraint(lv.is_jump * limb);
+    }
+
+    // Check `input0_upper_zero`
+    // `input0_upper_zero` is either 0 or 1.
+    yield_constr
+        .constraint(filter * jumps_lv.input0_upper_zero * (jumps_lv.input0_upper_zero - P::ONES));
+    // The below sum cannot overflow due to the limb size.
+    let input0_upper_sum: P = jumps_lv.input0[1..].iter().copied().sum();
+    // `input0_upper_zero` = 1 implies `input0_upper_sum` = 0.
+    yield_constr.constraint(filter * jumps_lv.input0_upper_zero * input0_upper_sum);
+    // `input0_upper_zero` = 0 implies `input0_upper_sum_inv * input0_upper_sum` = 1, which can only
+    // happen when `input0_upper_sum` is nonzero.
+    yield_constr.constraint(
+        filter
+            * (jumps_lv.input0_upper_sum_inv * input0_upper_sum + jumps_lv.input0_upper_zero
+                - P::ONES),
+    );
+
+    // Check `dst_valid_or_kernel` (this is just a logical OR)
+    yield_constr.constraint(
+        filter
+            * (jumps_lv.dst_valid + lv.is_kernel_mode
+                - jumps_lv.dst_valid * lv.is_kernel_mode
+                - jumps_lv.dst_valid_or_kernel),
+    );
+
+    // Check `input0_jumpable` (this is just `dst_valid_or_kernel` AND `input0_upper_zero`)
+    yield_constr.constraint(
+        filter
+            * (jumps_lv.dst_valid_or_kernel * jumps_lv.input0_upper_zero
+                - jumps_lv.input0_jumpable),
+    );
+
+    // Make sure that `should_continue`, `should_jump`, `should_trap` are all binary and exactly one
+    // is set.
+    yield_constr
+        .constraint(filter * jumps_lv.should_continue * (jumps_lv.should_continue - P::ONES));
+    yield_constr.constraint(filter * jumps_lv.should_jump * (jumps_lv.should_jump - P::ONES));
+    yield_constr.constraint(filter * jumps_lv.should_trap * (jumps_lv.should_trap - P::ONES));
+    yield_constr.constraint(
+        filter * (jumps_lv.should_continue + jumps_lv.should_jump + jumps_lv.should_trap - P::ONES),
+    );
+
+    // Validate `should_continue`
+    // This sum cannot overflow (due to limb size).
+    let input1_sum: P = jumps_lv.input1.into_iter().sum();
+    // `should_continue` = 1 implies `input1_sum` = 0.
+    yield_constr.constraint(filter * jumps_lv.should_continue * input1_sum);
+    // `should_continue` = 0 implies `input1_sum * input1_sum_inv` = 1, which can only happen if
+    // input1_sum is nonzero.
+    yield_constr.constraint(
+        filter * (input1_sum * jumps_lv.input1_sum_inv + jumps_lv.should_continue - P::ONES),
+    );
+
+    // Validate `should_jump` and `should_trap` by splitting on `input0_jumpable`.
+    // Note that `should_jump` = 1 and `should_trap` = 1 both imply that `should_continue` = 0, so
+    // `input1` is nonzero.
+    yield_constr.constraint(filter * jumps_lv.should_jump * (jumps_lv.input0_jumpable - P::ONES));
+    yield_constr.constraint(filter * jumps_lv.should_trap * jumps_lv.input0_jumpable);
+
+    // Handle trap
+    // Set program counter and kernel flag
+    yield_constr
+        .constraint_transition(filter * jumps_lv.should_trap * (nv.is_kernel_mode - P::ONES));
+    yield_constr.constraint_transition(
+        filter
+            * jumps_lv.should_trap
+            * (nv.program_counter - P::Scalar::from_canonical_usize(*INVALID_DST_HANDLER_ADDR)),
+    );
+
+    // Handle continue and jump
+    let continue_or_jump = jumps_lv.should_continue + jumps_lv.should_jump;
+    // Keep kernel mode.
+    yield_constr
+        .constraint_transition(filter * continue_or_jump * (nv.is_kernel_mode - lv.is_kernel_mode));
+    // Set program counter depending on whether we're continuing or jumping.
+    yield_constr.constraint_transition(
+        filter * jumps_lv.should_continue * (nv.program_counter - lv.program_counter - P::ONES),
+    );
+    yield_constr.constraint_transition(
+        filter * jumps_lv.should_jump * (nv.program_counter - jumps_lv.input0[0]),
+    );
+}
+
+pub fn eval_ext_circuit_jump_jumpi<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
+    lv: &CpuColumnsView<ExtensionTarget<D>>,
+    nv: &CpuColumnsView<ExtensionTarget<D>>,
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+) {
+    let jumps_lv = lv.general.jumps();
+    let filter = builder.add_extension(lv.is_jump, lv.is_jumpi); // `JUMP` or `JUMPI`
+
+    // If `JUMP`, re-use the `JUMPI` logic, but setting the second input (the predicate) to be 1.
+    // In other words, we implement `JUMP(addr)` as `JUMPI(addr, cond=1)`.
+    {
+        let constr = builder.mul_sub_extension(lv.is_jump, jumps_lv.input1[0], lv.is_jump);
+        yield_constr.constraint(builder, constr);
+    }
+    for &limb in &jumps_lv.input1[1..] {
+        // Set all limbs (other than the least-significant limb) to 0.
+        // NB: Technically, they don't have to be 0, as long as the sum
+        // `input1[0] + ... + input1[7]` cannot overflow.
+        let constr = builder.mul_extension(lv.is_jump, limb);
+        yield_constr.constraint(builder, constr);
+    }
+
+    // Check `input0_upper_zero`
+    // `input0_upper_zero` is either 0 or 1.
+    {
+        let constr = builder.mul_sub_extension(
+            jumps_lv.input0_upper_zero,
+            jumps_lv.input0_upper_zero,
+            jumps_lv.input0_upper_zero,
+        );
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint(builder, constr);
+    }
+    {
+        // The below sum cannot overflow due to the limb size.
+        let input0_upper_sum = builder.add_many_extension(jumps_lv.input0[1..].iter());
+
+        // `input0_upper_zero` = 1 implies `input0_upper_sum` = 0.
+        let constr = builder.mul_extension(jumps_lv.input0_upper_zero, input0_upper_sum);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint(builder, constr);
+
+        // `input0_upper_zero` = 0 implies `input0_upper_sum_inv * input0_upper_sum` = 1, which can
+        // only happen when `input0_upper_sum` is nonzero.
+        let constr = builder.mul_add_extension(
+            jumps_lv.input0_upper_sum_inv,
+            input0_upper_sum,
+            jumps_lv.input0_upper_zero,
+        );
+        let constr = builder.mul_sub_extension(filter, constr, filter);
+        yield_constr.constraint(builder, constr);
+    };
+
+    // Check `dst_valid_or_kernel` (this is just a logical OR)
+    {
+        let constr = builder.mul_add_extension(
+            jumps_lv.dst_valid,
+            lv.is_kernel_mode,
+            jumps_lv.dst_valid_or_kernel,
+        );
+        let constr = builder.sub_extension(jumps_lv.dst_valid, constr);
+        let constr = builder.add_extension(lv.is_kernel_mode, constr);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint(builder, constr);
+    }
+
+    // Check `input0_jumpable` (this is just `dst_valid_or_kernel` AND `input0_upper_zero`)
+    {
+        let constr = builder.mul_sub_extension(
+            jumps_lv.dst_valid_or_kernel,
+            jumps_lv.input0_upper_zero,
+            jumps_lv.input0_jumpable,
+        );
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint(builder, constr);
+    }
+
+    // Make sure that `should_continue`, `should_jump`, `should_trap` are all binary and exactly one
+    // is set.
+    for flag in [
+        jumps_lv.should_continue,
+        jumps_lv.should_jump,
+        jumps_lv.should_trap,
+    ] {
+        let constr = builder.mul_sub_extension(flag, flag, flag);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint(builder, constr);
+    }
+    {
+        let constr = builder.add_extension(jumps_lv.should_continue, jumps_lv.should_jump);
+        let constr = builder.add_extension(constr, jumps_lv.should_trap);
+        let constr = builder.mul_sub_extension(filter, constr, filter);
+        yield_constr.constraint(builder, constr);
+    }
+
+    // Validate `should_continue`
+    {
+        // This sum cannot overflow (due to limb size).
+        let input1_sum = builder.add_many_extension(jumps_lv.input1.into_iter());
+
+        // `should_continue` = 1 implies `input1_sum` = 0.
+        let constr = builder.mul_extension(jumps_lv.should_continue, input1_sum);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint(builder, constr);
+
+        // `should_continue` = 0 implies `input1_sum * input1_sum_inv` = 1, which can only happen if
+        // input1_sum is nonzero.
+        let constr = builder.mul_add_extension(
+            input1_sum,
+            jumps_lv.input1_sum_inv,
+            jumps_lv.should_continue,
+        );
+        let constr = builder.mul_sub_extension(filter, constr, filter);
+        yield_constr.constraint(builder, constr);
+    }
+
+    // Validate `should_jump` and `should_trap` by splitting on `input0_jumpable`.
+    // Note that `should_jump` = 1 and `should_trap` = 1 both imply that `should_continue` = 0, so
+    // `input1` is nonzero.
+    {
+        let constr = builder.mul_sub_extension(
+            jumps_lv.should_jump,
+            jumps_lv.input0_jumpable,
+            jumps_lv.should_jump,
+        );
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint(builder, constr);
+    }
+    {
+        let constr = builder.mul_extension(jumps_lv.should_trap, jumps_lv.input0_jumpable);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint(builder, constr);
+    }
+
+    // Handle trap
+    {
+        let trap_filter = builder.mul_extension(filter, jumps_lv.should_trap);
+
+        // Set kernel flag
+        let constr = builder.mul_sub_extension(trap_filter, nv.is_kernel_mode, trap_filter);
+        yield_constr.constraint_transition(builder, constr);
+
+        // Set program counter
+        let constr = builder.arithmetic_extension(
+            F::ONE,
+            -F::from_canonical_usize(*INVALID_DST_HANDLER_ADDR),
+            trap_filter,
+            nv.program_counter,
+            trap_filter,
+        );
+        yield_constr.constraint_transition(builder, constr);
+    }
+
+    // Handle continue and jump
+    {
+        // Keep kernel mode.
+        let continue_or_jump =
+            builder.add_extension(jumps_lv.should_continue, jumps_lv.should_jump);
+        let constr = builder.sub_extension(nv.is_kernel_mode, lv.is_kernel_mode);
+        let constr = builder.mul_extension(continue_or_jump, constr);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint_transition(builder, constr);
+    }
+    // Set program counter depending on whether we're continuing...
+    {
+        let constr = builder.sub_extension(nv.program_counter, lv.program_counter);
+        let constr =
+            builder.mul_sub_extension(jumps_lv.should_continue, constr, jumps_lv.should_continue);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint_transition(builder, constr);
+    }
+    // ...or jumping.
+    {
+        let constr = builder.sub_extension(nv.program_counter, jumps_lv.input0[0]);
+        let constr = builder.mul_extension(jumps_lv.should_jump, constr);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint_transition(builder, constr);
+    }
+}
+
+pub fn eval_packed<P: PackedField>(
+    lv: &CpuColumnsView<P>,
+    nv: &CpuColumnsView<P>,
+    yield_constr: &mut ConstraintConsumer<P>,
+) {
+    eval_packed_exit_kernel(lv, nv, yield_constr);
+    eval_packed_jump_jumpi(lv, nv, yield_constr);
+}
+
+pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
+    lv: &CpuColumnsView<ExtensionTarget<D>>,
+    nv: &CpuColumnsView<ExtensionTarget<D>>,
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+) {
+    eval_ext_circuit_exit_kernel(builder, lv, nv, yield_constr);
+    eval_ext_circuit_jump_jumpi(builder, lv, nv, yield_constr);
+}
diff --git a/evm/src/cpu/kernel/asm/core/terminate.asm b/evm/src/cpu/kernel/asm/core/terminate.asm
index 3f953522..d035da76 100644
--- a/evm/src/cpu/kernel/asm/core/terminate.asm
+++ b/evm/src/cpu/kernel/asm/core/terminate.asm
@@ -1,25 +1,25 @@
 // Handlers for operations which terminate the current context, namely STOP,
 // RETURN, SELFDESTRUCT, REVERT, and exceptions such as stack underflow.
 
-global stop:
+global sys_stop:
     // TODO: Set parent context's CTX_METADATA_RETURNDATA_SIZE to 0.
     // TODO: Refund unused gas to parent.
     %jump(terminate_common)
 
-global return:
+global sys_return:
     // TODO: Set parent context's CTX_METADATA_RETURNDATA_SIZE.
     // TODO: Copy returned memory to parent context's RETURNDATA (but not if we're returning from a constructor?)
     // TODO: Copy returned memory to parent context's memory (as specified in their call instruction)
     // TODO: Refund unused gas to parent.
     %jump(terminate_common)
 
-global selfdestruct:
+global sys_selfdestruct:
     %consume_gas_const(@GAS_SELFDESTRUCT)
     // TODO: Destroy account.
     // TODO: Refund unused gas to parent.
     %jump(terminate_common)
 
-global revert:
+global sys_revert:
     // TODO: Refund unused gas to parent.
     // TODO: Revert state changes.
     %jump(terminate_common)
@@ -31,7 +31,7 @@ global revert:
 // - a JUMP/JUMPI destination is invalid
 // - the new stack size would be larger than 1024, or
 // - state modification is attempted during a static call
-global exception:
+global fault_exception:
     // TODO: Revert state changes.
     %jump(terminate_common)
 
diff --git a/evm/src/cpu/kernel/asm/exp.asm b/evm/src/cpu/kernel/asm/exp.asm
index 389f8490..3640b2f6 100644
--- a/evm/src/cpu/kernel/asm/exp.asm
+++ b/evm/src/cpu/kernel/asm/exp.asm
@@ -74,3 +74,6 @@ recursion_return:
     pop
     // stack: retdest, (1 + (x - 1) * (e % 2)) * exp(x * x, e / 2)
     jump
+
+global sys_exp:
+    PANIC
diff --git a/evm/src/cpu/mod.rs b/evm/src/cpu/mod.rs
index 6c767998..5950c837 100644
--- a/evm/src/cpu/mod.rs
+++ b/evm/src/cpu/mod.rs
@@ -3,6 +3,8 @@ pub(crate) mod columns;
 mod control_flow;
 pub mod cpu_stark;
 pub(crate) mod decode;
+mod jumps;
 pub mod kernel;
 pub mod public_inputs;
 mod simple_logic;
+mod syscalls;
diff --git a/evm/src/cpu/syscalls.rs b/evm/src/cpu/syscalls.rs
new file mode 100644
index 00000000..a676a6a2
--- /dev/null
+++ b/evm/src/cpu/syscalls.rs
@@ -0,0 +1,110 @@
+//! Handle instructions that are implemented in terms of system calls.
+//!
+//! These are usually the ones that are too complicated to implement in one CPU table row.
+
+use once_cell::sync::Lazy;
+use plonky2::field::extension::Extendable;
+use plonky2::field::packed::PackedField;
+use plonky2::field::types::Field;
+use plonky2::hash::hash_types::RichField;
+use plonky2::iop::ext_target::ExtensionTarget;
+
+use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
+use crate::cpu::columns::{CpuColumnsView, COL_MAP};
+use crate::cpu::kernel::aggregator::KERNEL;
+
+const NUM_SYSCALLS: usize = 2;
+
+fn make_syscall_list() -> [(usize, usize); NUM_SYSCALLS] {
+    let kernel = Lazy::force(&KERNEL);
+    [(COL_MAP.is_stop, "sys_stop"), (COL_MAP.is_exp, "sys_exp")]
+        .map(|(col_index, handler_name)| (col_index, kernel.global_labels[handler_name]))
+}
+
+static TRAP_LIST: Lazy<[(usize, usize); NUM_SYSCALLS]> = Lazy::new(make_syscall_list);
+
+pub fn eval_packed<P: PackedField>(
+    lv: &CpuColumnsView<P>,
+    nv: &CpuColumnsView<P>,
+    yield_constr: &mut ConstraintConsumer<P>,
+) {
+    let lv_syscalls = lv.general.syscalls();
+    let syscall_list = Lazy::force(&TRAP_LIST);
+    // 1 if _any_ syscall, else 0.
+    let should_syscall: P = syscall_list
+        .iter()
+        .map(|&(col_index, _)| lv[col_index])
+        .sum();
+    let filter = lv.is_cpu_cycle * should_syscall;
+
+    // If syscall: set program counter to the handler address
+    // Note that at most one of the `lv[col_index]`s will be 1 and all others will be 0.
+    let syscall_dst: P = syscall_list
+        .iter()
+        .map(|&(col_index, handler_addr)| {
+            lv[col_index] * P::Scalar::from_canonical_usize(handler_addr)
+        })
+        .sum();
+    yield_constr.constraint_transition(filter * (nv.program_counter - syscall_dst));
+    // If syscall: set kernel mode
+    yield_constr.constraint_transition(filter * (nv.is_kernel_mode - P::ONES));
+    // If syscall: push current PC to stack
+    yield_constr.constraint(filter * (lv_syscalls.output[0] - lv.program_counter));
+    // If syscall: push current kernel flag to stack (share register with PC)
+    yield_constr.constraint(filter * (lv_syscalls.output[1] - lv.is_kernel_mode));
+    // If syscall: zero the rest of that register
+    for &limb in &lv_syscalls.output[2..] {
+        yield_constr.constraint(filter * limb);
+    }
+}
+
+pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
+    lv: &CpuColumnsView<ExtensionTarget<D>>,
+    nv: &CpuColumnsView<ExtensionTarget<D>>,
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+) {
+    let lv_syscalls = lv.general.syscalls();
+    let syscall_list = Lazy::force(&TRAP_LIST);
+    // 1 if _any_ syscall, else 0.
+    let should_syscall =
+        builder.add_many_extension(syscall_list.iter().map(|&(col_index, _)| lv[col_index]));
+    let filter = builder.mul_extension(lv.is_cpu_cycle, should_syscall);
+
+    // If syscall: set program counter to the handler address
+    {
+        // Note that at most one of the `lv[col_index]`s will be 1 and all others will be 0.
+        let syscall_dst = syscall_list.iter().fold(
+            builder.zero_extension(),
+            |cumul, &(col_index, handler_addr)| {
+                let handler_addr = F::from_canonical_usize(handler_addr);
+                builder.mul_const_add_extension(handler_addr, lv[col_index], cumul)
+            },
+        );
+        let constr = builder.sub_extension(nv.program_counter, syscall_dst);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint_transition(builder, constr);
+    }
+    // If syscall: set kernel mode
+    {
+        let constr = builder.mul_sub_extension(filter, nv.is_kernel_mode, filter);
+        yield_constr.constraint_transition(builder, constr);
+    }
+    // If syscall: push current PC to stack
+    {
+        let constr = builder.sub_extension(lv_syscalls.output[0], lv.program_counter);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint(builder, constr);
+    }
+    // If syscall: push current kernel flag to stack (share register with PC)
+    {
+        let constr = builder.sub_extension(lv_syscalls.output[1], lv.is_kernel_mode);
+        let constr = builder.mul_extension(filter, constr);
+        yield_constr.constraint(builder, constr);
+    }
+    // If syscall: zero the rest of that register
+    for &limb in &lv_syscalls.output[2..] {
+        let constr = builder.mul_extension(filter, limb);
+        yield_constr.constraint(builder, constr);
+    }
+}