From 3d83d63f0ba5f6ee3f12ca1d6ca5c29db5343341 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 18 Jul 2022 11:50:58 -0700
Subject: [PATCH 01/85] Shared CPU columns

I was thinking we could have two sets of shared columns:
- First, a set of "core" columns which would contain instruction decoding registers during an execution cycle, or some counter data during a kernel bootloading cycle.
- Second, a set of "general" columns which would be more general-purpose. For now it could contain "looking" columns for most CTLs (Keccak, arithmetic and logic; NOT memory since memory can be used simultaneously with the others). It could potentially be reused for other things too, such as the registers used for `EQ` and `IS_ZERO` (but I know it's nontrivial to share those since we would need to use lower-degree constraints, so I wouldn't bother for now).

This PR implements just the latter. If it looks good I'll proceed with the former afterward.
---
 evm/src/all_stark.rs                       | 38 ++++-----
 evm/src/cpu/bootstrap_kernel.rs            | 10 ++-
 evm/src/cpu/{columns.rs => columns/mod.rs} | 31 ++++---
 evm/src/cpu/columns/shared.rs              | 95 ++++++++++++++++++++++
 evm/src/cpu/cpu_stark.rs                   | 12 +--
 evm/src/cpu/simple_logic/eq_iszero.rs      | 34 ++++----
 evm/src/cpu/simple_logic/not.rs            |  9 +-
 7 files changed, 166 insertions(+), 63 deletions(-)
 rename evm/src/cpu/{columns.rs => columns/mod.rs} (87%)
 create mode 100644 evm/src/cpu/columns/shared.rs
diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs
index ba157fc0..58756703 100644
--- a/evm/src/all_stark.rs
+++ b/evm/src/all_stark.rs
@@ -130,7 +130,7 @@ mod tests {
 
     use anyhow::Result;
     use ethereum_types::U256;
-    use itertools::{izip, Itertools};
+    use itertools::Itertools;
     use plonky2::field::polynomial::PolynomialValues;
     use plonky2::field::types::{Field, PrimeField64};
     use plonky2::iop::witness::PartialWitness;
@@ -246,13 +246,10 @@ mod tests {
             let mut row: cpu::columns::CpuColumnsView<F> =
                 [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
             row.is_keccak = F::ONE;
-            for (j, input, output) in izip!(
-                0..2 * NUM_INPUTS,
-                row.keccak_input_limbs.iter_mut(),
-                row.keccak_output_limbs.iter_mut()
-            ) {
-                *input = keccak_input_limbs[i][j];
-                *output = keccak_output_limbs[i][j];
+            let keccak = row.general.keccak_mut();
+            for j in 0..2 * NUM_INPUTS {
+                keccak.input_limbs[j] = keccak_input_limbs[i][j];
+                keccak.output_limbs[j] = keccak_output_limbs[i][j];
             }
             cpu_stark.generate(row.borrow_mut());
             cpu_trace_rows.push(row.into());
@@ -270,21 +267,22 @@ mod tests {
             .into_iter()
             .map(|(col, opcode)| logic_trace[col].values[i] * F::from_canonical_u64(opcode))
             .sum();
-            for (cols_cpu, cols_logic) in [
-                (&mut row.logic_input0, logic::columns::INPUT0),
-                (&mut row.logic_input1, logic::columns::INPUT1),
-            ] {
-                for (col_cpu, limb_cols_logic) in cols_cpu
-                    .iter_mut()
-                    .zip(logic::columns::limb_bit_cols_for_input(cols_logic))
-                {
-                    *col_cpu =
-                        limb_from_bits_le(limb_cols_logic.map(|col| logic_trace[col].values[i]));
-                }
+            let logic = row.general.logic_mut();
+
+            let input0_bit_cols = logic::columns::limb_bit_cols_for_input(logic::columns::INPUT0);
+            for (col_cpu, limb_cols_logic) in logic.input0.iter_mut().zip(input0_bit_cols) {
+                *col_cpu = limb_from_bits_le(limb_cols_logic.map(|col| logic_trace[col].values[i]));
             }
-            for (col_cpu, col_logic) in row.logic_output.iter_mut().zip(logic::columns::RESULT) {
+
+            let input1_bit_cols = logic::columns::limb_bit_cols_for_input(logic::columns::INPUT1);
+            for (col_cpu, limb_cols_logic) in logic.input1.iter_mut().zip(input1_bit_cols) {
+                *col_cpu = limb_from_bits_le(limb_cols_logic.map(|col| logic_trace[col].values[i]));
+            }
+
+            for (col_cpu, col_logic) in logic.output.iter_mut().zip(logic::columns::RESULT) {
                 *col_cpu = logic_trace[col_logic].values[i];
             }
+
             cpu_stark.generate(row.borrow_mut());
             cpu_trace_rows.push(row.into());
         }
diff --git a/evm/src/cpu/bootstrap_kernel.rs b/evm/src/cpu/bootstrap_kernel.rs
index af307a28..2c6afb51 100644
--- a/evm/src/cpu/bootstrap_kernel.rs
+++ b/evm/src/cpu/bootstrap_kernel.rs
@@ -56,7 +56,8 @@ pub(crate) fn generate_bootstrap_kernel<F: Field>(state: &mut GenerationState<F>
         }
 
         sponge_state[sponge_input_pos] = packed_bytes;
-        state.current_cpu_row.keccak_input_limbs = sponge_state.map(F::from_canonical_u32);
+        let keccak = state.current_cpu_row.general.keccak_mut();
+        keccak.input_limbs = sponge_state.map(F::from_canonical_u32);
         state.commit_cpu_row();
 
         sponge_input_pos = (sponge_input_pos + 1) % KECCAK_RATE_LIMBS;
@@ -65,7 +66,8 @@ pub(crate) fn generate_bootstrap_kernel<F: Field>(state: &mut GenerationState<F>
         if sponge_input_pos == 0 {
             state.current_cpu_row.is_keccak = F::ONE;
             keccakf_u32s(&mut sponge_state);
-            state.current_cpu_row.keccak_output_limbs = sponge_state.map(F::from_canonical_u32);
+            let keccak = state.current_cpu_row.general.keccak_mut();
+            keccak.output_limbs = sponge_state.map(F::from_canonical_u32);
         }
     }
 }
@@ -97,7 +99,7 @@ pub(crate) fn eval_bootstrap_kernel<F: Field, P: PackedField<Scalar = F>>(
     for (&expected, actual) in KERNEL
         .code_hash
         .iter()
-        .zip(local_values.keccak_output_limbs)
+        .zip(local_values.general.keccak().output_limbs)
     {
         let expected = P::from(F::from_canonical_u32(expected));
         let diff = expected - actual;
@@ -137,7 +139,7 @@ pub(crate) fn eval_bootstrap_kernel_circuit<F: RichField + Extendable<D>, const
     for (&expected, actual) in KERNEL
         .code_hash
         .iter()
-        .zip(local_values.keccak_output_limbs)
+        .zip(local_values.general.keccak().output_limbs)
     {
         let expected = builder.constant_extension(F::Extension::from_canonical_u32(expected));
         let diff = builder.sub_extension(expected, actual);
diff --git a/evm/src/cpu/columns.rs b/evm/src/cpu/columns/mod.rs
similarity index 87%
rename from evm/src/cpu/columns.rs
rename to evm/src/cpu/columns/mod.rs
index f3a400c6..51acbfd3 100644
--- a/evm/src/cpu/columns.rs
+++ b/evm/src/cpu/columns/mod.rs
@@ -2,14 +2,18 @@
 #![allow(dead_code)]
 
 use std::borrow::{Borrow, BorrowMut};
+use std::fmt::Debug;
 use std::mem::{size_of, transmute, transmute_copy, ManuallyDrop};
 use std::ops::{Index, IndexMut};
 
+use crate::cpu::columns::shared::CpuSharedColumnsView;
 use crate::memory;
 
+mod shared;
+
 #[repr(C)]
 #[derive(Eq, PartialEq, Debug)]
-pub struct CpuColumnsView<T> {
+pub struct CpuColumnsView<T: Copy> {
     /// Filter. 1 if the row is part of bootstrapping the kernel code, 0 otherwise.
     pub is_bootstrap_kernel: T,
 
@@ -136,14 +140,9 @@ pub struct CpuColumnsView<T> {
 
     /// Filter. 1 iff a Keccak permutation is computed on this row.
     pub is_keccak: T,
-    pub keccak_input_limbs: [T; 50],
-    pub keccak_output_limbs: [T; 50],
 
-    // Assuming a limb size of 16 bits. This can be changed, but it must be <= 28 bits.
-    // TODO: These input/output columns can be shared between the logic operations and others.
-    pub logic_input0: [T; 16],
-    pub logic_input1: [T; 16],
-    pub logic_output: [T; 16],
+    pub(crate) general: CpuSharedColumnsView<T>,
+
     pub simple_logic_diff: T,
     pub simple_logic_diff_inv: T,
 
@@ -169,43 +168,43 @@ unsafe fn transmute_no_compile_time_size_checks<T, U>(value: T) -> U {
     transmute_copy(&value)
 }
 
-impl<T> From<[T; NUM_CPU_COLUMNS]> for CpuColumnsView<T> {
+impl<T: Copy> From<[T; NUM_CPU_COLUMNS]> for CpuColumnsView<T> {
     fn from(value: [T; NUM_CPU_COLUMNS]) -> Self {
         unsafe { transmute_no_compile_time_size_checks(value) }
     }
 }
 
-impl<T> From<CpuColumnsView<T>> for [T; NUM_CPU_COLUMNS] {
+impl<T: Copy> From<CpuColumnsView<T>> for [T; NUM_CPU_COLUMNS] {
     fn from(value: CpuColumnsView<T>) -> Self {
         unsafe { transmute_no_compile_time_size_checks(value) }
     }
 }
 
-impl<T> Borrow<CpuColumnsView<T>> for [T; NUM_CPU_COLUMNS] {
+impl<T: Copy> Borrow<CpuColumnsView<T>> for [T; NUM_CPU_COLUMNS] {
     fn borrow(&self) -> &CpuColumnsView<T> {
         unsafe { transmute(self) }
     }
 }
 
-impl<T> BorrowMut<CpuColumnsView<T>> for [T; NUM_CPU_COLUMNS] {
+impl<T: Copy> BorrowMut<CpuColumnsView<T>> for [T; NUM_CPU_COLUMNS] {
     fn borrow_mut(&mut self) -> &mut CpuColumnsView<T> {
         unsafe { transmute(self) }
     }
 }
 
-impl<T> Borrow<[T; NUM_CPU_COLUMNS]> for CpuColumnsView<T> {
+impl<T: Copy> Borrow<[T; NUM_CPU_COLUMNS]> for CpuColumnsView<T> {
     fn borrow(&self) -> &[T; NUM_CPU_COLUMNS] {
         unsafe { transmute(self) }
     }
 }
 
-impl<T> BorrowMut<[T; NUM_CPU_COLUMNS]> for CpuColumnsView<T> {
+impl<T: Copy> BorrowMut<[T; NUM_CPU_COLUMNS]> for CpuColumnsView<T> {
     fn borrow_mut(&mut self) -> &mut [T; NUM_CPU_COLUMNS] {
         unsafe { transmute(self) }
     }
 }
 
-impl<T, I> Index<I> for CpuColumnsView<T>
+impl<T: Copy, I> Index<I> for CpuColumnsView<T>
 where
     [T]: Index<I>,
 {
@@ -217,7 +216,7 @@ where
     }
 }
 
-impl<T, I> IndexMut<I> for CpuColumnsView<T>
+impl<T: Copy, I> IndexMut<I> for CpuColumnsView<T>
 where
     [T]: IndexMut<I>,
 {
diff --git a/evm/src/cpu/columns/shared.rs b/evm/src/cpu/columns/shared.rs
new file mode 100644
index 00000000..34db209d
--- /dev/null
+++ b/evm/src/cpu/columns/shared.rs
@@ -0,0 +1,95 @@
+use std::borrow::{Borrow, BorrowMut};
+use std::fmt::{Debug, Formatter};
+use std::mem::{size_of, transmute};
+
+/// General purpose columns, which can have different meanings depending on what CTL or other
+/// operation is occurring at this row.
+pub(crate) union CpuSharedColumnsView<T: Copy> {
+    keccak: CpuKeccakView<T>,
+    arithmetic: CpuArithmeticView<T>,
+    logic: CpuLogicView<T>,
+}
+
+impl<T: Copy> CpuSharedColumnsView<T> {
+    // SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn keccak(&self) -> &CpuKeccakView<T> {
+        unsafe { &self.keccak }
+    }
+
+    // SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn keccak_mut(&mut self) -> &mut CpuKeccakView<T> {
+        unsafe { &mut self.keccak }
+    }
+
+    // SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn arithmetic(&self) -> &CpuArithmeticView<T> {
+        unsafe { &self.arithmetic }
+    }
+
+    // SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn arithmetic_mut(&mut self) -> &mut CpuArithmeticView<T> {
+        unsafe { &mut self.arithmetic }
+    }
+
+    // SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn logic(&self) -> &CpuLogicView<T> {
+        unsafe { &self.logic }
+    }
+
+    // SAFETY: Each view is a valid interpretation of the underlying array.
+    pub(crate) fn logic_mut(&mut self) -> &mut CpuLogicView<T> {
+        unsafe { &mut self.logic }
+    }
+}
+
+impl<T: Copy + PartialEq> PartialEq<Self> for CpuSharedColumnsView<T> {
+    fn eq(&self, other: &Self) -> bool {
+        let self_arr: &[T; NUM_SHARED_COLUMNS] = self.borrow();
+        let other_arr: &[T; NUM_SHARED_COLUMNS] = other.borrow();
+        self_arr == other_arr
+    }
+}
+
+impl<T: Copy + Eq> Eq for CpuSharedColumnsView<T> {}
+
+impl<T: Copy + Debug> Debug for CpuSharedColumnsView<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        let self_arr: &[T; NUM_SHARED_COLUMNS] = self.borrow();
+        Debug::fmt(self_arr, f)
+    }
+}
+
+impl<T: Copy> Borrow<[T; NUM_SHARED_COLUMNS]> for CpuSharedColumnsView<T> {
+    fn borrow(&self) -> &[T; NUM_SHARED_COLUMNS] {
+        unsafe { transmute(self) }
+    }
+}
+
+impl<T: Copy> BorrowMut<[T; NUM_SHARED_COLUMNS]> for CpuSharedColumnsView<T> {
+    fn borrow_mut(&mut self) -> &mut [T; NUM_SHARED_COLUMNS] {
+        unsafe { transmute(self) }
+    }
+}
+
+#[derive(Copy, Clone)]
+pub(crate) struct CpuKeccakView<T: Copy> {
+    pub(crate) input_limbs: [T; 50],
+    pub(crate) output_limbs: [T; 50],
+}
+
+#[derive(Copy, Clone)]
+pub(crate) struct CpuArithmeticView<T: Copy> {
+    // TODO: Add "looking" columns for the arithmetic CTL.
+    tmp: T, // temporary, to suppress errors
+}
+
+#[derive(Copy, Clone)]
+pub(crate) struct CpuLogicView<T: Copy> {
+    // Assuming a limb size of 16 bits. This can be changed, but it must be <= 28 bits.
+    pub(crate) input0: [T; 16],
+    pub(crate) input1: [T; 16],
+    pub(crate) output: [T; 16],
+}
+
+// `u8` is guaranteed to have a `size_of` of 1.
+pub const NUM_SHARED_COLUMNS: usize = size_of::<CpuSharedColumnsView<u8>>();
diff --git a/evm/src/cpu/cpu_stark.rs b/evm/src/cpu/cpu_stark.rs
index 1e5cc887..0e4d69f2 100644
--- a/evm/src/cpu/cpu_stark.rs
+++ b/evm/src/cpu/cpu_stark.rs
@@ -16,8 +16,9 @@ use crate::stark::Stark;
 use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars};
 
 pub fn ctl_data_keccak<F: Field>() -> Vec<Column<F>> {
-    let mut res: Vec<_> = Column::singles(COL_MAP.keccak_input_limbs).collect();
-    res.extend(Column::singles(COL_MAP.keccak_output_limbs));
+    let keccak = COL_MAP.general.keccak();
+    let mut res: Vec<_> = Column::singles(keccak.input_limbs).collect();
+    res.extend(Column::singles(keccak.output_limbs));
     res
 }
 
@@ -27,9 +28,10 @@ pub fn ctl_filter_keccak<F: Field>() -> Column<F> {
 
 pub fn ctl_data_logic<F: Field>() -> Vec<Column<F>> {
     let mut res = Column::singles([COL_MAP.is_and, COL_MAP.is_or, COL_MAP.is_xor]).collect_vec();
-    res.extend(Column::singles(COL_MAP.logic_input0));
-    res.extend(Column::singles(COL_MAP.logic_input1));
-    res.extend(Column::singles(COL_MAP.logic_output));
+    let logic = COL_MAP.general.logic();
+    res.extend(Column::singles(logic.input0));
+    res.extend(Column::singles(logic.input1));
+    res.extend(Column::singles(logic.output));
     res
 }
 
diff --git a/evm/src/cpu/simple_logic/eq_iszero.rs b/evm/src/cpu/simple_logic/eq_iszero.rs
index 97e000b6..75bb8bb6 100644
--- a/evm/src/cpu/simple_logic/eq_iszero.rs
+++ b/evm/src/cpu/simple_logic/eq_iszero.rs
@@ -9,6 +9,7 @@ use crate::cpu::columns::CpuColumnsView;
 const LIMB_SIZE: usize = 16;
 
 pub fn generate<F: RichField>(lv: &mut CpuColumnsView<F>) {
+    let logic = lv.general.logic_mut();
     let eq_filter = lv.is_eq.to_canonical_u64();
     let iszero_filter = lv.is_iszero.to_canonical_u64();
     assert!(eq_filter <= 1);
@@ -20,9 +21,10 @@ pub fn generate<F: RichField>(lv: &mut CpuColumnsView<F>) {
     }
 
     let diffs = if eq_filter == 1 {
-        lv.logic_input0
+        logic
+            .input0
             .into_iter()
-            .zip(lv.logic_input1)
+            .zip(logic.input1)
             .map(|(in0, in1)| {
                 assert_eq!(in0.to_canonical_u64() >> LIMB_SIZE, 0);
                 assert_eq!(in1.to_canonical_u64() >> LIMB_SIZE, 0);
@@ -31,7 +33,7 @@ pub fn generate<F: RichField>(lv: &mut CpuColumnsView<F>) {
             })
             .sum()
     } else if iszero_filter == 1 {
-        lv.logic_input0.into_iter().sum()
+        logic.input0.into_iter().sum()
     } else {
         panic!()
     };
@@ -39,8 +41,8 @@ pub fn generate<F: RichField>(lv: &mut CpuColumnsView<F>) {
     lv.simple_logic_diff = diffs;
     lv.simple_logic_diff_inv = diffs.try_inverse().unwrap_or(F::ZERO);
 
-    lv.logic_output[0] = F::from_bool(diffs == F::ZERO);
-    for out_limb_ref in lv.logic_output[1..].iter_mut() {
+    logic.output[0] = F::from_bool(diffs == F::ZERO);
+    for out_limb_ref in logic.output[1..].iter_mut() {
         *out_limb_ref = F::ZERO;
     }
 }
@@ -49,17 +51,18 @@ pub fn eval_packed<P: PackedField>(
     lv: &CpuColumnsView<P>,
     yield_constr: &mut ConstraintConsumer<P>,
 ) {
+    let logic = lv.general.logic();
     let eq_filter = lv.is_eq;
     let iszero_filter = lv.is_iszero;
     let eq_or_iszero_filter = eq_filter + iszero_filter;
 
-    let ls_bit = lv.logic_output[0];
+    let ls_bit = logic.output[0];
 
     // Handle EQ and ISZERO. Most limbs of the output are 0, but the least-significant one is
     // either 0 or 1.
     yield_constr.constraint(eq_or_iszero_filter * ls_bit * (ls_bit - P::ONES));
 
-    for &bit in &lv.logic_output[1..] {
+    for &bit in &logic.output[1..] {
         yield_constr.constraint(eq_or_iszero_filter * bit);
     }
 
@@ -67,13 +70,13 @@ pub fn eval_packed<P: PackedField>(
     let diffs = lv.simple_logic_diff;
     let diffs_inv = lv.simple_logic_diff_inv;
     {
-        let input0_sum: P = lv.logic_input0.into_iter().sum();
+        let input0_sum: P = logic.input0.into_iter().sum();
         yield_constr.constraint(iszero_filter * (diffs - input0_sum));
 
-        let sum_squared_diffs: P = lv
-            .logic_input0
+        let sum_squared_diffs: P = logic
+            .input0
             .into_iter()
-            .zip(lv.logic_input1)
+            .zip(logic.input1)
             .map(|(in0, in1)| (in0 - in1).square())
             .sum();
         yield_constr.constraint(eq_filter * (diffs - sum_squared_diffs));
@@ -90,11 +93,12 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     lv: &CpuColumnsView<ExtensionTarget<D>>,
     yield_constr: &mut RecursiveConstraintConsumer<F, D>,
 ) {
+    let logic = lv.general.logic();
     let eq_filter = lv.is_eq;
     let iszero_filter = lv.is_iszero;
     let eq_or_iszero_filter = builder.add_extension(eq_filter, iszero_filter);
 
-    let ls_bit = lv.logic_output[0];
+    let ls_bit = logic.output[0];
 
     // Handle EQ and ISZERO. Most limbs of the output are 0, but the least-significant one is
     // either 0 or 1.
@@ -104,7 +108,7 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
         yield_constr.constraint(builder, constr);
     }
 
-    for &bit in &lv.logic_output[1..] {
+    for &bit in &logic.output[1..] {
         let constr = builder.mul_extension(eq_or_iszero_filter, bit);
         yield_constr.constraint(builder, constr);
     }
@@ -113,14 +117,14 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     let diffs = lv.simple_logic_diff;
     let diffs_inv = lv.simple_logic_diff_inv;
     {
-        let input0_sum = builder.add_many_extension(lv.logic_input0);
+        let input0_sum = builder.add_many_extension(logic.input0);
         {
             let constr = builder.sub_extension(diffs, input0_sum);
             let constr = builder.mul_extension(iszero_filter, constr);
             yield_constr.constraint(builder, constr);
         }
 
-        let sum_squared_diffs = lv.logic_input0.into_iter().zip(lv.logic_input1).fold(
+        let sum_squared_diffs = logic.input0.into_iter().zip(logic.input1).fold(
             builder.zero_extension(),
             |acc, (in0, in1)| {
                 let diff = builder.sub_extension(in0, in1);
diff --git a/evm/src/cpu/simple_logic/not.rs b/evm/src/cpu/simple_logic/not.rs
index d1ba4d46..efbf51a6 100644
--- a/evm/src/cpu/simple_logic/not.rs
+++ b/evm/src/cpu/simple_logic/not.rs
@@ -17,7 +17,8 @@ pub fn generate<F: RichField>(lv: &mut CpuColumnsView<F>) {
     }
     assert_eq!(is_not_filter, 1);
 
-    for (input, output_ref) in lv.logic_input0.into_iter().zip(lv.logic_output.iter_mut()) {
+    let logic = lv.general.logic_mut();
+    for (input, output_ref) in logic.input0.into_iter().zip(logic.output.iter_mut()) {
         let input = input.to_canonical_u64();
         assert_eq!(input >> LIMB_SIZE, 0);
         let output = input ^ ALL_1_LIMB;
@@ -30,10 +31,11 @@ pub fn eval_packed<P: PackedField>(
     yield_constr: &mut ConstraintConsumer<P>,
 ) {
     // This is simple: just do output = 0xffff - input.
+    let logic = lv.general.logic();
     let cycle_filter = lv.is_cpu_cycle;
     let is_not_filter = lv.is_not;
     let filter = cycle_filter * is_not_filter;
-    for (input, output) in lv.logic_input0.into_iter().zip(lv.logic_output) {
+    for (input, output) in logic.input0.into_iter().zip(logic.output) {
         yield_constr
             .constraint(filter * (output + input - P::Scalar::from_canonical_u64(ALL_1_LIMB)));
     }
@@ -44,10 +46,11 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
     lv: &CpuColumnsView<ExtensionTarget<D>>,
     yield_constr: &mut RecursiveConstraintConsumer<F, D>,
 ) {
+    let logic = lv.general.logic();
     let cycle_filter = lv.is_cpu_cycle;
     let is_not_filter = lv.is_not;
     let filter = builder.mul_extension(cycle_filter, is_not_filter);
-    for (input, output) in lv.logic_input0.into_iter().zip(lv.logic_output) {
+    for (input, output) in logic.input0.into_iter().zip(logic.output) {
         let constr = builder.add_extension(output, input);
         let constr = builder.arithmetic_extension(
             F::ONE,

From 49a785f2bd1b12c12b2b2d902096e1dbdbd6427a Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 18 Jul 2022 12:21:00 -0700
Subject: [PATCH 02/85] rename

---
 evm/src/cpu/columns/mod.rs    |  4 ++--
 evm/src/cpu/columns/shared.rs | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/evm/src/cpu/columns/mod.rs b/evm/src/cpu/columns/mod.rs
index 51acbfd3..bfb3f0a8 100644
--- a/evm/src/cpu/columns/mod.rs
+++ b/evm/src/cpu/columns/mod.rs
@@ -6,7 +6,7 @@ use std::fmt::Debug;
 use std::mem::{size_of, transmute, transmute_copy, ManuallyDrop};
 use std::ops::{Index, IndexMut};
 
-use crate::cpu::columns::shared::CpuSharedColumnsView;
+use crate::cpu::columns::shared::CpuGeneralColumnsView;
 use crate::memory;
 
 mod shared;
@@ -141,7 +141,7 @@ pub struct CpuColumnsView<T: Copy> {
     /// Filter. 1 iff a Keccak permutation is computed on this row.
     pub is_keccak: T,
 
-    pub(crate) general: CpuSharedColumnsView<T>,
+    pub(crate) general: CpuGeneralColumnsView<T>,
 
     pub simple_logic_diff: T,
     pub simple_logic_diff_inv: T,
diff --git a/evm/src/cpu/columns/shared.rs b/evm/src/cpu/columns/shared.rs
index 34db209d..600dda87 100644
--- a/evm/src/cpu/columns/shared.rs
+++ b/evm/src/cpu/columns/shared.rs
@@ -4,13 +4,13 @@ use std::mem::{size_of, transmute};
 
 /// General purpose columns, which can have different meanings depending on what CTL or other
 /// operation is occurring at this row.
-pub(crate) union CpuSharedColumnsView<T: Copy> {
+pub(crate) union CpuGeneralColumnsView<T: Copy> {
     keccak: CpuKeccakView<T>,
     arithmetic: CpuArithmeticView<T>,
     logic: CpuLogicView<T>,
 }
 
-impl<T: Copy> CpuSharedColumnsView<T> {
+impl<T: Copy> CpuGeneralColumnsView<T> {
     // SAFETY: Each view is a valid interpretation of the underlying array.
     pub(crate) fn keccak(&self) -> &CpuKeccakView<T> {
         unsafe { &self.keccak }
@@ -42,7 +42,7 @@ impl<T: Copy> CpuSharedColumnsView<T> {
     }
 }
 
-impl<T: Copy + PartialEq> PartialEq<Self> for CpuSharedColumnsView<T> {
+impl<T: Copy + PartialEq> PartialEq<Self> for CpuGeneralColumnsView<T> {
     fn eq(&self, other: &Self) -> bool {
         let self_arr: &[T; NUM_SHARED_COLUMNS] = self.borrow();
         let other_arr: &[T; NUM_SHARED_COLUMNS] = other.borrow();
@@ -50,22 +50,22 @@ impl<T: Copy + PartialEq> PartialEq<Self> for CpuSharedColumnsView<T> {
     }
 }
 
-impl<T: Copy + Eq> Eq for CpuSharedColumnsView<T> {}
+impl<T: Copy + Eq> Eq for CpuGeneralColumnsView<T> {}
 
-impl<T: Copy + Debug> Debug for CpuSharedColumnsView<T> {
+impl<T: Copy + Debug> Debug for CpuGeneralColumnsView<T> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         let self_arr: &[T; NUM_SHARED_COLUMNS] = self.borrow();
         Debug::fmt(self_arr, f)
     }
 }
 
-impl<T: Copy> Borrow<[T; NUM_SHARED_COLUMNS]> for CpuSharedColumnsView<T> {
+impl<T: Copy> Borrow<[T; NUM_SHARED_COLUMNS]> for CpuGeneralColumnsView<T> {
     fn borrow(&self) -> &[T; NUM_SHARED_COLUMNS] {
         unsafe { transmute(self) }
     }
 }
 
-impl<T: Copy> BorrowMut<[T; NUM_SHARED_COLUMNS]> for CpuSharedColumnsView<T> {
+impl<T: Copy> BorrowMut<[T; NUM_SHARED_COLUMNS]> for CpuGeneralColumnsView<T> {
     fn borrow_mut(&mut self) -> &mut [T; NUM_SHARED_COLUMNS] {
         unsafe { transmute(self) }
     }
@@ -92,4 +92,4 @@ pub(crate) struct CpuLogicView<T: Copy> {
 }
 
 // `u8` is guaranteed to have a `size_of` of 1.
-pub const NUM_SHARED_COLUMNS: usize = size_of::<CpuSharedColumnsView<u8>>();
+pub const NUM_SHARED_COLUMNS: usize = size_of::<CpuGeneralColumnsView<u8>>();

From 6ee2e4fcd879952434340dcb6f12882ee2a7c0d9 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 18 Jul 2022 12:21:29 -0700
Subject: [PATCH 03/85] move

---
 evm/src/cpu/columns/{shared.rs => general.rs} | 0
 evm/src/cpu/columns/mod.rs                    | 4 ++--
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename evm/src/cpu/columns/{shared.rs => general.rs} (100%)

diff --git a/evm/src/cpu/columns/shared.rs b/evm/src/cpu/columns/general.rs
similarity index 100%
rename from evm/src/cpu/columns/shared.rs
rename to evm/src/cpu/columns/general.rs
diff --git a/evm/src/cpu/columns/mod.rs b/evm/src/cpu/columns/mod.rs
index bfb3f0a8..63e91085 100644
--- a/evm/src/cpu/columns/mod.rs
+++ b/evm/src/cpu/columns/mod.rs
@@ -6,10 +6,10 @@ use std::fmt::Debug;
 use std::mem::{size_of, transmute, transmute_copy, ManuallyDrop};
 use std::ops::{Index, IndexMut};
 
-use crate::cpu::columns::shared::CpuGeneralColumnsView;
+use crate::cpu::columns::general::CpuGeneralColumnsView;
 use crate::memory;
 
-mod shared;
+mod general;
 
 #[repr(C)]
 #[derive(Eq, PartialEq, Debug)]

From a281e28d545734ffae55b3b8a28b95298ee22ffd Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Thu, 21 Jul 2022 16:59:13 -0400
Subject: [PATCH 04/85] add rayon shim

---
 Cargo.toml                                |   2 +-
 maybe_rayon/Cargo.toml                    |  11 +
 maybe_rayon/src/lib.rs                    | 250 ++++++++++++++++++++++
 plonky2/Cargo.toml                        |   7 +-
 plonky2/examples/bench_recursion.rs       |   1 -
 plonky2/src/fri/oracle.rs                 |   6 +-
 plonky2/src/fri/prover.rs                 |   4 +-
 plonky2/src/hash/merkle_tree.rs           |   6 +-
 plonky2/src/plonk/permutation_argument.rs |   2 +-
 plonky2/src/plonk/proof.rs                |   2 +-
 plonky2/src/plonk/prover.rs               |   8 +-
 11 files changed, 283 insertions(+), 16 deletions(-)
 create mode 100644 maybe_rayon/Cargo.toml
 create mode 100644 maybe_rayon/src/lib.rs

diff --git a/Cargo.toml b/Cargo.toml
index 8d14c3d0..a78d0a96 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
-members = ["field", "insertion", "plonky2", "starky", "system_zero", "util", "waksman", "ecdsa", "u32", "evm"]
+members = ["field", "insertion", "plonky2", "starky", "system_zero", "util", "waksman", "ecdsa", "u32", "evm", "maybe_rayon"]
 
 [profile.release]
 opt-level = 3
diff --git a/maybe_rayon/Cargo.toml b/maybe_rayon/Cargo.toml
new file mode 100644
index 00000000..f8cc95fb
--- /dev/null
+++ b/maybe_rayon/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "maybe_rayon"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[features]
+parallel = ["rayon"]
+
+[dependencies]
+rayon = { version = "1.5.3", optional = true }
diff --git a/maybe_rayon/src/lib.rs b/maybe_rayon/src/lib.rs
new file mode 100644
index 00000000..6f8dc7a1
--- /dev/null
+++ b/maybe_rayon/src/lib.rs
@@ -0,0 +1,250 @@
+#[cfg(not(feature = "parallel"))]
+use std::{
+	iter::{IntoIterator, Iterator},
+	slice::{Chunks, ChunksExact, ChunksMut, ChunksExactMut},
+};
+
+#[cfg(feature = "parallel")]
+use rayon::{
+	prelude::*,
+	slice::{Chunks as ParChunks, ChunksMut as ParChunksMut, ChunksExact as ParChunksExact, ChunksExactMut as ParChunksExactMut, ParallelSlice, ParallelSliceMut}
+};
+
+#[cfg(feature = "parallel")]
+pub use rayon::prelude::{
+	ParallelIterator,
+	IndexedParallelIterator,
+	ParallelExtend,
+	ParallelDrainFull,
+	ParallelDrainRange
+};
+
+pub trait MaybeParIter<'data> {
+	#[cfg(feature = "parallel")]
+	type Item: Send + 'data;
+
+	#[cfg(feature = "parallel")]
+	type Iter: ParallelIterator<Item = Self::Item>;
+
+	#[cfg(not(feature = "parallel"))]
+	type Item;
+
+	#[cfg(not(feature = "parallel"))]
+	type Iter: Iterator<Item = Self::Item>;
+
+	fn par_iter(&'data self) -> Self::Iter;
+}
+
+#[cfg(feature = "parallel")]
+impl<'data, T> MaybeParIter<'data> for T where T: ?Sized + IntoParallelRefIterator<'data> {
+	type Item = T::Item;
+	type Iter = T::Iter;
+
+	fn par_iter(&'data self) -> Self::Iter {
+		self.par_iter()
+	}
+}
+
+#[cfg(not(feature = "parallel"))]
+impl<'data, T: 'data> MaybeParIter<'data> for Vec<T> {
+	type Item = &'data T;
+	type Iter = std::slice::Iter<'data, T>;
+
+	fn par_iter(&'data self) -> Self::Iter {
+		self.iter()
+	}
+}
+
+#[cfg(not(feature = "parallel"))]
+impl<'data, T: 'data> MaybeParIter<'data> for [T] {
+	type Item = &'data T;
+	type Iter = std::slice::Iter<'data, T>;
+
+	fn par_iter(&'data self) -> Self::Iter {
+		self.iter()
+	}
+}
+
+pub trait MaybeParIterMut<'data> {
+	#[cfg(feature = "parallel")]
+	type Item: Send + 'data;
+
+	#[cfg(feature = "parallel")]
+	type Iter: ParallelIterator<Item = Self::Item>;
+
+	#[cfg(not(feature = "parallel"))]
+	type Item;
+
+	#[cfg(not(feature = "parallel"))]
+	type Iter: Iterator<Item = Self::Item>;
+
+	fn par_iter_mut(&'data mut self) -> Self::Iter;
+}
+
+#[cfg(feature = "parallel")]
+impl<'data, T> MaybeParIterMut<'data> for T where T: ?Sized + IntoParallelRefMutIterator<'data> {
+	type Item = T::Item;
+	type Iter = T::Iter;
+
+	fn par_iter_mut(&'data mut self) -> Self::Iter {
+		self.par_iter_mut()
+	}
+}
+
+#[cfg(not(feature = "parallel"))]
+impl<'data, T: 'data> MaybeParIterMut<'data> for Vec<T> {
+	type Item = &'data mut T;
+	type Iter = std::slice::IterMut<'data, T>;
+
+	fn par_iter_mut(&'data mut self) -> Self::Iter {
+		self.iter_mut()
+	}
+}
+
+#[cfg(not(feature = "parallel"))]
+impl<'data, T: 'data> MaybeParIterMut<'data> for [T] {
+	type Item = &'data mut T;
+	type Iter = std::slice::IterMut<'data, T>;
+	
+	fn par_iter_mut(&'data mut self) -> Self::Iter {
+		self.iter_mut()
+	}
+}
+
+pub trait MaybeIntoParIter {
+	#[cfg(feature = "parallel")]
+	type Item: Send;
+
+	#[cfg(feature = "parallel")]
+	type Iter: ParallelIterator<Item = Self::Item>;
+
+	#[cfg(not(feature = "parallel"))]
+	type Item;
+
+	#[cfg(not(feature = "parallel"))]
+	type Iter: Iterator<Item = Self::Item>;
+
+	fn maybe_into_par_iter(self) -> Self::Iter;
+}
+
+#[cfg(feature = "parallel")]
+impl<T> MaybeIntoParIter for T where T: IntoParallelIterator {
+	type Item = T::Item;
+	type Iter = T::Iter;
+
+	fn maybe_into_par_iter(self) -> Self::Iter {
+		self.into_par_iter()
+	}
+}
+
+#[cfg(not(feature = "parallel"))]
+impl<T> MaybeIntoParIter for T where T: IntoIterator {
+	type Item = T::Item;
+	type Iter = T::IntoIter;
+
+	fn maybe_into_par_iter(self) -> Self::Iter {
+		self.into_iter()
+	}
+}
+
+#[cfg(feature = "parallel")]
+pub trait MaybeParChunks<T: Sync> {
+	fn par_chunks(&self, chunk_size: usize) -> ParChunks<'_, T>;
+	fn par_chunks_exact(&self, chunk_size: usize) -> ParChunksExact<'_, T>;
+}
+
+#[cfg(not(feature = "parallel"))]
+pub trait MaybeParChunks<T> {
+	fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T>;
+	fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T>;
+}
+
+#[cfg(feature = "parallel")]
+impl<T: ParallelSlice<U> + ?Sized, U: Sync> MaybeParChunks<U> for T {
+	fn par_chunks(&self, chunk_size: usize) -> ParChunks<'_, U> {
+		self.par_chunks(chunk_size)
+	}
+	fn par_chunks_exact(&self, chunk_size: usize) -> ParChunksExact<'_, U> {
+		self.par_chunks_exact(chunk_size)
+	}
+}
+
+#[cfg(not(feature = "parallel"))]
+impl<T> MaybeParChunks<T> for [T] {
+	fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
+		self.chunks(chunk_size)
+	}
+
+	fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
+		self.chunks_exact(chunk_size)
+	}
+}
+
+#[cfg(feature = "parallel")]
+pub trait MaybeParChunksMut<T: Send> {
+	fn par_chunks_mut(&mut self, chunk_size: usize) -> ParChunksMut<'_, T>;
+	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ParChunksExactMut<'_, T>;
+}
+
+#[cfg(not(feature = "parallel"))]
+pub trait MaybeParChunksMut<T: Send> {
+	fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T>;
+	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T>;
+}
+
+
+#[cfg(feature = "parallel")]
+impl<T: ?Sized + ParallelSliceMut<U>, U: Send> MaybeParChunksMut<U> for T {
+	fn par_chunks_mut(&mut self, chunk_size: usize) -> ParChunksMut<'_, U> {
+		self.par_chunks_mut(chunk_size)
+	}
+	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ParChunksExactMut<'_, U> {
+		self.par_chunks_exact_mut(chunk_size)
+	}
+}
+
+#[cfg(not(feature = "parallel"))]
+impl<T: Send> MaybeParChunksMut<T> for [T] {
+	fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
+		self.chunks_mut(chunk_size)
+	}
+	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
+		self.chunks_exact_mut(chunk_size)
+	}
+}
+
+pub trait ParallelIteratorMock {
+	type Item;
+	fn find_any<P>(self, predicate: P) -> Option<Self::Item>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send;
+}
+
+impl<T: Iterator> ParallelIteratorMock for T {
+	type Item = T::Item;
+
+	fn find_any<P>(mut self, predicate: P) -> Option<Self::Item>
+    where
+        P: Fn(&Self::Item) -> bool + Sync + Send
+	{
+		self.find(predicate)
+	}
+}
+
+#[cfg(feature = "parallel")]
+pub fn join<A, B, RA, RB>(oper_a: A, oper_b: B) -> (RA, RB)
+    where A: FnOnce() -> RA + Send,
+          B: FnOnce() -> RB + Send,
+          RA: Send,
+          RB: Send
+{
+    rayon::join(oper_a, oper_b)
+}
+
+#[cfg(not(feature = "parallel"))]
+pub fn join<A, B, RA, RB>(oper_a: A, oper_b: B) -> (RA, RB)
+    where A: FnOnce() -> RA,
+          B: FnOnce() -> RB,
+{
+    (oper_a(), oper_b())
+}
diff --git a/plonky2/Cargo.toml b/plonky2/Cargo.toml
index 9c019640..9ee89344 100644
--- a/plonky2/Cargo.toml
+++ b/plonky2/Cargo.toml
@@ -10,6 +10,10 @@ categories = ["cryptography"]
 edition = "2021"
 default-run = "generate_constants"
 
+[features]
+default = ["parallel"]
+parallel = ["maybe_rayon/parallel"]
+
 [dependencies]
 plonky2_field = { path = "../field" }
 plonky2_util = { path = "../util" }
@@ -19,7 +23,7 @@ itertools = "0.10.0"
 num = { version = "0.4", features = [ "rand" ] }
 rand = "0.8.4"
 rand_chacha = "0.3.1"
-rayon = "1.5.1"
+maybe_rayon = { path = "../maybe_rayon" }
 unroll = "0.1.5"
 anyhow = "1.0.40"
 serde = { version = "1.0", features = ["derive"] }
@@ -32,6 +36,7 @@ criterion = "0.3.5"
 tynm = "0.1.6"
 structopt = "0.3.26"
 num_cpus = "1.13.1"
+rayon = "1.5.1" 
 
 [target.'cfg(not(target_env = "msvc"))'.dev-dependencies]
 jemallocator = "0.3.2"
diff --git a/plonky2/examples/bench_recursion.rs b/plonky2/examples/bench_recursion.rs
index 1f2d127f..8073c9dc 100644
--- a/plonky2/examples/bench_recursion.rs
+++ b/plonky2/examples/bench_recursion.rs
@@ -2,7 +2,6 @@
 // custom CLI argument parsing (even with harness disabled). We could also have
 // put it in `src/bin/`, but then we wouldn't have access to
 // `[dev-dependencies]`.
-
 #![allow(incomplete_features)]
 #![feature(generic_const_exprs)]
 
diff --git a/plonky2/src/fri/oracle.rs b/plonky2/src/fri/oracle.rs
index 312b458b..da4e9e80 100644
--- a/plonky2/src/fri/oracle.rs
+++ b/plonky2/src/fri/oracle.rs
@@ -5,7 +5,7 @@ use plonky2_field::packed::PackedField;
 use plonky2_field::polynomial::{PolynomialCoeffs, PolynomialValues};
 use plonky2_field::types::Field;
 use plonky2_util::{log2_strict, reverse_index_bits_in_place};
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::fri::proof::FriProof;
 use crate::fri::prover::fri_proof;
@@ -52,7 +52,7 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
         let coeffs = timed!(
             timing,
             "IFFT",
-            values.into_par_iter().map(|v| v.ifft()).collect::<Vec<_>>()
+            values.maybe_into_par_iter().map(|v| v.ifft()).collect::<Vec<_>>()
         );
 
         Self::from_coeffs(
@@ -122,7 +122,7 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
             })
             .chain(
                 (0..salt_size)
-                    .into_par_iter()
+                    .maybe_into_par_iter()
                     .map(|_| F::rand_vec(degree << rate_bits)),
             )
             .collect()
diff --git a/plonky2/src/fri/prover.rs b/plonky2/src/fri/prover.rs
index 6136a9a1..0f3215a8 100644
--- a/plonky2/src/fri/prover.rs
+++ b/plonky2/src/fri/prover.rs
@@ -2,7 +2,7 @@ use itertools::Itertools;
 use plonky2_field::extension::{flatten, unflatten, Extendable};
 use plonky2_field::polynomial::{PolynomialCoeffs, PolynomialValues};
 use plonky2_util::reverse_index_bits_in_place;
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::fri::proof::{FriInitialTreeProof, FriProof, FriQueryRound, FriQueryStep};
 use crate::fri::{FriConfig, FriParams};
@@ -119,7 +119,7 @@ fn fri_proof_of_work<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, c
     config: &FriConfig,
 ) -> F {
     (0..=F::NEG_ONE.to_canonical_u64())
-        .into_par_iter()
+        .maybe_into_par_iter()
         .find_any(|&i| {
             C::InnerHasher::hash_no_pad(
                 &current_hash
diff --git a/plonky2/src/hash/merkle_tree.rs b/plonky2/src/hash/merkle_tree.rs
index 69cf2ef9..f7b6d4a2 100644
--- a/plonky2/src/hash/merkle_tree.rs
+++ b/plonky2/src/hash/merkle_tree.rs
@@ -2,9 +2,9 @@ use std::mem::MaybeUninit;
 use std::slice;
 
 use plonky2_util::log2_strict;
-use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 
+use maybe_rayon::*;
 use crate::hash::hash_types::RichField;
 use crate::hash::merkle_proofs::MerkleProof;
 use crate::plonk::config::GenericHashOut;
@@ -77,10 +77,12 @@ where
         let (right_digest_mem, right_digests_buf) = right_digests_buf.split_first_mut().unwrap();
         // Split `leaves` between both children.
         let (left_leaves, right_leaves) = leaves.split_at(leaves.len() / 2);
-        let (left_digest, right_digest) = rayon::join(
+
+        let (left_digest, right_digest) = maybe_rayon::join(
             || fill_subtree::<F, H>(left_digests_buf, left_leaves),
             || fill_subtree::<F, H>(right_digests_buf, right_leaves),
         );
+
         left_digest_mem.write(left_digest);
         right_digest_mem.write(right_digest);
         H::two_to_one(left_digest, right_digest)
diff --git a/plonky2/src/plonk/permutation_argument.rs b/plonky2/src/plonk/permutation_argument.rs
index 076c2a7a..f9b23796 100644
--- a/plonky2/src/plonk/permutation_argument.rs
+++ b/plonky2/src/plonk/permutation_argument.rs
@@ -2,7 +2,7 @@ use std::collections::HashMap;
 
 use plonky2_field::polynomial::PolynomialValues;
 use plonky2_field::types::Field;
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::iop::target::Target;
 use crate::iop::wire::Wire;
diff --git a/plonky2/src/plonk/proof.rs b/plonky2/src/plonk/proof.rs
index 18af1f73..1cb83b14 100644
--- a/plonky2/src/plonk/proof.rs
+++ b/plonky2/src/plonk/proof.rs
@@ -1,7 +1,7 @@
 use anyhow::ensure;
 use plonky2_field::extension::Extendable;
-use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
+use maybe_rayon::*;
 
 use crate::fri::oracle::PolynomialBatch;
 use crate::fri::proof::{
diff --git a/plonky2/src/plonk/prover.rs b/plonky2/src/plonk/prover.rs
index 26626208..526721f0 100644
--- a/plonky2/src/plonk/prover.rs
+++ b/plonky2/src/plonk/prover.rs
@@ -6,7 +6,7 @@ use plonky2_field::extension::Extendable;
 use plonky2_field::polynomial::{PolynomialCoeffs, PolynomialValues};
 use plonky2_field::zero_poly_coset::ZeroPolyOnCoset;
 use plonky2_util::{ceil_div_usize, log2_ceil};
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::field::types::Field;
 use crate::fri::oracle::PolynomialBatch;
@@ -142,7 +142,7 @@ where
         timing,
         "split up quotient polys",
         quotient_polys
-            .into_par_iter()
+            .maybe_into_par_iter()
             .flat_map(|mut quotient_poly| {
                 quotient_poly.trim_to_len(quotient_degree).expect(
                     "Quotient has failed, the vanishing polynomial is not divisible by Z_H",
@@ -305,7 +305,7 @@ fn wires_permutation_partial_products_and_zs<
     }
 
     transpose(&all_partial_products_and_zs)
-        .into_par_iter()
+        .maybe_into_par_iter()
         .map(PolynomialValues::new)
         .collect()
 }
@@ -452,7 +452,7 @@ fn compute_quotient_polys<
         .collect();
 
     transpose(&quotient_values)
-        .into_par_iter()
+        .maybe_into_par_iter()
         .map(PolynomialValues::new)
         .map(|values| values.coset_ifft(F::coset_shift()))
         .collect()

From 529add1c0aae0baca0e150777ae774c6a0814d9c Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Thu, 21 Jul 2022 17:01:21 -0400
Subject: [PATCH 05/85] switch rest of names back

---
 maybe_rayon/src/lib.rs      | 6 +++---
 plonky2/src/fri/oracle.rs   | 4 ++--
 plonky2/src/fri/prover.rs   | 2 +-
 plonky2/src/plonk/prover.rs | 6 +++---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/maybe_rayon/src/lib.rs b/maybe_rayon/src/lib.rs
index 6f8dc7a1..7c7c685a 100644
--- a/maybe_rayon/src/lib.rs
+++ b/maybe_rayon/src/lib.rs
@@ -124,7 +124,7 @@ pub trait MaybeIntoParIter {
 	#[cfg(not(feature = "parallel"))]
 	type Iter: Iterator<Item = Self::Item>;
 
-	fn maybe_into_par_iter(self) -> Self::Iter;
+	fn into_par_iter(self) -> Self::Iter;
 }
 
 #[cfg(feature = "parallel")]
@@ -132,7 +132,7 @@ impl<T> MaybeIntoParIter for T where T: IntoParallelIterator {
 	type Item = T::Item;
 	type Iter = T::Iter;
 
-	fn maybe_into_par_iter(self) -> Self::Iter {
+	fn into_par_iter(self) -> Self::Iter {
 		self.into_par_iter()
 	}
 }
@@ -142,7 +142,7 @@ impl<T> MaybeIntoParIter for T where T: IntoIterator {
 	type Item = T::Item;
 	type Iter = T::IntoIter;
 
-	fn maybe_into_par_iter(self) -> Self::Iter {
+	fn into_par_iter(self) -> Self::Iter {
 		self.into_iter()
 	}
 }
diff --git a/plonky2/src/fri/oracle.rs b/plonky2/src/fri/oracle.rs
index da4e9e80..47647701 100644
--- a/plonky2/src/fri/oracle.rs
+++ b/plonky2/src/fri/oracle.rs
@@ -52,7 +52,7 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
         let coeffs = timed!(
             timing,
             "IFFT",
-            values.maybe_into_par_iter().map(|v| v.ifft()).collect::<Vec<_>>()
+            values.into_par_iter().map(|v| v.ifft()).collect::<Vec<_>>()
         );
 
         Self::from_coeffs(
@@ -122,7 +122,7 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
             })
             .chain(
                 (0..salt_size)
-                    .maybe_into_par_iter()
+                    .into_par_iter()
                     .map(|_| F::rand_vec(degree << rate_bits)),
             )
             .collect()
diff --git a/plonky2/src/fri/prover.rs b/plonky2/src/fri/prover.rs
index 0f3215a8..94389a7e 100644
--- a/plonky2/src/fri/prover.rs
+++ b/plonky2/src/fri/prover.rs
@@ -119,7 +119,7 @@ fn fri_proof_of_work<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, c
     config: &FriConfig,
 ) -> F {
     (0..=F::NEG_ONE.to_canonical_u64())
-        .maybe_into_par_iter()
+        .into_par_iter()
         .find_any(|&i| {
             C::InnerHasher::hash_no_pad(
                 &current_hash
diff --git a/plonky2/src/plonk/prover.rs b/plonky2/src/plonk/prover.rs
index 526721f0..9275e241 100644
--- a/plonky2/src/plonk/prover.rs
+++ b/plonky2/src/plonk/prover.rs
@@ -142,7 +142,7 @@ where
         timing,
         "split up quotient polys",
         quotient_polys
-            .maybe_into_par_iter()
+            .into_par_iter()
             .flat_map(|mut quotient_poly| {
                 quotient_poly.trim_to_len(quotient_degree).expect(
                     "Quotient has failed, the vanishing polynomial is not divisible by Z_H",
@@ -305,7 +305,7 @@ fn wires_permutation_partial_products_and_zs<
     }
 
     transpose(&all_partial_products_and_zs)
-        .maybe_into_par_iter()
+        .into_par_iter()
         .map(PolynomialValues::new)
         .collect()
 }
@@ -452,7 +452,7 @@ fn compute_quotient_polys<
         .collect();
 
     transpose(&quotient_values)
-        .maybe_into_par_iter()
+        .into_par_iter()
         .map(PolynomialValues::new)
         .map(|values| values.coset_ifft(F::coset_shift()))
         .collect()

From 81e14bf5b3b2e9e0791d5f776328280d9eda9b1f Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Thu, 21 Jul 2022 17:02:03 -0400
Subject: [PATCH 06/85] fmt

---
 maybe_rayon/src/lib.rs                    | 276 +++++++++++-----------
 plonky2/src/fri/oracle.rs                 |   2 +-
 plonky2/src/fri/prover.rs                 |   2 +-
 plonky2/src/hash/merkle_tree.rs           |   2 +-
 plonky2/src/plonk/permutation_argument.rs |   2 +-
 plonky2/src/plonk/proof.rs                |   2 +-
 plonky2/src/plonk/prover.rs               |   2 +-
 7 files changed, 150 insertions(+), 138 deletions(-)

diff --git a/maybe_rayon/src/lib.rs b/maybe_rayon/src/lib.rs
index 7c7c685a..1a9bd823 100644
--- a/maybe_rayon/src/lib.rs
+++ b/maybe_rayon/src/lib.rs
@@ -1,250 +1,262 @@
 #[cfg(not(feature = "parallel"))]
 use std::{
-	iter::{IntoIterator, Iterator},
-	slice::{Chunks, ChunksExact, ChunksMut, ChunksExactMut},
-};
-
-#[cfg(feature = "parallel")]
-use rayon::{
-	prelude::*,
-	slice::{Chunks as ParChunks, ChunksMut as ParChunksMut, ChunksExact as ParChunksExact, ChunksExactMut as ParChunksExactMut, ParallelSlice, ParallelSliceMut}
+    iter::{IntoIterator, Iterator},
+    slice::{Chunks, ChunksExact, ChunksExactMut, ChunksMut},
 };
 
 #[cfg(feature = "parallel")]
 pub use rayon::prelude::{
-	ParallelIterator,
-	IndexedParallelIterator,
-	ParallelExtend,
-	ParallelDrainFull,
-	ParallelDrainRange
+    IndexedParallelIterator, ParallelDrainFull, ParallelDrainRange, ParallelExtend,
+    ParallelIterator,
+};
+#[cfg(feature = "parallel")]
+use rayon::{
+    prelude::*,
+    slice::{
+        Chunks as ParChunks, ChunksExact as ParChunksExact, ChunksExactMut as ParChunksExactMut,
+        ChunksMut as ParChunksMut, ParallelSlice, ParallelSliceMut,
+    },
 };
 
 pub trait MaybeParIter<'data> {
-	#[cfg(feature = "parallel")]
-	type Item: Send + 'data;
+    #[cfg(feature = "parallel")]
+    type Item: Send + 'data;
 
-	#[cfg(feature = "parallel")]
-	type Iter: ParallelIterator<Item = Self::Item>;
+    #[cfg(feature = "parallel")]
+    type Iter: ParallelIterator<Item = Self::Item>;
 
-	#[cfg(not(feature = "parallel"))]
-	type Item;
+    #[cfg(not(feature = "parallel"))]
+    type Item;
 
-	#[cfg(not(feature = "parallel"))]
-	type Iter: Iterator<Item = Self::Item>;
+    #[cfg(not(feature = "parallel"))]
+    type Iter: Iterator<Item = Self::Item>;
 
-	fn par_iter(&'data self) -> Self::Iter;
+    fn par_iter(&'data self) -> Self::Iter;
 }
 
 #[cfg(feature = "parallel")]
-impl<'data, T> MaybeParIter<'data> for T where T: ?Sized + IntoParallelRefIterator<'data> {
-	type Item = T::Item;
-	type Iter = T::Iter;
+impl<'data, T> MaybeParIter<'data> for T
+where
+    T: ?Sized + IntoParallelRefIterator<'data>,
+{
+    type Item = T::Item;
+    type Iter = T::Iter;
 
-	fn par_iter(&'data self) -> Self::Iter {
-		self.par_iter()
-	}
+    fn par_iter(&'data self) -> Self::Iter {
+        self.par_iter()
+    }
 }
 
 #[cfg(not(feature = "parallel"))]
 impl<'data, T: 'data> MaybeParIter<'data> for Vec<T> {
-	type Item = &'data T;
-	type Iter = std::slice::Iter<'data, T>;
+    type Item = &'data T;
+    type Iter = std::slice::Iter<'data, T>;
 
-	fn par_iter(&'data self) -> Self::Iter {
-		self.iter()
-	}
+    fn par_iter(&'data self) -> Self::Iter {
+        self.iter()
+    }
 }
 
 #[cfg(not(feature = "parallel"))]
 impl<'data, T: 'data> MaybeParIter<'data> for [T] {
-	type Item = &'data T;
-	type Iter = std::slice::Iter<'data, T>;
+    type Item = &'data T;
+    type Iter = std::slice::Iter<'data, T>;
 
-	fn par_iter(&'data self) -> Self::Iter {
-		self.iter()
-	}
+    fn par_iter(&'data self) -> Self::Iter {
+        self.iter()
+    }
 }
 
 pub trait MaybeParIterMut<'data> {
-	#[cfg(feature = "parallel")]
-	type Item: Send + 'data;
+    #[cfg(feature = "parallel")]
+    type Item: Send + 'data;
 
-	#[cfg(feature = "parallel")]
-	type Iter: ParallelIterator<Item = Self::Item>;
+    #[cfg(feature = "parallel")]
+    type Iter: ParallelIterator<Item = Self::Item>;
 
-	#[cfg(not(feature = "parallel"))]
-	type Item;
+    #[cfg(not(feature = "parallel"))]
+    type Item;
 
-	#[cfg(not(feature = "parallel"))]
-	type Iter: Iterator<Item = Self::Item>;
+    #[cfg(not(feature = "parallel"))]
+    type Iter: Iterator<Item = Self::Item>;
 
-	fn par_iter_mut(&'data mut self) -> Self::Iter;
+    fn par_iter_mut(&'data mut self) -> Self::Iter;
 }
 
 #[cfg(feature = "parallel")]
-impl<'data, T> MaybeParIterMut<'data> for T where T: ?Sized + IntoParallelRefMutIterator<'data> {
-	type Item = T::Item;
-	type Iter = T::Iter;
+impl<'data, T> MaybeParIterMut<'data> for T
+where
+    T: ?Sized + IntoParallelRefMutIterator<'data>,
+{
+    type Item = T::Item;
+    type Iter = T::Iter;
 
-	fn par_iter_mut(&'data mut self) -> Self::Iter {
-		self.par_iter_mut()
-	}
+    fn par_iter_mut(&'data mut self) -> Self::Iter {
+        self.par_iter_mut()
+    }
 }
 
 #[cfg(not(feature = "parallel"))]
 impl<'data, T: 'data> MaybeParIterMut<'data> for Vec<T> {
-	type Item = &'data mut T;
-	type Iter = std::slice::IterMut<'data, T>;
+    type Item = &'data mut T;
+    type Iter = std::slice::IterMut<'data, T>;
 
-	fn par_iter_mut(&'data mut self) -> Self::Iter {
-		self.iter_mut()
-	}
+    fn par_iter_mut(&'data mut self) -> Self::Iter {
+        self.iter_mut()
+    }
 }
 
 #[cfg(not(feature = "parallel"))]
 impl<'data, T: 'data> MaybeParIterMut<'data> for [T] {
-	type Item = &'data mut T;
-	type Iter = std::slice::IterMut<'data, T>;
-	
-	fn par_iter_mut(&'data mut self) -> Self::Iter {
-		self.iter_mut()
-	}
+    type Item = &'data mut T;
+    type Iter = std::slice::IterMut<'data, T>;
+
+    fn par_iter_mut(&'data mut self) -> Self::Iter {
+        self.iter_mut()
+    }
 }
 
 pub trait MaybeIntoParIter {
-	#[cfg(feature = "parallel")]
-	type Item: Send;
+    #[cfg(feature = "parallel")]
+    type Item: Send;
 
-	#[cfg(feature = "parallel")]
-	type Iter: ParallelIterator<Item = Self::Item>;
+    #[cfg(feature = "parallel")]
+    type Iter: ParallelIterator<Item = Self::Item>;
 
-	#[cfg(not(feature = "parallel"))]
-	type Item;
+    #[cfg(not(feature = "parallel"))]
+    type Item;
 
-	#[cfg(not(feature = "parallel"))]
-	type Iter: Iterator<Item = Self::Item>;
+    #[cfg(not(feature = "parallel"))]
+    type Iter: Iterator<Item = Self::Item>;
 
-	fn into_par_iter(self) -> Self::Iter;
+    fn into_par_iter(self) -> Self::Iter;
 }
 
 #[cfg(feature = "parallel")]
-impl<T> MaybeIntoParIter for T where T: IntoParallelIterator {
-	type Item = T::Item;
-	type Iter = T::Iter;
+impl<T> MaybeIntoParIter for T
+where
+    T: IntoParallelIterator,
+{
+    type Item = T::Item;
+    type Iter = T::Iter;
 
-	fn into_par_iter(self) -> Self::Iter {
-		self.into_par_iter()
-	}
+    fn into_par_iter(self) -> Self::Iter {
+        self.into_par_iter()
+    }
 }
 
 #[cfg(not(feature = "parallel"))]
-impl<T> MaybeIntoParIter for T where T: IntoIterator {
-	type Item = T::Item;
-	type Iter = T::IntoIter;
+impl<T> MaybeIntoParIter for T
+where
+    T: IntoIterator,
+{
+    type Item = T::Item;
+    type Iter = T::IntoIter;
 
-	fn into_par_iter(self) -> Self::Iter {
-		self.into_iter()
-	}
+    fn into_par_iter(self) -> Self::Iter {
+        self.into_iter()
+    }
 }
 
 #[cfg(feature = "parallel")]
 pub trait MaybeParChunks<T: Sync> {
-	fn par_chunks(&self, chunk_size: usize) -> ParChunks<'_, T>;
-	fn par_chunks_exact(&self, chunk_size: usize) -> ParChunksExact<'_, T>;
+    fn par_chunks(&self, chunk_size: usize) -> ParChunks<'_, T>;
+    fn par_chunks_exact(&self, chunk_size: usize) -> ParChunksExact<'_, T>;
 }
 
 #[cfg(not(feature = "parallel"))]
 pub trait MaybeParChunks<T> {
-	fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T>;
-	fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T>;
+    fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T>;
+    fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T>;
 }
 
 #[cfg(feature = "parallel")]
 impl<T: ParallelSlice<U> + ?Sized, U: Sync> MaybeParChunks<U> for T {
-	fn par_chunks(&self, chunk_size: usize) -> ParChunks<'_, U> {
-		self.par_chunks(chunk_size)
-	}
-	fn par_chunks_exact(&self, chunk_size: usize) -> ParChunksExact<'_, U> {
-		self.par_chunks_exact(chunk_size)
-	}
+    fn par_chunks(&self, chunk_size: usize) -> ParChunks<'_, U> {
+        self.par_chunks(chunk_size)
+    }
+    fn par_chunks_exact(&self, chunk_size: usize) -> ParChunksExact<'_, U> {
+        self.par_chunks_exact(chunk_size)
+    }
 }
 
 #[cfg(not(feature = "parallel"))]
 impl<T> MaybeParChunks<T> for [T] {
-	fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
-		self.chunks(chunk_size)
-	}
+    fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
+        self.chunks(chunk_size)
+    }
 
-	fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
-		self.chunks_exact(chunk_size)
-	}
+    fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
+        self.chunks_exact(chunk_size)
+    }
 }
 
 #[cfg(feature = "parallel")]
 pub trait MaybeParChunksMut<T: Send> {
-	fn par_chunks_mut(&mut self, chunk_size: usize) -> ParChunksMut<'_, T>;
-	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ParChunksExactMut<'_, T>;
+    fn par_chunks_mut(&mut self, chunk_size: usize) -> ParChunksMut<'_, T>;
+    fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ParChunksExactMut<'_, T>;
 }
 
 #[cfg(not(feature = "parallel"))]
 pub trait MaybeParChunksMut<T: Send> {
-	fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T>;
-	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T>;
+    fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T>;
+    fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T>;
 }
 
-
 #[cfg(feature = "parallel")]
 impl<T: ?Sized + ParallelSliceMut<U>, U: Send> MaybeParChunksMut<U> for T {
-	fn par_chunks_mut(&mut self, chunk_size: usize) -> ParChunksMut<'_, U> {
-		self.par_chunks_mut(chunk_size)
-	}
-	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ParChunksExactMut<'_, U> {
-		self.par_chunks_exact_mut(chunk_size)
-	}
+    fn par_chunks_mut(&mut self, chunk_size: usize) -> ParChunksMut<'_, U> {
+        self.par_chunks_mut(chunk_size)
+    }
+    fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ParChunksExactMut<'_, U> {
+        self.par_chunks_exact_mut(chunk_size)
+    }
 }
 
 #[cfg(not(feature = "parallel"))]
 impl<T: Send> MaybeParChunksMut<T> for [T] {
-	fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
-		self.chunks_mut(chunk_size)
-	}
-	fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
-		self.chunks_exact_mut(chunk_size)
-	}
+    fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
+        self.chunks_mut(chunk_size)
+    }
+    fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
+        self.chunks_exact_mut(chunk_size)
+    }
 }
 
 pub trait ParallelIteratorMock {
-	type Item;
-	fn find_any<P>(self, predicate: P) -> Option<Self::Item>
+    type Item;
+    fn find_any<P>(self, predicate: P) -> Option<Self::Item>
     where
         P: Fn(&Self::Item) -> bool + Sync + Send;
 }
 
 impl<T: Iterator> ParallelIteratorMock for T {
-	type Item = T::Item;
+    type Item = T::Item;
 
-	fn find_any<P>(mut self, predicate: P) -> Option<Self::Item>
+    fn find_any<P>(mut self, predicate: P) -> Option<Self::Item>
     where
-        P: Fn(&Self::Item) -> bool + Sync + Send
-	{
-		self.find(predicate)
-	}
+        P: Fn(&Self::Item) -> bool + Sync + Send,
+    {
+        self.find(predicate)
+    }
 }
 
 #[cfg(feature = "parallel")]
 pub fn join<A, B, RA, RB>(oper_a: A, oper_b: B) -> (RA, RB)
-    where A: FnOnce() -> RA + Send,
-          B: FnOnce() -> RB + Send,
-          RA: Send,
-          RB: Send
+where
+    A: FnOnce() -> RA + Send,
+    B: FnOnce() -> RB + Send,
+    RA: Send,
+    RB: Send,
 {
     rayon::join(oper_a, oper_b)
 }
 
 #[cfg(not(feature = "parallel"))]
 pub fn join<A, B, RA, RB>(oper_a: A, oper_b: B) -> (RA, RB)
-    where A: FnOnce() -> RA,
-          B: FnOnce() -> RB,
+where
+    A: FnOnce() -> RA,
+    B: FnOnce() -> RB,
 {
     (oper_a(), oper_b())
 }
diff --git a/plonky2/src/fri/oracle.rs b/plonky2/src/fri/oracle.rs
index 47647701..1f5b648f 100644
--- a/plonky2/src/fri/oracle.rs
+++ b/plonky2/src/fri/oracle.rs
@@ -1,11 +1,11 @@
 use itertools::Itertools;
+use maybe_rayon::*;
 use plonky2_field::extension::Extendable;
 use plonky2_field::fft::FftRootTable;
 use plonky2_field::packed::PackedField;
 use plonky2_field::polynomial::{PolynomialCoeffs, PolynomialValues};
 use plonky2_field::types::Field;
 use plonky2_util::{log2_strict, reverse_index_bits_in_place};
-use maybe_rayon::*;
 
 use crate::fri::proof::FriProof;
 use crate::fri::prover::fri_proof;
diff --git a/plonky2/src/fri/prover.rs b/plonky2/src/fri/prover.rs
index 94389a7e..39e25869 100644
--- a/plonky2/src/fri/prover.rs
+++ b/plonky2/src/fri/prover.rs
@@ -1,8 +1,8 @@
 use itertools::Itertools;
+use maybe_rayon::*;
 use plonky2_field::extension::{flatten, unflatten, Extendable};
 use plonky2_field::polynomial::{PolynomialCoeffs, PolynomialValues};
 use plonky2_util::reverse_index_bits_in_place;
-use maybe_rayon::*;
 
 use crate::fri::proof::{FriInitialTreeProof, FriProof, FriQueryRound, FriQueryStep};
 use crate::fri::{FriConfig, FriParams};
diff --git a/plonky2/src/hash/merkle_tree.rs b/plonky2/src/hash/merkle_tree.rs
index f7b6d4a2..1da66bff 100644
--- a/plonky2/src/hash/merkle_tree.rs
+++ b/plonky2/src/hash/merkle_tree.rs
@@ -1,10 +1,10 @@
 use std::mem::MaybeUninit;
 use std::slice;
 
+use maybe_rayon::*;
 use plonky2_util::log2_strict;
 use serde::{Deserialize, Serialize};
 
-use maybe_rayon::*;
 use crate::hash::hash_types::RichField;
 use crate::hash::merkle_proofs::MerkleProof;
 use crate::plonk::config::GenericHashOut;
diff --git a/plonky2/src/plonk/permutation_argument.rs b/plonky2/src/plonk/permutation_argument.rs
index f9b23796..3658a12d 100644
--- a/plonky2/src/plonk/permutation_argument.rs
+++ b/plonky2/src/plonk/permutation_argument.rs
@@ -1,8 +1,8 @@
 use std::collections::HashMap;
 
+use maybe_rayon::*;
 use plonky2_field::polynomial::PolynomialValues;
 use plonky2_field::types::Field;
-use maybe_rayon::*;
 
 use crate::iop::target::Target;
 use crate::iop::wire::Wire;
diff --git a/plonky2/src/plonk/proof.rs b/plonky2/src/plonk/proof.rs
index 1cb83b14..922a24bb 100644
--- a/plonky2/src/plonk/proof.rs
+++ b/plonky2/src/plonk/proof.rs
@@ -1,7 +1,7 @@
 use anyhow::ensure;
+use maybe_rayon::*;
 use plonky2_field::extension::Extendable;
 use serde::{Deserialize, Serialize};
-use maybe_rayon::*;
 
 use crate::fri::oracle::PolynomialBatch;
 use crate::fri::proof::{
diff --git a/plonky2/src/plonk/prover.rs b/plonky2/src/plonk/prover.rs
index 9275e241..3e81942b 100644
--- a/plonky2/src/plonk/prover.rs
+++ b/plonky2/src/plonk/prover.rs
@@ -2,11 +2,11 @@ use std::mem::swap;
 
 use anyhow::ensure;
 use anyhow::Result;
+use maybe_rayon::*;
 use plonky2_field::extension::Extendable;
 use plonky2_field::polynomial::{PolynomialCoeffs, PolynomialValues};
 use plonky2_field::zero_poly_coset::ZeroPolyOnCoset;
 use plonky2_util::{ceil_div_usize, log2_ceil};
-use maybe_rayon::*;
 
 use crate::field::types::Field;
 use crate::fri::oracle::PolynomialBatch;

From e93235d07e606f8849f032a653623f95e1ed0747 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Fri, 22 Jul 2022 18:26:15 +0200
Subject: [PATCH 07/85] Modify `inverse` asm

---
 evm/src/cpu/kernel/asm/moddiv.asm | 491 ++----------------------------
 1 file changed, 17 insertions(+), 474 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/moddiv.asm b/evm/src/cpu/kernel/asm/moddiv.asm
index 891897e5..3340faa9 100644
--- a/evm/src/cpu/kernel/asm/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/moddiv.asm
@@ -11,7 +11,7 @@
 
 %macro mulmodn
     // stack: x, y
-    PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47
+    %bn_base
     // stack: N, x, y
     SWAP2
     // stack: y, x, N
@@ -25,480 +25,23 @@
     %mulmodn
 %endmacro
 
-// Computes the inverse modulo N using x^-1 = x^(N-2) mod N and square-and-multiply modular exponentiation.
+// Computes the inverse modulo N by providing it non-deterministically.
 %macro inverse
-    DUP1
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
-    %squaremodn
-    %squaremodn
-    DUP2
-    %mulmodn
+    // stack: x
+    PROVER_INPUT
+    // stack: x^-1, x
+    %bn_base
+    // stack: N, x^-1, x
+    DUP3
+    // stack: x, N, x^-1, x
+    DUP3
+    // stack: x^-1, x, N, x^-1, x
+    MULMOD
+    // stack: x^-1 * x, x^-1, x
+    PUSH 1
+    // stack: 1, x^-1 * x, x^-1, x
+    %assert_eq
+    // stack: x^-1, x
     SWAP1
     // stack: x, x^-1
     POP

From 0afe98525b8e54b6cbdae7fc1ebcbc319a62a0e9 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Fri, 22 Jul 2022 19:25:06 +0200
Subject: [PATCH 08/85] Minor

---
 evm/src/cpu/kernel/asm/moddiv.asm | 8 ++------
 evm/src/cpu/kernel/evm_asm.pest   | 2 ++
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/moddiv.asm b/evm/src/cpu/kernel/asm/moddiv.asm
index 3340faa9..630dcc54 100644
--- a/evm/src/cpu/kernel/asm/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/moddiv.asm
@@ -28,13 +28,9 @@
 // Computes the inverse modulo N by providing it non-deterministically.
 %macro inverse
     // stack: x
-    PROVER_INPUT
+    PROVER_INPUT(ff::bn254_base::inverse)
     // stack: x^-1, x
-    %bn_base
-    // stack: N, x^-1, x
-    DUP3
-    // stack: x, N, x^-1, x
-    DUP3
+    %stack (inv, x) -> (inv, x, @BN_BASE, inv, x)
     // stack: x^-1, x, N, x^-1, x
     MULMOD
     // stack: x^-1 * x, x^-1, x
diff --git a/evm/src/cpu/kernel/evm_asm.pest b/evm/src/cpu/kernel/evm_asm.pest
index 78938b64..943e8dae 100644
--- a/evm/src/cpu/kernel/evm_asm.pest
+++ b/evm/src/cpu/kernel/evm_asm.pest
@@ -29,6 +29,8 @@ local_label = { identifier ~ ":" }
 bytes_item = { ^"BYTES " ~ literal ~ ("," ~ literal)* }
 push_instruction = { ^"PUSH " ~ push_target }
 push_target = { literal | identifier | variable | constant }
+prover_input_instruction = { ^"PROVER_INPUT " ~ "(" ~ prover_input_fn ~ ")" } // TODO: Can also support extra arguments.
+prover_input_fn = { identifier ~ ("::" ~ identifier)*}
 nullary_instruction = { identifier }
 
 file = { SOI ~ item* ~ silent_eoi }

From ec97f8497fd736a3aca622ac4316b600e8ad394a Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Sat, 23 Jul 2022 11:16:45 +0200
Subject: [PATCH 09/85] Modify parser

---
 evm/src/cpu/kernel/assembler.rs    | 30 ++++++++++++++++++++++++++----
 evm/src/cpu/kernel/ast.rs          |  4 ++++
 evm/src/cpu/kernel/evm_asm.pest    |  4 ++--
 evm/src/cpu/kernel/mod.rs          |  1 +
 evm/src/cpu/kernel/parser.rs       |  9 +++++++++
 evm/src/cpu/kernel/prover_input.rs |  8 ++++++++
 6 files changed, 50 insertions(+), 6 deletions(-)
 create mode 100644 evm/src/cpu/kernel/prover_input.rs

diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs
index 4dbc46ca..334220fd 100644
--- a/evm/src/cpu/kernel/assembler.rs
+++ b/evm/src/cpu/kernel/assembler.rs
@@ -7,6 +7,7 @@ use log::debug;
 use super::ast::PushTarget;
 use crate::cpu::kernel::ast::{Literal, StackReplacement};
 use crate::cpu::kernel::keccak_util::hash_kernel;
+use crate::cpu::kernel::prover_input::ProverInputFn;
 use crate::cpu::kernel::stack_manipulation::expand_stack_manipulation;
 use crate::cpu::kernel::{
     ast::{File, Item},
@@ -27,15 +28,22 @@ pub struct Kernel {
     pub(crate) code_hash: [u32; 8],
 
     pub(crate) global_labels: HashMap<String, usize>,
+
+    pub(crate) prover_inputs: HashMap<usize, ProverInputFn>,
 }
 
 impl Kernel {
-    fn new(code: Vec<u8>, global_labels: HashMap<String, usize>) -> Self {
+    fn new(
+        code: Vec<u8>,
+        global_labels: HashMap<String, usize>,
+        prover_inputs: HashMap<usize, ProverInputFn>,
+    ) -> Self {
         let code_hash = hash_kernel(&code);
         Self {
             code,
             code_hash,
             global_labels,
+            prover_inputs,
         }
     }
 }
@@ -57,6 +65,7 @@ impl Macro {
 pub(crate) fn assemble(files: Vec<File>, constants: HashMap<String, U256>) -> Kernel {
     let macros = find_macros(&files);
     let mut global_labels = HashMap::new();
+    let mut prover_inputs = HashMap::new();
     let mut offset = 0;
     let mut expanded_files = Vec::with_capacity(files.len());
     let mut local_labels = Vec::with_capacity(files.len());
@@ -65,7 +74,12 @@ pub(crate) fn assemble(files: Vec<File>, constants: HashMap<String, U256>) -> Ke
         let expanded_file = expand_repeats(expanded_file);
         let expanded_file = inline_constants(expanded_file, &constants);
         let expanded_file = expand_stack_manipulation(expanded_file);
-        local_labels.push(find_labels(&expanded_file, &mut offset, &mut global_labels));
+        local_labels.push(find_labels(
+            &expanded_file,
+            &mut offset,
+            &mut global_labels,
+            &mut prover_inputs,
+        ));
         expanded_files.push(expanded_file);
     }
     let mut code = vec![];
@@ -76,7 +90,7 @@ pub(crate) fn assemble(files: Vec<File>, constants: HashMap<String, U256>) -> Ke
         debug!("Assembled file size: {} bytes", file_len);
     }
     assert_eq!(code.len(), offset, "Code length doesn't match offset.");
-    Kernel::new(code, global_labels)
+    Kernel::new(code, global_labels, prover_inputs)
 }
 
 fn find_macros(files: &[File]) -> HashMap<String, Macro> {
@@ -217,6 +231,7 @@ fn find_labels(
     body: &[Item],
     offset: &mut usize,
     global_labels: &mut HashMap<String, usize>,
+    prover_inputs: &mut HashMap<usize, ProverInputFn>,
 ) -> HashMap<String, usize> {
     // Discover the offset of each label in this file.
     let mut local_labels = HashMap::<String, usize>::new();
@@ -237,6 +252,10 @@ fn find_labels(
                 assert!(old.is_none(), "Duplicate local label: {}", label);
             }
             Item::Push(target) => *offset += 1 + push_target_size(target) as usize,
+            Item::ProverInput(prover_input_fn) => {
+                prover_inputs.insert(*offset, prover_input_fn.clone());
+                *offset += 1;
+            }
             Item::StandardOp(_) => *offset += 1,
             Item::Bytes(bytes) => *offset += bytes.len(),
         }
@@ -283,6 +302,9 @@ fn assemble_file(
                 code.push(get_push_opcode(target_bytes.len() as u8));
                 code.extend(target_bytes);
             }
+            Item::ProverInput(_) => {
+                code.push(get_opcode("PROVER_INPUT"));
+            }
             Item::StandardOp(opcode) => {
                 code.push(get_opcode(&opcode));
             }
@@ -357,7 +379,7 @@ mod tests {
         expected_global_labels.insert("function_1".to_string(), 0);
         expected_global_labels.insert("function_2".to_string(), 3);
 
-        let expected_kernel = Kernel::new(expected_code, expected_global_labels);
+        let expected_kernel = Kernel::new(expected_code, expected_global_labels, HashMap::new());
 
         let program = vec![file_1, file_2];
         assert_eq!(assemble(program, HashMap::new()), expected_kernel);
diff --git a/evm/src/cpu/kernel/ast.rs b/evm/src/cpu/kernel/ast.rs
index 92728104..9580d9c6 100644
--- a/evm/src/cpu/kernel/ast.rs
+++ b/evm/src/cpu/kernel/ast.rs
@@ -1,6 +1,8 @@
 use ethereum_types::U256;
 use plonky2_util::ceil_div_usize;
 
+use crate::cpu::kernel::prover_input::ProverInputFn;
+
 #[derive(Debug)]
 pub(crate) struct File {
     pub(crate) body: Vec<Item>,
@@ -25,6 +27,8 @@ pub(crate) enum Item {
     LocalLabelDeclaration(String),
     /// A `PUSH` operation.
     Push(PushTarget),
+    /// A `ProverInput` operation.
+    ProverInput(ProverInputFn),
     /// Any opcode besides a PUSH opcode.
     StandardOp(String),
     /// Literal hex data; should contain an even number of hex chars.
diff --git a/evm/src/cpu/kernel/evm_asm.pest b/evm/src/cpu/kernel/evm_asm.pest
index 943e8dae..b0033391 100644
--- a/evm/src/cpu/kernel/evm_asm.pest
+++ b/evm/src/cpu/kernel/evm_asm.pest
@@ -15,7 +15,7 @@ literal = { literal_hex | literal_decimal }
 variable = ${ "$" ~ identifier }
 constant = ${ "@" ~ identifier }
 
-item = { macro_def | macro_call | repeat | stack | global_label | local_label | bytes_item | push_instruction | nullary_instruction }
+item = { macro_def | macro_call | repeat | stack | global_label | local_label | bytes_item | push_instruction | prover_input_instruction | nullary_instruction }
 macro_def = { ^"%macro" ~ identifier ~ paramlist? ~ item* ~ ^"%endmacro" }
 macro_call = ${ "%" ~ !(^"macro" | ^"endmacro" | ^"rep" | ^"endrep" | ^"stack") ~ identifier ~ macro_arglist? }
 repeat = { ^"%rep" ~ literal ~ item* ~ ^"%endrep" }
@@ -29,7 +29,7 @@ local_label = { identifier ~ ":" }
 bytes_item = { ^"BYTES " ~ literal ~ ("," ~ literal)* }
 push_instruction = { ^"PUSH " ~ push_target }
 push_target = { literal | identifier | variable | constant }
-prover_input_instruction = { ^"PROVER_INPUT " ~ "(" ~ prover_input_fn ~ ")" } // TODO: Can also support extra arguments.
+prover_input_instruction = { ^"PROVER_INPUT" ~ "(" ~ prover_input_fn ~ ")" } // TODO: Could also support extra arguments.
 prover_input_fn = { identifier ~ ("::" ~ identifier)*}
 nullary_instruction = { identifier }
 
diff --git a/evm/src/cpu/kernel/mod.rs b/evm/src/cpu/kernel/mod.rs
index 1f13a042..a79e17e9 100644
--- a/evm/src/cpu/kernel/mod.rs
+++ b/evm/src/cpu/kernel/mod.rs
@@ -4,6 +4,7 @@ mod ast;
 pub(crate) mod keccak_util;
 mod opcodes;
 mod parser;
+mod prover_input;
 mod stack_manipulation;
 
 #[cfg(test)]
diff --git a/evm/src/cpu/kernel/parser.rs b/evm/src/cpu/kernel/parser.rs
index aa84ee05..f7acc96c 100644
--- a/evm/src/cpu/kernel/parser.rs
+++ b/evm/src/cpu/kernel/parser.rs
@@ -33,6 +33,15 @@ fn parse_item(item: Pair<Rule>) -> Item {
         }
         Rule::bytes_item => Item::Bytes(item.into_inner().map(parse_literal).collect()),
         Rule::push_instruction => Item::Push(parse_push_target(item.into_inner().next().unwrap())),
+        Rule::prover_input_instruction => Item::ProverInput(
+            item.into_inner()
+                .next()
+                .unwrap()
+                .into_inner()
+                .map(|x| x.as_str().into())
+                .collect::<Vec<_>>()
+                .into(),
+        ),
         Rule::nullary_instruction => Item::StandardOp(item.as_str().into()),
         _ => panic!("Unexpected {:?}", item.as_rule()),
     }
diff --git a/evm/src/cpu/kernel/prover_input.rs b/evm/src/cpu/kernel/prover_input.rs
new file mode 100644
index 00000000..1251f7d2
--- /dev/null
+++ b/evm/src/cpu/kernel/prover_input.rs
@@ -0,0 +1,8 @@
+#[derive(PartialEq, Eq, Debug, Clone)]
+pub(crate) struct ProverInputFn(Vec<String>);
+
+impl From<Vec<String>> for ProverInputFn {
+    fn from(v: Vec<String>) -> Self {
+        Self(v)
+    }
+}

From 0c539795fab75f218a35647577087865f17a03e1 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Sat, 23 Jul 2022 11:47:10 +0200
Subject: [PATCH 10/85] Implement prover input fns

---
 evm/src/cpu/kernel/prover_input.rs | 104 +++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

diff --git a/evm/src/cpu/kernel/prover_input.rs b/evm/src/cpu/kernel/prover_input.rs
index 1251f7d2..79a8483d 100644
--- a/evm/src/cpu/kernel/prover_input.rs
+++ b/evm/src/cpu/kernel/prover_input.rs
@@ -1,3 +1,12 @@
+use std::str::FromStr;
+
+use ethereum_types::U256;
+
+use crate::cpu::kernel::prover_input::Field::{
+    Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar,
+};
+use crate::cpu::kernel::prover_input::FieldOp::{Inverse, Sqrt};
+
 #[derive(PartialEq, Eq, Debug, Clone)]
 pub(crate) struct ProverInputFn(Vec<String>);
 
@@ -6,3 +15,98 @@ impl From<Vec<String>> for ProverInputFn {
         Self(v)
     }
 }
+
+impl ProverInputFn {
+    pub(crate) fn run(&self, mut stack: Vec<U256>) -> U256 {
+        match self.0[0].as_str() {
+            "ff" => self.run_ff(stack),
+            "storage" => todo!(),
+            _ => panic!("Unrecognized prover input function."),
+        }
+    }
+
+    fn run_ff(&self, mut stack: Vec<U256>) -> U256 {
+        let field = Field::from_str(self.0[1].as_str()).unwrap();
+        let op = FieldOp::from_str(self.0[2].as_str()).unwrap();
+        let x = stack.pop().expect("Empty stack");
+        field.op(op, x)
+    }
+}
+
+enum Field {
+    Bn254Base,
+    Bn254Scalar,
+    Secp256k1Base,
+    Secp256k1Scalar,
+}
+
+enum FieldOp {
+    Inverse,
+    Sqrt,
+}
+
+impl FromStr for Field {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "bn254_base" => Bn254Base,
+            "bn254_scalar" => Bn254Scalar,
+            "secp256k1_base" => Secp256k1Base,
+            "secp256k1_scalar" => Secp256k1Scalar,
+            _ => panic!("Unrecognized field."),
+        })
+    }
+}
+
+impl FromStr for FieldOp {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "inverse" => Inverse,
+            "sqrt" => Sqrt,
+            _ => panic!("Unrecognized field operation."),
+        })
+    }
+}
+
+impl Field {
+    fn order(&self) -> U256 {
+        match self {
+            Field::Bn254Base => {
+                U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
+                    .unwrap()
+            }
+            Field::Bn254Scalar => todo!(),
+            Field::Secp256k1Base => todo!(),
+            Field::Secp256k1Scalar => todo!(),
+        }
+    }
+
+    fn op(&self, op: FieldOp, x: U256) -> U256 {
+        match op {
+            FieldOp::Inverse => self.inverse(x),
+            FieldOp::Sqrt => todo!(),
+        }
+    }
+
+    fn inverse(&self, x: U256) -> U256 {
+        let n = self.order();
+        assert!(x < n);
+        modexp(x, n - 2, n)
+    }
+}
+
+fn modexp(x: U256, e: U256, n: U256) -> U256 {
+    let mut current = x;
+    let mut product = U256::one();
+
+    for j in 0..256 {
+        if !(e >> j & U256::one()).is_zero() {
+            product = U256::try_from(product.full_mul(current) % n).unwrap();
+        }
+        current = U256::try_from(current.full_mul(current) % n).unwrap();
+    }
+    product
+}

From 19e6725cfae3c0f0debea414c01d8aa48fa1eab3 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Sat, 23 Jul 2022 11:56:52 +0200
Subject: [PATCH 11/85] Working

---
 evm/src/cpu/kernel/interpreter.rs     | 46 ++++++++---------
 evm/src/cpu/kernel/prover_input.rs    |  2 +-
 evm/src/cpu/kernel/tests/curve_ops.rs | 74 ++++++++++++++++-----------
 evm/src/cpu/kernel/tests/ecrecover.rs | 16 +++++-
 evm/src/cpu/kernel/tests/exp.rs       | 12 ++---
 5 files changed, 86 insertions(+), 64 deletions(-)

diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index f2fb276a..c9d52cff 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -1,7 +1,11 @@
+use std::collections::HashMap;
+
 use anyhow::{anyhow, bail};
 use ethereum_types::{BigEndianHash, U256, U512};
 use keccak_hash::keccak;
 
+use crate::cpu::kernel::prover_input::ProverInputFn;
+
 /// Halt interpreter execution whenever a jump to this offset is done.
 const HALT_OFFSET: usize = 0xdeadbeef;
 
@@ -55,29 +59,16 @@ pub(crate) struct Interpreter<'a> {
     offset: usize,
     pub(crate) stack: Vec<U256>,
     pub(crate) memory: EvmMemory,
-    /// Non-deterministic prover inputs, stored backwards so that popping the last item gives the
-    /// next prover input.
-    prover_inputs: Vec<U256>,
+    prover_inputs: &'a HashMap<usize, ProverInputFn>,
     running: bool,
 }
 
-pub(crate) fn run(
-    code: &[u8],
+pub(crate) fn run<'a>(
+    code: &'a [u8],
     initial_offset: usize,
     initial_stack: Vec<U256>,
-) -> anyhow::Result<Interpreter> {
-    run_with_input(code, initial_offset, initial_stack, vec![])
-}
-
-pub(crate) fn run_with_input(
-    code: &[u8],
-    initial_offset: usize,
-    initial_stack: Vec<U256>,
-    mut prover_inputs: Vec<U256>,
-) -> anyhow::Result<Interpreter> {
-    // Prover inputs are stored backwards, so that popping the last item gives the next input.
-    prover_inputs.reverse();
-
+    prover_inputs: &'a HashMap<usize, ProverInputFn>,
+) -> anyhow::Result<Interpreter<'a>> {
     let mut interpreter = Interpreter {
         code,
         jumpdests: find_jumpdests(code),
@@ -337,11 +328,12 @@ impl<'a> Interpreter<'a> {
     }
 
     fn run_prover_input(&mut self) -> anyhow::Result<()> {
-        let input = self
+        let prover_input_fn = self
             .prover_inputs
-            .pop()
-            .ok_or_else(|| anyhow!("Out of prover inputs"))?;
-        self.stack.push(input);
+            .get(&(self.offset - 1))
+            .ok_or_else(|| anyhow!("Offset not in prover inputs."))?;
+        let output = prover_input_fn.run(self.stack.clone());
+        self.stack.push(output);
         Ok(())
     }
 
@@ -424,6 +416,8 @@ fn find_jumpdests(code: &[u8]) -> Vec<usize> {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+
     use hex_literal::hex;
 
     use crate::cpu::kernel::interpreter::{run, Interpreter};
@@ -433,7 +427,10 @@ mod tests {
         let code = vec![
             0x60, 0x1, 0x60, 0x2, 0x1, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56,
         ]; // PUSH1, 1, PUSH1, 2, ADD, PUSH4 deadbeef, JUMP
-        assert_eq!(run(&code, 0, vec![])?.stack, vec![0x3.into()]);
+        assert_eq!(
+            run(&code, 0, vec![], &HashMap::new())?.stack,
+            vec![0x3.into()],
+        );
         Ok(())
     }
 
@@ -456,7 +453,8 @@ mod tests {
             0x60, 0xff, 0x60, 0x0, 0x52, 0x60, 0, 0x51, 0x60, 0x1, 0x51, 0x60, 0x42, 0x60, 0x27,
             0x53,
         ];
-        let run = run(&code, 0, vec![])?;
+        let pis = HashMap::new();
+        let run = run(&code, 0, vec![], &pis)?;
         let Interpreter { stack, memory, .. } = run;
         assert_eq!(stack, vec![0xff.into(), 0xff00.into()]);
         assert_eq!(&memory.memory, &hex!("00000000000000000000000000000000000000000000000000000000000000ff0000000000000042000000000000000000000000000000000000000000000000"));
diff --git a/evm/src/cpu/kernel/prover_input.rs b/evm/src/cpu/kernel/prover_input.rs
index 79a8483d..c9cb8821 100644
--- a/evm/src/cpu/kernel/prover_input.rs
+++ b/evm/src/cpu/kernel/prover_input.rs
@@ -17,7 +17,7 @@ impl From<Vec<String>> for ProverInputFn {
 }
 
 impl ProverInputFn {
-    pub(crate) fn run(&self, mut stack: Vec<U256>) -> U256 {
+    pub(crate) fn run(&self, stack: Vec<U256>) -> U256 {
         match self.0[0].as_str() {
             "ff" => self.run_ff(stack),
             "storage" => todo!(),
diff --git a/evm/src/cpu/kernel/tests/curve_ops.rs b/evm/src/cpu/kernel/tests/curve_ops.rs
index 6d8c6696..72e4169b 100644
--- a/evm/src/cpu/kernel/tests/curve_ops.rs
+++ b/evm/src/cpu/kernel/tests/curve_ops.rs
@@ -43,76 +43,82 @@ mod bn {
 
         // Standard addition #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
         // Standard addition #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
 
         // Standard doubling #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #2
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_double, initial_stack)?.stack;
+        let stack = run(
+            &kernel.code,
+            ec_double,
+            initial_stack,
+            &kernel.prover_inputs,
+        )?
+        .stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #3
         let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
 
         // Addition with identity #1
         let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #3
         let initial_stack =
             u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Addition with invalid point(s) #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, invalid.1, invalid.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #2
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #3
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #4
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, invalid.1, invalid.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
 
         // Scalar multiplication #1
         let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
         // Scalar multiplication #2
         let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #3
         let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point0.1, point0.0])?);
         // Scalar multiplication #4
         let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #5
         let initial_stack = u256ify(["0xdeadbeef", s, invalid.1, invalid.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
 
         // Multiple calls
@@ -126,7 +132,7 @@ mod bn {
             point0.1,
             point0.0,
         ])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
 
         Ok(())
@@ -176,55 +182,61 @@ mod secp {
 
         // Standard addition #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
         // Standard addition #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
 
         // Standard doubling #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #2
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_double, initial_stack)?.stack;
+        let stack = run(
+            &kernel.code,
+            ec_double,
+            initial_stack,
+            &kernel.prover_inputs,
+        )?
+        .stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #3
         let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
 
         // Addition with identity #1
         let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #3
         let initial_stack =
             u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Scalar multiplication #1
         let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
         // Scalar multiplication #2
         let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #3
         let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point0.1, point0.0])?);
         // Scalar multiplication #4
         let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Multiple calls
@@ -238,7 +250,7 @@ mod secp {
             point0.1,
             point0.0,
         ])?;
-        let stack = run(&kernel.code, ec_add, initial_stack)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
 
         Ok(())
diff --git a/evm/src/cpu/kernel/tests/ecrecover.rs b/evm/src/cpu/kernel/tests/ecrecover.rs
index 78bdea3e..790a4a2c 100644
--- a/evm/src/cpu/kernel/tests/ecrecover.rs
+++ b/evm/src/cpu/kernel/tests/ecrecover.rs
@@ -18,7 +18,13 @@ fn test_valid_ecrecover(
 ) -> Result<()> {
     let ecrecover = kernel.global_labels["ecrecover"];
     let initial_stack = u256ify(["0xdeadbeef", s, r, v, hash])?;
-    let stack = run(&kernel.code, ecrecover, initial_stack)?.stack;
+    let stack = run(
+        &kernel.code,
+        ecrecover,
+        initial_stack,
+        &kernel.prover_inputs,
+    )?
+    .stack;
     assert_eq!(stack[0], U256::from_str(expected).unwrap());
 
     Ok(())
@@ -27,7 +33,13 @@ fn test_valid_ecrecover(
 fn test_invalid_ecrecover(hash: &str, v: &str, r: &str, s: &str, kernel: &Kernel) -> Result<()> {
     let ecrecover = kernel.global_labels["ecrecover"];
     let initial_stack = u256ify(["0xdeadbeef", s, r, v, hash])?;
-    let stack = run(&kernel.code, ecrecover, initial_stack)?.stack;
+    let stack = run(
+        &kernel.code,
+        ecrecover,
+        initial_stack,
+        &kernel.prover_inputs,
+    )?
+    .stack;
     assert_eq!(stack, vec![U256::MAX]);
 
     Ok(())
diff --git a/evm/src/cpu/kernel/tests/exp.rs b/evm/src/cpu/kernel/tests/exp.rs
index 25c88623..0858c37c 100644
--- a/evm/src/cpu/kernel/tests/exp.rs
+++ b/evm/src/cpu/kernel/tests/exp.rs
@@ -18,26 +18,26 @@ fn test_exp() -> Result<()> {
 
     // Random input
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a];
-    let stack_with_kernel = run(&kernel.code, exp, initial_stack)?.stack;
+    let stack_with_kernel = run(&kernel.code, exp, initial_stack, &kernel.prover_inputs)?.stack;
     let initial_stack = vec![b, a];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
-    let stack_with_opcode = run(&code, 0, initial_stack)?.stack;
+    let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?.stack;
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     // 0 base
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, U256::zero()];
-    let stack_with_kernel = run(&kernel.code, exp, initial_stack)?.stack;
+    let stack_with_kernel = run(&kernel.code, exp, initial_stack, &kernel.prover_inputs)?.stack;
     let initial_stack = vec![b, U256::zero()];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
-    let stack_with_opcode = run(&code, 0, initial_stack)?.stack;
+    let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?.stack;
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     // 0 exponent
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, U256::zero(), a];
-    let stack_with_kernel = run(&kernel.code, exp, initial_stack)?.stack;
+    let stack_with_kernel = run(&kernel.code, exp, initial_stack, &kernel.prover_inputs)?.stack;
     let initial_stack = vec![U256::zero(), a];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
-    let stack_with_opcode = run(&code, 0, initial_stack)?.stack;
+    let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?.stack;
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     Ok(())

From cafae8b818d0a7ec53f81be8536d89b97f18a948 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Sat, 23 Jul 2022 12:36:03 +0200
Subject: [PATCH 12/85] Add `run_with_kernel` fn

---
 evm/src/cpu/kernel/asm/moddiv.asm     |  2 +-
 evm/src/cpu/kernel/interpreter.rs     | 18 ++++++-
 evm/src/cpu/kernel/mod.rs             |  3 +-
 evm/src/cpu/kernel/prover_input.rs    |  2 +-
 evm/src/cpu/kernel/tests/curve_ops.rs | 76 +++++++++++----------------
 5 files changed, 51 insertions(+), 50 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/moddiv.asm b/evm/src/cpu/kernel/asm/moddiv.asm
index 630dcc54..2b76d054 100644
--- a/evm/src/cpu/kernel/asm/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/moddiv.asm
@@ -25,7 +25,7 @@
     %mulmodn
 %endmacro
 
-// Computes the inverse modulo N by providing it non-deterministically.
+// Non-deterministically provide the inverse modulo N.
 %macro inverse
     // stack: x
     PROVER_INPUT(ff::bn254_base::inverse)
diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index c9d52cff..b5f44103 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -4,6 +4,7 @@ use anyhow::{anyhow, bail};
 use ethereum_types::{BigEndianHash, U256, U512};
 use keccak_hash::keccak;
 
+use crate::cpu::kernel::assembler::Kernel;
 use crate::cpu::kernel::prover_input::ProverInputFn;
 
 /// Halt interpreter execution whenever a jump to this offset is done.
@@ -53,7 +54,7 @@ impl EvmMemory {
     }
 }
 
-pub(crate) struct Interpreter<'a> {
+pub struct Interpreter<'a> {
     code: &'a [u8],
     jumpdests: Vec<usize>,
     offset: usize,
@@ -63,7 +64,20 @@ pub(crate) struct Interpreter<'a> {
     running: bool,
 }
 
-pub(crate) fn run<'a>(
+pub fn run_with_kernel(
+    kernel: &Kernel,
+    initial_offset: usize,
+    initial_stack: Vec<U256>,
+) -> anyhow::Result<Interpreter> {
+    run(
+        &kernel.code,
+        initial_offset,
+        initial_stack,
+        &kernel.prover_inputs,
+    )
+}
+
+pub fn run<'a>(
     code: &'a [u8],
     initial_offset: usize,
     initial_stack: Vec<U256>,
diff --git a/evm/src/cpu/kernel/mod.rs b/evm/src/cpu/kernel/mod.rs
index a79e17e9..67a22fc1 100644
--- a/evm/src/cpu/kernel/mod.rs
+++ b/evm/src/cpu/kernel/mod.rs
@@ -1,14 +1,13 @@
 pub mod aggregator;
 pub mod assembler;
 mod ast;
+pub mod interpreter;
 pub(crate) mod keccak_util;
 mod opcodes;
 mod parser;
 mod prover_input;
 mod stack_manipulation;
 
-#[cfg(test)]
-mod interpreter;
 #[cfg(test)]
 mod tests;
 
diff --git a/evm/src/cpu/kernel/prover_input.rs b/evm/src/cpu/kernel/prover_input.rs
index c9cb8821..5f3ecd42 100644
--- a/evm/src/cpu/kernel/prover_input.rs
+++ b/evm/src/cpu/kernel/prover_input.rs
@@ -8,7 +8,7 @@ use crate::cpu::kernel::prover_input::Field::{
 use crate::cpu::kernel::prover_input::FieldOp::{Inverse, Sqrt};
 
 #[derive(PartialEq, Eq, Debug, Clone)]
-pub(crate) struct ProverInputFn(Vec<String>);
+pub struct ProverInputFn(Vec<String>);
 
 impl From<Vec<String>> for ProverInputFn {
     fn from(v: Vec<String>) -> Self {
diff --git a/evm/src/cpu/kernel/tests/curve_ops.rs b/evm/src/cpu/kernel/tests/curve_ops.rs
index 72e4169b..44609f21 100644
--- a/evm/src/cpu/kernel/tests/curve_ops.rs
+++ b/evm/src/cpu/kernel/tests/curve_ops.rs
@@ -4,7 +4,7 @@ mod bn {
     use ethereum_types::U256;
 
     use crate::cpu::kernel::aggregator::combined_kernel;
-    use crate::cpu::kernel::interpreter::run;
+    use crate::cpu::kernel::interpreter::run_with_kernel;
     use crate::cpu::kernel::tests::u256ify;
 
     #[test]
@@ -43,82 +43,76 @@ mod bn {
 
         // Standard addition #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
         // Standard addition #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
 
         // Standard doubling #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #2
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?;
-        let stack = run(
-            &kernel.code,
-            ec_double,
-            initial_stack,
-            &kernel.prover_inputs,
-        )?
-        .stack;
+        let stack = run_with_kernel(&kernel, ec_double, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #3
         let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
 
         // Addition with identity #1
         let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #3
         let initial_stack =
             u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Addition with invalid point(s) #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, invalid.1, invalid.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #2
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #3
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #4
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, invalid.1, invalid.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
 
         // Scalar multiplication #1
         let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
         // Scalar multiplication #2
         let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #3
         let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point0.1, point0.0])?);
         // Scalar multiplication #4
         let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #5
         let initial_stack = u256ify(["0xdeadbeef", s, invalid.1, invalid.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
 
         // Multiple calls
@@ -132,7 +126,7 @@ mod bn {
             point0.1,
             point0.0,
         ])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
 
         Ok(())
@@ -144,7 +138,7 @@ mod secp {
     use anyhow::Result;
 
     use crate::cpu::kernel::aggregator::combined_kernel;
-    use crate::cpu::kernel::interpreter::run;
+    use crate::cpu::kernel::interpreter::{run, run_with_kernel};
     use crate::cpu::kernel::tests::u256ify;
 
     #[test]
@@ -182,7 +176,7 @@ mod secp {
 
         // Standard addition #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
         // Standard addition #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?;
@@ -191,52 +185,46 @@ mod secp {
 
         // Standard doubling #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #2
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?;
-        let stack = run(
-            &kernel.code,
-            ec_double,
-            initial_stack,
-            &kernel.prover_inputs,
-        )?
-        .stack;
+        let stack = run_with_kernel(&kernel, ec_double, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #3
         let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
 
         // Addition with identity #1
         let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #3
         let initial_stack =
             u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Scalar multiplication #1
         let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
         // Scalar multiplication #2
         let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #3
         let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point0.1, point0.0])?);
         // Scalar multiplication #4
         let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?;
-        let stack = run(&kernel.code, ec_mul, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Multiple calls
@@ -250,7 +238,7 @@ mod secp {
             point0.1,
             point0.0,
         ])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
 
         Ok(())

From 9dacbe0ff61b17aefdcaa496635b92adaac282be Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Sat, 23 Jul 2022 12:52:45 +0200
Subject: [PATCH 13/85] Comments

---
 evm/src/cpu/kernel/assembler.rs       |  1 +
 evm/src/cpu/kernel/evm_asm.pest       |  2 +-
 evm/src/cpu/kernel/prover_input.rs    | 11 ++++++++++-
 evm/src/cpu/kernel/tests/ecrecover.rs | 18 +++---------------
 evm/src/cpu/kernel/tests/exp.rs       |  8 ++++----
 5 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs
index 334220fd..6d2b0ff2 100644
--- a/evm/src/cpu/kernel/assembler.rs
+++ b/evm/src/cpu/kernel/assembler.rs
@@ -29,6 +29,7 @@ pub struct Kernel {
 
     pub(crate) global_labels: HashMap<String, usize>,
 
+    /// Map from `PROVER_INPUT` offsets to their corresponding `ProverInputFn`.
     pub(crate) prover_inputs: HashMap<usize, ProverInputFn>,
 }
 
diff --git a/evm/src/cpu/kernel/evm_asm.pest b/evm/src/cpu/kernel/evm_asm.pest
index b0033391..0703798e 100644
--- a/evm/src/cpu/kernel/evm_asm.pest
+++ b/evm/src/cpu/kernel/evm_asm.pest
@@ -29,7 +29,7 @@ local_label = { identifier ~ ":" }
 bytes_item = { ^"BYTES " ~ literal ~ ("," ~ literal)* }
 push_instruction = { ^"PUSH " ~ push_target }
 push_target = { literal | identifier | variable | constant }
-prover_input_instruction = { ^"PROVER_INPUT" ~ "(" ~ prover_input_fn ~ ")" } // TODO: Could also support extra arguments.
+prover_input_instruction = { ^"PROVER_INPUT" ~ "(" ~ prover_input_fn ~ ")" }
 prover_input_fn = { identifier ~ ("::" ~ identifier)*}
 nullary_instruction = { identifier }
 
diff --git a/evm/src/cpu/kernel/prover_input.rs b/evm/src/cpu/kernel/prover_input.rs
index 5f3ecd42..733cbef7 100644
--- a/evm/src/cpu/kernel/prover_input.rs
+++ b/evm/src/cpu/kernel/prover_input.rs
@@ -7,6 +7,8 @@ use crate::cpu::kernel::prover_input::Field::{
 };
 use crate::cpu::kernel::prover_input::FieldOp::{Inverse, Sqrt};
 
+/// Prover input function represented as a scoped function name.
+/// Example: `PROVER_INPUT(ff::bn254_base::inverse)` is represented as `ProverInputFn([ff, bn254_base, inverse])`.
 #[derive(PartialEq, Eq, Debug, Clone)]
 pub struct ProverInputFn(Vec<String>);
 
@@ -17,20 +19,27 @@ impl From<Vec<String>> for ProverInputFn {
 }
 
 impl ProverInputFn {
+    /// Run the function on the stack.
     pub(crate) fn run(&self, stack: Vec<U256>) -> U256 {
         match self.0[0].as_str() {
             "ff" => self.run_ff(stack),
-            "storage" => todo!(),
+            "mpt" => todo!(),
             _ => panic!("Unrecognized prover input function."),
         }
     }
 
+    // Finite field operations.
     fn run_ff(&self, mut stack: Vec<U256>) -> U256 {
         let field = Field::from_str(self.0[1].as_str()).unwrap();
         let op = FieldOp::from_str(self.0[2].as_str()).unwrap();
         let x = stack.pop().expect("Empty stack");
         field.op(op, x)
     }
+
+    // MPT operations.
+    fn run_mpt(&self, mut stack: Vec<U256>) -> U256 {
+        todo!()
+    }
 }
 
 enum Field {
diff --git a/evm/src/cpu/kernel/tests/ecrecover.rs b/evm/src/cpu/kernel/tests/ecrecover.rs
index 790a4a2c..b105cf47 100644
--- a/evm/src/cpu/kernel/tests/ecrecover.rs
+++ b/evm/src/cpu/kernel/tests/ecrecover.rs
@@ -5,7 +5,7 @@ use ethereum_types::U256;
 
 use crate::cpu::kernel::aggregator::combined_kernel;
 use crate::cpu::kernel::assembler::Kernel;
-use crate::cpu::kernel::interpreter::run;
+use crate::cpu::kernel::interpreter::run_with_kernel;
 use crate::cpu::kernel::tests::u256ify;
 
 fn test_valid_ecrecover(
@@ -18,13 +18,7 @@ fn test_valid_ecrecover(
 ) -> Result<()> {
     let ecrecover = kernel.global_labels["ecrecover"];
     let initial_stack = u256ify(["0xdeadbeef", s, r, v, hash])?;
-    let stack = run(
-        &kernel.code,
-        ecrecover,
-        initial_stack,
-        &kernel.prover_inputs,
-    )?
-    .stack;
+    let stack = run_with_kernel(kernel, ecrecover, initial_stack)?.stack;
     assert_eq!(stack[0], U256::from_str(expected).unwrap());
 
     Ok(())
@@ -33,13 +27,7 @@ fn test_valid_ecrecover(
 fn test_invalid_ecrecover(hash: &str, v: &str, r: &str, s: &str, kernel: &Kernel) -> Result<()> {
     let ecrecover = kernel.global_labels["ecrecover"];
     let initial_stack = u256ify(["0xdeadbeef", s, r, v, hash])?;
-    let stack = run(
-        &kernel.code,
-        ecrecover,
-        initial_stack,
-        &kernel.prover_inputs,
-    )?
-    .stack;
+    let stack = run_with_kernel(kernel, ecrecover, initial_stack)?.stack;
     assert_eq!(stack, vec![U256::MAX]);
 
     Ok(())
diff --git a/evm/src/cpu/kernel/tests/exp.rs b/evm/src/cpu/kernel/tests/exp.rs
index 0858c37c..049fd23a 100644
--- a/evm/src/cpu/kernel/tests/exp.rs
+++ b/evm/src/cpu/kernel/tests/exp.rs
@@ -5,7 +5,7 @@ use ethereum_types::U256;
 use rand::{thread_rng, Rng};
 
 use crate::cpu::kernel::aggregator::combined_kernel;
-use crate::cpu::kernel::interpreter::run;
+use crate::cpu::kernel::interpreter::{run, run_with_kernel};
 
 #[test]
 fn test_exp() -> Result<()> {
@@ -18,7 +18,7 @@ fn test_exp() -> Result<()> {
 
     // Random input
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a];
-    let stack_with_kernel = run(&kernel.code, exp, initial_stack, &kernel.prover_inputs)?.stack;
+    let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?.stack;
     let initial_stack = vec![b, a];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
     let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?.stack;
@@ -26,7 +26,7 @@ fn test_exp() -> Result<()> {
 
     // 0 base
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, U256::zero()];
-    let stack_with_kernel = run(&kernel.code, exp, initial_stack, &kernel.prover_inputs)?.stack;
+    let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?.stack;
     let initial_stack = vec![b, U256::zero()];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
     let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?.stack;
@@ -34,7 +34,7 @@ fn test_exp() -> Result<()> {
 
     // 0 exponent
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, U256::zero(), a];
-    let stack_with_kernel = run(&kernel.code, exp, initial_stack, &kernel.prover_inputs)?.stack;
+    let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?.stack;
     let initial_stack = vec![U256::zero(), a];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
     let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?.stack;

From 1e02fd0236cf68ffd1418b54bea05f0393444692 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Sat, 23 Jul 2022 12:58:29 +0200
Subject: [PATCH 14/85] Oh Clippy...

---
 evm/src/cpu/kernel/prover_input.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/prover_input.rs b/evm/src/cpu/kernel/prover_input.rs
index 733cbef7..21645340 100644
--- a/evm/src/cpu/kernel/prover_input.rs
+++ b/evm/src/cpu/kernel/prover_input.rs
@@ -37,7 +37,8 @@ impl ProverInputFn {
     }
 
     // MPT operations.
-    fn run_mpt(&self, mut stack: Vec<U256>) -> U256 {
+    #[allow(dead_code)]
+    fn run_mpt(&self, _stack: Vec<U256>) -> U256 {
         todo!()
     }
 }

From 927cad3acd7dd9aa282a6f98f4c55dda213e33c2 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Sat, 23 Jul 2022 13:03:43 +0200
Subject: [PATCH 15/85] Collect prover inputs

---
 evm/src/cpu/kernel/interpreter.rs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index b5f44103..e179d713 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -60,7 +60,8 @@ pub struct Interpreter<'a> {
     offset: usize,
     pub(crate) stack: Vec<U256>,
     pub(crate) memory: EvmMemory,
-    prover_inputs: &'a HashMap<usize, ProverInputFn>,
+    prover_inputs_map: &'a HashMap<usize, ProverInputFn>,
+    prover_inputs: Vec<U256>,
     running: bool,
 }
 
@@ -89,7 +90,8 @@ pub fn run<'a>(
         offset: initial_offset,
         stack: initial_stack,
         memory: EvmMemory::default(),
-        prover_inputs,
+        prover_inputs_map: prover_inputs,
+        prover_inputs: Vec::new(),
         running: true,
     };
 
@@ -343,11 +345,12 @@ impl<'a> Interpreter<'a> {
 
     fn run_prover_input(&mut self) -> anyhow::Result<()> {
         let prover_input_fn = self
-            .prover_inputs
+            .prover_inputs_map
             .get(&(self.offset - 1))
             .ok_or_else(|| anyhow!("Offset not in prover inputs."))?;
         let output = prover_input_fn.run(self.stack.clone());
         self.stack.push(output);
+        self.prover_inputs.push(output);
         Ok(())
     }
 

From c9d610ec10686a3bf6465ad50815d69d62dfbabe Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Sun, 24 Jul 2022 17:47:14 -0400
Subject: [PATCH 16/85] use maybe_rayon in starky and evm

---
 evm/Cargo.toml                 | 4 +++-
 evm/src/memory/memory_stark.rs | 2 +-
 evm/src/permutation.rs         | 2 +-
 evm/src/proof.rs               | 2 +-
 evm/src/prover.rs              | 2 +-
 starky/Cargo.toml              | 6 +++++-
 starky/src/permutation.rs      | 2 +-
 starky/src/proof.rs            | 2 +-
 starky/src/prover.rs           | 2 +-
 9 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/evm/Cargo.toml b/evm/Cargo.toml
index c10ab104..48ef12d7 100644
--- a/evm/Cargo.toml
+++ b/evm/Cargo.toml
@@ -17,7 +17,7 @@ log = "0.4.14"
 once_cell = "1.13.0"
 pest = "2.1.3"
 pest_derive = "2.1.0"
-rayon = "1.5.1"
+maybe_rayon = { path = "../maybe_rayon" }
 rand = "0.8.5"
 rand_chacha = "0.3.1"
 rlp = "0.5.1"
@@ -28,7 +28,9 @@ keccak-hash = "0.9.0"
 hex = "0.4.3"
 
 [features]
+default = ["parallel"]
 asmtools = ["hex"]
+parallel = ["maybe_rayon/parallel"]
 
 [[bin]]
 name = "assemble"
diff --git a/evm/src/memory/memory_stark.rs b/evm/src/memory/memory_stark.rs
index 82e10869..c16ef2be 100644
--- a/evm/src/memory/memory_stark.rs
+++ b/evm/src/memory/memory_stark.rs
@@ -10,7 +10,7 @@ use plonky2::hash::hash_types::RichField;
 use plonky2::timed;
 use plonky2::util::timing::TimingTree;
 use plonky2::util::transpose;
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::cross_table_lookup::Column;
diff --git a/evm/src/permutation.rs b/evm/src/permutation.rs
index a4039ad2..42400a94 100644
--- a/evm/src/permutation.rs
+++ b/evm/src/permutation.rs
@@ -16,7 +16,7 @@ use plonky2::plonk::plonk_common::{
     reduce_with_powers, reduce_with_powers_circuit, reduce_with_powers_ext_circuit,
 };
 use plonky2::util::reducing::{ReducingFactor, ReducingFactorTarget};
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::config::StarkConfig;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
diff --git a/evm/src/proof.rs b/evm/src/proof.rs
index b1275844..97c27bd4 100644
--- a/evm/src/proof.rs
+++ b/evm/src/proof.rs
@@ -12,7 +12,7 @@ use plonky2::hash::merkle_tree::MerkleCap;
 use plonky2::iop::ext_target::ExtensionTarget;
 use plonky2::iop::target::Target;
 use plonky2::plonk::config::GenericConfig;
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::config::StarkConfig;
 use crate::permutation::GrandProductChallengeSet;
diff --git a/evm/src/prover.rs b/evm/src/prover.rs
index 346224a5..b4073c29 100644
--- a/evm/src/prover.rs
+++ b/evm/src/prover.rs
@@ -15,7 +15,7 @@ use plonky2::timed;
 use plonky2::util::timing::TimingTree;
 use plonky2::util::transpose;
 use plonky2_util::{log2_ceil, log2_strict};
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::all_stark::{AllStark, Table};
 use crate::config::StarkConfig;
diff --git a/starky/Cargo.toml b/starky/Cargo.toml
index 4e67856d..80a26bfc 100644
--- a/starky/Cargo.toml
+++ b/starky/Cargo.toml
@@ -4,6 +4,10 @@ description = "Implementation of STARKs"
 version = "0.1.0"
 edition = "2021"
 
+[features]
+default = ["parallel"]
+parallel = ["maybe_rayon/parallel"]
+
 [dependencies]
 plonky2 = { path = "../plonky2" }
 plonky2_util = { path = "../util" }
@@ -11,4 +15,4 @@ anyhow = "1.0.40"
 env_logger = "0.9.0"
 itertools = "0.10.0"
 log = "0.4.14"
-rayon = "1.5.1"
+maybe_rayon = { path = "../maybe_rayon"}
diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs
index 88361003..e1e1c2af 100644
--- a/starky/src/permutation.rs
+++ b/starky/src/permutation.rs
@@ -13,7 +13,7 @@ use plonky2::iop::target::Target;
 use plonky2::plonk::circuit_builder::CircuitBuilder;
 use plonky2::plonk::config::{AlgebraicHasher, GenericConfig, Hasher};
 use plonky2::util::reducing::{ReducingFactor, ReducingFactorTarget};
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::config::StarkConfig;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
diff --git a/starky/src/proof.rs b/starky/src/proof.rs
index c321b484..fad8a7f1 100644
--- a/starky/src/proof.rs
+++ b/starky/src/proof.rs
@@ -12,7 +12,7 @@ use plonky2::hash::merkle_tree::MerkleCap;
 use plonky2::iop::ext_target::ExtensionTarget;
 use plonky2::iop::target::Target;
 use plonky2::plonk::config::GenericConfig;
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::config::StarkConfig;
 use crate::permutation::PermutationChallengeSet;
diff --git a/starky/src/prover.rs b/starky/src/prover.rs
index 6cdb1384..d446dabb 100644
--- a/starky/src/prover.rs
+++ b/starky/src/prover.rs
@@ -16,7 +16,7 @@ use plonky2::timed;
 use plonky2::util::timing::TimingTree;
 use plonky2::util::transpose;
 use plonky2_util::{log2_ceil, log2_strict};
-use rayon::prelude::*;
+use maybe_rayon::*;
 
 use crate::config::StarkConfig;
 use crate::constraint_consumer::ConstraintConsumer;

From e48bfa837fe327bdf8a293b1bf13535c85409301 Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Sun, 24 Jul 2022 18:06:03 -0400
Subject: [PATCH 17/85] fmt

---
 evm/src/memory/memory_stark.rs | 2 +-
 evm/src/permutation.rs         | 2 +-
 evm/src/proof.rs               | 2 +-
 evm/src/prover.rs              | 2 +-
 starky/src/permutation.rs      | 2 +-
 starky/src/proof.rs            | 2 +-
 starky/src/prover.rs           | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/evm/src/memory/memory_stark.rs b/evm/src/memory/memory_stark.rs
index c16ef2be..398c2c15 100644
--- a/evm/src/memory/memory_stark.rs
+++ b/evm/src/memory/memory_stark.rs
@@ -2,6 +2,7 @@ use std::marker::PhantomData;
 
 use ethereum_types::U256;
 use itertools::Itertools;
+use maybe_rayon::*;
 use plonky2::field::extension::{Extendable, FieldExtension};
 use plonky2::field::packed::PackedField;
 use plonky2::field::polynomial::PolynomialValues;
@@ -10,7 +11,6 @@ use plonky2::hash::hash_types::RichField;
 use plonky2::timed;
 use plonky2::util::timing::TimingTree;
 use plonky2::util::transpose;
-use maybe_rayon::*;
 
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::cross_table_lookup::Column;
diff --git a/evm/src/permutation.rs b/evm/src/permutation.rs
index 42400a94..c21a06de 100644
--- a/evm/src/permutation.rs
+++ b/evm/src/permutation.rs
@@ -1,6 +1,7 @@
 //! Permutation arguments.
 
 use itertools::Itertools;
+use maybe_rayon::*;
 use plonky2::field::batch_util::batch_multiply_inplace;
 use plonky2::field::extension::{Extendable, FieldExtension};
 use plonky2::field::packed::PackedField;
@@ -16,7 +17,6 @@ use plonky2::plonk::plonk_common::{
     reduce_with_powers, reduce_with_powers_circuit, reduce_with_powers_ext_circuit,
 };
 use plonky2::util::reducing::{ReducingFactor, ReducingFactorTarget};
-use maybe_rayon::*;
 
 use crate::config::StarkConfig;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
diff --git a/evm/src/proof.rs b/evm/src/proof.rs
index 97c27bd4..4f81308d 100644
--- a/evm/src/proof.rs
+++ b/evm/src/proof.rs
@@ -1,4 +1,5 @@
 use itertools::Itertools;
+use maybe_rayon::*;
 use plonky2::field::extension::{Extendable, FieldExtension};
 use plonky2::fri::oracle::PolynomialBatch;
 use plonky2::fri::proof::{
@@ -12,7 +13,6 @@ use plonky2::hash::merkle_tree::MerkleCap;
 use plonky2::iop::ext_target::ExtensionTarget;
 use plonky2::iop::target::Target;
 use plonky2::plonk::config::GenericConfig;
-use maybe_rayon::*;
 
 use crate::config::StarkConfig;
 use crate::permutation::GrandProductChallengeSet;
diff --git a/evm/src/prover.rs b/evm/src/prover.rs
index b4073c29..8be39b6c 100644
--- a/evm/src/prover.rs
+++ b/evm/src/prover.rs
@@ -1,6 +1,7 @@
 use std::any::type_name;
 
 use anyhow::{ensure, Result};
+use maybe_rayon::*;
 use plonky2::field::extension::Extendable;
 use plonky2::field::packable::Packable;
 use plonky2::field::packed::PackedField;
@@ -15,7 +16,6 @@ use plonky2::timed;
 use plonky2::util::timing::TimingTree;
 use plonky2::util::transpose;
 use plonky2_util::{log2_ceil, log2_strict};
-use maybe_rayon::*;
 
 use crate::all_stark::{AllStark, Table};
 use crate::config::StarkConfig;
diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs
index e1e1c2af..7d422171 100644
--- a/starky/src/permutation.rs
+++ b/starky/src/permutation.rs
@@ -1,6 +1,7 @@
 //! Permutation arguments.
 
 use itertools::Itertools;
+use maybe_rayon::*;
 use plonky2::field::batch_util::batch_multiply_inplace;
 use plonky2::field::extension::{Extendable, FieldExtension};
 use plonky2::field::packed::PackedField;
@@ -13,7 +14,6 @@ use plonky2::iop::target::Target;
 use plonky2::plonk::circuit_builder::CircuitBuilder;
 use plonky2::plonk::config::{AlgebraicHasher, GenericConfig, Hasher};
 use plonky2::util::reducing::{ReducingFactor, ReducingFactorTarget};
-use maybe_rayon::*;
 
 use crate::config::StarkConfig;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
diff --git a/starky/src/proof.rs b/starky/src/proof.rs
index fad8a7f1..c9900c08 100644
--- a/starky/src/proof.rs
+++ b/starky/src/proof.rs
@@ -1,4 +1,5 @@
 use itertools::Itertools;
+use maybe_rayon::*;
 use plonky2::field::extension::{Extendable, FieldExtension};
 use plonky2::fri::oracle::PolynomialBatch;
 use plonky2::fri::proof::{
@@ -12,7 +13,6 @@ use plonky2::hash::merkle_tree::MerkleCap;
 use plonky2::iop::ext_target::ExtensionTarget;
 use plonky2::iop::target::Target;
 use plonky2::plonk::config::GenericConfig;
-use maybe_rayon::*;
 
 use crate::config::StarkConfig;
 use crate::permutation::PermutationChallengeSet;
diff --git a/starky/src/prover.rs b/starky/src/prover.rs
index d446dabb..24593b45 100644
--- a/starky/src/prover.rs
+++ b/starky/src/prover.rs
@@ -2,6 +2,7 @@ use std::iter::once;
 
 use anyhow::{ensure, Result};
 use itertools::Itertools;
+use maybe_rayon::*;
 use plonky2::field::extension::Extendable;
 use plonky2::field::packable::Packable;
 use plonky2::field::packed::PackedField;
@@ -16,7 +17,6 @@ use plonky2::timed;
 use plonky2::util::timing::TimingTree;
 use plonky2::util::transpose;
 use plonky2_util::{log2_ceil, log2_strict};
-use maybe_rayon::*;
 
 use crate::config::StarkConfig;
 use crate::constraint_consumer::ConstraintConsumer;

From ce23d4377a5202f67c25ff2313ed03f495603311 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Wed, 27 Jul 2022 11:27:04 +0200
Subject: [PATCH 18/85] Minor

---
 evm/src/cpu/kernel/interpreter.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index ebd4b949..0634b864 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -36,7 +36,7 @@ impl InterpreterMemory {
 }
 
 // TODO: Remove `code` and `stack` fields as they are contained in `memory`.
-pub(crate) struct Interpreter<'a> {
+pub struct Interpreter<'a> {
     code: &'a [u8],
     jumpdests: Vec<usize>,
     offset: usize,
@@ -468,8 +468,6 @@ fn find_jumpdests(code: &[u8]) -> Vec<usize> {
 mod tests {
     use std::collections::HashMap;
 
-    use hex_literal::hex;
-
     use crate::cpu::kernel::interpreter::{run, Interpreter};
     use crate::memory::segments::Segment;
 

From 805321584193f18dbc2411e92327f00696389edc Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Wed, 27 Jul 2022 16:49:26 +0200
Subject: [PATCH 19/85] Inverse for other fields

---
 evm/src/cpu/kernel/aggregator.rs              |  24 +-
 .../cpu/kernel/asm/secp256k1/curve_mul.asm    |   4 +
 .../kernel/asm/secp256k1/inverse_scalar.asm   | 659 +--------------
 evm/src/cpu/kernel/asm/secp256k1/moddiv.asm   | 765 +-----------------
 evm/src/cpu/kernel/prover_input.rs            |  10 +-
 5 files changed, 54 insertions(+), 1408 deletions(-)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 1f8ba0da..114a3d83 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -14,14 +14,26 @@ use crate::memory::segments::Segment;
 
 pub static KERNEL: Lazy<Kernel> = Lazy::new(combined_kernel);
 
+const EC_CONSTANTS: [(&str, [u8; 32]); 3] = [
+    (
+        "BN_BASE",
+        hex!("30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"),
+    ),
+    (
+        "SECP_BASE",
+        hex!("fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f"),
+    ),
+    (
+        "SECP_SCALAR",
+        hex!("fffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141"),
+    ),
+];
+
 pub fn evm_constants() -> HashMap<String, U256> {
     let mut c = HashMap::new();
-    c.insert(
-        "BN_BASE".into(),
-        U256::from_big_endian(&hex!(
-            "30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"
-        )),
-    );
+    for (name, value) in EC_CONSTANTS {
+        c.insert(name.into(), U256::from_big_endian(&value));
+    }
     for segment in Segment::all() {
         c.insert(segment.var_name().into(), (segment as u32).into());
     }
diff --git a/evm/src/cpu/kernel/asm/secp256k1/curve_mul.asm b/evm/src/cpu/kernel/asm/secp256k1/curve_mul.asm
index 7ad2dd71..f0825e88 100644
--- a/evm/src/cpu/kernel/asm/secp256k1/curve_mul.asm
+++ b/evm/src/cpu/kernel/asm/secp256k1/curve_mul.asm
@@ -2,6 +2,10 @@
 global ec_mul_valid_point_secp:
     JUMPDEST
     // stack: x, y, s, retdest
+    %stack (x,y) -> (x,y,x,y)
+    %ec_isidentity
+    // stack: (x,y)==(0,0), x, y, s, retdest
+    %jumpi(ret_zero_ec_mul)
     DUP3
     // stack: s, x, y, s, retdest
     %jumpi(step_case)
diff --git a/evm/src/cpu/kernel/asm/secp256k1/inverse_scalar.asm b/evm/src/cpu/kernel/asm/secp256k1/inverse_scalar.asm
index ce0af757..7b859b4f 100644
--- a/evm/src/cpu/kernel/asm/secp256k1/inverse_scalar.asm
+++ b/evm/src/cpu/kernel/asm/secp256k1/inverse_scalar.asm
@@ -17,654 +17,19 @@
     %mulmodn_secp_scalar
 %endmacro
 
-// Computes the inverse modulo N using x^-1 = x^(N-2) mod N and square-and-multiply modular exponentiation.
+// Non-deterministically provide the inverse modulo N.
 %macro inverse_secp_scalar
-    DUP1
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
-    %squaremodn_secp_scalar
-    DUP2
-    %mulmodn_secp_scalar
+    // stack: x
+    PROVER_INPUT(ff::secp256k1_scalar::inverse)
+    // stack: x^-1, x
+    %stack (inv, x) -> (inv, x, @SECP_SCALAR, inv, x)
+    // stack: x^-1, x, N, x^-1, x
+    MULMOD
+    // stack: x^-1 * x, x^-1, x
+    PUSH 1
+    // stack: 1, x^-1 * x, x^-1, x
+    %assert_eq
+    // stack: x^-1, x
     SWAP1
     // stack: x, x^-1
     POP
diff --git a/evm/src/cpu/kernel/asm/secp256k1/moddiv.asm b/evm/src/cpu/kernel/asm/secp256k1/moddiv.asm
index 941fa33a..fd077b11 100644
--- a/evm/src/cpu/kernel/asm/secp256k1/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/secp256k1/moddiv.asm
@@ -25,760 +25,19 @@
     %mulmodn_secp_base
 %endmacro
 
-// Computes the inverse modulo N using x^-1 = x^(N-2) mod N and square-and-multiply modular exponentiation.
+// Non-deterministically provide the inverse modulo N.
 %macro inverse_secp_base
-    DUP1
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
+    // stack: x
+    PROVER_INPUT(ff::secp256k1_base::inverse)
+    // stack: x^-1, x
+    %stack (inv, x) -> (inv, x, @SECP_BASE, inv, x)
+    // stack: x^-1, x, N, x^-1, x
+    MULMOD
+    // stack: x^-1 * x, x^-1, x
+    PUSH 1
+    // stack: 1, x^-1 * x, x^-1, x
+    %assert_eq
+    // stack: x^-1, x
     SWAP1
     // stack: x, x^-1
     POP
diff --git a/evm/src/cpu/kernel/prover_input.rs b/evm/src/cpu/kernel/prover_input.rs
index 21645340..ac83257a 100644
--- a/evm/src/cpu/kernel/prover_input.rs
+++ b/evm/src/cpu/kernel/prover_input.rs
@@ -89,8 +89,14 @@ impl Field {
                     .unwrap()
             }
             Field::Bn254Scalar => todo!(),
-            Field::Secp256k1Base => todo!(),
-            Field::Secp256k1Scalar => todo!(),
+            Field::Secp256k1Base => {
+                U256::from_str("0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f")
+                    .unwrap()
+            }
+            Field::Secp256k1Scalar => {
+                U256::from_str("0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141")
+                    .unwrap()
+            }
         }
     }
 

From bb2ee9d543809c14c51c16cc8ee5f2c28c845892 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Wed, 27 Jul 2022 17:06:16 +0200
Subject: [PATCH 20/85] Implement sqrt

---
 evm/src/cpu/kernel/asm/secp256k1/lift_x.asm | 756 +-------------------
 evm/src/cpu/kernel/prover_input.rs          |  13 +-
 2 files changed, 17 insertions(+), 752 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm b/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm
index cd392b61..4bef700e 100644
--- a/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm
+++ b/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm
@@ -60,759 +60,13 @@
     ADDMOD
 %endmacro
 
-// Returns a square root of x if one exists, otherwise an undefined value.
-// Computed as x^(q+1)/4, with q the Secp base field order.
-// To replace with more efficient method using non-determinism later.
+// Non-deterministically provide the square root modulo N.
 %macro sqrt_secp_base
     // stack: x
-    DUP1
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    DUP2
-    %mulmodn_secp_base
-    %squaremodn_secp_base
-    %squaremodn_secp_base
+    PROVER_INPUT(ff::secp256k1_base::sqrt)
+    // stack: √x, x
     SWAP1
-    // stack: x, x^-1
+    // stack: x, √x
     POP
-    // stack: x^-1
+    // stack: √x
 %endmacro
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/prover_input.rs b/evm/src/cpu/kernel/prover_input.rs
index ac83257a..b27eb561 100644
--- a/evm/src/cpu/kernel/prover_input.rs
+++ b/evm/src/cpu/kernel/prover_input.rs
@@ -103,7 +103,7 @@ impl Field {
     fn op(&self, op: FieldOp, x: U256) -> U256 {
         match op {
             FieldOp::Inverse => self.inverse(x),
-            FieldOp::Sqrt => todo!(),
+            FieldOp::Sqrt => self.sqrt(x),
         }
     }
 
@@ -112,6 +112,17 @@ impl Field {
         assert!(x < n);
         modexp(x, n - 2, n)
     }
+
+    fn sqrt(&self, x: U256) -> U256 {
+        let n = self.order();
+        assert!(x < n);
+        let (q, r) = (n + 1).div_mod(4.into());
+        assert!(
+            r.is_zero(),
+            "Only naive sqrt implementation for now. If needed implement Tonelli-Shanks."
+        );
+        modexp(x, q, n)
+    }
 }
 
 fn modexp(x: U256, e: U256, n: U256) -> U256 {

From c160c4032d42e62c6019c39773f61ee2726ad880 Mon Sep 17 00:00:00 2001
From: Jacqueline Nabaglo <jakub@mirprotocol.org>
Date: Thu, 28 Jul 2022 04:36:33 +1000
Subject: [PATCH 21/85] Inter-row program counter constraints (#639)

* Beginning of control flow support

* Fixes to halt spin loop
---
 evm/src/all_stark.rs             |  22 +++++-
 evm/src/cpu/columns.rs           |   8 ++-
 evm/src/cpu/control_flow.rs      | 112 +++++++++++++++++++++++++++++++
 evm/src/cpu/cpu_stark.rs         |   6 +-
 evm/src/cpu/kernel/aggregator.rs |   1 +
 evm/src/cpu/kernel/asm/halt.asm  |   6 ++
 evm/src/cpu/mod.rs               |   1 +
 7 files changed, 153 insertions(+), 3 deletions(-)
 create mode 100644 evm/src/cpu/control_flow.rs
 create mode 100644 evm/src/cpu/kernel/asm/halt.asm

diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs
index ba157fc0..9f520019 100644
--- a/evm/src/all_stark.rs
+++ b/evm/src/all_stark.rs
@@ -143,6 +143,7 @@ mod tests {
     use crate::all_stark::AllStark;
     use crate::config::StarkConfig;
     use crate::cpu::cpu_stark::CpuStark;
+    use crate::cpu::kernel::aggregator::KERNEL;
     use crate::cross_table_lookup::testutils::check_ctls;
     use crate::keccak::keccak_stark::{KeccakStark, NUM_INPUTS, NUM_ROUNDS};
     use crate::logic::{self, LogicStark, Operation};
@@ -321,8 +322,27 @@ mod tests {
 
         // Pad to a power of two.
         for _ in cpu_trace_rows.len()..cpu_trace_rows.len().next_power_of_two() {
-            cpu_trace_rows.push([F::ZERO; CpuStark::<F, D>::COLUMNS]);
+            let mut row: cpu::columns::CpuColumnsView<F> =
+                [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
+            row.is_cpu_cycle = F::ONE;
+            cpu_stark.generate(row.borrow_mut());
+            cpu_trace_rows.push(row.into());
         }
+
+        // Ensure we finish in a halted state.
+        {
+            let num_rows = cpu_trace_rows.len();
+            let halt_label = F::from_canonical_usize(KERNEL.global_labels["halt_pc0"]);
+
+            let last_row: &mut cpu::columns::CpuColumnsView<F> =
+                cpu_trace_rows[num_rows - 1].borrow_mut();
+            last_row.program_counter = halt_label;
+
+            let second_last_row: &mut cpu::columns::CpuColumnsView<F> =
+                cpu_trace_rows[num_rows - 2].borrow_mut();
+            second_last_row.next_program_counter = halt_label;
+        }
+
         trace_rows_to_poly_values(cpu_trace_rows)
     }
 
diff --git a/evm/src/cpu/columns.rs b/evm/src/cpu/columns.rs
index ae6872df..970f0279 100644
--- a/evm/src/cpu/columns.rs
+++ b/evm/src/cpu/columns.rs
@@ -17,9 +17,12 @@ pub struct CpuColumnsView<T> {
     pub is_bootstrap_contract: T,
 
     /// Filter. 1 if the row corresponds to a cycle of execution and 0 otherwise.
-    /// Lets us re-use decode columns in non-cycle rows.
+    /// Lets us re-use columns in non-cycle rows.
     pub is_cpu_cycle: T,
 
+    /// If CPU cycle: The program counter for the current instruction.
+    pub program_counter: T,
+
     /// If CPU cycle: The opcode being decoded, in {0, ..., 255}.
     pub opcode: T,
 
@@ -139,6 +142,9 @@ pub struct CpuColumnsView<T> {
     /// If CPU cycle: the opcode, broken up into bits in **big-endian** order.
     pub opcode_bits: [T; 8],
 
+    /// If CPU cycle: The program counter for the next instruction.
+    pub next_program_counter: T,
+
     /// Filter. 1 iff a Keccak permutation is computed on this row.
     pub is_keccak: T,
     pub keccak_input_limbs: [T; 50],
diff --git a/evm/src/cpu/control_flow.rs b/evm/src/cpu/control_flow.rs
new file mode 100644
index 00000000..cf24afca
--- /dev/null
+++ b/evm/src/cpu/control_flow.rs
@@ -0,0 +1,112 @@
+use plonky2::field::extension::Extendable;
+use plonky2::field::packed::PackedField;
+use plonky2::field::types::Field;
+use plonky2::hash::hash_types::RichField;
+use plonky2::iop::ext_target::ExtensionTarget;
+
+use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
+use crate::cpu::columns::CpuColumnsView;
+use crate::cpu::kernel::aggregator::KERNEL;
+
+fn get_halt_pcs<F: Field>() -> (F, F) {
+    let halt_pc0 = KERNEL.global_labels["halt_pc0"];
+    let halt_pc1 = KERNEL.global_labels["halt_pc1"];
+
+    (
+        F::from_canonical_usize(halt_pc0),
+        F::from_canonical_usize(halt_pc1),
+    )
+}
+
+pub fn eval_packed_generic<P: PackedField>(
+    lv: &CpuColumnsView<P>,
+    nv: &CpuColumnsView<P>,
+    yield_constr: &mut ConstraintConsumer<P>,
+) {
+    // Once we start executing instructions, then we continue until the end of the table.
+    yield_constr.constraint_transition(lv.is_cpu_cycle * (nv.is_cpu_cycle - P::ONES));
+
+    // If a row is a CPU cycle, then its `next_program_counter` becomes the `program_counter` of the
+    // next row.
+    yield_constr
+        .constraint_transition(lv.is_cpu_cycle * (nv.program_counter - lv.next_program_counter));
+
+    // If a non-CPU cycle row is followed by a CPU cycle row, then the `program_counter` of the CPU
+    // cycle row is 0.
+    yield_constr
+        .constraint_transition((lv.is_cpu_cycle - P::ONES) * nv.is_cpu_cycle * nv.program_counter);
+
+    // The first row has nowhere to continue execution from, so if it's a cycle row, then its
+    // `program_counter` must be 0.
+    // NB: I know the first few rows will be used for initialization and will not be CPU cycle rows.
+    // Once that's done, then this constraint can be removed. Until then, it is needed to ensure
+    // that execution starts at 0 and not at any arbitrary offset.
+    yield_constr.constraint_first_row(lv.is_cpu_cycle * lv.program_counter);
+
+    // The last row must be a CPU cycle row.
+    yield_constr.constraint_last_row(lv.is_cpu_cycle - P::ONES);
+    // Also, the last row's `program_counter` must be inside the `halt` infinite loop. Note that
+    // that loop consists of two instructions, so we must check for `halt` and `halt_inner` labels.
+    let (halt_pc0, halt_pc1) = get_halt_pcs::<P::Scalar>();
+    yield_constr
+        .constraint_last_row((lv.program_counter - halt_pc0) * (lv.program_counter - halt_pc1));
+}
+
+pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
+    lv: &CpuColumnsView<ExtensionTarget<D>>,
+    nv: &CpuColumnsView<ExtensionTarget<D>>,
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+) {
+    // Once we start executing instructions, then we continue until the end of the table.
+    {
+        let constr = builder.mul_sub_extension(lv.is_cpu_cycle, nv.is_cpu_cycle, lv.is_cpu_cycle);
+        yield_constr.constraint_transition(builder, constr);
+    }
+
+    // If a row is a CPU cycle, then its `next_program_counter` becomes the `program_counter` of the
+    // next row.
+    {
+        let constr = builder.sub_extension(nv.program_counter, lv.next_program_counter);
+        let constr = builder.mul_extension(lv.is_cpu_cycle, constr);
+        yield_constr.constraint_transition(builder, constr);
+    }
+
+    // If a non-CPU cycle row is followed by a CPU cycle row, then the `program_counter` of the CPU
+    // cycle row is 0.
+    {
+        let constr = builder.mul_extension(nv.is_cpu_cycle, nv.program_counter);
+        let constr = builder.mul_sub_extension(lv.is_cpu_cycle, constr, constr);
+        yield_constr.constraint_transition(builder, constr);
+    }
+
+    // The first row has nowhere to continue execution from, so if it's a cycle row, then its
+    // `program_counter` must be 0.
+    // NB: I know the first few rows will be used for initialization and will not be CPU cycle rows.
+    // Once that's done, then this constraint can be removed. Until then, it is needed to ensure
+    // that execution starts at 0 and not at any arbitrary offset.
+    {
+        let constr = builder.mul_extension(lv.is_cpu_cycle, lv.program_counter);
+        yield_constr.constraint_first_row(builder, constr);
+    }
+
+    // The last row must be a CPU cycle row.
+    {
+        let one = builder.one_extension();
+        let constr = builder.sub_extension(lv.is_cpu_cycle, one);
+        yield_constr.constraint_last_row(builder, constr);
+    }
+    // Also, the last row's `program_counter` must be inside the `halt` infinite loop. Note that
+    // that loop consists of two instructions, so we must check for `halt` and `halt_inner` labels.
+    {
+        let (halt_pc0, halt_pc1) = get_halt_pcs();
+        let halt_pc0_target = builder.constant_extension(halt_pc0);
+        let halt_pc1_target = builder.constant_extension(halt_pc1);
+
+        let halt_pc0_offset = builder.sub_extension(lv.program_counter, halt_pc0_target);
+        let halt_pc1_offset = builder.sub_extension(lv.program_counter, halt_pc1_target);
+        let constr = builder.mul_extension(halt_pc0_offset, halt_pc1_offset);
+
+        yield_constr.constraint_last_row(builder, constr);
+    }
+}
diff --git a/evm/src/cpu/cpu_stark.rs b/evm/src/cpu/cpu_stark.rs
index 1e5cc887..6eb3154e 100644
--- a/evm/src/cpu/cpu_stark.rs
+++ b/evm/src/cpu/cpu_stark.rs
@@ -9,7 +9,7 @@ use plonky2::hash::hash_types::RichField;
 
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::cpu::columns::{CpuColumnsView, COL_MAP, NUM_CPU_COLUMNS};
-use crate::cpu::{bootstrap_kernel, decode, simple_logic};
+use crate::cpu::{bootstrap_kernel, control_flow, decode, simple_logic};
 use crate::cross_table_lookup::Column;
 use crate::memory::NUM_CHANNELS;
 use crate::stark::Stark;
@@ -88,7 +88,9 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for CpuStark<F, D
         P: PackedField<Scalar = FE>,
     {
         let local_values = vars.local_values.borrow();
+        let next_values = vars.next_values.borrow();
         bootstrap_kernel::eval_bootstrap_kernel(vars, yield_constr);
+        control_flow::eval_packed_generic(local_values, next_values, yield_constr);
         decode::eval_packed_generic(local_values, yield_constr);
         simple_logic::eval_packed(local_values, yield_constr);
     }
@@ -100,7 +102,9 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for CpuStark<F, D
         yield_constr: &mut RecursiveConstraintConsumer<F, D>,
     ) {
         let local_values = vars.local_values.borrow();
+        let next_values = vars.next_values.borrow();
         bootstrap_kernel::eval_bootstrap_kernel_circuit(builder, vars, yield_constr);
+        control_flow::eval_ext_circuit(builder, local_values, next_values, yield_constr);
         decode::eval_ext_circuit(builder, local_values, yield_constr);
         simple_logic::eval_ext_circuit(builder, local_values, yield_constr);
     }
diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 1f8ba0da..ec9e34e8 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -39,6 +39,7 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/exp.asm"),
         include_str!("asm/curve_mul.asm"),
         include_str!("asm/curve_add.asm"),
+        include_str!("asm/halt.asm"),
         include_str!("asm/memory.asm"),
         include_str!("asm/moddiv.asm"),
         include_str!("asm/secp256k1/curve_mul.asm"),
diff --git a/evm/src/cpu/kernel/asm/halt.asm b/evm/src/cpu/kernel/asm/halt.asm
new file mode 100644
index 00000000..906ce51a
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/halt.asm
@@ -0,0 +1,6 @@
+global halt:
+    PUSH halt_pc0
+global halt_pc0:
+    DUP1
+global halt_pc1:
+    JUMP
diff --git a/evm/src/cpu/mod.rs b/evm/src/cpu/mod.rs
index 8da8a125..6c767998 100644
--- a/evm/src/cpu/mod.rs
+++ b/evm/src/cpu/mod.rs
@@ -1,5 +1,6 @@
 pub(crate) mod bootstrap_kernel;
 pub(crate) mod columns;
+mod control_flow;
 pub mod cpu_stark;
 pub(crate) mod decode;
 pub mod kernel;

From 16ddfcb94cf8a1364aba6f8ebe4174f08c030e6b Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Thu, 21 Jul 2022 17:10:40 -0400
Subject: [PATCH 22/85] make env_logger dev-dependency

---
 plonky2/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plonky2/Cargo.toml b/plonky2/Cargo.toml
index 9c019640..6f491e1d 100644
--- a/plonky2/Cargo.toml
+++ b/plonky2/Cargo.toml
@@ -13,7 +13,6 @@ default-run = "generate_constants"
 [dependencies]
 plonky2_field = { path = "../field" }
 plonky2_util = { path = "../util" }
-env_logger = "0.9.0"
 log = "0.4.14"
 itertools = "0.10.0"
 num = { version = "0.4", features = [ "rand" ] }
@@ -29,6 +28,7 @@ static_assertions = "1.1.0"
 
 [dev-dependencies]
 criterion = "0.3.5"
+env_logger = "0.9.0"
 tynm = "0.1.6"
 structopt = "0.3.26"
 num_cpus = "1.13.1"

From 585495d314defc65914d57ae5c1f950576564bd8 Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Wed, 27 Jul 2022 23:53:26 -0400
Subject: [PATCH 23/85] feature-gate stub TimingTree

---
 plonky2/Cargo.toml         |  4 ++++
 plonky2/src/fri/oracle.rs  |  6 +++---
 plonky2/src/fri/prover.rs  |  6 +++---
 plonky2/src/util/timing.rs | 43 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/plonky2/Cargo.toml b/plonky2/Cargo.toml
index 6f491e1d..8542f307 100644
--- a/plonky2/Cargo.toml
+++ b/plonky2/Cargo.toml
@@ -10,6 +10,10 @@ categories = ["cryptography"]
 edition = "2021"
 default-run = "generate_constants"
 
+[features]
+default = ["timing"]
+timing = []
+
 [dependencies]
 plonky2_field = { path = "../field" }
 plonky2_util = { path = "../util" }
diff --git a/plonky2/src/fri/oracle.rs b/plonky2/src/fri/oracle.rs
index 312b458b..7499e696 100644
--- a/plonky2/src/fri/oracle.rs
+++ b/plonky2/src/fri/oracle.rs
@@ -71,7 +71,7 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
         rate_bits: usize,
         blinding: bool,
         cap_height: usize,
-        timing: &mut TimingTree,
+        _timing: &mut TimingTree,
         fft_root_table: Option<&FftRootTable<F>>,
     ) -> Self
     where
@@ -79,7 +79,7 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
     {
         let degree = polynomials[0].len();
         let lde_values = timed!(
-            timing,
+            _timing,
             "FFT + blinding",
             Self::lde_values(&polynomials, rate_bits, blinding, fft_root_table)
         );
@@ -87,7 +87,7 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
         let mut leaves = timed!(timing, "transpose LDEs", transpose(&lde_values));
         reverse_index_bits_in_place(&mut leaves);
         let merkle_tree = timed!(
-            timing,
+            _timing,
             "build Merkle tree",
             MerkleTree::new(leaves, cap_height)
         );
diff --git a/plonky2/src/fri/prover.rs b/plonky2/src/fri/prover.rs
index 6136a9a1..f8467d2b 100644
--- a/plonky2/src/fri/prover.rs
+++ b/plonky2/src/fri/prover.rs
@@ -23,7 +23,7 @@ pub fn fri_proof<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const
     lde_polynomial_values: PolynomialValues<F::Extension>,
     challenger: &mut Challenger<F, C::Hasher>,
     fri_params: &FriParams,
-    timing: &mut TimingTree,
+    _timing: &mut TimingTree,
 ) -> FriProof<F, C::Hasher, D>
 where
     [(); C::Hasher::HASH_SIZE]:,
@@ -33,7 +33,7 @@ where
 
     // Commit phase
     let (trees, final_coeffs) = timed!(
-        timing,
+        _timing,
         "fold codewords in the commitment phase",
         fri_committed_trees::<F, C, D>(
             lde_polynomial_coeffs,
@@ -46,7 +46,7 @@ where
     // PoW phase
     let current_hash = challenger.get_hash();
     let pow_witness = timed!(
-        timing,
+        _timing,
         "find proof-of-work witness",
         fri_proof_of_work::<F, C, D>(current_hash, &fri_params.config)
     );
diff --git a/plonky2/src/util/timing.rs b/plonky2/src/util/timing.rs
index 4250d688..70f34306 100644
--- a/plonky2/src/util/timing.rs
+++ b/plonky2/src/util/timing.rs
@@ -1,8 +1,10 @@
+#[cfg(feature = "timing")]
 use std::time::{Duration, Instant};
 
 use log::{log, Level};
 
 /// The hierarchy of scopes, and the time consumed by each one. Useful for profiling.
+#[cfg(feature = "timing")]
 pub struct TimingTree {
     /// The name of this scope.
     name: String,
@@ -16,13 +18,25 @@ pub struct TimingTree {
     children: Vec<TimingTree>,
 }
 
+#[cfg(not(feature = "timing"))]
+pub struct TimingTree(Level);
+
+#[cfg(feature = "timing")]
 impl Default for TimingTree {
     fn default() -> Self {
         TimingTree::new("root", Level::Debug)
     }
 }
 
+#[cfg(not(feature = "timing"))]
+impl Default for TimingTree {
+    fn default() -> Self {
+        TimingTree::new("", Level::Debug)
+    }
+}
+
 impl TimingTree {
+    #[cfg(feature = "timing")]
     pub fn new(root_name: &str, level: Level) -> Self {
         Self {
             name: root_name.to_string(),
@@ -33,18 +47,26 @@ impl TimingTree {
         }
     }
 
+    #[cfg(not(feature = "timing"))]
+    pub fn new(_root_name: &str, level: Level) -> Self {
+        Self(level)
+    }
+
     /// Whether this scope is still in scope.
+    #[cfg(feature = "timing")]
     fn is_open(&self) -> bool {
         self.exit_time.is_none()
     }
 
     /// A description of the stack of currently-open scopes.
+    #[cfg(feature = "timing")]
     pub fn open_stack(&self) -> String {
         let mut stack = Vec::new();
         self.open_stack_helper(&mut stack);
         stack.join(" > ")
     }
 
+    #[cfg(feature = "timing")]
     fn open_stack_helper(&self, stack: &mut Vec<String>) {
         if self.is_open() {
             stack.push(self.name.clone());
@@ -54,6 +76,7 @@ impl TimingTree {
         }
     }
 
+    #[cfg(feature = "timing")]
     pub fn push(&mut self, ctx: &str, mut level: log::Level) {
         assert!(self.is_open());
 
@@ -77,6 +100,7 @@ impl TimingTree {
     }
 
     /// Close the deepest open scope from this tree.
+    #[cfg(feature = "timing")]
     pub fn pop(&mut self) {
         assert!(self.is_open());
 
@@ -90,6 +114,7 @@ impl TimingTree {
         self.exit_time = Some(Instant::now());
     }
 
+    #[cfg(feature = "timing")]
     fn duration(&self) -> Duration {
         self.exit_time
             .unwrap_or_else(Instant::now)
@@ -97,6 +122,7 @@ impl TimingTree {
     }
 
     /// Filter out children with a low duration.
+    #[cfg(feature = "timing")]
     pub fn filter(&self, min_delta: Duration) -> Self {
         Self {
             name: self.name.clone(),
@@ -112,10 +138,17 @@ impl TimingTree {
         }
     }
 
+    #[cfg(feature = "timing")]
     pub fn print(&self) {
         self.print_helper(0);
     }
 
+    #[cfg(not(feature = "timing"))]
+    pub fn print(&self) {
+        log!(self.0, "TimingTree is not supported without the 'timing' feature enabled");
+    }
+
+    #[cfg(feature = "timing")]
     fn print_helper(&self, depth: usize) {
         let prefix = "| ".repeat(depth);
         log!(
@@ -135,16 +168,26 @@ impl TimingTree {
 #[macro_export]
 macro_rules! timed {
     ($timing_tree:expr, $level:expr, $ctx:expr, $exp:expr) => {{
+        #[cfg(feature = "timing")]
         $timing_tree.push($ctx, $level);
+
         let res = $exp;
+
+        #[cfg(feature = "timing")]
         $timing_tree.pop();
+
         res
     }};
     // If no context is specified, default to Debug.
     ($timing_tree:expr, $ctx:expr, $exp:expr) => {{
+        #[cfg(feature = "timing")]
         $timing_tree.push($ctx, log::Level::Debug);
+
         let res = $exp;
+
+        #[cfg(feature = "timing")]
         $timing_tree.pop();
+
         res
     }};
 }

From a6931d4555de46940ebb223196c18949ace5aa7f Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Wed, 27 Jul 2022 23:53:33 -0400
Subject: [PATCH 24/85] fmt

---
 plonky2/src/util/timing.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/plonky2/src/util/timing.rs b/plonky2/src/util/timing.rs
index 70f34306..d16ceead 100644
--- a/plonky2/src/util/timing.rs
+++ b/plonky2/src/util/timing.rs
@@ -145,7 +145,10 @@ impl TimingTree {
 
     #[cfg(not(feature = "timing"))]
     pub fn print(&self) {
-        log!(self.0, "TimingTree is not supported without the 'timing' feature enabled");
+        log!(
+            self.0,
+            "TimingTree is not supported without the 'timing' feature enabled"
+        );
     }
 
     #[cfg(feature = "timing")]

From 85111b0f02c9e8753f78583992d2017c7761f9a2 Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Wed, 27 Jul 2022 23:58:16 -0400
Subject: [PATCH 25/85] fix missing underscore

---
 plonky2/src/fri/oracle.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plonky2/src/fri/oracle.rs b/plonky2/src/fri/oracle.rs
index 7499e696..c6e3a1ff 100644
--- a/plonky2/src/fri/oracle.rs
+++ b/plonky2/src/fri/oracle.rs
@@ -84,7 +84,7 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
             Self::lde_values(&polynomials, rate_bits, blinding, fft_root_table)
         );
 
-        let mut leaves = timed!(timing, "transpose LDEs", transpose(&lde_values));
+        let mut leaves = timed!(_timing, "transpose LDEs", transpose(&lde_values));
         reverse_index_bits_in_place(&mut leaves);
         let merkle_tree = timed!(
             _timing,

From b7fa5e81c4b08e51e4aad37a26a2342de05848e7 Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Thu, 28 Jul 2022 00:08:51 -0400
Subject: [PATCH 26/85] add timing to starky, evm, and system_zero

---
 system_zero/Cargo.toml         | 2 +-
 system_zero/src/system_zero.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/system_zero/Cargo.toml b/system_zero/Cargo.toml
index f1cb5729..458ce27a 100644
--- a/system_zero/Cargo.toml
+++ b/system_zero/Cargo.toml
@@ -5,7 +5,7 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-plonky2 = { path = "../plonky2" }
+plonky2 = { path = "../plonky2", features = ["timing"] }
 plonky2_util = { path = "../util" }
 starky = { path = "../starky" }
 anyhow = "1.0.40"
diff --git a/system_zero/src/system_zero.rs b/system_zero/src/system_zero.rs
index 19c2df8c..41d39cf7 100644
--- a/system_zero/src/system_zero.rs
+++ b/system_zero/src/system_zero.rs
@@ -69,7 +69,7 @@ impl<F: RichField + Extendable<D>, const D: usize> SystemZero<F, D> {
     }
 
     pub fn generate_trace(&self) -> Vec<PolynomialValues<F>> {
-        let mut timing = TimingTree::new("generate trace", log::Level::Debug);
+        let timing = TimingTree::new("generate trace", log::Level::Debug);
 
         // Generate the witness, except for permuted columns in the lookup argument.
         let trace_rows = timed!(

From 9f2fa07e1249852eacb0d8ae5617bf03d17729a6 Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Thu, 28 Jul 2022 00:09:11 -0400
Subject: [PATCH 27/85] add rest of files

---
 evm/Cargo.toml    | 2 +-
 starky/Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/evm/Cargo.toml b/evm/Cargo.toml
index c10ab104..c5ec7f0b 100644
--- a/evm/Cargo.toml
+++ b/evm/Cargo.toml
@@ -5,7 +5,7 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-plonky2 = { path = "../plonky2" }
+plonky2 = { path = "../plonky2", features = ["timing"] }
 plonky2_util = { path = "../util" }
 anyhow = "1.0.40"
 env_logger = "0.9.0"
diff --git a/starky/Cargo.toml b/starky/Cargo.toml
index 4e67856d..3ce62c56 100644
--- a/starky/Cargo.toml
+++ b/starky/Cargo.toml
@@ -5,7 +5,7 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-plonky2 = { path = "../plonky2" }
+plonky2 = { path = "../plonky2", features = ["timing"]}
 plonky2_util = { path = "../util" }
 anyhow = "1.0.40"
 env_logger = "0.9.0"

From fd0af3fa3ebebea54d5e14ee5570e490bbddbcd5 Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Thu, 28 Jul 2022 00:21:21 -0400
Subject: [PATCH 28/85] allow unused mut when feature disabled

---
 evm/src/keccak/keccak_stark.rs | 1 +
 evm/src/lib.rs                 | 1 +
 evm/src/memory/memory_stark.rs | 1 +
 system_zero/src/system_zero.rs | 3 ++-
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/evm/src/keccak/keccak_stark.rs b/evm/src/keccak/keccak_stark.rs
index 53dd66ab..de80bf44 100644
--- a/evm/src/keccak/keccak_stark.rs
+++ b/evm/src/keccak/keccak_stark.rs
@@ -194,6 +194,7 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakStark<F, D> {
     }
 
     pub fn generate_trace(&self, inputs: Vec<[u64; NUM_INPUTS]>) -> Vec<PolynomialValues<F>> {
+        #[allow(unused_mut)]
         let mut timing = TimingTree::new("generate trace", log::Level::Debug);
 
         // Generate the witness, except for permuted columns in the lookup argument.
diff --git a/evm/src/lib.rs b/evm/src/lib.rs
index 47335db2..1248b3e0 100644
--- a/evm/src/lib.rs
+++ b/evm/src/lib.rs
@@ -2,6 +2,7 @@
 #![allow(clippy::needless_range_loop)]
 #![allow(clippy::too_many_arguments)]
 #![allow(clippy::type_complexity)]
+#![feature(let_chains)]
 #![feature(generic_const_exprs)]
 
 pub mod all_stark;
diff --git a/evm/src/memory/memory_stark.rs b/evm/src/memory/memory_stark.rs
index 82e10869..bc38e66d 100644
--- a/evm/src/memory/memory_stark.rs
+++ b/evm/src/memory/memory_stark.rs
@@ -192,6 +192,7 @@ impl<F: RichField + Extendable<D>, const D: usize> MemoryStark<F, D> {
     }
 
     pub(crate) fn generate_trace(&self, memory_ops: Vec<MemoryOp>) -> Vec<PolynomialValues<F>> {
+        #[allow(unused_mut)]
         let mut timing = TimingTree::new("generate trace", log::Level::Debug);
 
         // Generate most of the trace in row-major form.
diff --git a/system_zero/src/system_zero.rs b/system_zero/src/system_zero.rs
index 41d39cf7..ce44c283 100644
--- a/system_zero/src/system_zero.rs
+++ b/system_zero/src/system_zero.rs
@@ -69,7 +69,8 @@ impl<F: RichField + Extendable<D>, const D: usize> SystemZero<F, D> {
     }
 
     pub fn generate_trace(&self) -> Vec<PolynomialValues<F>> {
-        let timing = TimingTree::new("generate trace", log::Level::Debug);
+        #[allow(unused_mut)]
+        let mut timing = TimingTree::new("generate trace", log::Level::Debug);
 
         // Generate the witness, except for permuted columns in the lookup argument.
         let trace_rows = timed!(

From 8ad0924bbb26a6fde6dbc488b877fb6d196eab3c Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Thu, 28 Jul 2022 00:24:54 -0400
Subject: [PATCH 29/85] apparently i need to update rust

---
 evm/src/lib.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/evm/src/lib.rs b/evm/src/lib.rs
index 1248b3e0..47335db2 100644
--- a/evm/src/lib.rs
+++ b/evm/src/lib.rs
@@ -2,7 +2,6 @@
 #![allow(clippy::needless_range_loop)]
 #![allow(clippy::too_many_arguments)]
 #![allow(clippy::type_complexity)]
-#![feature(let_chains)]
 #![feature(generic_const_exprs)]
 
 pub mod all_stark;

From 87640d7e98683f7b6ec92e09d0564c43765c3522 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Thu, 28 Jul 2022 10:35:53 +0200
Subject: [PATCH 30/85] PR feedback

---
 evm/src/cpu/kernel/aggregator.rs                   |  1 +
 evm/src/cpu/kernel/asm/moddiv.asm                  | 14 ++++----------
 .../cpu/kernel/asm/secp256k1/inverse_scalar.asm    | 14 ++++----------
 evm/src/cpu/kernel/asm/secp256k1/lift_x.asm        |  5 +++--
 evm/src/cpu/kernel/asm/secp256k1/moddiv.asm        | 14 ++++----------
 evm/src/cpu/kernel/interpreter.rs                  |  2 +-
 evm/src/cpu/kernel/mod.rs                          |  5 +++--
 evm/src/cpu/kernel/prover_input.rs                 |  8 ++++----
 8 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 114a3d83..81be7f96 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -90,5 +90,6 @@ mod tests {
         // Make sure we can parse and assemble the entire kernel.
         let kernel = combined_kernel();
         debug!("Total kernel size: {} bytes", kernel.code.len());
+        dbg!("Total kernel size: {} bytes", kernel.code.len());
     }
 }
diff --git a/evm/src/cpu/kernel/asm/moddiv.asm b/evm/src/cpu/kernel/asm/moddiv.asm
index 2b76d054..780473b9 100644
--- a/evm/src/cpu/kernel/asm/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/moddiv.asm
@@ -30,16 +30,10 @@
     // stack: x
     PROVER_INPUT(ff::bn254_base::inverse)
     // stack: x^-1, x
-    %stack (inv, x) -> (inv, x, @BN_BASE, inv, x)
-    // stack: x^-1, x, N, x^-1, x
+    %stack (inv, x) -> (inv, x, @BN_BASE, inv)
+    // stack: x^-1, x, N, x^-1
     MULMOD
-    // stack: x^-1 * x, x^-1, x
-    PUSH 1
-    // stack: 1, x^-1 * x, x^-1, x
-    %assert_eq
-    // stack: x^-1, x
-    SWAP1
-    // stack: x, x^-1
-    POP
+    // stack: x^-1 * x, x^-1
+    %assert_eq_const(1)
     // stack: x^-1
 %endmacro
diff --git a/evm/src/cpu/kernel/asm/secp256k1/inverse_scalar.asm b/evm/src/cpu/kernel/asm/secp256k1/inverse_scalar.asm
index 7b859b4f..6e1563e2 100644
--- a/evm/src/cpu/kernel/asm/secp256k1/inverse_scalar.asm
+++ b/evm/src/cpu/kernel/asm/secp256k1/inverse_scalar.asm
@@ -22,16 +22,10 @@
     // stack: x
     PROVER_INPUT(ff::secp256k1_scalar::inverse)
     // stack: x^-1, x
-    %stack (inv, x) -> (inv, x, @SECP_SCALAR, inv, x)
-    // stack: x^-1, x, N, x^-1, x
+    %stack (inv, x) -> (inv, x, @SECP_SCALAR, inv)
+    // stack: x^-1, x, N, x^-1
     MULMOD
-    // stack: x^-1 * x, x^-1, x
-    PUSH 1
-    // stack: 1, x^-1 * x, x^-1, x
-    %assert_eq
-    // stack: x^-1, x
-    SWAP1
-    // stack: x, x^-1
-    POP
+    // stack: x^-1 * x, x^-1
+    %assert_eq_const(1)
     // stack: x^-1
 %endmacro
diff --git a/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm b/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm
index 4bef700e..dc765518 100644
--- a/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm
+++ b/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm
@@ -10,7 +10,7 @@
     // stack: x^3+7, x, parity
     DUP1
     // stack: x^3+7, x^3+7, parity
-    %sqrt_secp_base
+    %sqrt_secp_base_unsafe
     // stack: y, x^3+7, x, parity
     SWAP1
     // stack: x^3+7, y, parity
@@ -61,7 +61,8 @@
 %endmacro
 
 // Non-deterministically provide the square root modulo N.
-%macro sqrt_secp_base
+// Note: The square root is not checked and the macro doesn't not panic if `x` is not a square.
+%macro sqrt_secp_base_unsafe
     // stack: x
     PROVER_INPUT(ff::secp256k1_base::sqrt)
     // stack: √x, x
diff --git a/evm/src/cpu/kernel/asm/secp256k1/moddiv.asm b/evm/src/cpu/kernel/asm/secp256k1/moddiv.asm
index fd077b11..d878dc14 100644
--- a/evm/src/cpu/kernel/asm/secp256k1/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/secp256k1/moddiv.asm
@@ -30,16 +30,10 @@
     // stack: x
     PROVER_INPUT(ff::secp256k1_base::inverse)
     // stack: x^-1, x
-    %stack (inv, x) -> (inv, x, @SECP_BASE, inv, x)
-    // stack: x^-1, x, N, x^-1, x
+    %stack (inv, x) -> (inv, x, @SECP_BASE, inv)
+    // stack: x^-1, x, N, x^-1
     MULMOD
-    // stack: x^-1 * x, x^-1, x
-    PUSH 1
-    // stack: 1, x^-1 * x, x^-1, x
-    %assert_eq
-    // stack: x^-1, x
-    SWAP1
-    // stack: x, x^-1
-    POP
+    // stack: x^-1 * x, x^-1
+    %assert_eq_const(1)
     // stack: x^-1
 %endmacro
diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 0634b864..61272c24 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -336,7 +336,7 @@ impl<'a> Interpreter<'a> {
             .prover_inputs_map
             .get(&(self.offset - 1))
             .ok_or_else(|| anyhow!("Offset not in prover inputs."))?;
-        let output = prover_input_fn.run(self.stack.clone());
+        let output = prover_input_fn.run(&self.stack);
         self.stack.push(output);
         self.prover_inputs.push(output);
         Ok(())
diff --git a/evm/src/cpu/kernel/mod.rs b/evm/src/cpu/kernel/mod.rs
index d146884a..5b9b1b4a 100644
--- a/evm/src/cpu/kernel/mod.rs
+++ b/evm/src/cpu/kernel/mod.rs
@@ -1,14 +1,15 @@
 pub mod aggregator;
 pub mod assembler;
 mod ast;
-pub mod interpreter;
 pub(crate) mod keccak_util;
 mod opcodes;
 mod parser;
-mod prover_input;
+pub mod prover_input;
 mod stack_manipulation;
 mod txn_fields;
 
+#[cfg(test)]
+mod interpreter;
 #[cfg(test)]
 mod tests;
 
diff --git a/evm/src/cpu/kernel/prover_input.rs b/evm/src/cpu/kernel/prover_input.rs
index b27eb561..38e1914e 100644
--- a/evm/src/cpu/kernel/prover_input.rs
+++ b/evm/src/cpu/kernel/prover_input.rs
@@ -20,7 +20,7 @@ impl From<Vec<String>> for ProverInputFn {
 
 impl ProverInputFn {
     /// Run the function on the stack.
-    pub(crate) fn run(&self, stack: Vec<U256>) -> U256 {
+    pub fn run(&self, stack: &[U256]) -> U256 {
         match self.0[0].as_str() {
             "ff" => self.run_ff(stack),
             "mpt" => todo!(),
@@ -29,10 +29,10 @@ impl ProverInputFn {
     }
 
     // Finite field operations.
-    fn run_ff(&self, mut stack: Vec<U256>) -> U256 {
+    fn run_ff(&self, stack: &[U256]) -> U256 {
         let field = Field::from_str(self.0[1].as_str()).unwrap();
         let op = FieldOp::from_str(self.0[2].as_str()).unwrap();
-        let x = stack.pop().expect("Empty stack");
+        let x = *stack.last().expect("Empty stack");
         field.op(op, x)
     }
 
@@ -130,7 +130,7 @@ fn modexp(x: U256, e: U256, n: U256) -> U256 {
     let mut product = U256::one();
 
     for j in 0..256 {
-        if !(e >> j & U256::one()).is_zero() {
+        if e.bit(j) {
             product = U256::try_from(product.full_mul(current) % n).unwrap();
         }
         current = U256::try_from(current.full_mul(current) % n).unwrap();

From 55d0eddecb261207ccdc3f22f5e687a7bf5a2785 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Thu, 28 Jul 2022 14:11:06 -0700
Subject: [PATCH 31/85] profiling

---
 evm/src/recursive_verifier.rs | 80 ++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 30 deletions(-)

diff --git a/evm/src/recursive_verifier.rs b/evm/src/recursive_verifier.rs
index 17db048b..b69a5519 100644
--- a/evm/src/recursive_verifier.rs
+++ b/evm/src/recursive_verifier.rs
@@ -71,46 +71,66 @@ pub fn verify_proof_circuit<
         &nums_permutation_zs,
     );
 
-    verify_stark_proof_with_challenges_circuit::<F, C, _, D>(
+    with_context!(
         builder,
-        cpu_stark,
-        &all_proof.stark_proofs[Table::Cpu as usize],
-        &stark_challenges[Table::Cpu as usize],
-        &ctl_vars_per_table[Table::Cpu as usize],
-        inner_config,
+        "verify CPU proof",
+        verify_stark_proof_with_challenges_circuit::<F, C, _, D>(
+            builder,
+            cpu_stark,
+            &all_proof.stark_proofs[Table::Cpu as usize],
+            &stark_challenges[Table::Cpu as usize],
+            &ctl_vars_per_table[Table::Cpu as usize],
+            inner_config,
+        )
     );
-    verify_stark_proof_with_challenges_circuit::<F, C, _, D>(
+    with_context!(
         builder,
-        keccak_stark,
-        &all_proof.stark_proofs[Table::Keccak as usize],
-        &stark_challenges[Table::Keccak as usize],
-        &ctl_vars_per_table[Table::Keccak as usize],
-        inner_config,
+        "verify Keccak proof",
+        verify_stark_proof_with_challenges_circuit::<F, C, _, D>(
+            builder,
+            keccak_stark,
+            &all_proof.stark_proofs[Table::Keccak as usize],
+            &stark_challenges[Table::Keccak as usize],
+            &ctl_vars_per_table[Table::Keccak as usize],
+            inner_config,
+        )
     );
-    verify_stark_proof_with_challenges_circuit::<F, C, _, D>(
+    with_context!(
         builder,
-        logic_stark,
-        &all_proof.stark_proofs[Table::Logic as usize],
-        &stark_challenges[Table::Logic as usize],
-        &ctl_vars_per_table[Table::Logic as usize],
-        inner_config,
+        "verify logic proof",
+        verify_stark_proof_with_challenges_circuit::<F, C, _, D>(
+            builder,
+            logic_stark,
+            &all_proof.stark_proofs[Table::Logic as usize],
+            &stark_challenges[Table::Logic as usize],
+            &ctl_vars_per_table[Table::Logic as usize],
+            inner_config,
+        )
     );
-    verify_stark_proof_with_challenges_circuit::<F, C, _, D>(
+    with_context!(
         builder,
-        memory_stark,
-        &all_proof.stark_proofs[Table::Memory as usize],
-        &stark_challenges[Table::Memory as usize],
-        &ctl_vars_per_table[Table::Memory as usize],
-        inner_config,
+        "verify memory proof",
+        verify_stark_proof_with_challenges_circuit::<F, C, _, D>(
+            builder,
+            memory_stark,
+            &all_proof.stark_proofs[Table::Memory as usize],
+            &stark_challenges[Table::Memory as usize],
+            &ctl_vars_per_table[Table::Memory as usize],
+            inner_config,
+        )
     );
 
-    verify_cross_table_lookups_circuit::<F, C, D>(
+    with_context!(
         builder,
-        cross_table_lookups,
-        &all_proof.stark_proofs,
-        ctl_challenges,
-        inner_config,
-    )
+        "verify cross-table lookups",
+        verify_cross_table_lookups_circuit::<F, C, D>(
+            builder,
+            cross_table_lookups,
+            &all_proof.stark_proofs,
+            ctl_challenges,
+            inner_config,
+        )
+    );
 }
 
 /// Recursively verifies an inner proof.

From 563de9e1c5b02f7dab6ccf5db319cff0a666a65d Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Thu, 28 Jul 2022 15:51:33 -0700
Subject: [PATCH 32/85] Small fix for type 0 txns

Always parse "to" as a scalar. No need for a branch; it's left over from when I was trying to enforce canonical RLP (in which case "to" must be 0 or 20 bytes).

The old code would be wrong if we had multiple txns per proof, as if to=0 we wouldn't write that field to memory, so it could have an old value from a previous txn.
---
 evm/src/cpu/kernel/asm/transactions/type_0.asm | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/transactions/type_0.asm b/evm/src/cpu/kernel/asm/transactions/type_0.asm
index 543095a7..4e39f6c3 100644
--- a/evm/src/cpu/kernel/asm/transactions/type_0.asm
+++ b/evm/src/cpu/kernel/asm/transactions/type_0.asm
@@ -57,16 +57,7 @@ store_gas_limit:
     %stack (pos, gas_limit) -> (@TXN_FIELD_GAS_LIMIT, gas_limit, pos)
     %mstore_current(@SEGMENT_NORMALIZED_TXN)
 
-    // Peak at the RLP to see if the next byte is zero.
-    // If so, there is no value field, so skip the store_to step.
     // stack: pos
-    DUP1
-    %mload_current(@SEGMENT_RLP_RAW)
-    ISZERO
-    // stack: to_empty, pos
-    %jumpi(parse_value)
-
-    // If we got here, there is a "to" field.
     PUSH store_to
     SWAP1
     // stack: pos, store_to

From 16c2bee4b9e7aa3413b89900b94a5babf2caee3e Mon Sep 17 00:00:00 2001
From: Jacqueline Nabaglo <jakub@mirprotocol.org>
Date: Thu, 28 Jul 2022 17:30:20 -0700
Subject: [PATCH 33/85] Increment program counter on native instructions (#641)

---
 evm/src/all_stark.rs        |  8 ++----
 evm/src/cpu/columns/mod.rs  |  3 --
 evm/src/cpu/control_flow.rs | 56 +++++++++++++++++++++++++++++++------
 3 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs
index 038c1b93..fbcd2115 100644
--- a/evm/src/all_stark.rs
+++ b/evm/src/all_stark.rs
@@ -260,6 +260,7 @@ mod tests {
             let mut row: cpu::columns::CpuColumnsView<F> =
                 [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
             row.is_cpu_cycle = F::ONE;
+            row.program_counter = F::from_canonical_usize(i);
             row.opcode = [
                 (logic::columns::IS_AND, 0x16),
                 (logic::columns::IS_OR, 0x17),
@@ -319,10 +320,11 @@ mod tests {
         }
 
         // Pad to a power of two.
-        for _ in cpu_trace_rows.len()..cpu_trace_rows.len().next_power_of_two() {
+        for i in 0..cpu_trace_rows.len().next_power_of_two() - cpu_trace_rows.len() {
             let mut row: cpu::columns::CpuColumnsView<F> =
                 [F::ZERO; CpuStark::<F, D>::COLUMNS].into();
             row.is_cpu_cycle = F::ONE;
+            row.program_counter = F::from_canonical_usize(i + num_logic_rows);
             cpu_stark.generate(row.borrow_mut());
             cpu_trace_rows.push(row.into());
         }
@@ -335,10 +337,6 @@ mod tests {
             let last_row: &mut cpu::columns::CpuColumnsView<F> =
                 cpu_trace_rows[num_rows - 1].borrow_mut();
             last_row.program_counter = halt_label;
-
-            let second_last_row: &mut cpu::columns::CpuColumnsView<F> =
-                cpu_trace_rows[num_rows - 2].borrow_mut();
-            second_last_row.next_program_counter = halt_label;
         }
 
         trace_rows_to_poly_values(cpu_trace_rows)
diff --git a/evm/src/cpu/columns/mod.rs b/evm/src/cpu/columns/mod.rs
index fbf20af2..1f0eff2d 100644
--- a/evm/src/cpu/columns/mod.rs
+++ b/evm/src/cpu/columns/mod.rs
@@ -146,9 +146,6 @@ pub struct CpuColumnsView<T: Copy> {
     /// If CPU cycle: the opcode, broken up into bits in **big-endian** order.
     pub opcode_bits: [T; 8],
 
-    /// If CPU cycle: The program counter for the next instruction.
-    pub next_program_counter: T,
-
     /// Filter. 1 iff a Keccak permutation is computed on this row.
     pub is_keccak: T,
 
diff --git a/evm/src/cpu/control_flow.rs b/evm/src/cpu/control_flow.rs
index cf24afca..90a76d46 100644
--- a/evm/src/cpu/control_flow.rs
+++ b/evm/src/cpu/control_flow.rs
@@ -5,9 +5,38 @@ use plonky2::hash::hash_types::RichField;
 use plonky2::iop::ext_target::ExtensionTarget;
 
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-use crate::cpu::columns::CpuColumnsView;
+use crate::cpu::columns::{CpuColumnsView, COL_MAP};
 use crate::cpu::kernel::aggregator::KERNEL;
 
+// TODO: This list is incomplete.
+const NATIVE_INSTRUCTIONS: [usize; 25] = [
+    COL_MAP.is_add,
+    COL_MAP.is_mul,
+    COL_MAP.is_sub,
+    COL_MAP.is_div,
+    COL_MAP.is_sdiv,
+    COL_MAP.is_mod,
+    COL_MAP.is_smod,
+    COL_MAP.is_addmod,
+    COL_MAP.is_mulmod,
+    COL_MAP.is_signextend,
+    COL_MAP.is_lt,
+    COL_MAP.is_gt,
+    COL_MAP.is_slt,
+    COL_MAP.is_sgt,
+    COL_MAP.is_eq,
+    COL_MAP.is_iszero,
+    COL_MAP.is_and,
+    COL_MAP.is_or,
+    COL_MAP.is_xor,
+    COL_MAP.is_not,
+    COL_MAP.is_byte,
+    COL_MAP.is_shl,
+    COL_MAP.is_shr,
+    COL_MAP.is_sar,
+    COL_MAP.is_pop,
+];
+
 fn get_halt_pcs<F: Field>() -> (F, F) {
     let halt_pc0 = KERNEL.global_labels["halt_pc0"];
     let halt_pc1 = KERNEL.global_labels["halt_pc1"];
@@ -26,10 +55,15 @@ pub fn eval_packed_generic<P: PackedField>(
     // Once we start executing instructions, then we continue until the end of the table.
     yield_constr.constraint_transition(lv.is_cpu_cycle * (nv.is_cpu_cycle - P::ONES));
 
-    // If a row is a CPU cycle, then its `next_program_counter` becomes the `program_counter` of the
-    // next row.
-    yield_constr
-        .constraint_transition(lv.is_cpu_cycle * (nv.program_counter - lv.next_program_counter));
+    // If a row is a CPU cycle and executing a native instruction (implemented as a table row; not
+    // microcoded) then the program counter is incremented by 1 to obtain the next row's program
+    // counter.
+    let is_native_instruction: P = NATIVE_INSTRUCTIONS.iter().map(|&col_i| lv[col_i]).sum();
+    yield_constr.constraint_transition(
+        lv.is_cpu_cycle
+            * is_native_instruction
+            * (lv.program_counter - nv.program_counter + P::ONES),
+    );
 
     // If a non-CPU cycle row is followed by a CPU cycle row, then the `program_counter` of the CPU
     // cycle row is 0.
@@ -64,11 +98,15 @@ pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
         yield_constr.constraint_transition(builder, constr);
     }
 
-    // If a row is a CPU cycle, then its `next_program_counter` becomes the `program_counter` of the
-    // next row.
+    // If a row is a CPU cycle and executing a native instruction (implemented as a table row; not
+    // microcoded) then the program counter is incremented by 1 to obtain the next row's program
+    // counter.
     {
-        let constr = builder.sub_extension(nv.program_counter, lv.next_program_counter);
-        let constr = builder.mul_extension(lv.is_cpu_cycle, constr);
+        let is_native_instruction =
+            builder.add_many_extension(NATIVE_INSTRUCTIONS.iter().map(|&col_i| lv[col_i]));
+        let filter = builder.mul_extension(lv.is_cpu_cycle, is_native_instruction);
+        let pc_diff = builder.sub_extension(lv.program_counter, nv.program_counter);
+        let constr = builder.mul_add_extension(filter, pc_diff, filter);
         yield_constr.constraint_transition(builder, constr);
     }
 

From eb96216278fadc4f06d425a048abcaf1ba18b09b Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Fri, 29 Jul 2022 11:32:55 +0200
Subject: [PATCH 34/85] Typo

---
 evm/src/cpu/kernel/asm/secp256k1/lift_x.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm b/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm
index dc765518..77e484be 100644
--- a/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm
+++ b/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm
@@ -61,7 +61,7 @@
 %endmacro
 
 // Non-deterministically provide the square root modulo N.
-// Note: The square root is not checked and the macro doesn't not panic if `x` is not a square.
+// Note: The square root is not checked and the macro doesn't panic if `x` is not a square.
 %macro sqrt_secp_base_unsafe
     // stack: x
     PROVER_INPUT(ff::secp256k1_base::sqrt)

From 209dc26dc95969e7e269999875e01c1cbd02f9e1 Mon Sep 17 00:00:00 2001
From: wborgeaud <williamborgeaud@gmail.com>
Date: Fri, 29 Jul 2022 14:29:10 +0200
Subject: [PATCH 35/85] Remove stack and code in interpreter

---
 evm/src/cpu/kernel/interpreter.rs     |  73 +++++++++------
 evm/src/cpu/kernel/tests/curve_ops.rs | 124 +++++++++++++++++++-------
 evm/src/cpu/kernel/tests/ecrecover.rs |   8 +-
 evm/src/cpu/kernel/tests/exp.rs       |  24 +++--
 4 files changed, 164 insertions(+), 65 deletions(-)

diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 61272c24..5c2f6514 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -6,7 +6,7 @@ use keccak_hash::keccak;
 
 use crate::cpu::kernel::assembler::Kernel;
 use crate::cpu::kernel::prover_input::ProverInputFn;
-use crate::generation::memory::MemoryContextState;
+use crate::generation::memory::{MemoryContextState, MemorySegmentState};
 use crate::memory::segments::Segment;
 
 /// Halt interpreter execution whenever a jump to this offset is done.
@@ -25,6 +25,18 @@ impl Default for InterpreterMemory {
     }
 }
 
+impl InterpreterMemory {
+    fn with_code_and_stack(code: &[u8], stack: Vec<U256>) -> Self {
+        let mut mem = Self::default();
+        for (i, b) in code.iter().copied().enumerate() {
+            mem.context_memory[0].segments[Segment::Code as usize].set(i, b.into());
+        }
+        mem.context_memory[0].segments[Segment::Stack as usize].content = stack;
+
+        mem
+    }
+}
+
 impl InterpreterMemory {
     fn mload_general(&self, context: usize, segment: Segment, offset: usize) -> U256 {
         self.context_memory[context].segments[segment as usize].get(offset)
@@ -35,12 +47,9 @@ impl InterpreterMemory {
     }
 }
 
-// TODO: Remove `code` and `stack` fields as they are contained in `memory`.
 pub struct Interpreter<'a> {
-    code: &'a [u8],
     jumpdests: Vec<usize>,
     offset: usize,
-    pub(crate) stack: Vec<U256>,
     context: usize,
     memory: InterpreterMemory,
     prover_inputs_map: &'a HashMap<usize, ProverInputFn>,
@@ -68,11 +77,9 @@ pub fn run<'a>(
     prover_inputs: &'a HashMap<usize, ProverInputFn>,
 ) -> anyhow::Result<Interpreter<'a>> {
     let mut interpreter = Interpreter {
-        code,
         jumpdests: find_jumpdests(code),
         offset: initial_offset,
-        stack: initial_stack,
-        memory: InterpreterMemory::default(),
+        memory: InterpreterMemory::with_code_and_stack(code, initial_stack),
         prover_inputs_map: prover_inputs,
         prover_inputs: Vec::new(),
         context: 0,
@@ -87,28 +94,43 @@ pub fn run<'a>(
 }
 
 impl<'a> Interpreter<'a> {
-    fn slice(&self, n: usize) -> &[u8] {
-        &self.code[self.offset..self.offset + n]
+    fn code(&self) -> &MemorySegmentState {
+        &self.memory.context_memory[self.context].segments[Segment::Code as usize]
+    }
+
+    fn code_slice(&self, n: usize) -> Vec<u8> {
+        self.code().content[self.offset..self.offset + n]
+            .iter()
+            .map(|u256| u256.byte(0))
+            .collect::<Vec<_>>()
     }
 
     fn incr(&mut self, n: usize) {
         self.offset += n;
     }
 
+    pub(crate) fn stack(&self) -> &[U256] {
+        &self.memory.context_memory[self.context].segments[Segment::Stack as usize].content
+    }
+
+    fn stack_mut(&mut self) -> &mut Vec<U256> {
+        &mut self.memory.context_memory[self.context].segments[Segment::Stack as usize].content
+    }
+
     fn push(&mut self, x: U256) {
-        self.stack.push(x);
+        self.stack_mut().push(x);
     }
 
     fn push_bool(&mut self, x: bool) {
-        self.stack.push(if x { U256::one() } else { U256::zero() });
+        self.push(if x { U256::one() } else { U256::zero() });
     }
 
     fn pop(&mut self) -> U256 {
-        self.stack.pop().expect("Pop on empty stack.")
+        self.stack_mut().pop().expect("Pop on empty stack.")
     }
 
     fn run_opcode(&mut self) -> anyhow::Result<()> {
-        let opcode = self.code.get(self.offset).copied().unwrap_or_default();
+        let opcode = self.code().get(self.offset).byte(0);
         self.incr(1);
         match opcode {
             0x00 => self.run_stop(),                                   // "STOP",
@@ -336,8 +358,8 @@ impl<'a> Interpreter<'a> {
             .prover_inputs_map
             .get(&(self.offset - 1))
             .ok_or_else(|| anyhow!("Offset not in prover inputs."))?;
-        let output = prover_input_fn.run(&self.stack);
-        self.stack.push(output);
+        let output = prover_input_fn.run(self.stack());
+        self.push(output);
         self.prover_inputs.push(output);
         Ok(())
     }
@@ -406,18 +428,18 @@ impl<'a> Interpreter<'a> {
     }
 
     fn run_push(&mut self, num_bytes: u8) {
-        let x = U256::from_big_endian(self.slice(num_bytes as usize));
+        let x = U256::from_big_endian(&self.code_slice(num_bytes as usize));
         self.incr(num_bytes as usize);
         self.push(x);
     }
 
     fn run_dup(&mut self, n: u8) {
-        self.push(self.stack[self.stack.len() - n as usize]);
+        self.push(self.stack()[self.stack().len() - n as usize]);
     }
 
     fn run_swap(&mut self, n: u8) {
-        let len = self.stack.len();
-        self.stack.swap(len - 1, len - n as usize - 1);
+        let len = self.stack().len();
+        self.stack_mut().swap(len - 1, len - n as usize - 1);
     }
 
     fn run_get_context(&mut self) {
@@ -468,7 +490,7 @@ fn find_jumpdests(code: &[u8]) -> Vec<usize> {
 mod tests {
     use std::collections::HashMap;
 
-    use crate::cpu::kernel::interpreter::{run, Interpreter};
+    use crate::cpu::kernel::interpreter::run;
     use crate::memory::segments::Segment;
 
     #[test]
@@ -477,8 +499,8 @@ mod tests {
             0x60, 0x1, 0x60, 0x2, 0x1, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56,
         ]; // PUSH1, 1, PUSH1, 2, ADD, PUSH4 deadbeef, JUMP
         assert_eq!(
-            run(&code, 0, vec![], &HashMap::new())?.stack,
-            vec![0x3.into()],
+            run(&code, 0, vec![], &HashMap::new())?.stack(),
+            &[0x3.into()],
         );
         Ok(())
     }
@@ -504,14 +526,13 @@ mod tests {
         ];
         let pis = HashMap::new();
         let run = run(&code, 0, vec![], &pis)?;
-        let Interpreter { stack, memory, .. } = run;
-        assert_eq!(stack, vec![0xff.into(), 0xff00.into()]);
+        assert_eq!(run.stack(), &[0xff.into(), 0xff00.into()]);
         assert_eq!(
-            memory.context_memory[0].segments[Segment::MainMemory as usize].get(0x27),
+            run.memory.context_memory[0].segments[Segment::MainMemory as usize].get(0x27),
             0x42.into()
         );
         assert_eq!(
-            memory.context_memory[0].segments[Segment::MainMemory as usize].get(0x1f),
+            run.memory.context_memory[0].segments[Segment::MainMemory as usize].get(0x1f),
             0xff.into()
         );
         Ok(())
diff --git a/evm/src/cpu/kernel/tests/curve_ops.rs b/evm/src/cpu/kernel/tests/curve_ops.rs
index 44609f21..0aaa94ea 100644
--- a/evm/src/cpu/kernel/tests/curve_ops.rs
+++ b/evm/src/cpu/kernel/tests/curve_ops.rs
@@ -43,76 +43,110 @@ mod bn {
 
         // Standard addition #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
         // Standard addition #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
 
         // Standard doubling #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #2
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_double, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_double, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #3
         let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
 
         // Addition with identity #1
         let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #3
         let initial_stack =
             u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Addition with invalid point(s) #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, invalid.1, invalid.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #2
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #3
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, identity.1, identity.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
         // Addition with invalid point(s) #4
         let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, invalid.1, invalid.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
 
         // Scalar multiplication #1
         let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
         // Scalar multiplication #2
         let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #3
         let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point0.1, point0.0])?);
         // Scalar multiplication #4
         let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #5
         let initial_stack = u256ify(["0xdeadbeef", s, invalid.1, invalid.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, vec![U256::MAX, U256::MAX]);
 
         // Multiple calls
@@ -126,7 +160,9 @@ mod bn {
             point0.1,
             point0.0,
         ])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
 
         Ok(())
@@ -176,55 +212,79 @@ mod secp {
 
         // Standard addition #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
         // Standard addition #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?;
-        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?.stack;
+        let stack = run(&kernel.code, ec_add, initial_stack, &kernel.prover_inputs)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point2.1, point2.0])?);
 
         // Standard doubling #1
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #2
         let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_double, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_double, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
         // Standard doubling #3
         let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point3.1, point3.0])?);
 
         // Addition with identity #1
         let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #2
         let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point1.1, point1.0])?);
         // Addition with identity #3
         let initial_stack =
             u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Scalar multiplication #1
         let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
         // Scalar multiplication #2
         let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
         // Scalar multiplication #3
         let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point0.1, point0.0])?);
         // Scalar multiplication #4
         let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?;
-        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_mul, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([identity.1, identity.0])?);
 
         // Multiple calls
@@ -238,7 +298,9 @@ mod secp {
             point0.1,
             point0.0,
         ])?;
-        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?.stack;
+        let stack = run_with_kernel(&kernel, ec_add, initial_stack)?
+            .stack()
+            .to_vec();
         assert_eq!(stack, u256ify([point4.1, point4.0])?);
 
         Ok(())
diff --git a/evm/src/cpu/kernel/tests/ecrecover.rs b/evm/src/cpu/kernel/tests/ecrecover.rs
index b105cf47..c01adc53 100644
--- a/evm/src/cpu/kernel/tests/ecrecover.rs
+++ b/evm/src/cpu/kernel/tests/ecrecover.rs
@@ -18,7 +18,9 @@ fn test_valid_ecrecover(
 ) -> Result<()> {
     let ecrecover = kernel.global_labels["ecrecover"];
     let initial_stack = u256ify(["0xdeadbeef", s, r, v, hash])?;
-    let stack = run_with_kernel(kernel, ecrecover, initial_stack)?.stack;
+    let stack = run_with_kernel(kernel, ecrecover, initial_stack)?
+        .stack()
+        .to_vec();
     assert_eq!(stack[0], U256::from_str(expected).unwrap());
 
     Ok(())
@@ -27,7 +29,9 @@ fn test_valid_ecrecover(
 fn test_invalid_ecrecover(hash: &str, v: &str, r: &str, s: &str, kernel: &Kernel) -> Result<()> {
     let ecrecover = kernel.global_labels["ecrecover"];
     let initial_stack = u256ify(["0xdeadbeef", s, r, v, hash])?;
-    let stack = run_with_kernel(kernel, ecrecover, initial_stack)?.stack;
+    let stack = run_with_kernel(kernel, ecrecover, initial_stack)?
+        .stack()
+        .to_vec();
     assert_eq!(stack, vec![U256::MAX]);
 
     Ok(())
diff --git a/evm/src/cpu/kernel/tests/exp.rs b/evm/src/cpu/kernel/tests/exp.rs
index 049fd23a..388a1ac3 100644
--- a/evm/src/cpu/kernel/tests/exp.rs
+++ b/evm/src/cpu/kernel/tests/exp.rs
@@ -18,26 +18,38 @@ fn test_exp() -> Result<()> {
 
     // Random input
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a];
-    let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?.stack;
+    let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?
+        .stack()
+        .to_vec();
     let initial_stack = vec![b, a];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
-    let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?.stack;
+    let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?
+        .stack()
+        .to_vec();
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     // 0 base
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, U256::zero()];
-    let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?.stack;
+    let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?
+        .stack()
+        .to_vec();
     let initial_stack = vec![b, U256::zero()];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
-    let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?.stack;
+    let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?
+        .stack()
+        .to_vec();
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     // 0 exponent
     let initial_stack = vec![U256::from_str("0xdeadbeef")?, U256::zero(), a];
-    let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?.stack;
+    let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?
+        .stack()
+        .to_vec();
     let initial_stack = vec![U256::zero(), a];
     let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP
-    let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?.stack;
+    let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)?
+        .stack()
+        .to_vec();
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     Ok(())

From 24bb6323584804089c0108131dc5d7c37d4c3f0a Mon Sep 17 00:00:00 2001
From: Jacqueline Nabaglo <jakub@mirprotocol.org>
Date: Fri, 29 Jul 2022 12:04:42 -0700
Subject: [PATCH 36/85] Implement PANIC instruction (#644)

* Implement PANIC instruction

* Minor: comments
---
 evm/src/cpu/columns/mod.rs | 2 +-
 evm/src/cpu/decode.rs      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/columns/mod.rs b/evm/src/cpu/columns/mod.rs
index 1f0eff2d..01260ec2 100644
--- a/evm/src/cpu/columns/mod.rs
+++ b/evm/src/cpu/columns/mod.rs
@@ -110,7 +110,7 @@ pub struct CpuColumnsView<T: Copy> {
     pub is_log2: T,
     pub is_log3: T,
     pub is_log4: T,
-    pub is_panic: T,
+    // PANIC does not get a flag; it fails at the decode stage.
     pub is_create: T,
     pub is_call: T,
     pub is_callcode: T,
diff --git a/evm/src/cpu/decode.rs b/evm/src/cpu/decode.rs
index 233c01c4..cf43f909 100644
--- a/evm/src/cpu/decode.rs
+++ b/evm/src/cpu/decode.rs
@@ -15,7 +15,7 @@ use crate::cpu::columns::{CpuColumnsView, COL_MAP};
 // - its start index is a multiple of its length (it is aligned)
 // These properties permit us to check if an opcode belongs to a block of length 2^n by checking its
 // top 8-n bits.
-const OPCODES: [(u64, usize, usize); 107] = [
+const OPCODES: [(u64, usize, usize); 106] = [
     // (start index of block, number of top bits to check (log2), flag column)
     (0x00, 0, COL_MAP.is_stop),
     (0x01, 0, COL_MAP.is_add),
@@ -102,7 +102,7 @@ const OPCODES: [(u64, usize, usize); 107] = [
     (0xa2, 0, COL_MAP.is_log2),
     (0xa3, 0, COL_MAP.is_log3),
     (0xa4, 0, COL_MAP.is_log4),
-    (0xa5, 0, COL_MAP.is_panic),
+    // Opcode 0xa5 is PANIC. Make the proof unverifiable by giving it no flag to decode to.
     (0xa6, 1, COL_MAP.is_invalid_8),  // 0xa6-0xa7
     (0xa8, 3, COL_MAP.is_invalid_9),  // 0xa8-0xaf
     (0xb0, 4, COL_MAP.is_invalid_10), // 0xb0-0xbf

From 86a797b1db1fd2ed18147c17f50518ee1a9cbfdf Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sat, 30 Jul 2022 22:24:11 -0700
Subject: [PATCH 37/85] Add a asm/curve/ directory

---
 evm/src/cpu/kernel/aggregator.rs              | 20 +++++++++----------
 .../asm/{ => curve/bn254}/curve_add.asm       |  0
 .../asm/{ => curve/bn254}/curve_mul.asm       | 13 ------------
 .../kernel/asm/{ => curve/bn254}/moddiv.asm   |  0
 evm/src/cpu/kernel/asm/curve/common.asm       | 12 +++++++++++
 .../asm/{ => curve}/secp256k1/curve_add.asm   |  0
 .../asm/{ => curve}/secp256k1/curve_mul.asm   |  0
 .../{ => curve}/secp256k1/inverse_scalar.asm  |  0
 .../asm/{ => curve}/secp256k1/lift_x.asm      |  0
 .../asm/{ => curve}/secp256k1/moddiv.asm      |  0
 10 files changed, 22 insertions(+), 23 deletions(-)
 rename evm/src/cpu/kernel/asm/{ => curve/bn254}/curve_add.asm (100%)
 rename evm/src/cpu/kernel/asm/{ => curve/bn254}/curve_mul.asm (92%)
 rename evm/src/cpu/kernel/asm/{ => curve/bn254}/moddiv.asm (100%)
 create mode 100644 evm/src/cpu/kernel/asm/curve/common.asm
 rename evm/src/cpu/kernel/asm/{ => curve}/secp256k1/curve_add.asm (100%)
 rename evm/src/cpu/kernel/asm/{ => curve}/secp256k1/curve_mul.asm (100%)
 rename evm/src/cpu/kernel/asm/{ => curve}/secp256k1/inverse_scalar.asm (100%)
 rename evm/src/cpu/kernel/asm/{ => curve}/secp256k1/lift_x.asm (100%)
 rename evm/src/cpu/kernel/asm/{ => curve}/secp256k1/moddiv.asm (100%)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 75e7e0ac..e1efeaa1 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -48,17 +48,18 @@ pub(crate) fn combined_kernel() -> Kernel {
     let files = vec![
         include_str!("asm/assertions.asm"),
         include_str!("asm/basic_macros.asm"),
+        include_str!("asm/curve/bn254/curve_mul.asm"),
+        include_str!("asm/curve/bn254/curve_add.asm"),
+        include_str!("asm/curve/bn254/moddiv.asm"),
+        include_str!("asm/curve/common.asm"),
+        include_str!("asm/curve/secp256k1/curve_mul.asm"),
+        include_str!("asm/curve/secp256k1/curve_add.asm"),
+        include_str!("asm/curve/secp256k1/moddiv.asm"),
+        include_str!("asm/curve/secp256k1/lift_x.asm"),
+        include_str!("asm/curve/secp256k1/inverse_scalar.asm"),
         include_str!("asm/exp.asm"),
-        include_str!("asm/curve_mul.asm"),
-        include_str!("asm/curve_add.asm"),
         include_str!("asm/halt.asm"),
         include_str!("asm/memory.asm"),
-        include_str!("asm/moddiv.asm"),
-        include_str!("asm/secp256k1/curve_mul.asm"),
-        include_str!("asm/secp256k1/curve_add.asm"),
-        include_str!("asm/secp256k1/moddiv.asm"),
-        include_str!("asm/secp256k1/lift_x.asm"),
-        include_str!("asm/secp256k1/inverse_scalar.asm"),
         include_str!("asm/ecrecover.asm"),
         include_str!("asm/rlp/encode.asm"),
         include_str!("asm/rlp/decode.asm"),
@@ -90,7 +91,6 @@ mod tests {
 
         // Make sure we can parse and assemble the entire kernel.
         let kernel = combined_kernel();
-        debug!("Total kernel size: {} bytes", kernel.code.len());
-        dbg!("Total kernel size: {} bytes", kernel.code.len());
+        info!("Total kernel size: {} bytes", kernel.code.len());
     }
 }
diff --git a/evm/src/cpu/kernel/asm/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/curve_add.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm
diff --git a/evm/src/cpu/kernel/asm/curve_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm
similarity index 92%
rename from evm/src/cpu/kernel/asm/curve_mul.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm
index d976d9d5..62cf2235 100644
--- a/evm/src/cpu/kernel/asm/curve_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm
@@ -99,16 +99,3 @@ odd_scalar:
     JUMPDEST
     // stack: x', y', x, y, retdest
     %jump(ec_add_valid_points)
-
-global ret_zero_ec_mul:
-    JUMPDEST
-    // stack: x, y, s, retdest
-    %pop3
-    // stack: retdest
-    PUSH 0
-    // stack: 0, retdest
-    PUSH 0
-    // stack: 0, 0, retdest
-    SWAP2
-    // stack: retdest, 0, 0
-    JUMP
diff --git a/evm/src/cpu/kernel/asm/moddiv.asm b/evm/src/cpu/kernel/asm/curve/bn254/moddiv.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/moddiv.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/moddiv.asm
diff --git a/evm/src/cpu/kernel/asm/curve/common.asm b/evm/src/cpu/kernel/asm/curve/common.asm
new file mode 100644
index 00000000..107dc63c
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/common.asm
@@ -0,0 +1,12 @@
+global ret_zero_ec_mul:
+    JUMPDEST
+    // stack: x, y, s, retdest
+    %pop3
+    // stack: retdest
+    PUSH 0
+    // stack: 0, retdest
+    PUSH 0
+    // stack: 0, 0, retdest
+    SWAP2
+    // stack: retdest, 0, 0
+    JUMP
diff --git a/evm/src/cpu/kernel/asm/secp256k1/curve_add.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/curve_add.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/secp256k1/curve_add.asm
rename to evm/src/cpu/kernel/asm/curve/secp256k1/curve_add.asm
diff --git a/evm/src/cpu/kernel/asm/secp256k1/curve_mul.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/curve_mul.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/secp256k1/curve_mul.asm
rename to evm/src/cpu/kernel/asm/curve/secp256k1/curve_mul.asm
diff --git a/evm/src/cpu/kernel/asm/secp256k1/inverse_scalar.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/inverse_scalar.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/secp256k1/inverse_scalar.asm
rename to evm/src/cpu/kernel/asm/curve/secp256k1/inverse_scalar.asm
diff --git a/evm/src/cpu/kernel/asm/secp256k1/lift_x.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/lift_x.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/secp256k1/lift_x.asm
rename to evm/src/cpu/kernel/asm/curve/secp256k1/lift_x.asm
diff --git a/evm/src/cpu/kernel/asm/secp256k1/moddiv.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/secp256k1/moddiv.asm
rename to evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm

From f49170a8b8c3c0b87080fb4a8d851aa0833f4f4e Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sat, 30 Jul 2022 22:31:07 -0700
Subject: [PATCH 38/85] fix

---
 evm/src/cpu/kernel/aggregator.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index e1efeaa1..960ac371 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -85,12 +85,12 @@ mod tests {
 
     #[test]
     fn make_kernel() {
-        let _ = env_logger::Builder::from_default_env()
-            .format_timestamp(None)
-            .try_init();
+        env_logger::init_from_env(
+            env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "debug"),
+        );
 
         // Make sure we can parse and assemble the entire kernel.
         let kernel = combined_kernel();
-        info!("Total kernel size: {} bytes", kernel.code.len());
+        debug!("Total kernel size: {} bytes", kernel.code.len());
     }
 }

From 56d814e467f51b4b6da3e5c5339aed82db54b5eb Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sun, 31 Jul 2022 09:22:13 -0700
Subject: [PATCH 39/85] fix

---
 evm/src/cpu/kernel/aggregator.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 960ac371..7f45b272 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -79,15 +79,14 @@ pub(crate) fn combined_kernel() -> Kernel {
 
 #[cfg(test)]
 mod tests {
+    use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV};
     use log::debug;
 
     use crate::cpu::kernel::aggregator::combined_kernel;
 
     #[test]
     fn make_kernel() {
-        env_logger::init_from_env(
-            env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "debug"),
-        );
+        let _ = try_init_from_env(Env::default().filter_or(DEFAULT_FILTER_ENV, "debug"));
 
         // Make sure we can parse and assemble the entire kernel.
         let kernel = combined_kernel();

From 718b3c0961279c9e244712addcba8a86eb5a7b2e Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sun, 31 Jul 2022 09:24:04 -0700
Subject: [PATCH 40/85] Move ecrecover

---
 evm/src/cpu/kernel/aggregator.rs                          | 8 ++++----
 .../cpu/kernel/asm/{ => curve/secp256k1}/ecrecover.asm    | 0
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename evm/src/cpu/kernel/asm/{ => curve/secp256k1}/ecrecover.asm (100%)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 7f45b272..196341f9 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -48,19 +48,19 @@ pub(crate) fn combined_kernel() -> Kernel {
     let files = vec![
         include_str!("asm/assertions.asm"),
         include_str!("asm/basic_macros.asm"),
-        include_str!("asm/curve/bn254/curve_mul.asm"),
         include_str!("asm/curve/bn254/curve_add.asm"),
+        include_str!("asm/curve/bn254/curve_mul.asm"),
         include_str!("asm/curve/bn254/moddiv.asm"),
         include_str!("asm/curve/common.asm"),
         include_str!("asm/curve/secp256k1/curve_mul.asm"),
         include_str!("asm/curve/secp256k1/curve_add.asm"),
-        include_str!("asm/curve/secp256k1/moddiv.asm"),
-        include_str!("asm/curve/secp256k1/lift_x.asm"),
+        include_str!("asm/curve/secp256k1/ecrecover.asm"),
         include_str!("asm/curve/secp256k1/inverse_scalar.asm"),
+        include_str!("asm/curve/secp256k1/lift_x.asm"),
+        include_str!("asm/curve/secp256k1/moddiv.asm"),
         include_str!("asm/exp.asm"),
         include_str!("asm/halt.asm"),
         include_str!("asm/memory.asm"),
-        include_str!("asm/ecrecover.asm"),
         include_str!("asm/rlp/encode.asm"),
         include_str!("asm/rlp/decode.asm"),
         include_str!("asm/rlp/read_to_memory.asm"),
diff --git a/evm/src/cpu/kernel/asm/ecrecover.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/ecrecover.asm
rename to evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm

From 7a6d996fe70cbaa5cdab70f443d078074e0b9871 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sun, 31 Jul 2022 09:28:16 -0700
Subject: [PATCH 41/85] Move couple asm files

---
 evm/src/cpu/kernel/aggregator.rs                   | 4 ++--
 evm/src/cpu/kernel/asm/{ => util}/assertions.asm   | 0
 evm/src/cpu/kernel/asm/{ => util}/basic_macros.asm | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename evm/src/cpu/kernel/asm/{ => util}/assertions.asm (100%)
 rename evm/src/cpu/kernel/asm/{ => util}/basic_macros.asm (100%)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 196341f9..e5e1f29f 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -46,8 +46,6 @@ pub fn evm_constants() -> HashMap<String, U256> {
 #[allow(dead_code)] // TODO: Should be used once witness generation is done.
 pub(crate) fn combined_kernel() -> Kernel {
     let files = vec![
-        include_str!("asm/assertions.asm"),
-        include_str!("asm/basic_macros.asm"),
         include_str!("asm/curve/bn254/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_mul.asm"),
         include_str!("asm/curve/bn254/moddiv.asm"),
@@ -71,6 +69,8 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/transactions/type_0.asm"),
         include_str!("asm/transactions/type_1.asm"),
         include_str!("asm/transactions/type_2.asm"),
+        include_str!("asm/util/assertions.asm"),
+        include_str!("asm/util/basic_macros.asm"),
     ];
 
     let parsed_files = files.iter().map(|f| parse(f)).collect_vec();
diff --git a/evm/src/cpu/kernel/asm/assertions.asm b/evm/src/cpu/kernel/asm/util/assertions.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/assertions.asm
rename to evm/src/cpu/kernel/asm/util/assertions.asm
diff --git a/evm/src/cpu/kernel/asm/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/basic_macros.asm
rename to evm/src/cpu/kernel/asm/util/basic_macros.asm

From bd6847e8fc6013af7a18480dd50d997ac778af80 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sat, 30 Jul 2022 21:51:55 -0700
Subject: [PATCH 42/85] Allow `%stack` to work with labels

There's no syntax to distinguish named stack items from labels, so this simply searches the former first. I.e. labels can be shadowed by stack items.
---
 evm/src/cpu/kernel/assembler.rs          | 14 ++++-
 evm/src/cpu/kernel/ast.rs                |  5 +-
 evm/src/cpu/kernel/parser.rs             |  2 +-
 evm/src/cpu/kernel/stack_manipulation.rs | 74 +++++++++++++++---------
 4 files changed, 65 insertions(+), 30 deletions(-)

diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs
index 6d2b0ff2..5582b8e5 100644
--- a/evm/src/cpu/kernel/assembler.rs
+++ b/evm/src/cpu/kernel/assembler.rs
@@ -17,7 +17,7 @@ use crate::cpu::kernel::{
 /// The number of bytes to push when pushing an offset within the code (i.e. when assembling jumps).
 /// Ideally we would automatically use the minimal number of bytes required, but that would be
 /// nontrivial given the circular dependency between an offset and its size.
-const BYTES_PER_OFFSET: u8 = 3;
+pub(crate) const BYTES_PER_OFFSET: u8 = 3;
 
 #[derive(PartialEq, Eq, Debug)]
 pub struct Kernel {
@@ -505,8 +505,13 @@ mod tests {
     #[test]
     fn stack_manipulation() {
         let pop = get_opcode("POP");
+        let dup1 = get_opcode("DUP1");
         let swap1 = get_opcode("SWAP1");
         let swap2 = get_opcode("SWAP2");
+        let push_label = get_push_opcode(BYTES_PER_OFFSET);
+
+        let kernel = parse_and_assemble(&["%stack (a) -> (a)"]);
+        assert_eq!(kernel.code, vec![]);
 
         let kernel = parse_and_assemble(&["%stack (a, b, c) -> (c, b, a)"]);
         assert_eq!(kernel.code, vec![swap2]);
@@ -518,6 +523,13 @@ mod tests {
         consts.insert("LIFE".into(), 42.into());
         parse_and_assemble_with_constants(&["%stack (a, b) -> (b, @LIFE)"], consts);
         // We won't check the code since there are two equally efficient implementations.
+
+        let kernel = parse_and_assemble(&["start: %stack (a, b) -> (start)"]);
+        assert_eq!(kernel.code, vec![pop, pop, push_label, 0, 0, 0]);
+
+        // The "start" label gets shadowed by the "start" named stack item.
+        let kernel = parse_and_assemble(&["start: %stack (start) -> (start, start)"]);
+        assert_eq!(kernel.code, vec![dup1]);
     }
 
     fn parse_and_assemble(files: &[&str]) -> Kernel {
diff --git a/evm/src/cpu/kernel/ast.rs b/evm/src/cpu/kernel/ast.rs
index 9580d9c6..b9d7286a 100644
--- a/evm/src/cpu/kernel/ast.rs
+++ b/evm/src/cpu/kernel/ast.rs
@@ -37,14 +37,15 @@ pub(crate) enum Item {
 
 #[derive(Clone, Debug)]
 pub(crate) enum StackReplacement {
-    NamedItem(String),
+    /// Can be either a named item or a label.
+    Identifier(String),
     Literal(Literal),
     MacroVar(String),
     Constant(String),
 }
 
 /// The target of a `PUSH` operation.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Eq, PartialEq, Hash)]
 pub(crate) enum PushTarget {
     Literal(Literal),
     Label(String),
diff --git a/evm/src/cpu/kernel/parser.rs b/evm/src/cpu/kernel/parser.rs
index f7acc96c..3c7b5fe2 100644
--- a/evm/src/cpu/kernel/parser.rs
+++ b/evm/src/cpu/kernel/parser.rs
@@ -112,7 +112,7 @@ fn parse_stack_replacement(target: Pair<Rule>) -> StackReplacement {
     assert_eq!(target.as_rule(), Rule::stack_replacement);
     let inner = target.into_inner().next().unwrap();
     match inner.as_rule() {
-        Rule::identifier => StackReplacement::NamedItem(inner.as_str().into()),
+        Rule::identifier => StackReplacement::Identifier(inner.as_str().into()),
         Rule::literal => StackReplacement::Literal(parse_literal(inner)),
         Rule::variable => {
             StackReplacement::MacroVar(inner.into_inner().next().unwrap().as_str().into())
diff --git a/evm/src/cpu/kernel/stack_manipulation.rs b/evm/src/cpu/kernel/stack_manipulation.rs
index 63d0566c..a659fd35 100644
--- a/evm/src/cpu/kernel/stack_manipulation.rs
+++ b/evm/src/cpu/kernel/stack_manipulation.rs
@@ -5,7 +5,8 @@ use std::collections::{BinaryHeap, HashMap};
 use itertools::Itertools;
 
 use crate::cpu::columns::NUM_CPU_COLUMNS;
-use crate::cpu::kernel::ast::{Item, Literal, PushTarget, StackReplacement};
+use crate::cpu::kernel::assembler::BYTES_PER_OFFSET;
+use crate::cpu::kernel::ast::{Item, PushTarget, StackReplacement};
 use crate::cpu::kernel::stack_manipulation::StackOp::Pop;
 use crate::memory;
 
@@ -22,22 +23,24 @@ pub(crate) fn expand_stack_manipulation(body: Vec<Item>) -> Vec<Item> {
 }
 
 fn expand(names: Vec<String>, replacements: Vec<StackReplacement>) -> Vec<Item> {
-    let mut src = names.into_iter().map(StackItem::NamedItem).collect_vec();
-
-    let unique_literals = replacements
+    let mut src = names
         .iter()
-        .filter_map(|item| match item {
-            StackReplacement::Literal(n) => Some(n.clone()),
-            _ => None,
-        })
-        .unique()
+        .cloned()
+        .map(StackItem::NamedItem)
         .collect_vec();
 
     let mut dst = replacements
         .into_iter()
         .map(|item| match item {
-            StackReplacement::NamedItem(name) => StackItem::NamedItem(name),
-            StackReplacement::Literal(n) => StackItem::Literal(n),
+            StackReplacement::Identifier(name) => {
+                // May be either a named item or a label. Named items have precedence.
+                if names.contains(&name) {
+                    StackItem::NamedItem(name)
+                } else {
+                    StackItem::PushTarget(PushTarget::Label(name))
+                }
+            }
+            StackReplacement::Literal(n) => StackItem::PushTarget(PushTarget::Literal(n)),
             StackReplacement::MacroVar(_) | StackReplacement::Constant(_) => {
                 panic!("Should have been expanded already: {:?}", item)
             }
@@ -49,7 +52,16 @@ fn expand(names: Vec<String>, replacements: Vec<StackReplacement>) -> Vec<Item>
     src.reverse();
     dst.reverse();
 
-    let path = shortest_path(src, dst, unique_literals);
+    let unique_push_targets = dst
+        .iter()
+        .filter_map(|item| match item {
+            StackItem::PushTarget(target) => Some(target.clone()),
+            _ => None,
+        })
+        .unique()
+        .collect_vec();
+
+    let path = shortest_path(src, dst, unique_push_targets);
     path.into_iter().map(StackOp::into_item).collect()
 }
 
@@ -58,7 +70,7 @@ fn expand(names: Vec<String>, replacements: Vec<StackReplacement>) -> Vec<Item>
 fn shortest_path(
     src: Vec<StackItem>,
     dst: Vec<StackItem>,
-    unique_literals: Vec<Literal>,
+    unique_push_targets: Vec<PushTarget>,
 ) -> Vec<StackOp> {
     // Nodes to visit, starting with the lowest-cost node.
     let mut queue = BinaryHeap::new();
@@ -93,7 +105,7 @@ fn shortest_path(
             continue;
         }
 
-        for op in next_ops(&node.stack, &dst, &unique_literals) {
+        for op in next_ops(&node.stack, &dst, &unique_push_targets) {
             let neighbor = match op.apply_to(node.stack.clone()) {
                 Some(n) => n,
                 None => continue,
@@ -151,19 +163,23 @@ impl Ord for Node {
 #[derive(Eq, PartialEq, Hash, Clone, Debug)]
 enum StackItem {
     NamedItem(String),
-    Literal(Literal),
+    PushTarget(PushTarget),
 }
 
 #[derive(Clone, Debug)]
 enum StackOp {
-    Push(Literal),
+    Push(PushTarget),
     Pop,
     Dup(u8),
     Swap(u8),
 }
 
 /// A set of candidate operations to consider for the next step in the path from `src` to `dst`.
-fn next_ops(src: &[StackItem], dst: &[StackItem], unique_literals: &[Literal]) -> Vec<StackOp> {
+fn next_ops(
+    src: &[StackItem],
+    dst: &[StackItem],
+    unique_push_targets: &[PushTarget],
+) -> Vec<StackOp> {
     if let Some(top) = src.last() && !dst.contains(top) {
         // If the top of src doesn't appear in dst, don't bother with anything other than a POP.
         return vec![StackOp::Pop]
@@ -172,12 +188,12 @@ fn next_ops(src: &[StackItem], dst: &[StackItem], unique_literals: &[Literal]) -
     let mut ops = vec![StackOp::Pop];
 
     ops.extend(
-        unique_literals
+        unique_push_targets
             .iter()
-            // Only consider pushing this literal if we need more occurrences of it, otherwise swaps
+            // Only consider pushing this target if we need more occurrences of it, otherwise swaps
             // will be a better way to rearrange the existing occurrences as needed.
-            .filter(|lit| {
-                let item = StackItem::Literal((*lit).clone());
+            .filter(|push_target| {
+                let item = StackItem::PushTarget((*push_target).clone());
                 let src_count = src.iter().filter(|x| **x == item).count();
                 let dst_count = dst.iter().filter(|x| **x == item).count();
                 src_count < dst_count
@@ -209,8 +225,14 @@ fn next_ops(src: &[StackItem], dst: &[StackItem], unique_literals: &[Literal]) -
 impl StackOp {
     fn cost(&self) -> u32 {
         let (cpu_rows, memory_rows) = match self {
-            StackOp::Push(n) => {
-                let bytes = n.to_trimmed_be_bytes().len() as u32;
+            StackOp::Push(target) => {
+                let bytes = match target {
+                    PushTarget::Literal(n) => n.to_trimmed_be_bytes().len() as u32,
+                    PushTarget::Label(_) => BYTES_PER_OFFSET as u32,
+                    PushTarget::MacroVar(_) | PushTarget::Constant(_) => {
+                        panic!("Target should have been expanded already: {:?}", target)
+                    }
+                };
                 // This is just a rough estimate; we can update it after implementing PUSH.
                 (bytes, bytes)
             }
@@ -232,8 +254,8 @@ impl StackOp {
     fn apply_to(&self, mut stack: Vec<StackItem>) -> Option<Vec<StackItem>> {
         let len = stack.len();
         match self {
-            StackOp::Push(n) => {
-                stack.push(StackItem::Literal(n.clone()));
+            StackOp::Push(target) => {
+                stack.push(StackItem::PushTarget(target.clone()));
             }
             Pop => {
                 stack.pop()?;
@@ -253,7 +275,7 @@ impl StackOp {
 
     fn into_item(self) -> Item {
         match self {
-            StackOp::Push(n) => Item::Push(PushTarget::Literal(n)),
+            StackOp::Push(target) => Item::Push(target),
             Pop => Item::StandardOp("POP".into()),
             StackOp::Dup(n) => Item::StandardOp(format!("DUP{}", n)),
             StackOp::Swap(n) => Item::StandardOp(format!("SWAP{}", n)),

From 7e9172008828189fde7ee78535bd9fa879959c98 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sun, 31 Jul 2022 11:58:41 -0700
Subject: [PATCH 43/85] Store literals as `U256` (or `u8` for `BYTES`)

Instead of the original strings. Will make optimizations simpler.
---
 evm/src/cpu/kernel/assembler.rs          | 31 ++++-------
 evm/src/cpu/kernel/ast.rs                | 70 ++----------------------
 evm/src/cpu/kernel/mod.rs                |  1 +
 evm/src/cpu/kernel/parser.rs             | 36 +++++++++---
 evm/src/cpu/kernel/stack_manipulation.rs |  3 +-
 evm/src/cpu/kernel/utils.rs              | 24 ++++++++
 6 files changed, 69 insertions(+), 96 deletions(-)
 create mode 100644 evm/src/cpu/kernel/utils.rs

diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs
index 5582b8e5..6e98b22c 100644
--- a/evm/src/cpu/kernel/assembler.rs
+++ b/evm/src/cpu/kernel/assembler.rs
@@ -5,10 +5,11 @@ use itertools::izip;
 use log::debug;
 
 use super::ast::PushTarget;
-use crate::cpu::kernel::ast::{Literal, StackReplacement};
+use crate::cpu::kernel::ast::StackReplacement;
 use crate::cpu::kernel::keccak_util::hash_kernel;
 use crate::cpu::kernel::prover_input::ProverInputFn;
 use crate::cpu::kernel::stack_manipulation::expand_stack_manipulation;
+use crate::cpu::kernel::utils::u256_to_trimmed_be_bytes;
 use crate::cpu::kernel::{
     ast::{File, Item},
     opcodes::{get_opcode, get_push_opcode},
@@ -184,7 +185,7 @@ fn expand_repeats(body: Vec<Item>) -> Vec<Item> {
     let mut expanded = vec![];
     for item in body {
         if let Item::Repeat(count, block) = item {
-            let reps = count.to_u256().as_usize();
+            let reps = count.as_usize();
             for _ in 0..reps {
                 expanded.extend(block.clone());
             }
@@ -197,12 +198,9 @@ fn expand_repeats(body: Vec<Item>) -> Vec<Item> {
 
 fn inline_constants(body: Vec<Item>, constants: &HashMap<String, U256>) -> Vec<Item> {
     let resolve_const = |c| {
-        Literal::Decimal(
-            constants
-                .get(&c)
-                .unwrap_or_else(|| panic!("No such constant: {}", c))
-                .to_string(),
-        )
+        *constants
+            .get(&c)
+            .unwrap_or_else(|| panic!("No such constant: {}", c))
     };
 
     body.into_iter()
@@ -284,7 +282,7 @@ fn assemble_file(
             }
             Item::Push(target) => {
                 let target_bytes: Vec<u8> = match target {
-                    PushTarget::Literal(literal) => literal.to_trimmed_be_bytes(),
+                    PushTarget::Literal(n) => u256_to_trimmed_be_bytes(&n),
                     PushTarget::Label(label) => {
                         let offset = local_labels
                             .get(&label)
@@ -309,7 +307,7 @@ fn assemble_file(
             Item::StandardOp(opcode) => {
                 code.push(get_opcode(&opcode));
             }
-            Item::Bytes(bytes) => code.extend(bytes.iter().map(|b| b.to_u8())),
+            Item::Bytes(bytes) => code.extend(bytes),
         }
     }
 }
@@ -317,7 +315,7 @@ fn assemble_file(
 /// The size of a `PushTarget`, in bytes.
 fn push_target_size(target: &PushTarget) -> u8 {
     match target {
-        PushTarget::Literal(lit) => lit.to_trimmed_be_bytes().len() as u8,
+        PushTarget::Literal(n) => u256_to_trimmed_be_bytes(n).len() as u8,
         PushTarget::Label(_) => BYTES_PER_OFFSET,
         PushTarget::MacroVar(v) => panic!("Variable not in a macro: {}", v),
         PushTarget::Constant(c) => panic!("Constant wasn't inlined: {}", c),
@@ -421,16 +419,7 @@ mod tests {
     #[test]
     fn literal_bytes() {
         let file = File {
-            body: vec![
-                Item::Bytes(vec![
-                    Literal::Hex("12".to_string()),
-                    Literal::Decimal("42".to_string()),
-                ]),
-                Item::Bytes(vec![
-                    Literal::Hex("fe".to_string()),
-                    Literal::Decimal("255".to_string()),
-                ]),
-            ],
+            body: vec![Item::Bytes(vec![0x12, 42]), Item::Bytes(vec![0xFE, 255])],
         };
         let code = assemble(vec![file], HashMap::new()).code;
         assert_eq!(code, vec![0x12, 42, 0xfe, 255]);
diff --git a/evm/src/cpu/kernel/ast.rs b/evm/src/cpu/kernel/ast.rs
index b9d7286a..bc2a3ec2 100644
--- a/evm/src/cpu/kernel/ast.rs
+++ b/evm/src/cpu/kernel/ast.rs
@@ -1,5 +1,4 @@
 use ethereum_types::U256;
-use plonky2_util::ceil_div_usize;
 
 use crate::cpu::kernel::prover_input::ProverInputFn;
 
@@ -15,7 +14,7 @@ pub(crate) enum Item {
     /// Calls a macro: name, args.
     MacroCall(String, Vec<PushTarget>),
     /// Repetition, like `%rep` in NASM.
-    Repeat(Literal, Vec<Item>),
+    Repeat(U256, Vec<Item>),
     /// A directive to manipulate the stack according to a specified pattern.
     /// The first list gives names to items on the top of the stack.
     /// The second list specifies replacement items.
@@ -32,14 +31,14 @@ pub(crate) enum Item {
     /// Any opcode besides a PUSH opcode.
     StandardOp(String),
     /// Literal hex data; should contain an even number of hex chars.
-    Bytes(Vec<Literal>),
+    Bytes(Vec<u8>),
 }
 
 #[derive(Clone, Debug)]
 pub(crate) enum StackReplacement {
     /// Can be either a named item or a label.
     Identifier(String),
-    Literal(Literal),
+    Literal(U256),
     MacroVar(String),
     Constant(String),
 }
@@ -47,69 +46,8 @@ pub(crate) enum StackReplacement {
 /// The target of a `PUSH` operation.
 #[derive(Clone, Debug, Eq, PartialEq, Hash)]
 pub(crate) enum PushTarget {
-    Literal(Literal),
+    Literal(U256),
     Label(String),
     MacroVar(String),
     Constant(String),
 }
-
-#[derive(Clone, Debug, Eq, PartialEq, Hash)]
-pub(crate) enum Literal {
-    Decimal(String),
-    Hex(String),
-}
-
-impl Literal {
-    pub(crate) fn to_trimmed_be_bytes(&self) -> Vec<u8> {
-        let u256 = self.to_u256();
-        let num_bytes = ceil_div_usize(u256.bits(), 8).max(1);
-        // `byte` is little-endian, so we manually reverse it.
-        (0..num_bytes).rev().map(|i| u256.byte(i)).collect()
-    }
-
-    pub(crate) fn to_u256(&self) -> U256 {
-        let (src, radix) = match self {
-            Literal::Decimal(s) => (s, 10),
-            Literal::Hex(s) => (s, 16),
-        };
-        U256::from_str_radix(src, radix)
-            .unwrap_or_else(|_| panic!("Not a valid u256 literal: {:?}", self))
-    }
-
-    pub(crate) fn to_u8(&self) -> u8 {
-        let (src, radix) = match self {
-            Literal::Decimal(s) => (s, 10),
-            Literal::Hex(s) => (s, 16),
-        };
-        u8::from_str_radix(src, radix)
-            .unwrap_or_else(|_| panic!("Not a valid u8 literal: {:?}", self))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::cpu::kernel::ast::*;
-
-    #[test]
-    fn literal_to_be_bytes() {
-        assert_eq!(
-            Literal::Decimal("0".into()).to_trimmed_be_bytes(),
-            vec![0x00]
-        );
-
-        assert_eq!(
-            Literal::Decimal("768".into()).to_trimmed_be_bytes(),
-            vec![0x03, 0x00]
-        );
-
-        assert_eq!(
-            Literal::Hex("a1b2".into()).to_trimmed_be_bytes(),
-            vec![0xa1, 0xb2]
-        );
-
-        assert_eq!(
-            Literal::Hex("1b2".into()).to_trimmed_be_bytes(),
-            vec![0x1, 0xb2]
-        );
-    }
-}
diff --git a/evm/src/cpu/kernel/mod.rs b/evm/src/cpu/kernel/mod.rs
index 5b9b1b4a..f0247f93 100644
--- a/evm/src/cpu/kernel/mod.rs
+++ b/evm/src/cpu/kernel/mod.rs
@@ -7,6 +7,7 @@ mod parser;
 pub mod prover_input;
 mod stack_manipulation;
 mod txn_fields;
+mod utils;
 
 #[cfg(test)]
 mod interpreter;
diff --git a/evm/src/cpu/kernel/parser.rs b/evm/src/cpu/kernel/parser.rs
index 3c7b5fe2..860dc19d 100644
--- a/evm/src/cpu/kernel/parser.rs
+++ b/evm/src/cpu/kernel/parser.rs
@@ -1,7 +1,10 @@
+use std::str::FromStr;
+
+use ethereum_types::U256;
 use pest::iterators::Pair;
 use pest::Parser;
 
-use crate::cpu::kernel::ast::{File, Item, Literal, PushTarget, StackReplacement};
+use crate::cpu::kernel::ast::{File, Item, PushTarget, StackReplacement};
 
 /// Parses EVM assembly code.
 #[derive(pest_derive::Parser)]
@@ -31,7 +34,7 @@ fn parse_item(item: Pair<Rule>) -> Item {
         Rule::local_label => {
             Item::LocalLabelDeclaration(item.into_inner().next().unwrap().as_str().into())
         }
-        Rule::bytes_item => Item::Bytes(item.into_inner().map(parse_literal).collect()),
+        Rule::bytes_item => Item::Bytes(item.into_inner().map(parse_literal_u8).collect()),
         Rule::push_instruction => Item::Push(parse_push_target(item.into_inner().next().unwrap())),
         Rule::prover_input_instruction => Item::ProverInput(
             item.into_inner()
@@ -84,7 +87,7 @@ fn parse_macro_call(item: Pair<Rule>) -> Item {
 fn parse_repeat(item: Pair<Rule>) -> Item {
     assert_eq!(item.as_rule(), Rule::repeat);
     let mut inner = item.into_inner().peekable();
-    let count = parse_literal(inner.next().unwrap());
+    let count = parse_literal_u256(inner.next().unwrap());
     Item::Repeat(count, inner.map(parse_item).collect())
 }
 
@@ -113,7 +116,7 @@ fn parse_stack_replacement(target: Pair<Rule>) -> StackReplacement {
     let inner = target.into_inner().next().unwrap();
     match inner.as_rule() {
         Rule::identifier => StackReplacement::Identifier(inner.as_str().into()),
-        Rule::literal => StackReplacement::Literal(parse_literal(inner)),
+        Rule::literal => StackReplacement::Literal(parse_literal_u256(inner)),
         Rule::variable => {
             StackReplacement::MacroVar(inner.into_inner().next().unwrap().as_str().into())
         }
@@ -128,7 +131,7 @@ fn parse_push_target(target: Pair<Rule>) -> PushTarget {
     assert_eq!(target.as_rule(), Rule::push_target);
     let inner = target.into_inner().next().unwrap();
     match inner.as_rule() {
-        Rule::literal => PushTarget::Literal(parse_literal(inner)),
+        Rule::literal => PushTarget::Literal(parse_literal_u256(inner)),
         Rule::identifier => PushTarget::Label(inner.as_str().into()),
         Rule::variable => PushTarget::MacroVar(inner.into_inner().next().unwrap().as_str().into()),
         Rule::constant => PushTarget::Constant(inner.into_inner().next().unwrap().as_str().into()),
@@ -136,11 +139,28 @@ fn parse_push_target(target: Pair<Rule>) -> PushTarget {
     }
 }
 
-fn parse_literal(literal: Pair<Rule>) -> Literal {
+fn parse_literal_u8(literal: Pair<Rule>) -> u8 {
     let literal = literal.into_inner().next().unwrap();
     match literal.as_rule() {
-        Rule::literal_decimal => Literal::Decimal(literal.as_str().into()),
-        Rule::literal_hex => Literal::Hex(parse_hex(literal)),
+        Rule::literal_decimal => {
+            u8::from_str(literal.as_str()).expect("Failed to parse literal decimal byte")
+        }
+        Rule::literal_hex => {
+            u8::from_str_radix(&parse_hex(literal), 16).expect("Failed to parse literal hex byte")
+        }
+        _ => panic!("Unexpected {:?}", literal.as_rule()),
+    }
+}
+
+fn parse_literal_u256(literal: Pair<Rule>) -> U256 {
+    let literal = literal.into_inner().next().unwrap();
+    match literal.as_rule() {
+        Rule::literal_decimal => {
+            U256::from_dec_str(literal.as_str()).expect("Failed to parse literal decimal")
+        }
+        Rule::literal_hex => {
+            U256::from_str_radix(&parse_hex(literal), 16).expect("Failed to parse literal hex")
+        }
         _ => panic!("Unexpected {:?}", literal.as_rule()),
     }
 }
diff --git a/evm/src/cpu/kernel/stack_manipulation.rs b/evm/src/cpu/kernel/stack_manipulation.rs
index a659fd35..71746f16 100644
--- a/evm/src/cpu/kernel/stack_manipulation.rs
+++ b/evm/src/cpu/kernel/stack_manipulation.rs
@@ -8,6 +8,7 @@ use crate::cpu::columns::NUM_CPU_COLUMNS;
 use crate::cpu::kernel::assembler::BYTES_PER_OFFSET;
 use crate::cpu::kernel::ast::{Item, PushTarget, StackReplacement};
 use crate::cpu::kernel::stack_manipulation::StackOp::Pop;
+use crate::cpu::kernel::utils::u256_to_trimmed_be_bytes;
 use crate::memory;
 
 pub(crate) fn expand_stack_manipulation(body: Vec<Item>) -> Vec<Item> {
@@ -227,7 +228,7 @@ impl StackOp {
         let (cpu_rows, memory_rows) = match self {
             StackOp::Push(target) => {
                 let bytes = match target {
-                    PushTarget::Literal(n) => n.to_trimmed_be_bytes().len() as u32,
+                    PushTarget::Literal(n) => u256_to_trimmed_be_bytes(n).len() as u32,
                     PushTarget::Label(_) => BYTES_PER_OFFSET as u32,
                     PushTarget::MacroVar(_) | PushTarget::Constant(_) => {
                         panic!("Target should have been expanded already: {:?}", target)
diff --git a/evm/src/cpu/kernel/utils.rs b/evm/src/cpu/kernel/utils.rs
new file mode 100644
index 00000000..d9682679
--- /dev/null
+++ b/evm/src/cpu/kernel/utils.rs
@@ -0,0 +1,24 @@
+use ethereum_types::U256;
+use plonky2_util::ceil_div_usize;
+
+pub(crate) fn u256_to_trimmed_be_bytes(u256: &U256) -> Vec<u8> {
+    let num_bytes = ceil_div_usize(u256.bits(), 8).max(1);
+    // `byte` is little-endian, so we manually reverse it.
+    (0..num_bytes).rev().map(|i| u256.byte(i)).collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn literal_to_be_bytes() {
+        assert_eq!(u256_to_trimmed_be_bytes(&0.into()), vec![0x00]);
+
+        assert_eq!(u256_to_trimmed_be_bytes(&768.into()), vec![0x03, 0x00]);
+
+        assert_eq!(u256_to_trimmed_be_bytes(&0xa1b2.into()), vec![0xa1, 0xb2]);
+
+        assert_eq!(u256_to_trimmed_be_bytes(&0x1b2.into()), vec![0x1, 0xb2]);
+    }
+}

From 497b26dee6a219b7b28df037fd35c59708d07400 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 25 Jul 2022 09:36:26 -0700
Subject: [PATCH 44/85] Some simple optimization rules

Depends on #647.
---
 evm/src/cpu/kernel/aggregator.rs             |  2 +-
 evm/src/cpu/kernel/asm/util/basic_macros.asm |  8 ++-
 evm/src/cpu/kernel/assembler.rs              | 22 ++++--
 evm/src/cpu/kernel/mod.rs                    |  3 +-
 evm/src/cpu/kernel/optimizer.rs              | 71 ++++++++++++++++++++
 evm/src/cpu/kernel/parser.rs                 |  2 +-
 evm/src/cpu/kernel/utils.rs                  | 39 +++++++++++
 7 files changed, 135 insertions(+), 12 deletions(-)
 create mode 100644 evm/src/cpu/kernel/optimizer.rs

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index e5e1f29f..73d1797b 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -74,7 +74,7 @@ pub(crate) fn combined_kernel() -> Kernel {
     ];
 
     let parsed_files = files.iter().map(|f| parse(f)).collect_vec();
-    assemble(parsed_files, evm_constants())
+    assemble(parsed_files, evm_constants(), true)
 }
 
 #[cfg(test)]
diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm
index e266b2cb..58ab5d2c 100644
--- a/evm/src/cpu/kernel/asm/util/basic_macros.asm
+++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm
@@ -120,7 +120,9 @@
     // stack: input, ...
     PUSH $c
     // stack: c, input, ...
-    GE // Check it backwards: (input <= c) == (c >= input)
+    %add_const(1) // This will be optimized out.
+    // stack: c + 1, input, ...
+    GE // Check it backwards: (input <= c) == (c + 1 > input)
     // stack: input <= c, ...
 %endmacro
 
@@ -136,7 +138,9 @@
     // stack: input, ...
     PUSH $c
     // stack: c, input, ...
-    LE // Check it backwards: (input >= c) == (c <= input)
+    %sub_const(1) // This will be optimized out.
+    // stack: c - 1, input, ...
+    LT // Check it backwards: (input >= c) == (c - 1 < input)
     // stack: input >= c, ...
 %endmacro
 
diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs
index 6e98b22c..0a1232e1 100644
--- a/evm/src/cpu/kernel/assembler.rs
+++ b/evm/src/cpu/kernel/assembler.rs
@@ -7,6 +7,7 @@ use log::debug;
 use super::ast::PushTarget;
 use crate::cpu::kernel::ast::StackReplacement;
 use crate::cpu::kernel::keccak_util::hash_kernel;
+use crate::cpu::kernel::optimizer::optimize_asm;
 use crate::cpu::kernel::prover_input::ProverInputFn;
 use crate::cpu::kernel::stack_manipulation::expand_stack_manipulation;
 use crate::cpu::kernel::utils::u256_to_trimmed_be_bytes;
@@ -64,7 +65,11 @@ impl Macro {
     }
 }
 
-pub(crate) fn assemble(files: Vec<File>, constants: HashMap<String, U256>) -> Kernel {
+pub(crate) fn assemble(
+    files: Vec<File>,
+    constants: HashMap<String, U256>,
+    optimize: bool,
+) -> Kernel {
     let macros = find_macros(&files);
     let mut global_labels = HashMap::new();
     let mut prover_inputs = HashMap::new();
@@ -75,7 +80,10 @@ pub(crate) fn assemble(files: Vec<File>, constants: HashMap<String, U256>) -> Ke
         let expanded_file = expand_macros(file.body, &macros);
         let expanded_file = expand_repeats(expanded_file);
         let expanded_file = inline_constants(expanded_file, &constants);
-        let expanded_file = expand_stack_manipulation(expanded_file);
+        let mut expanded_file = expand_stack_manipulation(expanded_file);
+        if optimize {
+            optimize_asm(&mut expanded_file);
+        }
         local_labels.push(find_labels(
             &expanded_file,
             &mut offset,
@@ -381,7 +389,7 @@ mod tests {
         let expected_kernel = Kernel::new(expected_code, expected_global_labels, HashMap::new());
 
         let program = vec![file_1, file_2];
-        assert_eq!(assemble(program, HashMap::new()), expected_kernel);
+        assert_eq!(assemble(program, HashMap::new(), false), expected_kernel);
     }
 
     #[test]
@@ -399,7 +407,7 @@ mod tests {
                 Item::StandardOp("JUMPDEST".to_string()),
             ],
         };
-        assemble(vec![file_1, file_2], HashMap::new());
+        assemble(vec![file_1, file_2], HashMap::new(), false);
     }
 
     #[test]
@@ -413,7 +421,7 @@ mod tests {
                 Item::StandardOp("ADD".to_string()),
             ],
         };
-        assemble(vec![file], HashMap::new());
+        assemble(vec![file], HashMap::new(), false);
     }
 
     #[test]
@@ -421,7 +429,7 @@ mod tests {
         let file = File {
             body: vec![Item::Bytes(vec![0x12, 42]), Item::Bytes(vec![0xFE, 255])],
         };
-        let code = assemble(vec![file], HashMap::new()).code;
+        let code = assemble(vec![file], HashMap::new(), false).code;
         assert_eq!(code, vec![0x12, 42, 0xfe, 255]);
     }
 
@@ -530,6 +538,6 @@ mod tests {
         constants: HashMap<String, U256>,
     ) -> Kernel {
         let parsed_files = files.iter().map(|f| parse(f)).collect_vec();
-        assemble(parsed_files, constants)
+        assemble(parsed_files, constants, false)
     }
 }
diff --git a/evm/src/cpu/kernel/mod.rs b/evm/src/cpu/kernel/mod.rs
index f0247f93..d87c1e13 100644
--- a/evm/src/cpu/kernel/mod.rs
+++ b/evm/src/cpu/kernel/mod.rs
@@ -3,6 +3,7 @@ pub mod assembler;
 mod ast;
 pub(crate) mod keccak_util;
 mod opcodes;
+mod optimizer;
 mod parser;
 pub mod prover_input;
 mod stack_manipulation;
@@ -23,6 +24,6 @@ use crate::cpu::kernel::aggregator::evm_constants;
 /// This is for debugging the kernel only.
 pub fn assemble_to_bytes(files: &[String]) -> Vec<u8> {
     let parsed_files: Vec<_> = files.iter().map(|f| parse(f)).collect();
-    let kernel = assemble(parsed_files, evm_constants());
+    let kernel = assemble(parsed_files, evm_constants(), true);
     kernel.code
 }
diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
new file mode 100644
index 00000000..2b162792
--- /dev/null
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -0,0 +1,71 @@
+use ethereum_types::U256;
+use Item::{Push, StandardOp};
+use PushTarget::Literal;
+
+use crate::cpu::kernel::ast::Item::LocalLabelDeclaration;
+use crate::cpu::kernel::ast::PushTarget::Label;
+use crate::cpu::kernel::ast::{Item, PushTarget};
+use crate::cpu::kernel::utils::replace_windows;
+
+pub(crate) fn optimize_asm(code: &mut Vec<Item>) {
+    constant_propagation(code);
+
+    // Remove no-op jumps: [PUSH label, JUMP, label:] -> [label:]
+    replace_windows(code, |window| {
+        if let [Push(Label(l1)), StandardOp(jump), LocalLabelDeclaration(l2)] = window
+            && l1 == l2
+            && &jump == "JUMP"
+        {
+            Some(vec![LocalLabelDeclaration(l2)])
+        } else {
+            None
+        }
+    });
+
+    // Remove swaps: [PUSH x, PUSH y, SWAP1] -> [PUSH y, PUSH x]
+    replace_windows(code, |window| {
+        if let [Push(Literal(x)), Push(Literal(y)), StandardOp(swap1)] = window
+                && &swap1 == "SWAP1" {
+            Some(vec![Push(Literal(y)), Push(Literal(x))])
+        } else {
+            None
+        }
+    });
+}
+
+fn constant_propagation(code: &mut Vec<Item>) {
+    // Constant propagation for unary ops: [PUSH x, UNARYOP] -> [PUSH UNARYOP(x)]
+    replace_windows(code, |window| {
+        if let [Push(Literal(x)), StandardOp(op)] = window {
+            match op.as_str() {
+                "ISZERO" => Some(vec![Push(Literal(if x.is_zero() {
+                    U256::one()
+                } else {
+                    U256::zero()
+                }))]),
+                "NOT" => Some(vec![Push(Literal(!x))]),
+                _ => None,
+            }
+        } else {
+            None
+        }
+    });
+
+    // Constant propagation for binary ops: [PUSH x, PUSH y, BINOP] -> [PUSH BINOP(x, y)]
+    replace_windows(code, |window| {
+        if let [Push(Literal(x)), Push(Literal(y)), StandardOp(op)] = window {
+            match op.as_str() {
+                "ADD" => Some(vec![Push(Literal(x + y))]),
+                "SUB" => Some(vec![Push(Literal(x - y))]),
+                "MUL" => Some(vec![Push(Literal(x * y))]),
+                "DIV" => Some(vec![Push(Literal(x / y))]),
+                _ => None,
+            }
+        } else {
+            None
+        }
+    });
+}
+
+#[cfg(test)]
+mod tests {}
diff --git a/evm/src/cpu/kernel/parser.rs b/evm/src/cpu/kernel/parser.rs
index 860dc19d..66bf0757 100644
--- a/evm/src/cpu/kernel/parser.rs
+++ b/evm/src/cpu/kernel/parser.rs
@@ -45,7 +45,7 @@ fn parse_item(item: Pair<Rule>) -> Item {
                 .collect::<Vec<_>>()
                 .into(),
         ),
-        Rule::nullary_instruction => Item::StandardOp(item.as_str().into()),
+        Rule::nullary_instruction => Item::StandardOp(item.as_str().to_uppercase()),
         _ => panic!("Unexpected {:?}", item.as_rule()),
     }
 }
diff --git a/evm/src/cpu/kernel/utils.rs b/evm/src/cpu/kernel/utils.rs
index d9682679..cff10430 100644
--- a/evm/src/cpu/kernel/utils.rs
+++ b/evm/src/cpu/kernel/utils.rs
@@ -1,6 +1,30 @@
+use std::fmt::Debug;
+
 use ethereum_types::U256;
 use plonky2_util::ceil_div_usize;
 
+/// Enumerate the length `W` windows of `vec`, and run `maybe_replace` on each one.
+///
+/// Whenever `maybe_replace` returns `Some(replacement)`, the given replacement will be applied.
+pub(crate) fn replace_windows<const W: usize, T, F>(vec: &mut Vec<T>, maybe_replace: F)
+where
+    T: Clone + Debug,
+    F: Fn([T; W]) -> Option<Vec<T>>,
+{
+    let mut start = 0;
+    while start + W <= vec.len() {
+        let range = start..start + W;
+        let window = vec[range.clone()].to_vec().try_into().unwrap();
+        if let Some(replacement) = maybe_replace(window) {
+            vec.splice(range, replacement);
+            // Go back to the earliest window that changed.
+            start = start.saturating_sub(W - 1);
+        } else {
+            start += 1;
+        }
+    }
+}
+
 pub(crate) fn u256_to_trimmed_be_bytes(u256: &U256) -> Vec<u8> {
     let num_bytes = ceil_div_usize(u256.bits(), 8).max(1);
     // `byte` is little-endian, so we manually reverse it.
@@ -11,6 +35,21 @@ pub(crate) fn u256_to_trimmed_be_bytes(u256: &U256) -> Vec<u8> {
 mod tests {
     use super::*;
 
+    #[test]
+    fn test_replace_windows() {
+        // This replacement function adds pairs of integers together.
+        let mut vec = vec![1, 2, 3, 4, 5];
+        replace_windows(&mut vec, |[x, y]| Some(vec![x + y]));
+        assert_eq!(vec, vec![15u32]);
+
+        // This replacement function splits each composite integer into two factors.
+        let mut vec = vec![9, 1, 6, 8, 15, 7, 9];
+        replace_windows(&mut vec, |[n]| {
+            (2..n).filter(|d| n % d == 0).next().map(|d| vec![d, n / d])
+        });
+        assert_eq!(vec, vec![3, 3, 1, 2, 3, 2, 2, 2, 3, 5, 7, 3, 3]);
+    }
+
     #[test]
     fn literal_to_be_bytes() {
         assert_eq!(u256_to_trimmed_be_bytes(&0.into()), vec![0x00]);

From a34a4c81842ff459920fa972dae95113da7eac06 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sun, 31 Jul 2022 13:03:07 -0700
Subject: [PATCH 45/85] fix

---
 evm/src/cpu/kernel/asm/util/basic_macros.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm
index 58ab5d2c..a91feb05 100644
--- a/evm/src/cpu/kernel/asm/util/basic_macros.asm
+++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm
@@ -122,7 +122,7 @@
     // stack: c, input, ...
     %add_const(1) // This will be optimized out.
     // stack: c + 1, input, ...
-    GE // Check it backwards: (input <= c) == (c + 1 > input)
+    GT // Check it backwards: (input <= c) == (c + 1 > input)
     // stack: input <= c, ...
 %endmacro
 

From 2b9600e50c0ba377c05819bd776d0192af59cdbc Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sun, 31 Jul 2022 13:13:39 -0700
Subject: [PATCH 46/85] Misc

---
 evm/src/cpu/kernel/ast.rs       |   4 +-
 evm/src/cpu/kernel/optimizer.rs | 116 +++++++++++++++++++++++++-------
 2 files changed, 92 insertions(+), 28 deletions(-)

diff --git a/evm/src/cpu/kernel/ast.rs b/evm/src/cpu/kernel/ast.rs
index bc2a3ec2..a0de748a 100644
--- a/evm/src/cpu/kernel/ast.rs
+++ b/evm/src/cpu/kernel/ast.rs
@@ -7,7 +7,7 @@ pub(crate) struct File {
     pub(crate) body: Vec<Item>,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Eq, PartialEq, Clone, Debug)]
 pub(crate) enum Item {
     /// Defines a new macro: name, params, body.
     MacroDef(String, Vec<String>, Vec<Item>),
@@ -34,7 +34,7 @@ pub(crate) enum Item {
     Bytes(Vec<u8>),
 }
 
-#[derive(Clone, Debug)]
+#[derive(Eq, PartialEq, Clone, Debug)]
 pub(crate) enum StackReplacement {
     /// Can be either a named item or a label.
     Identifier(String),
diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
index 2b162792..5daf541f 100644
--- a/evm/src/cpu/kernel/optimizer.rs
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -2,39 +2,32 @@ use ethereum_types::U256;
 use Item::{Push, StandardOp};
 use PushTarget::Literal;
 
-use crate::cpu::kernel::ast::Item::LocalLabelDeclaration;
+use crate::cpu::kernel::ast::Item::{GlobalLabelDeclaration, LocalLabelDeclaration};
 use crate::cpu::kernel::ast::PushTarget::Label;
 use crate::cpu::kernel::ast::{Item, PushTarget};
 use crate::cpu::kernel::utils::replace_windows;
 
 pub(crate) fn optimize_asm(code: &mut Vec<Item>) {
-    constant_propagation(code);
-
-    // Remove no-op jumps: [PUSH label, JUMP, label:] -> [label:]
-    replace_windows(code, |window| {
-        if let [Push(Label(l1)), StandardOp(jump), LocalLabelDeclaration(l2)] = window
-            && l1 == l2
-            && &jump == "JUMP"
-        {
-            Some(vec![LocalLabelDeclaration(l2)])
-        } else {
-            None
+    // Run the optimizer until nothing changes.
+    loop {
+        let old_code = code.clone();
+        optimize_asm_once(code);
+        if code == &old_code {
+            break;
         }
-    });
-
-    // Remove swaps: [PUSH x, PUSH y, SWAP1] -> [PUSH y, PUSH x]
-    replace_windows(code, |window| {
-        if let [Push(Literal(x)), Push(Literal(y)), StandardOp(swap1)] = window
-                && &swap1 == "SWAP1" {
-            Some(vec![Push(Literal(y)), Push(Literal(x))])
-        } else {
-            None
-        }
-    });
+    }
 }
 
+/// A single optimization pass.
+fn optimize_asm_once(code: &mut Vec<Item>) {
+    constant_propagation(code);
+    no_op_jumps(code);
+    remove_swaps(code);
+}
+
+/// Constant propagation.
 fn constant_propagation(code: &mut Vec<Item>) {
-    // Constant propagation for unary ops: [PUSH x, UNARYOP] -> [PUSH UNARYOP(x)]
+    // Constant propagation for unary ops: `[PUSH x, UNARYOP] -> [PUSH UNARYOP(x)]`
     replace_windows(code, |window| {
         if let [Push(Literal(x)), StandardOp(op)] = window {
             match op.as_str() {
@@ -51,7 +44,7 @@ fn constant_propagation(code: &mut Vec<Item>) {
         }
     });
 
-    // Constant propagation for binary ops: [PUSH x, PUSH y, BINOP] -> [PUSH BINOP(x, y)]
+    // Constant propagation for binary ops: `[PUSH x, PUSH y, BINOP] -> [PUSH BINOP(x, y)]`
     replace_windows(code, |window| {
         if let [Push(Literal(x)), Push(Literal(y)), StandardOp(op)] = window {
             match op.as_str() {
@@ -67,5 +60,76 @@ fn constant_propagation(code: &mut Vec<Item>) {
     });
 }
 
+/// Remove no-op jumps: `[PUSH label, JUMP, label:] -> [label:]`
+fn no_op_jumps(code: &mut Vec<Item>) {
+    replace_windows(code, |window| {
+        if let [Push(Label(l)), StandardOp(jump), decl] = window
+            && &jump == "JUMP"
+            && (decl == LocalLabelDeclaration(l.clone()) || decl == GlobalLabelDeclaration(l.clone()))
+        {
+            Some(vec![LocalLabelDeclaration(l)])
+        } else {
+            None
+        }
+    });
+}
+
+/// Remove swaps: `[PUSH x, PUSH y, SWAP1] -> [PUSH y, PUSH x]`
+fn remove_swaps(code: &mut Vec<Item>) {
+    replace_windows(code, |window| {
+        if let [Push(x), Push(y), StandardOp(swap1)] = window
+            && &swap1 == "SWAP1" {
+            Some(vec![Push(y), Push(x)])
+        } else {
+            None
+        }
+    });
+}
+
 #[cfg(test)]
-mod tests {}
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_constant_propagation_iszero() {
+        let mut code = vec![Push(Literal(3.into())), StandardOp("ISZERO".into())];
+        constant_propagation(&mut code);
+        assert_eq!(code, vec![Push(Literal(0.into()))]);
+    }
+
+    #[test]
+    fn test_constant_propagation_mul() {
+        let mut code = vec![
+            Push(Literal(3.into())),
+            Push(Literal(4.into())),
+            StandardOp("MUL".into()),
+        ];
+        constant_propagation(&mut code);
+        assert_eq!(code, vec![Push(Literal(12.into()))]);
+    }
+
+    #[test]
+    fn test_no_op_jump() {
+        let mut code = vec![
+            Push(Label("mylabel".into())),
+            StandardOp("JUMP".into()),
+            LocalLabelDeclaration("mylabel".into()),
+        ];
+        no_op_jumps(&mut code);
+        assert_eq!(code, vec![LocalLabelDeclaration("mylabel".into())]);
+    }
+
+    #[test]
+    fn test_remove_swap() {
+        let mut code = vec![
+            Push(Literal("42".into())),
+            Push(Label("mylabel".into())),
+            StandardOp("SWAP1".into()),
+        ];
+        remove_swaps(&mut code);
+        assert_eq!(
+            code,
+            vec![Push(Label("mylabel".into())), Push(Literal("42".into()))]
+        );
+    }
+}

From d639d0b31129ac9bae9d3594f47563e1668cc3fe Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sun, 31 Jul 2022 15:55:02 -0700
Subject: [PATCH 47/85] clippy

---
 evm/src/cpu/kernel/utils.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/utils.rs b/evm/src/cpu/kernel/utils.rs
index cff10430..e2156259 100644
--- a/evm/src/cpu/kernel/utils.rs
+++ b/evm/src/cpu/kernel/utils.rs
@@ -45,7 +45,7 @@ mod tests {
         // This replacement function splits each composite integer into two factors.
         let mut vec = vec![9, 1, 6, 8, 15, 7, 9];
         replace_windows(&mut vec, |[n]| {
-            (2..n).filter(|d| n % d == 0).next().map(|d| vec![d, n / d])
+            (2..n).find(|d| n % d == 0).map(|d| vec![d, n / d])
         });
         assert_eq!(vec, vec![3, 3, 1, 2, 3, 2, 2, 2, 3, 5, 7, 3, 3]);
     }

From d6b5193c9b706f64f8670dac5799de9e72da1ed8 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Fri, 29 Jul 2022 13:50:16 -0700
Subject: [PATCH 48/85] RLP decoding tests

---
 evm/src/cpu/kernel/asm/rlp/decode.asm |   2 +
 evm/src/cpu/kernel/interpreter.rs     |  63 +++++++++----
 evm/src/cpu/kernel/tests/mod.rs       |   1 +
 evm/src/cpu/kernel/tests/rlp.rs       | 129 ++++++++++++++++++++++++++
 4 files changed, 178 insertions(+), 17 deletions(-)
 create mode 100644 evm/src/cpu/kernel/tests/rlp.rs

diff --git a/evm/src/cpu/kernel/asm/rlp/decode.asm b/evm/src/cpu/kernel/asm/rlp/decode.asm
index 76daec1a..24d8d5a7 100644
--- a/evm/src/cpu/kernel/asm/rlp/decode.asm
+++ b/evm/src/cpu/kernel/asm/rlp/decode.asm
@@ -32,6 +32,7 @@ global decode_rlp_string_len:
     JUMP
 
 decode_rlp_string_len_medium:
+    JUMPDEST
     // String is 0-55 bytes long. First byte contains the len.
     // stack: first_byte, pos, retdest
     %sub_const(0x80)
@@ -43,6 +44,7 @@ decode_rlp_string_len_medium:
     JUMP
 
 decode_rlp_string_len_large:
+    JUMPDEST
     // String is >55 bytes long. First byte contains the len of the len.
     // stack: first_byte, pos, retdest
     %sub_const(0xb7)
diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 5c2f6514..6a9d31fa 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -4,6 +4,7 @@ use anyhow::{anyhow, bail};
 use ethereum_types::{BigEndianHash, U256, U512};
 use keccak_hash::keccak;
 
+use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::assembler::Kernel;
 use crate::cpu::kernel::prover_input::ProverInputFn;
 use crate::generation::memory::{MemoryContextState, MemorySegmentState};
@@ -14,7 +15,7 @@ const HALT_OFFSET: usize = 0xdeadbeef;
 
 #[derive(Debug)]
 pub(crate) struct InterpreterMemory {
-    context_memory: Vec<MemoryContextState>,
+    pub(crate) context_memory: Vec<MemoryContextState>,
 }
 
 impl Default for InterpreterMemory {
@@ -51,13 +52,14 @@ pub struct Interpreter<'a> {
     jumpdests: Vec<usize>,
     offset: usize,
     context: usize,
-    memory: InterpreterMemory,
+    pub(crate) memory: InterpreterMemory,
     prover_inputs_map: &'a HashMap<usize, ProverInputFn>,
     prover_inputs: Vec<U256>,
     running: bool,
 }
 
 pub fn run_with_kernel(
+    // TODO: Remove param and just use KERNEL.
     kernel: &Kernel,
     initial_offset: usize,
     initial_stack: Vec<U256>,
@@ -76,24 +78,45 @@ pub fn run<'a>(
     initial_stack: Vec<U256>,
     prover_inputs: &'a HashMap<usize, ProverInputFn>,
 ) -> anyhow::Result<Interpreter<'a>> {
-    let mut interpreter = Interpreter {
-        jumpdests: find_jumpdests(code),
-        offset: initial_offset,
-        memory: InterpreterMemory::with_code_and_stack(code, initial_stack),
-        prover_inputs_map: prover_inputs,
-        prover_inputs: Vec::new(),
-        context: 0,
-        running: true,
-    };
-
-    while interpreter.running {
-        interpreter.run_opcode()?;
-    }
-
+    let mut interpreter = Interpreter::new(code, initial_offset, initial_stack, prover_inputs);
+    interpreter.run()?;
     Ok(interpreter)
 }
 
 impl<'a> Interpreter<'a> {
+    pub(crate) fn new_with_kernel(initial_offset: usize, initial_stack: Vec<U256>) -> Self {
+        Self::new(
+            &KERNEL.code,
+            initial_offset,
+            initial_stack,
+            &KERNEL.prover_inputs,
+        )
+    }
+
+    pub(crate) fn new(
+        code: &'a [u8],
+        initial_offset: usize,
+        initial_stack: Vec<U256>,
+        prover_inputs: &'a HashMap<usize, ProverInputFn>,
+    ) -> Self {
+        Self {
+            jumpdests: find_jumpdests(code),
+            offset: initial_offset,
+            memory: InterpreterMemory::with_code_and_stack(code, initial_stack),
+            prover_inputs_map: prover_inputs,
+            prover_inputs: Vec::new(),
+            context: 0,
+            running: true,
+        }
+    }
+
+    pub(crate) fn run(&mut self) -> anyhow::Result<()> {
+        while self.running {
+            self.run_opcode()?;
+        }
+        Ok(())
+    }
+
     fn code(&self) -> &MemorySegmentState {
         &self.memory.context_memory[self.context].segments[Segment::Code as usize]
     }
@@ -156,7 +179,7 @@ impl<'a> Interpreter<'a> {
             0x18 => self.run_xor(),                                    // "XOR",
             0x19 => self.run_not(),                                    // "NOT",
             0x1a => todo!(),                                           // "BYTE",
-            0x1b => todo!(),                                           // "SHL",
+            0x1b => self.run_shl(),                                    // "SHL",
             0x1c => todo!(),                                           // "SHR",
             0x1d => todo!(),                                           // "SAR",
             0x20 => self.run_keccak256(),                              // "KECCAK256",
@@ -339,6 +362,12 @@ impl<'a> Interpreter<'a> {
         self.push(!x);
     }
 
+    fn run_shl(&mut self) {
+        let shift = self.pop();
+        let x = self.pop();
+        self.push(x << shift);
+    }
+
     fn run_keccak256(&mut self) {
         let offset = self.pop().as_usize();
         let size = self.pop().as_usize();
diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs
index 100ef377..73eb3ada 100644
--- a/evm/src/cpu/kernel/tests/mod.rs
+++ b/evm/src/cpu/kernel/tests/mod.rs
@@ -1,6 +1,7 @@
 mod curve_ops;
 mod ecrecover;
 mod exp;
+mod rlp;
 
 use std::str::FromStr;
 
diff --git a/evm/src/cpu/kernel/tests/rlp.rs b/evm/src/cpu/kernel/tests/rlp.rs
new file mode 100644
index 00000000..e74213da
--- /dev/null
+++ b/evm/src/cpu/kernel/tests/rlp.rs
@@ -0,0 +1,129 @@
+use std::str::FromStr;
+
+use anyhow::Result;
+use ethereum_types::U256;
+
+use crate::cpu::kernel::aggregator::KERNEL;
+use crate::cpu::kernel::interpreter::Interpreter;
+use crate::memory::segments::Segment;
+
+#[test]
+fn test_decode_rlp_string_len_short() -> Result<()> {
+    let decode_rlp_string_len = KERNEL.global_labels["decode_rlp_string_len"];
+
+    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 2.into()];
+    let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
+
+    // A couple dummy bytes, followed by "0x70" which is its own encoding.
+    set_rlp_memory(&mut interpreter, vec![123, 234, 0x70]);
+
+    interpreter.run()?;
+    let expected_stack = vec![1.into(), 2.into()]; // len, pos
+    assert_eq!(interpreter.stack(), expected_stack);
+
+    Ok(())
+}
+
+#[test]
+fn test_decode_rlp_string_len_medium() -> Result<()> {
+    let decode_rlp_string_len = KERNEL.global_labels["decode_rlp_string_len"];
+
+    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 2.into()];
+    let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
+
+    // A couple dummy bytes, followed by the RLP encoding of "1 2 3 4 5".
+    set_rlp_memory(&mut interpreter, vec![123, 234, 0x85, 1, 2, 3, 4, 5]);
+
+    interpreter.run()?;
+    let expected_stack = vec![5.into(), 3.into()]; // len, pos
+    assert_eq!(interpreter.stack(), expected_stack);
+
+    Ok(())
+}
+
+#[test]
+fn test_decode_rlp_string_len_long() -> Result<()> {
+    let decode_rlp_string_len = KERNEL.global_labels["decode_rlp_string_len"];
+
+    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 2.into()];
+    let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
+
+    // The RLP encoding of the string "1 2 3 ... 56".
+    set_rlp_memory(
+        &mut interpreter,
+        vec![
+            123, 234, 0xb8, 56, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+            20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+            42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
+        ],
+    );
+
+    interpreter.run()?;
+    let expected_stack = vec![56.into(), 4.into()]; // len, pos
+    assert_eq!(interpreter.stack(), expected_stack);
+
+    Ok(())
+}
+
+#[test]
+fn test_decode_rlp_list_len_short() -> Result<()> {
+    let decode_rlp_list_len = KERNEL.global_labels["decode_rlp_list_len"];
+
+    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 0.into()];
+    let mut interpreter = Interpreter::new_with_kernel(decode_rlp_list_len, initial_stack);
+
+    // The RLP encoding of [1, 2, [3, 4]].
+    set_rlp_memory(&mut interpreter, vec![0xc5, 1, 2, 0xc2, 3, 4]);
+
+    interpreter.run()?;
+    let expected_stack = vec![5.into(), 1.into()]; // len, pos
+    assert_eq!(interpreter.stack(), expected_stack);
+
+    Ok(())
+}
+
+#[test]
+fn test_decode_rlp_list_len_long() -> Result<()> {
+    let decode_rlp_list_len = KERNEL.global_labels["decode_rlp_list_len"];
+
+    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 0.into()];
+    let mut interpreter = Interpreter::new_with_kernel(decode_rlp_list_len, initial_stack);
+
+    // The RLP encoding of [1, ..., 56].
+    set_rlp_memory(
+        &mut interpreter,
+        vec![
+            0xf8, 56, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+            22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+            44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
+        ],
+    );
+
+    interpreter.run()?;
+    let expected_stack = vec![56.into(), 2.into()]; // len, pos
+    assert_eq!(interpreter.stack(), expected_stack);
+
+    Ok(())
+}
+
+#[test]
+fn test_decode_rlp_scalar() -> Result<()> {
+    let decode_rlp_scalar = KERNEL.global_labels["decode_rlp_scalar"];
+
+    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 0.into()];
+    let mut interpreter = Interpreter::new_with_kernel(decode_rlp_scalar, initial_stack);
+
+    // The RLP encoding of "12 34 56".
+    set_rlp_memory(&mut interpreter, vec![0x83, 0x12, 0x34, 0x56]);
+
+    interpreter.run()?;
+    let expected_stack = vec![0x123456.into(), 4.into()]; // scalar, pos
+    assert_eq!(interpreter.stack(), expected_stack);
+
+    Ok(())
+}
+
+fn set_rlp_memory(interpreter: &mut Interpreter, rlp: Vec<u8>) {
+    interpreter.memory.context_memory[0].segments[Segment::RlpRaw as usize].content =
+        rlp.into_iter().map(U256::from).collect();
+}

From 61a9839f2fe78337d2d1ead9bfb0343dcb6647a7 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sun, 31 Jul 2022 20:37:05 -0700
Subject: [PATCH 49/85] Feedback

---
 evm/src/cpu/kernel/optimizer.rs | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
index 5daf541f..9683e160 100644
--- a/evm/src/cpu/kernel/optimizer.rs
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -44,14 +44,16 @@ fn constant_propagation(code: &mut Vec<Item>) {
         }
     });
 
-    // Constant propagation for binary ops: `[PUSH x, PUSH y, BINOP] -> [PUSH BINOP(x, y)]`
+    // Constant propagation for binary ops: `[PUSH y, PUSH x, BINOP] -> [PUSH BINOP(x, y)]`
     replace_windows(code, |window| {
-        if let [Push(Literal(x)), Push(Literal(y)), StandardOp(op)] = window {
+        if let [Push(Literal(y)), Push(Literal(x)), StandardOp(op)] = window {
             match op.as_str() {
                 "ADD" => Some(vec![Push(Literal(x + y))]),
                 "SUB" => Some(vec![Push(Literal(x - y))]),
                 "MUL" => Some(vec![Push(Literal(x * y))]),
-                "DIV" => Some(vec![Push(Literal(x / y))]),
+                "DIV" => Some(vec![Push(Literal(
+                    x.checked_div(y).unwrap_or(U256::zero()),
+                ))]),
                 _ => None,
             }
         } else {
@@ -108,6 +110,28 @@ mod tests {
         assert_eq!(code, vec![Push(Literal(12.into()))]);
     }
 
+    #[test]
+    fn test_constant_propagation_div() {
+        let mut code = vec![
+            Push(Literal(3.into())),
+            Push(Literal(8.into())),
+            StandardOp("DIV".into()),
+        ];
+        constant_propagation(&mut code);
+        assert_eq!(code, vec![Push(Literal(2.into()))]);
+    }
+
+    #[test]
+    fn test_constant_propagation_div_zero() {
+        let mut code = vec![
+            Push(Literal(0.into())),
+            Push(Literal(1.into())),
+            StandardOp("DIV".into()),
+        ];
+        constant_propagation(&mut code);
+        assert_eq!(code, vec![Push(Literal(0.into()))]);
+    }
+
     #[test]
     fn test_no_op_jump() {
         let mut code = vec![

From 94183f723c57a434e7f9b5465464357d7fa682cb Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sun, 31 Jul 2022 20:43:58 -0700
Subject: [PATCH 50/85] fixes

---
 evm/src/cpu/kernel/optimizer.rs | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
index 9683e160..66307aec 100644
--- a/evm/src/cpu/kernel/optimizer.rs
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -48,9 +48,9 @@ fn constant_propagation(code: &mut Vec<Item>) {
     replace_windows(code, |window| {
         if let [Push(Literal(y)), Push(Literal(x)), StandardOp(op)] = window {
             match op.as_str() {
-                "ADD" => Some(vec![Push(Literal(x + y))]),
-                "SUB" => Some(vec![Push(Literal(x - y))]),
-                "MUL" => Some(vec![Push(Literal(x * y))]),
+                "ADD" => Some(vec![Push(Literal(x.overflowing_add(y).0))]),
+                "SUB" => Some(vec![Push(Literal(x.overflowing_sub(y).0))]),
+                "MUL" => Some(vec![Push(Literal(x.overflowing_mul(y).0))]),
                 "DIV" => Some(vec![Push(Literal(
                     x.checked_div(y).unwrap_or(U256::zero()),
                 ))]),
@@ -99,6 +99,28 @@ mod tests {
         assert_eq!(code, vec![Push(Literal(0.into()))]);
     }
 
+    #[test]
+    fn test_constant_propagation_add_overflowing() {
+        let mut code = vec![
+            Push(Literal(U256::max_value())),
+            Push(Literal(U256::max_value())),
+            StandardOp("ADD".into()),
+        ];
+        constant_propagation(&mut code);
+        assert_eq!(code, vec![Push(Literal(U256::max_value() - 1))]);
+    }
+
+    #[test]
+    fn test_constant_propagation_sub_underflowing() {
+        let mut code = vec![
+            Push(Literal(U256::one())),
+            Push(Literal(U256::zero())),
+            StandardOp("SUB".into()),
+        ];
+        constant_propagation(&mut code);
+        assert_eq!(code, vec![Push(Literal(U256::max_value()))]);
+    }
+
     #[test]
     fn test_constant_propagation_mul() {
         let mut code = vec![

From 63c8568b17e067eae0b254cf0c7d7f8ccbaa8dd2 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 1 Aug 2022 08:47:07 -0700
Subject: [PATCH 51/85] remove_ignored_values

---
 evm/src/cpu/kernel/optimizer.rs | 34 +++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
index 66307aec..cc595191 100644
--- a/evm/src/cpu/kernel/optimizer.rs
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -23,6 +23,7 @@ fn optimize_asm_once(code: &mut Vec<Item>) {
     constant_propagation(code);
     no_op_jumps(code);
     remove_swaps(code);
+    remove_ignored_values(code);
 }
 
 /// Constant propagation.
@@ -62,7 +63,7 @@ fn constant_propagation(code: &mut Vec<Item>) {
     });
 }
 
-/// Remove no-op jumps: `[PUSH label, JUMP, label:] -> [label:]`
+/// Remove no-op jumps: `[PUSH label, JUMP, label:] -> [label:]`.
 fn no_op_jumps(code: &mut Vec<Item>) {
     replace_windows(code, |window| {
         if let [Push(Label(l)), StandardOp(jump), decl] = window
@@ -76,7 +77,7 @@ fn no_op_jumps(code: &mut Vec<Item>) {
     });
 }
 
-/// Remove swaps: `[PUSH x, PUSH y, SWAP1] -> [PUSH y, PUSH x]`
+/// Remove swaps: `[PUSH x, PUSH y, SWAP1] -> [PUSH y, PUSH x]`.
 fn remove_swaps(code: &mut Vec<Item>) {
     replace_windows(code, |window| {
         if let [Push(x), Push(y), StandardOp(swap1)] = window
@@ -88,6 +89,21 @@ fn remove_swaps(code: &mut Vec<Item>) {
     });
 }
 
+/// Remove push-pop type patterns, such as: `[DUP1, POP]`.
+fn remove_ignored_values(code: &mut Vec<Item>) {
+    replace_windows(code, |[a, b]| {
+        if let StandardOp(pop) = b && &pop == "POP" {
+            match a {
+                Push(_) => Some(vec![]),
+                StandardOp(dup) if dup.starts_with("DUP") => Some(vec![]),
+                _ => None,
+            }
+        } else {
+            None
+        }
+    });
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -178,4 +194,18 @@ mod tests {
             vec![Push(Label("mylabel".into())), Push(Literal("42".into()))]
         );
     }
+
+    #[test]
+    fn test_remove_push_pop() {
+        let mut code = vec![Push(Literal("42".into())), StandardOp("POP".into())];
+        remove_ignored_values(&mut code);
+        assert_eq!(code, vec![]);
+    }
+
+    #[test]
+    fn test_remove_dup_pop() {
+        let mut code = vec![StandardOp("DUP5".into()), StandardOp("POP".into())];
+        remove_ignored_values(&mut code);
+        assert_eq!(code, vec![]);
+    }
 }

From d1c9e150b37e9b8483d321631e24967823c2f3e1 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 1 Aug 2022 09:02:39 -0700
Subject: [PATCH 52/85] remove_swaps_commutative

---
 evm/src/cpu/kernel/optimizer.rs | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
index cc595191..811454e1 100644
--- a/evm/src/cpu/kernel/optimizer.rs
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -22,7 +22,8 @@ pub(crate) fn optimize_asm(code: &mut Vec<Item>) {
 fn optimize_asm_once(code: &mut Vec<Item>) {
     constant_propagation(code);
     no_op_jumps(code);
-    remove_swaps(code);
+    remove_swapped_pushes(code);
+    remove_swaps_commutative(code);
     remove_ignored_values(code);
 }
 
@@ -78,7 +79,7 @@ fn no_op_jumps(code: &mut Vec<Item>) {
 }
 
 /// Remove swaps: `[PUSH x, PUSH y, SWAP1] -> [PUSH y, PUSH x]`.
-fn remove_swaps(code: &mut Vec<Item>) {
+fn remove_swapped_pushes(code: &mut Vec<Item>) {
     replace_windows(code, |window| {
         if let [Push(x), Push(y), StandardOp(swap1)] = window
             && &swap1 == "SWAP1" {
@@ -89,6 +90,22 @@ fn remove_swaps(code: &mut Vec<Item>) {
     });
 }
 
+/// Remove SWAP1 before a commutative function.
+fn remove_swaps_commutative(code: &mut Vec<Item>) {
+    replace_windows(code, |window| {
+        if let [StandardOp(swap1), StandardOp(f)] = window && &swap1 == "SWAP1" {
+            let commutative = match f.as_str() {
+                "ADD" => true,
+                "MUL" => true,
+                _ => false,
+            };
+            commutative.then_some(vec![StandardOp(f)])
+        } else {
+            None
+        }
+    });
+}
+
 /// Remove push-pop type patterns, such as: `[DUP1, POP]`.
 fn remove_ignored_values(code: &mut Vec<Item>) {
     replace_windows(code, |[a, b]| {
@@ -182,19 +199,26 @@ mod tests {
     }
 
     #[test]
-    fn test_remove_swap() {
+    fn test_remove_swapped_pushes() {
         let mut code = vec![
             Push(Literal("42".into())),
             Push(Label("mylabel".into())),
             StandardOp("SWAP1".into()),
         ];
-        remove_swaps(&mut code);
+        remove_swapped_pushes(&mut code);
         assert_eq!(
             code,
             vec![Push(Label("mylabel".into())), Push(Literal("42".into()))]
         );
     }
 
+    #[test]
+    fn test_remove_swap_mul() {
+        let mut code = vec![StandardOp("SWAP1".into()), StandardOp("MUL".into())];
+        remove_swaps_commutative(&mut code);
+        assert_eq!(code, vec![StandardOp("MUL".into())]);
+    }
+
     #[test]
     fn test_remove_push_pop() {
         let mut code = vec![Push(Literal("42".into())), StandardOp("POP".into())];

From 98c4a372fb784351b19b80ccf9c235a893b3812e Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 1 Aug 2022 09:22:01 -0700
Subject: [PATCH 53/85] More binops

---
 evm/src/cpu/kernel/optimizer.rs | 33 ++++++++++++++++-----------------
 evm/src/cpu/kernel/utils.rs     |  8 ++++++++
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
index 811454e1..6d77f0fa 100644
--- a/evm/src/cpu/kernel/optimizer.rs
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -5,7 +5,7 @@ use PushTarget::Literal;
 use crate::cpu::kernel::ast::Item::{GlobalLabelDeclaration, LocalLabelDeclaration};
 use crate::cpu::kernel::ast::PushTarget::Label;
 use crate::cpu::kernel::ast::{Item, PushTarget};
-use crate::cpu::kernel::utils::replace_windows;
+use crate::cpu::kernel::utils::{replace_windows, u256_from_bool};
 
 pub(crate) fn optimize_asm(code: &mut Vec<Item>) {
     // Run the optimizer until nothing changes.
@@ -33,11 +33,7 @@ fn constant_propagation(code: &mut Vec<Item>) {
     replace_windows(code, |window| {
         if let [Push(Literal(x)), StandardOp(op)] = window {
             match op.as_str() {
-                "ISZERO" => Some(vec![Push(Literal(if x.is_zero() {
-                    U256::one()
-                } else {
-                    U256::zero()
-                }))]),
+                "ISZERO" => Some(vec![Push(Literal(u256_from_bool(x.is_zero())))]),
                 "NOT" => Some(vec![Push(Literal(!x))]),
                 _ => None,
             }
@@ -50,14 +46,21 @@ fn constant_propagation(code: &mut Vec<Item>) {
     replace_windows(code, |window| {
         if let [Push(Literal(y)), Push(Literal(x)), StandardOp(op)] = window {
             match op.as_str() {
-                "ADD" => Some(vec![Push(Literal(x.overflowing_add(y).0))]),
-                "SUB" => Some(vec![Push(Literal(x.overflowing_sub(y).0))]),
-                "MUL" => Some(vec![Push(Literal(x.overflowing_mul(y).0))]),
-                "DIV" => Some(vec![Push(Literal(
-                    x.checked_div(y).unwrap_or(U256::zero()),
-                ))]),
+                "ADD" => Some(x.overflowing_add(y).0),
+                "SUB" => Some(x.overflowing_sub(y).0),
+                "MUL" => Some(x.overflowing_mul(y).0),
+                "DIV" => Some(x.checked_div(y).unwrap_or(U256::zero())),
+                "SHL" => Some(x << y),
+                "SHR" => Some(x >> y),
+                "AND" => Some(x & y),
+                "OR" => Some(x | y),
+                "XOR" => Some(x ^ y),
+                "LT" => Some(u256_from_bool(x < y)),
+                "GT" => Some(u256_from_bool(x > y)),
+                "EQ" => Some(u256_from_bool(x == y)),
                 _ => None,
             }
+            .map(|res| vec![Push(Literal(res))])
         } else {
             None
         }
@@ -94,11 +97,7 @@ fn remove_swapped_pushes(code: &mut Vec<Item>) {
 fn remove_swaps_commutative(code: &mut Vec<Item>) {
     replace_windows(code, |window| {
         if let [StandardOp(swap1), StandardOp(f)] = window && &swap1 == "SWAP1" {
-            let commutative = match f.as_str() {
-                "ADD" => true,
-                "MUL" => true,
-                _ => false,
-            };
+            let commutative = matches!(f.as_str(), "ADD" | "MUL");
             commutative.then_some(vec![StandardOp(f)])
         } else {
             None
diff --git a/evm/src/cpu/kernel/utils.rs b/evm/src/cpu/kernel/utils.rs
index e2156259..8900b8e2 100644
--- a/evm/src/cpu/kernel/utils.rs
+++ b/evm/src/cpu/kernel/utils.rs
@@ -31,6 +31,14 @@ pub(crate) fn u256_to_trimmed_be_bytes(u256: &U256) -> Vec<u8> {
     (0..num_bytes).rev().map(|i| u256.byte(i)).collect()
 }
 
+pub(crate) fn u256_from_bool(b: bool) -> U256 {
+    if b {
+        U256::one()
+    } else {
+        U256::zero()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From 36187937060d467eb27d0b37876d21e10c8e1e22 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 1 Aug 2022 11:00:12 -0700
Subject: [PATCH 54/85] Test for parsing type 0 transactions

I made a couple related changes to `Interpreter`:
- Only enforce the `JUMPDEST` rule if we're in kernel mode.
- Convenience methods for dealing with the RLP and txn field segments of memory.
---
 evm/src/cpu/kernel/interpreter.rs      | 43 ++++++++++++------
 evm/src/cpu/kernel/tests/mod.rs        |  1 +
 evm/src/cpu/kernel/tests/rlp.rs        | 39 ++++++----------
 evm/src/cpu/kernel/tests/type_0_txn.rs | 63 ++++++++++++++++++++++++++
 4 files changed, 108 insertions(+), 38 deletions(-)
 create mode 100644 evm/src/cpu/kernel/tests/type_0_txn.rs

diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 6a9d31fa..3b753f0a 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -7,11 +7,12 @@ use keccak_hash::keccak;
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::assembler::Kernel;
 use crate::cpu::kernel::prover_input::ProverInputFn;
+use crate::cpu::kernel::txn_fields::NormalizedTxnField;
 use crate::generation::memory::{MemoryContextState, MemorySegmentState};
 use crate::memory::segments::Segment;
 
 /// Halt interpreter execution whenever a jump to this offset is done.
-const HALT_OFFSET: usize = 0xdeadbeef;
+const DEFAULT_HALT_OFFSET: usize = 0xdeadbeef;
 
 #[derive(Debug)]
 pub(crate) struct InterpreterMemory {
@@ -49,12 +50,14 @@ impl InterpreterMemory {
 }
 
 pub struct Interpreter<'a> {
+    kernel_mode: bool,
     jumpdests: Vec<usize>,
     offset: usize,
     context: usize,
     pub(crate) memory: InterpreterMemory,
     prover_inputs_map: &'a HashMap<usize, ProverInputFn>,
     prover_inputs: Vec<U256>,
+    pub(crate) halt_offsets: Vec<usize>,
     running: bool,
 }
 
@@ -100,12 +103,14 @@ impl<'a> Interpreter<'a> {
         prover_inputs: &'a HashMap<usize, ProverInputFn>,
     ) -> Self {
         Self {
+            kernel_mode: true,
             jumpdests: find_jumpdests(code),
             offset: initial_offset,
             memory: InterpreterMemory::with_code_and_stack(code, initial_stack),
             prover_inputs_map: prover_inputs,
             prover_inputs: Vec::new(),
             context: 0,
+            halt_offsets: vec![DEFAULT_HALT_OFFSET],
             running: true,
         }
     }
@@ -128,6 +133,15 @@ impl<'a> Interpreter<'a> {
             .collect::<Vec<_>>()
     }
 
+    pub(crate) fn get_txn_field(&self, field: NormalizedTxnField) -> U256 {
+        self.memory.context_memory[0].segments[Segment::TxnFields as usize].content[field as usize]
+    }
+
+    pub(crate) fn set_rlp_memory(&mut self, rlp: Vec<u8>) {
+        self.memory.context_memory[0].segments[Segment::RlpRaw as usize].content =
+            rlp.into_iter().map(U256::from).collect();
+    }
+
     fn incr(&mut self, n: usize) {
         self.offset += n;
     }
@@ -435,24 +449,27 @@ impl<'a> Interpreter<'a> {
 
     fn run_jump(&mut self) {
         let x = self.pop().as_usize();
-        self.offset = x;
-        if self.offset == HALT_OFFSET {
-            self.running = false;
-        } else if self.jumpdests.binary_search(&self.offset).is_err() {
-            panic!("Destination is not a JUMPDEST.");
-        }
+        self.jump_to(x);
     }
 
     fn run_jumpi(&mut self) {
         let x = self.pop().as_usize();
         let b = self.pop();
         if !b.is_zero() {
-            self.offset = x;
-            if self.offset == HALT_OFFSET {
-                self.running = false;
-            } else if self.jumpdests.binary_search(&self.offset).is_err() {
-                panic!("Destination is not a JUMPDEST.");
-            }
+            self.jump_to(x);
+        }
+    }
+
+    fn jump_to(&mut self, offset: usize) {
+        // The JUMPDEST rule is not enforced in kernel mode.
+        if !self.kernel_mode && self.jumpdests.binary_search(&offset).is_err() {
+            panic!("Destination is not a JUMPDEST.");
+        }
+
+        self.offset = offset;
+
+        if self.halt_offsets.contains(&offset) {
+            self.running = false;
         }
     }
 
diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs
index 73eb3ada..e912388b 100644
--- a/evm/src/cpu/kernel/tests/mod.rs
+++ b/evm/src/cpu/kernel/tests/mod.rs
@@ -2,6 +2,7 @@ mod curve_ops;
 mod ecrecover;
 mod exp;
 mod rlp;
+mod type_0_txn;
 
 use std::str::FromStr;
 
diff --git a/evm/src/cpu/kernel/tests/rlp.rs b/evm/src/cpu/kernel/tests/rlp.rs
index e74213da..fd790dcb 100644
--- a/evm/src/cpu/kernel/tests/rlp.rs
+++ b/evm/src/cpu/kernel/tests/rlp.rs
@@ -15,7 +15,7 @@ fn test_decode_rlp_string_len_short() -> Result<()> {
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
 
     // A couple dummy bytes, followed by "0x70" which is its own encoding.
-    set_rlp_memory(&mut interpreter, vec![123, 234, 0x70]);
+    interpreter.set_rlp_memory(vec![123, 234, 0x70]);
 
     interpreter.run()?;
     let expected_stack = vec![1.into(), 2.into()]; // len, pos
@@ -32,7 +32,7 @@ fn test_decode_rlp_string_len_medium() -> Result<()> {
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
 
     // A couple dummy bytes, followed by the RLP encoding of "1 2 3 4 5".
-    set_rlp_memory(&mut interpreter, vec![123, 234, 0x85, 1, 2, 3, 4, 5]);
+    interpreter.set_rlp_memory(vec![123, 234, 0x85, 1, 2, 3, 4, 5]);
 
     interpreter.run()?;
     let expected_stack = vec![5.into(), 3.into()]; // len, pos
@@ -49,14 +49,11 @@ fn test_decode_rlp_string_len_long() -> Result<()> {
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
 
     // The RLP encoding of the string "1 2 3 ... 56".
-    set_rlp_memory(
-        &mut interpreter,
-        vec![
-            123, 234, 0xb8, 56, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
-            20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
-            42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
-        ],
-    );
+    interpreter.set_rlp_memory(vec![
+        123, 234, 0xb8, 56, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+        21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+        44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
+    ]);
 
     interpreter.run()?;
     let expected_stack = vec![56.into(), 4.into()]; // len, pos
@@ -73,7 +70,7 @@ fn test_decode_rlp_list_len_short() -> Result<()> {
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_list_len, initial_stack);
 
     // The RLP encoding of [1, 2, [3, 4]].
-    set_rlp_memory(&mut interpreter, vec![0xc5, 1, 2, 0xc2, 3, 4]);
+    interpreter.set_rlp_memory(vec![0xc5, 1, 2, 0xc2, 3, 4]);
 
     interpreter.run()?;
     let expected_stack = vec![5.into(), 1.into()]; // len, pos
@@ -90,14 +87,11 @@ fn test_decode_rlp_list_len_long() -> Result<()> {
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_list_len, initial_stack);
 
     // The RLP encoding of [1, ..., 56].
-    set_rlp_memory(
-        &mut interpreter,
-        vec![
-            0xf8, 56, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
-            22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
-            44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
-        ],
-    );
+    interpreter.set_rlp_memory(vec![
+        0xf8, 56, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+        23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
+        46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
+    ]);
 
     interpreter.run()?;
     let expected_stack = vec![56.into(), 2.into()]; // len, pos
@@ -114,7 +108,7 @@ fn test_decode_rlp_scalar() -> Result<()> {
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_scalar, initial_stack);
 
     // The RLP encoding of "12 34 56".
-    set_rlp_memory(&mut interpreter, vec![0x83, 0x12, 0x34, 0x56]);
+    interpreter.set_rlp_memory(vec![0x83, 0x12, 0x34, 0x56]);
 
     interpreter.run()?;
     let expected_stack = vec![0x123456.into(), 4.into()]; // scalar, pos
@@ -122,8 +116,3 @@ fn test_decode_rlp_scalar() -> Result<()> {
 
     Ok(())
 }
-
-fn set_rlp_memory(interpreter: &mut Interpreter, rlp: Vec<u8>) {
-    interpreter.memory.context_memory[0].segments[Segment::RlpRaw as usize].content =
-        rlp.into_iter().map(U256::from).collect();
-}
diff --git a/evm/src/cpu/kernel/tests/type_0_txn.rs b/evm/src/cpu/kernel/tests/type_0_txn.rs
new file mode 100644
index 00000000..437795e9
--- /dev/null
+++ b/evm/src/cpu/kernel/tests/type_0_txn.rs
@@ -0,0 +1,63 @@
+use std::str::FromStr;
+
+use anyhow::Result;
+use ethereum_types::U256;
+use hex_literal::hex;
+use NormalizedTxnField::*;
+
+use crate::cpu::kernel::aggregator::KERNEL;
+use crate::cpu::kernel::interpreter::Interpreter;
+use crate::cpu::kernel::tests::rlp::set_rlp_memory;
+use crate::cpu::kernel::txn_fields::NormalizedTxnField;
+
+#[test]
+fn process_type_0_txn() -> Result<()> {
+    let process_type_0_txn = KERNEL.global_labels["process_type_0_txn"];
+    let process_normalized_txn = KERNEL.global_labels["process_normalized_txn"];
+
+    let mut interpreter = Interpreter::new_with_kernel(process_type_0_txn, vec![]);
+
+    // When we reach process_normalized_txn, we're done with parsing and normalizing.
+    // Processing normalized transactions is outside the scope of this test.
+    interpreter.halt_offsets.push(process_normalized_txn);
+
+    // Generated with py-evm:
+    // unsigned_txn = chain.create_unsigned_transaction(
+    //     nonce=5,
+    //     gas_price=10,
+    //     gas=22_000,
+    //     to=constants.ZERO_ADDRESS,
+    //     value=100,
+    //     data=b'\x42\x42',
+    // )
+    // my_txn = unsigned_txn.as_signed_transaction(my_sk)
+    // rlp.encode(my_txn)
+    interpreter.set_rlp_memory(hex!("f861050a8255f0940000000000000000000000000000000000000000648242421ca07c5c61ed975ebd286f6b027b8c504842e50a47d318e1e801719dd744fe93e6c6a01e7b5119b57dd54e175ff2f055c91f3ab1b53eba0b2c184f347cdff0e745aca2").to_vec());
+
+    interpreter.run()?;
+
+    assert_eq!(interpreter.get_txn_field(ChainIdPresent), 0.into());
+    assert_eq!(interpreter.get_txn_field(ChainId), 0.into());
+    assert_eq!(interpreter.get_txn_field(Nonce), 5.into());
+    assert_eq!(interpreter.get_txn_field(MaxPriorityFeePerGas), 10.into());
+    assert_eq!(interpreter.get_txn_field(MaxPriorityFeePerGas), 10.into());
+    assert_eq!(interpreter.get_txn_field(MaxFeePerGas), 10.into());
+    assert_eq!(interpreter.get_txn_field(To), 0.into());
+    assert_eq!(interpreter.get_txn_field(Value), 100.into());
+    assert_eq!(interpreter.get_txn_field(DataLen), 2.into());
+    assert_eq!(interpreter.get_txn_field(YParity), 1.into());
+    assert_eq!(
+        interpreter.get_txn_field(R),
+        U256::from_big_endian(&hex!(
+            "7c5c61ed975ebd286f6b027b8c504842e50a47d318e1e801719dd744fe93e6c6"
+        ))
+    );
+    assert_eq!(
+        interpreter.get_txn_field(S),
+        U256::from_big_endian(&hex!(
+            "1e7b5119b57dd54e175ff2f055c91f3ab1b53eba0b2c184f347cdff0e745aca2"
+        ))
+    );
+
+    Ok(())
+}

From b737aeaf0325e161f81acd46d321ae5d4cc0e448 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 1 Aug 2022 11:13:33 -0700
Subject: [PATCH 55/85] Tweak py-evm code

---
 evm/src/cpu/kernel/tests/type_0_txn.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/type_0_txn.rs b/evm/src/cpu/kernel/tests/type_0_txn.rs
index 437795e9..01e26423 100644
--- a/evm/src/cpu/kernel/tests/type_0_txn.rs
+++ b/evm/src/cpu/kernel/tests/type_0_txn.rs
@@ -22,16 +22,20 @@ fn process_type_0_txn() -> Result<()> {
     interpreter.halt_offsets.push(process_normalized_txn);
 
     // Generated with py-evm:
+    // import eth, eth_keys, eth_utils, rlp
+    // genesis_params = { 'difficulty': eth.constants.GENESIS_DIFFICULTY }
+    // chain = eth.chains.mainnet.MainnetChain.from_genesis(eth.db.atomic.AtomicDB(), genesis_params, {})
     // unsigned_txn = chain.create_unsigned_transaction(
     //     nonce=5,
     //     gas_price=10,
     //     gas=22_000,
-    //     to=constants.ZERO_ADDRESS,
+    //     to=eth.constants.ZERO_ADDRESS,
     //     value=100,
     //     data=b'\x42\x42',
     // )
-    // my_txn = unsigned_txn.as_signed_transaction(my_sk)
-    // rlp.encode(my_txn)
+    // sk = eth_keys.keys.PrivateKey(eth_utils.decode_hex('4c0883a69102937d6231471b5dbb6204fe5129617082792ae468d01a3f362318'))
+    // signed_txn = unsigned_txn.as_signed_transaction(sk)
+    // rlp.encode(signed_txn).hex()
     interpreter.set_rlp_memory(hex!("f861050a8255f0940000000000000000000000000000000000000000648242421ca07c5c61ed975ebd286f6b027b8c504842e50a47d318e1e801719dd744fe93e6c6a01e7b5119b57dd54e175ff2f055c91f3ab1b53eba0b2c184f347cdff0e745aca2").to_vec());
 
     interpreter.run()?;

From 94c9b1b09c1c5f3d02ac4d8f7ed42f63892268c2 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 1 Aug 2022 11:20:56 -0700
Subject: [PATCH 56/85] Misc

---
 evm/src/cpu/kernel/tests/mod.rs                                | 2 +-
 evm/src/cpu/kernel/tests/rlp.rs                                | 1 -
 evm/src/cpu/kernel/tests/transaction_parsing/mod.rs            | 1 +
 .../{type_0_txn.rs => transaction_parsing/parse_type_0_txn.rs} | 3 ---
 4 files changed, 2 insertions(+), 5 deletions(-)
 create mode 100644 evm/src/cpu/kernel/tests/transaction_parsing/mod.rs
 rename evm/src/cpu/kernel/tests/{type_0_txn.rs => transaction_parsing/parse_type_0_txn.rs} (97%)

diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs
index e912388b..ab92c5a0 100644
--- a/evm/src/cpu/kernel/tests/mod.rs
+++ b/evm/src/cpu/kernel/tests/mod.rs
@@ -2,7 +2,7 @@ mod curve_ops;
 mod ecrecover;
 mod exp;
 mod rlp;
-mod type_0_txn;
+mod transaction_parsing;
 
 use std::str::FromStr;
 
diff --git a/evm/src/cpu/kernel/tests/rlp.rs b/evm/src/cpu/kernel/tests/rlp.rs
index fd790dcb..55dfc7e8 100644
--- a/evm/src/cpu/kernel/tests/rlp.rs
+++ b/evm/src/cpu/kernel/tests/rlp.rs
@@ -5,7 +5,6 @@ use ethereum_types::U256;
 
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
-use crate::memory::segments::Segment;
 
 #[test]
 fn test_decode_rlp_string_len_short() -> Result<()> {
diff --git a/evm/src/cpu/kernel/tests/transaction_parsing/mod.rs b/evm/src/cpu/kernel/tests/transaction_parsing/mod.rs
new file mode 100644
index 00000000..fb50625f
--- /dev/null
+++ b/evm/src/cpu/kernel/tests/transaction_parsing/mod.rs
@@ -0,0 +1 @@
+mod parse_type_0_txn;
diff --git a/evm/src/cpu/kernel/tests/type_0_txn.rs b/evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs
similarity index 97%
rename from evm/src/cpu/kernel/tests/type_0_txn.rs
rename to evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs
index 01e26423..8b725479 100644
--- a/evm/src/cpu/kernel/tests/type_0_txn.rs
+++ b/evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs
@@ -1,5 +1,3 @@
-use std::str::FromStr;
-
 use anyhow::Result;
 use ethereum_types::U256;
 use hex_literal::hex;
@@ -7,7 +5,6 @@ use NormalizedTxnField::*;
 
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
-use crate::cpu::kernel::tests::rlp::set_rlp_memory;
 use crate::cpu::kernel::txn_fields::NormalizedTxnField;
 
 #[test]

From b34ace4c9017927b92e20ff4170aa678ee95e66d Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 1 Aug 2022 17:40:16 -0700
Subject: [PATCH 57/85] More succinct deadbeef

---
 evm/src/cpu/kernel/tests/exp.rs |  8 +++-----
 evm/src/cpu/kernel/tests/rlp.rs | 14 ++++++--------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/exp.rs b/evm/src/cpu/kernel/tests/exp.rs
index 388a1ac3..25bc5ad3 100644
--- a/evm/src/cpu/kernel/tests/exp.rs
+++ b/evm/src/cpu/kernel/tests/exp.rs
@@ -1,5 +1,3 @@
-use std::str::FromStr;
-
 use anyhow::Result;
 use ethereum_types::U256;
 use rand::{thread_rng, Rng};
@@ -17,7 +15,7 @@ fn test_exp() -> Result<()> {
     let b = U256([0; 4].map(|_| rng.gen()));
 
     // Random input
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a];
+    let initial_stack = vec![0xDEADBEEFu32.into(), b, a];
     let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?
         .stack()
         .to_vec();
@@ -29,7 +27,7 @@ fn test_exp() -> Result<()> {
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     // 0 base
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, U256::zero()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), b, U256::zero()];
     let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?
         .stack()
         .to_vec();
@@ -41,7 +39,7 @@ fn test_exp() -> Result<()> {
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     // 0 exponent
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, U256::zero(), a];
+    let initial_stack = vec![0xDEADBEEFu32.into(), U256::zero(), a];
     let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?
         .stack()
         .to_vec();
diff --git a/evm/src/cpu/kernel/tests/rlp.rs b/evm/src/cpu/kernel/tests/rlp.rs
index e74213da..79407f1b 100644
--- a/evm/src/cpu/kernel/tests/rlp.rs
+++ b/evm/src/cpu/kernel/tests/rlp.rs
@@ -1,5 +1,3 @@
-use std::str::FromStr;
-
 use anyhow::Result;
 use ethereum_types::U256;
 
@@ -11,7 +9,7 @@ use crate::memory::segments::Segment;
 fn test_decode_rlp_string_len_short() -> Result<()> {
     let decode_rlp_string_len = KERNEL.global_labels["decode_rlp_string_len"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 2.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 2.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
 
     // A couple dummy bytes, followed by "0x70" which is its own encoding.
@@ -28,7 +26,7 @@ fn test_decode_rlp_string_len_short() -> Result<()> {
 fn test_decode_rlp_string_len_medium() -> Result<()> {
     let decode_rlp_string_len = KERNEL.global_labels["decode_rlp_string_len"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 2.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 2.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
 
     // A couple dummy bytes, followed by the RLP encoding of "1 2 3 4 5".
@@ -45,7 +43,7 @@ fn test_decode_rlp_string_len_medium() -> Result<()> {
 fn test_decode_rlp_string_len_long() -> Result<()> {
     let decode_rlp_string_len = KERNEL.global_labels["decode_rlp_string_len"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 2.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 2.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
 
     // The RLP encoding of the string "1 2 3 ... 56".
@@ -69,7 +67,7 @@ fn test_decode_rlp_string_len_long() -> Result<()> {
 fn test_decode_rlp_list_len_short() -> Result<()> {
     let decode_rlp_list_len = KERNEL.global_labels["decode_rlp_list_len"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 0.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 0.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_list_len, initial_stack);
 
     // The RLP encoding of [1, 2, [3, 4]].
@@ -86,7 +84,7 @@ fn test_decode_rlp_list_len_short() -> Result<()> {
 fn test_decode_rlp_list_len_long() -> Result<()> {
     let decode_rlp_list_len = KERNEL.global_labels["decode_rlp_list_len"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 0.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 0.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_list_len, initial_stack);
 
     // The RLP encoding of [1, ..., 56].
@@ -110,7 +108,7 @@ fn test_decode_rlp_list_len_long() -> Result<()> {
 fn test_decode_rlp_scalar() -> Result<()> {
     let decode_rlp_scalar = KERNEL.global_labels["decode_rlp_scalar"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 0.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 0.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_scalar, initial_stack);
 
     // The RLP encoding of "12 34 56".

From 3f08cca1164d422dce8959cc27747e40de118172 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 1 Aug 2022 14:49:28 -0700
Subject: [PATCH 58/85] More constants for kernel ASM

- `GlobalMetadata` - offsets for global kernel variables in memory
- `ContextMetadata` - offsets for context-specific kernel variables in memory
- `GAS_CONSTANTS`, based on the yellowpaper

Also move constants to a separate module since `aggregator` was getting long.
---
 evm/src/cpu/kernel/aggregator.rs       | 37 +----------
 evm/src/cpu/kernel/constants.rs        | 87 ++++++++++++++++++++++++++
 evm/src/cpu/kernel/context_metadata.rs | 33 ++++++++++
 evm/src/cpu/kernel/global_metadata.rs  | 23 +++++++
 evm/src/cpu/kernel/mod.rs              |  5 +-
 evm/src/memory/segments.rs             | 22 ++++---
 6 files changed, 161 insertions(+), 46 deletions(-)
 create mode 100644 evm/src/cpu/kernel/constants.rs
 create mode 100644 evm/src/cpu/kernel/context_metadata.rs
 create mode 100644 evm/src/cpu/kernel/global_metadata.rs

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index e5e1f29f..8d45a9a2 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -1,49 +1,14 @@
 //! Loads each kernel assembly file and concatenates them.
 
-use std::collections::HashMap;
-
-use ethereum_types::U256;
-use hex_literal::hex;
 use itertools::Itertools;
 use once_cell::sync::Lazy;
 
 use super::assembler::{assemble, Kernel};
+use crate::cpu::kernel::constants::evm_constants;
 use crate::cpu::kernel::parser::parse;
-use crate::cpu::kernel::txn_fields::NormalizedTxnField;
-use crate::memory::segments::Segment;
 
 pub static KERNEL: Lazy<Kernel> = Lazy::new(combined_kernel);
 
-const EC_CONSTANTS: [(&str, [u8; 32]); 3] = [
-    (
-        "BN_BASE",
-        hex!("30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"),
-    ),
-    (
-        "SECP_BASE",
-        hex!("fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f"),
-    ),
-    (
-        "SECP_SCALAR",
-        hex!("fffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141"),
-    ),
-];
-
-pub fn evm_constants() -> HashMap<String, U256> {
-    let mut c = HashMap::new();
-    for (name, value) in EC_CONSTANTS {
-        c.insert(name.into(), U256::from_big_endian(&value));
-    }
-    for segment in Segment::all() {
-        c.insert(segment.var_name().into(), (segment as u32).into());
-    }
-    for txn_field in NormalizedTxnField::all() {
-        c.insert(txn_field.var_name().into(), (txn_field as u32).into());
-    }
-    c
-}
-
-#[allow(dead_code)] // TODO: Should be used once witness generation is done.
 pub(crate) fn combined_kernel() -> Kernel {
     let files = vec![
         include_str!("asm/curve/bn254/curve_add.asm"),
diff --git a/evm/src/cpu/kernel/constants.rs b/evm/src/cpu/kernel/constants.rs
new file mode 100644
index 00000000..5bc5908e
--- /dev/null
+++ b/evm/src/cpu/kernel/constants.rs
@@ -0,0 +1,87 @@
+use std::collections::HashMap;
+
+use ethereum_types::U256;
+use hex_literal::hex;
+
+use crate::cpu::kernel::context_metadata::ContextMetadata;
+use crate::cpu::kernel::global_metadata::GlobalMetadata;
+use crate::cpu::kernel::txn_fields::NormalizedTxnField;
+use crate::memory::segments::Segment;
+
+/// Constants that are accessible to our kernel assembly code.
+pub fn evm_constants() -> HashMap<String, U256> {
+    let mut c = HashMap::new();
+    for (name, value) in EC_CONSTANTS {
+        c.insert(name.into(), U256::from_big_endian(&value));
+    }
+    for (name, value) in GAS_CONSTANTS {
+        c.insert(name.into(), U256::from(value));
+    }
+    for segment in Segment::all() {
+        c.insert(segment.var_name().into(), (segment as u32).into());
+    }
+    for txn_field in NormalizedTxnField::all() {
+        c.insert(txn_field.var_name().into(), (txn_field as u32).into());
+    }
+    for txn_field in GlobalMetadata::all() {
+        c.insert(txn_field.var_name().into(), (txn_field as u32).into());
+    }
+    for txn_field in ContextMetadata::all() {
+        c.insert(txn_field.var_name().into(), (txn_field as u32).into());
+    }
+    c
+}
+
+const EC_CONSTANTS: [(&str, [u8; 32]); 3] = [
+    (
+        "BN_BASE",
+        hex!("30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"),
+    ),
+    (
+        "SECP_BASE",
+        hex!("fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f"),
+    ),
+    (
+        "SECP_SCALAR",
+        hex!("fffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141"),
+    ),
+];
+
+const GAS_CONSTANTS: [(&str, u16); 36] = [
+    ("GAS_ZERO", 0),
+    ("GAS_JUMPDEST", 1),
+    ("GAS_BASE", 2),
+    ("GAS_VERYLOW", 3),
+    ("GAS_LOW", 5),
+    ("GAS_MID", 8),
+    ("GAS_HIGH", 10),
+    ("GAS_WARMACCESS", 100),
+    ("GAS_ACCESSLISTADDRESS", 2_400),
+    ("GAS_ACCESSLISTSTORAGE", 1_900),
+    ("GAS_COLDACCOUNTACCESS", 2_600),
+    ("GAS_COLDSLOAD", 2_100),
+    ("GAS_SSET", 20_000),
+    ("GAS_SRESET", 2_900),
+    ("REFUND_SCLEAR", 15_000),
+    ("REFUND_SELFDESTRUCT", 24_000),
+    ("GAS_SELFDESTRUCT", 5_000),
+    ("GAS_CREATE", 32_000),
+    ("GAS_CODEDEPOSIT", 200),
+    ("GAS_CALLVALUE", 9_000),
+    ("GAS_CALLSTIPEND", 2_300),
+    ("GAS_NEWACCOUNT", 25_000),
+    ("GAS_EXP", 10),
+    ("GAS_EXPBYTE", 50),
+    ("GAS_MEMORY", 3),
+    ("GAS_TXCREATE", 32_000),
+    ("GAS_TXDATAZERO", 4),
+    ("GAS_TXDATANONZERO", 16),
+    ("GAS_TRANSACTION", 21_000),
+    ("GAS_LOG", 375),
+    ("GAS_LOGDATA", 8),
+    ("GAS_LOGTOPIC", 375),
+    ("GAS_KECCAK256", 30),
+    ("GAS_KECCAK256WORD", 6),
+    ("GAS_COPY", 3),
+    ("GAS_BLOCKHASH", 20),
+];
diff --git a/evm/src/cpu/kernel/context_metadata.rs b/evm/src/cpu/kernel/context_metadata.rs
new file mode 100644
index 00000000..cf0bfc23
--- /dev/null
+++ b/evm/src/cpu/kernel/context_metadata.rs
@@ -0,0 +1,33 @@
+/// These metadata fields contain VM state specific to a particular context.
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
+pub(crate) enum ContextMetadata {
+    /// The ID of the context which created this one.
+    ParentContext = 0,
+    /// The program counter to return to when we return to the parent context.
+    ParentProgramCounter = 1,
+    CalldataSize = 2,
+    ReturndataSize = 3,
+}
+
+impl ContextMetadata {
+    pub(crate) const COUNT: usize = 4;
+
+    pub(crate) fn all() -> [Self; Self::COUNT] {
+        [
+            Self::ParentContext,
+            Self::ParentProgramCounter,
+            Self::CalldataSize,
+            Self::ReturndataSize,
+        ]
+    }
+
+    /// The variable name that gets passed into kernel assembly code.
+    pub(crate) fn var_name(&self) -> &'static str {
+        match self {
+            ContextMetadata::ParentContext => "CTX_METADATA_PARENT_CONTEXT",
+            ContextMetadata::ParentProgramCounter => "CTX_METADATA_PARENT_PC",
+            ContextMetadata::CalldataSize => "CTX_METADATA_CALLDATA_SIZE",
+            ContextMetadata::ReturndataSize => "CTX_METADATA_RETURNDATA_SIZE",
+        }
+    }
+}
diff --git a/evm/src/cpu/kernel/global_metadata.rs b/evm/src/cpu/kernel/global_metadata.rs
new file mode 100644
index 00000000..50b54ec3
--- /dev/null
+++ b/evm/src/cpu/kernel/global_metadata.rs
@@ -0,0 +1,23 @@
+/// These metadata fields contain global VM state, stored in the `Segment::Metadata` segment of the
+/// kernel's context (which is zero).
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
+pub(crate) enum GlobalMetadata {
+    /// The larger context ID that has been used so far in this execution. Tracking this allows us
+    /// give each new context a unique ID, so that its memory will be zero-initialized.
+    LargestContext = 0,
+}
+
+impl GlobalMetadata {
+    pub(crate) const COUNT: usize = 1;
+
+    pub(crate) fn all() -> [Self; Self::COUNT] {
+        [Self::LargestContext]
+    }
+
+    /// The variable name that gets passed into kernel assembly code.
+    pub(crate) fn var_name(&self) -> &'static str {
+        match self {
+            GlobalMetadata::LargestContext => "GLOBAL_METADATA_LARGEST_CONTEXT",
+        }
+    }
+}
diff --git a/evm/src/cpu/kernel/mod.rs b/evm/src/cpu/kernel/mod.rs
index f0247f93..641ee529 100644
--- a/evm/src/cpu/kernel/mod.rs
+++ b/evm/src/cpu/kernel/mod.rs
@@ -1,6 +1,9 @@
 pub mod aggregator;
 pub mod assembler;
 mod ast;
+mod constants;
+mod context_metadata;
+mod global_metadata;
 pub(crate) mod keccak_util;
 mod opcodes;
 mod parser;
@@ -17,7 +20,7 @@ mod tests;
 use assembler::assemble;
 use parser::parse;
 
-use crate::cpu::kernel::aggregator::evm_constants;
+use crate::cpu::kernel::constants::evm_constants;
 
 /// Assemble files, outputting bytes.
 /// This is for debugging the kernel only.
diff --git a/evm/src/memory/segments.rs b/evm/src/memory/segments.rs
index 15545ea0..712db644 100644
--- a/evm/src/memory/segments.rs
+++ b/evm/src/memory/segments.rs
@@ -13,20 +13,21 @@ pub(crate) enum Segment {
     Returndata = 4,
     /// A segment which contains a few fixed-size metadata fields, such as the caller's context, or the
     /// size of `CALLDATA` and `RETURNDATA`.
-    Metadata = 5,
+    GlobalMetadata = 5,
+    ContextMetadata = 6,
     /// General purpose kernel memory, used by various kernel functions.
     /// In general, calling a helper function can result in this memory being clobbered.
-    KernelGeneral = 6,
+    KernelGeneral = 7,
     /// Contains normalized transaction fields; see `TxnField`.
-    TxnFields = 7,
+    TxnFields = 8,
     /// Contains the data field of a transaction.
-    TxnData = 8,
+    TxnData = 9,
     /// Raw RLP data.
-    RlpRaw = 9,
+    RlpRaw = 10,
 }
 
 impl Segment {
-    pub(crate) const COUNT: usize = 10;
+    pub(crate) const COUNT: usize = 11;
 
     pub(crate) fn all() -> [Self; Self::COUNT] {
         [
@@ -35,7 +36,8 @@ impl Segment {
             Self::MainMemory,
             Self::Calldata,
             Self::Returndata,
-            Self::Metadata,
+            Self::GlobalMetadata,
+            Self::ContextMetadata,
             Self::KernelGeneral,
             Self::TxnFields,
             Self::TxnData,
@@ -51,7 +53,8 @@ impl Segment {
             Segment::MainMemory => "SEGMENT_MAIN_MEMORY",
             Segment::Calldata => "SEGMENT_CALLDATA",
             Segment::Returndata => "SEGMENT_RETURNDATA",
-            Segment::Metadata => "SEGMENT_METADATA",
+            Segment::GlobalMetadata => "SEGMENT_GLOBAL_METADATA",
+            Segment::ContextMetadata => "SEGMENT_CONTEXT_METADATA",
             Segment::KernelGeneral => "SEGMENT_KERNEL_GENERAL",
             Segment::TxnFields => "SEGMENT_NORMALIZED_TXN",
             Segment::TxnData => "SEGMENT_TXN_DATA",
@@ -67,7 +70,8 @@ impl Segment {
             Segment::MainMemory => 8,
             Segment::Calldata => 8,
             Segment::Returndata => 8,
-            Segment::Metadata => 256,
+            Segment::GlobalMetadata => 256,
+            Segment::ContextMetadata => 256,
             Segment::KernelGeneral => 256,
             Segment::TxnFields => 256,
             Segment::TxnData => 256,

From be0a5269ab2f5ea4e8d8f659902491916aae127c Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 1 Aug 2022 21:21:35 -0700
Subject: [PATCH 59/85] UserspaceProgramCounter

---
 evm/src/cpu/kernel/context_metadata.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/context_metadata.rs b/evm/src/cpu/kernel/context_metadata.rs
index cf0bfc23..ff075814 100644
--- a/evm/src/cpu/kernel/context_metadata.rs
+++ b/evm/src/cpu/kernel/context_metadata.rs
@@ -5,17 +5,20 @@ pub(crate) enum ContextMetadata {
     ParentContext = 0,
     /// The program counter to return to when we return to the parent context.
     ParentProgramCounter = 1,
-    CalldataSize = 2,
-    ReturndataSize = 3,
+    /// If we're in a system call, this holds the userspace program counter to return to.
+    UserspaceProgramCounter = 2,
+    CalldataSize = 3,
+    ReturndataSize = 4,
 }
 
 impl ContextMetadata {
-    pub(crate) const COUNT: usize = 4;
+    pub(crate) const COUNT: usize = 5;
 
     pub(crate) fn all() -> [Self; Self::COUNT] {
         [
             Self::ParentContext,
             Self::ParentProgramCounter,
+            Self::UserspaceProgramCounter,
             Self::CalldataSize,
             Self::ReturndataSize,
         ]
@@ -26,6 +29,7 @@ impl ContextMetadata {
         match self {
             ContextMetadata::ParentContext => "CTX_METADATA_PARENT_CONTEXT",
             ContextMetadata::ParentProgramCounter => "CTX_METADATA_PARENT_PC",
+            ContextMetadata::UserspaceProgramCounter => "CTX_METADATA_USERSPACE_PC",
             ContextMetadata::CalldataSize => "CTX_METADATA_CALLDATA_SIZE",
             ContextMetadata::ReturndataSize => "CTX_METADATA_RETURNDATA_SIZE",
         }

From 215be25cf8e1364092ea35d285c9bea1188a21ed Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Tue, 2 Aug 2022 09:08:24 -0700
Subject: [PATCH 60/85] Feedback

---
 evm/src/cpu/kernel/interpreter.rs                             | 4 ++++
 .../cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs  | 1 +
 2 files changed, 5 insertions(+)

diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 3b753f0a..6a5b794f 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -137,6 +137,10 @@ impl<'a> Interpreter<'a> {
         self.memory.context_memory[0].segments[Segment::TxnFields as usize].content[field as usize]
     }
 
+    pub(crate) fn get_txn_data(&self) -> &[U256] {
+        &self.memory.context_memory[0].segments[Segment::TxnData as usize].content
+    }
+
     pub(crate) fn set_rlp_memory(&mut self, rlp: Vec<u8>) {
         self.memory.context_memory[0].segments[Segment::RlpRaw as usize].content =
             rlp.into_iter().map(U256::from).collect();
diff --git a/evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs b/evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs
index 8b725479..c01474ce 100644
--- a/evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs
+++ b/evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs
@@ -46,6 +46,7 @@ fn process_type_0_txn() -> Result<()> {
     assert_eq!(interpreter.get_txn_field(To), 0.into());
     assert_eq!(interpreter.get_txn_field(Value), 100.into());
     assert_eq!(interpreter.get_txn_field(DataLen), 2.into());
+    assert_eq!(interpreter.get_txn_data(), &[0x42.into(), 0x42.into()]);
     assert_eq!(interpreter.get_txn_field(YParity), 1.into());
     assert_eq!(
         interpreter.get_txn_field(R),

From f95134555621288cb4e003b4adcd6cd1bc31b7ae Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Tue, 2 Aug 2022 09:09:48 -0700
Subject: [PATCH 61/85] Update evm/src/cpu/kernel/global_metadata.rs

Co-authored-by: Jacqueline Nabaglo <jakub@mirprotocol.org>
---
 evm/src/cpu/kernel/global_metadata.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/global_metadata.rs b/evm/src/cpu/kernel/global_metadata.rs
index 50b54ec3..23c5a4c7 100644
--- a/evm/src/cpu/kernel/global_metadata.rs
+++ b/evm/src/cpu/kernel/global_metadata.rs
@@ -2,7 +2,7 @@
 /// kernel's context (which is zero).
 #[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
 pub(crate) enum GlobalMetadata {
-    /// The larger context ID that has been used so far in this execution. Tracking this allows us
+    /// The largest context ID that has been used so far in this execution. Tracking this allows us
     /// give each new context a unique ID, so that its memory will be zero-initialized.
     LargestContext = 0,
 }

From 8bb45203f9dfe90047f866cd5729e02edb19e35e Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Mon, 1 Aug 2022 17:40:16 -0700
Subject: [PATCH 62/85] More succinct deadbeef

---
 evm/src/cpu/kernel/tests/exp.rs |  8 +++-----
 evm/src/cpu/kernel/tests/rlp.rs | 14 ++++++--------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/exp.rs b/evm/src/cpu/kernel/tests/exp.rs
index 388a1ac3..25bc5ad3 100644
--- a/evm/src/cpu/kernel/tests/exp.rs
+++ b/evm/src/cpu/kernel/tests/exp.rs
@@ -1,5 +1,3 @@
-use std::str::FromStr;
-
 use anyhow::Result;
 use ethereum_types::U256;
 use rand::{thread_rng, Rng};
@@ -17,7 +15,7 @@ fn test_exp() -> Result<()> {
     let b = U256([0; 4].map(|_| rng.gen()));
 
     // Random input
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, a];
+    let initial_stack = vec![0xDEADBEEFu32.into(), b, a];
     let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?
         .stack()
         .to_vec();
@@ -29,7 +27,7 @@ fn test_exp() -> Result<()> {
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     // 0 base
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, b, U256::zero()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), b, U256::zero()];
     let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?
         .stack()
         .to_vec();
@@ -41,7 +39,7 @@ fn test_exp() -> Result<()> {
     assert_eq!(stack_with_kernel, stack_with_opcode);
 
     // 0 exponent
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, U256::zero(), a];
+    let initial_stack = vec![0xDEADBEEFu32.into(), U256::zero(), a];
     let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)?
         .stack()
         .to_vec();
diff --git a/evm/src/cpu/kernel/tests/rlp.rs b/evm/src/cpu/kernel/tests/rlp.rs
index 55dfc7e8..cc311b4f 100644
--- a/evm/src/cpu/kernel/tests/rlp.rs
+++ b/evm/src/cpu/kernel/tests/rlp.rs
@@ -1,5 +1,3 @@
-use std::str::FromStr;
-
 use anyhow::Result;
 use ethereum_types::U256;
 
@@ -10,7 +8,7 @@ use crate::cpu::kernel::interpreter::Interpreter;
 fn test_decode_rlp_string_len_short() -> Result<()> {
     let decode_rlp_string_len = KERNEL.global_labels["decode_rlp_string_len"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 2.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 2.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
 
     // A couple dummy bytes, followed by "0x70" which is its own encoding.
@@ -27,7 +25,7 @@ fn test_decode_rlp_string_len_short() -> Result<()> {
 fn test_decode_rlp_string_len_medium() -> Result<()> {
     let decode_rlp_string_len = KERNEL.global_labels["decode_rlp_string_len"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 2.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 2.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
 
     // A couple dummy bytes, followed by the RLP encoding of "1 2 3 4 5".
@@ -44,7 +42,7 @@ fn test_decode_rlp_string_len_medium() -> Result<()> {
 fn test_decode_rlp_string_len_long() -> Result<()> {
     let decode_rlp_string_len = KERNEL.global_labels["decode_rlp_string_len"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 2.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 2.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_string_len, initial_stack);
 
     // The RLP encoding of the string "1 2 3 ... 56".
@@ -65,7 +63,7 @@ fn test_decode_rlp_string_len_long() -> Result<()> {
 fn test_decode_rlp_list_len_short() -> Result<()> {
     let decode_rlp_list_len = KERNEL.global_labels["decode_rlp_list_len"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 0.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 0.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_list_len, initial_stack);
 
     // The RLP encoding of [1, 2, [3, 4]].
@@ -82,7 +80,7 @@ fn test_decode_rlp_list_len_short() -> Result<()> {
 fn test_decode_rlp_list_len_long() -> Result<()> {
     let decode_rlp_list_len = KERNEL.global_labels["decode_rlp_list_len"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 0.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 0.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_list_len, initial_stack);
 
     // The RLP encoding of [1, ..., 56].
@@ -103,7 +101,7 @@ fn test_decode_rlp_list_len_long() -> Result<()> {
 fn test_decode_rlp_scalar() -> Result<()> {
     let decode_rlp_scalar = KERNEL.global_labels["decode_rlp_scalar"];
 
-    let initial_stack = vec![U256::from_str("0xdeadbeef")?, 0.into()];
+    let initial_stack = vec![0xDEADBEEFu32.into(), 0.into()];
     let mut interpreter = Interpreter::new_with_kernel(decode_rlp_scalar, initial_stack);
 
     // The RLP encoding of "12 34 56".

From 3b54ec398618ed891e5474afcfc52288b35dec0d Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Tue, 2 Aug 2022 09:08:24 -0700
Subject: [PATCH 63/85] Feedback

---
 evm/src/cpu/kernel/interpreter.rs                             | 4 ++++
 .../cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs  | 1 +
 2 files changed, 5 insertions(+)

diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 3b753f0a..6a5b794f 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -137,6 +137,10 @@ impl<'a> Interpreter<'a> {
         self.memory.context_memory[0].segments[Segment::TxnFields as usize].content[field as usize]
     }
 
+    pub(crate) fn get_txn_data(&self) -> &[U256] {
+        &self.memory.context_memory[0].segments[Segment::TxnData as usize].content
+    }
+
     pub(crate) fn set_rlp_memory(&mut self, rlp: Vec<u8>) {
         self.memory.context_memory[0].segments[Segment::RlpRaw as usize].content =
             rlp.into_iter().map(U256::from).collect();
diff --git a/evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs b/evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs
index 8b725479..c01474ce 100644
--- a/evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs
+++ b/evm/src/cpu/kernel/tests/transaction_parsing/parse_type_0_txn.rs
@@ -46,6 +46,7 @@ fn process_type_0_txn() -> Result<()> {
     assert_eq!(interpreter.get_txn_field(To), 0.into());
     assert_eq!(interpreter.get_txn_field(Value), 100.into());
     assert_eq!(interpreter.get_txn_field(DataLen), 2.into());
+    assert_eq!(interpreter.get_txn_data(), &[0x42.into(), 0x42.into()]);
     assert_eq!(interpreter.get_txn_field(YParity), 1.into());
     assert_eq!(
         interpreter.get_txn_field(R),

From c167da8cbea201f6b219db73119126e87b6e2f1c Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Tue, 2 Aug 2022 09:10:21 -0700
Subject: [PATCH 64/85] Revert "UserspaceProgramCounter"

This reverts commit 05beaab661a8aaa2f84b6f121b5ac2f29f2ed836.
---
 evm/src/cpu/kernel/context_metadata.rs | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/evm/src/cpu/kernel/context_metadata.rs b/evm/src/cpu/kernel/context_metadata.rs
index ff075814..cf0bfc23 100644
--- a/evm/src/cpu/kernel/context_metadata.rs
+++ b/evm/src/cpu/kernel/context_metadata.rs
@@ -5,20 +5,17 @@ pub(crate) enum ContextMetadata {
     ParentContext = 0,
     /// The program counter to return to when we return to the parent context.
     ParentProgramCounter = 1,
-    /// If we're in a system call, this holds the userspace program counter to return to.
-    UserspaceProgramCounter = 2,
-    CalldataSize = 3,
-    ReturndataSize = 4,
+    CalldataSize = 2,
+    ReturndataSize = 3,
 }
 
 impl ContextMetadata {
-    pub(crate) const COUNT: usize = 5;
+    pub(crate) const COUNT: usize = 4;
 
     pub(crate) fn all() -> [Self; Self::COUNT] {
         [
             Self::ParentContext,
             Self::ParentProgramCounter,
-            Self::UserspaceProgramCounter,
             Self::CalldataSize,
             Self::ReturndataSize,
         ]
@@ -29,7 +26,6 @@ impl ContextMetadata {
         match self {
             ContextMetadata::ParentContext => "CTX_METADATA_PARENT_CONTEXT",
             ContextMetadata::ParentProgramCounter => "CTX_METADATA_PARENT_PC",
-            ContextMetadata::UserspaceProgramCounter => "CTX_METADATA_USERSPACE_PC",
             ContextMetadata::CalldataSize => "CTX_METADATA_CALLDATA_SIZE",
             ContextMetadata::ReturndataSize => "CTX_METADATA_RETURNDATA_SIZE",
         }

From cb2df9fa03dd334e0e3373d2fff857893f084da5 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Tue, 2 Aug 2022 09:22:06 -0700
Subject: [PATCH 65/85] More commutative fns

---
 evm/src/cpu/kernel/optimizer.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
index 6d77f0fa..ac46011a 100644
--- a/evm/src/cpu/kernel/optimizer.rs
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -97,7 +97,7 @@ fn remove_swapped_pushes(code: &mut Vec<Item>) {
 fn remove_swaps_commutative(code: &mut Vec<Item>) {
     replace_windows(code, |window| {
         if let [StandardOp(swap1), StandardOp(f)] = window && &swap1 == "SWAP1" {
-            let commutative = matches!(f.as_str(), "ADD" | "MUL");
+            let commutative = matches!(f.as_str(), "ADD" | "MUL" | "AND" | "OR" | "XOR" | "EQ");
             commutative.then_some(vec![StandardOp(f)])
         } else {
             None

From 002b568a12654d6d1b7804f1308bc0825dfce91d Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Tue, 2 Aug 2022 10:17:34 -0700
Subject: [PATCH 66/85] fix

---
 evm/src/cpu/kernel/tests/rlp.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/evm/src/cpu/kernel/tests/rlp.rs b/evm/src/cpu/kernel/tests/rlp.rs
index cc311b4f..a1ca3609 100644
--- a/evm/src/cpu/kernel/tests/rlp.rs
+++ b/evm/src/cpu/kernel/tests/rlp.rs
@@ -1,5 +1,4 @@
 use anyhow::Result;
-use ethereum_types::U256;
 
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;

From 5563176bad191dc8a7034d1e9c3cc7de742b0e46 Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Tue, 2 Aug 2022 16:20:58 -0400
Subject: [PATCH 67/85] make rest of gates public

---
 plonky2/src/gates/exponentiation.rs           | 2 +-
 plonky2/src/gates/interpolation.rs            | 2 +-
 plonky2/src/gates/low_degree_interpolation.rs | 2 +-
 plonky2/src/gates/random_access.rs            | 2 +-
 plonky2/src/gates/selectors.rs                | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/plonky2/src/gates/exponentiation.rs b/plonky2/src/gates/exponentiation.rs
index db12d697..aa977308 100644
--- a/plonky2/src/gates/exponentiation.rs
+++ b/plonky2/src/gates/exponentiation.rs
@@ -23,7 +23,7 @@ use crate::plonk::vars::{
 
 /// A gate for raising a value to a power.
 #[derive(Clone, Debug)]
-pub(crate) struct ExponentiationGate<F: RichField + Extendable<D>, const D: usize> {
+pub struct ExponentiationGate<F: RichField + Extendable<D>, const D: usize> {
     pub num_power_bits: usize,
     pub _phantom: PhantomData<F>,
 }
diff --git a/plonky2/src/gates/interpolation.rs b/plonky2/src/gates/interpolation.rs
index c98f7fe3..1983e5aa 100644
--- a/plonky2/src/gates/interpolation.rs
+++ b/plonky2/src/gates/interpolation.rs
@@ -22,7 +22,7 @@ use crate::plonk::vars::{EvaluationTargets, EvaluationVars, EvaluationVarsBase};
 /// Interpolation gate with constraints of degree at most `1<<subgroup_bits`.
 /// `eval_unfiltered_recursively` uses less gates than `LowDegreeInterpolationGate`.
 #[derive(Copy, Clone, Debug)]
-pub(crate) struct HighDegreeInterpolationGate<F: RichField + Extendable<D>, const D: usize> {
+pub struct HighDegreeInterpolationGate<F: RichField + Extendable<D>, const D: usize> {
     pub subgroup_bits: usize,
     _phantom: PhantomData<F>,
 }
diff --git a/plonky2/src/gates/low_degree_interpolation.rs b/plonky2/src/gates/low_degree_interpolation.rs
index 4852792a..217f4f0a 100644
--- a/plonky2/src/gates/low_degree_interpolation.rs
+++ b/plonky2/src/gates/low_degree_interpolation.rs
@@ -23,7 +23,7 @@ use crate::plonk::vars::{EvaluationTargets, EvaluationVars, EvaluationVarsBase};
 /// Interpolation gate with constraints of degree 2.
 /// `eval_unfiltered_recursively` uses more gates than `HighDegreeInterpolationGate`.
 #[derive(Copy, Clone, Debug)]
-pub(crate) struct LowDegreeInterpolationGate<F: RichField + Extendable<D>, const D: usize> {
+pub struct LowDegreeInterpolationGate<F: RichField + Extendable<D>, const D: usize> {
     pub subgroup_bits: usize,
     _phantom: PhantomData<F>,
 }
diff --git a/plonky2/src/gates/random_access.rs b/plonky2/src/gates/random_access.rs
index b1f1d529..2df392bc 100644
--- a/plonky2/src/gates/random_access.rs
+++ b/plonky2/src/gates/random_access.rs
@@ -23,7 +23,7 @@ use crate::plonk::vars::{
 
 /// A gate for checking that a particular element of a list matches a given value.
 #[derive(Copy, Clone, Debug)]
-pub(crate) struct RandomAccessGate<F: RichField + Extendable<D>, const D: usize> {
+pub struct RandomAccessGate<F: RichField + Extendable<D>, const D: usize> {
     pub bits: usize,
     pub num_copies: usize,
     pub num_extra_constants: usize,
diff --git a/plonky2/src/gates/selectors.rs b/plonky2/src/gates/selectors.rs
index fff5d967..f1214a93 100644
--- a/plonky2/src/gates/selectors.rs
+++ b/plonky2/src/gates/selectors.rs
@@ -10,7 +10,7 @@ use crate::hash::hash_types::RichField;
 pub(crate) const UNUSED_SELECTOR: usize = u32::MAX as usize;
 
 #[derive(Debug, Clone)]
-pub(crate) struct SelectorsInfo {
+pub struct SelectorsInfo {
     pub(crate) selector_indices: Vec<usize>,
     pub(crate) groups: Vec<Range<usize>>,
 }

From 243bc09293edb630422c070670dabd03350b4fd7 Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Tue, 2 Aug 2022 16:42:37 -0400
Subject: [PATCH 68/85] make modules public

---
 plonky2/src/gates/mod.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/plonky2/src/gates/mod.rs b/plonky2/src/gates/mod.rs
index d02f2978..e78e1e9d 100644
--- a/plonky2/src/gates/mod.rs
+++ b/plonky2/src/gates/mod.rs
@@ -14,12 +14,12 @@ pub mod multiplication_extension;
 pub mod noop;
 pub mod packed_util;
 pub mod poseidon;
-pub(crate) mod poseidon_mds;
-pub(crate) mod public_input;
+pub mod poseidon_mds;
+pub mod public_input;
 pub mod random_access;
 pub mod reducing;
 pub mod reducing_extension;
-pub(crate) mod selectors;
+pub mod selectors;
 pub mod util;
 
 // Can't use #[cfg(test)] here because it needs to be visible to other crates.

From 8c515b4f2c133edb00b888c4e5a78958d393e39e Mon Sep 17 00:00:00 2001
From: Sebastien La Duca <sladuca777@gmail.com>
Date: Tue, 2 Aug 2022 17:12:27 -0400
Subject: [PATCH 69/85] selectors can stay pub(crate)

---
 plonky2/src/gates/mod.rs       | 2 +-
 plonky2/src/gates/selectors.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/plonky2/src/gates/mod.rs b/plonky2/src/gates/mod.rs
index e78e1e9d..786ba12c 100644
--- a/plonky2/src/gates/mod.rs
+++ b/plonky2/src/gates/mod.rs
@@ -19,7 +19,7 @@ pub mod public_input;
 pub mod random_access;
 pub mod reducing;
 pub mod reducing_extension;
-pub mod selectors;
+pub(crate) mod selectors;
 pub mod util;
 
 // Can't use #[cfg(test)] here because it needs to be visible to other crates.
diff --git a/plonky2/src/gates/selectors.rs b/plonky2/src/gates/selectors.rs
index f1214a93..fff5d967 100644
--- a/plonky2/src/gates/selectors.rs
+++ b/plonky2/src/gates/selectors.rs
@@ -10,7 +10,7 @@ use crate::hash::hash_types::RichField;
 pub(crate) const UNUSED_SELECTOR: usize = u32::MAX as usize;
 
 #[derive(Debug, Clone)]
-pub struct SelectorsInfo {
+pub(crate) struct SelectorsInfo {
     pub(crate) selector_indices: Vec<usize>,
     pub(crate) groups: Vec<Range<usize>>,
 }

From 6416826643ae906bd1ec0b59510e0922e092d6b6 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Tue, 2 Aug 2022 15:44:50 -0700
Subject: [PATCH 70/85] Feedback

---
 evm/src/cpu/kernel/asm/util/basic_macros.asm | 8 ++------
 evm/src/cpu/kernel/optimizer.rs              | 7 +++++++
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm
index a91feb05..4dd93d14 100644
--- a/evm/src/cpu/kernel/asm/util/basic_macros.asm
+++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm
@@ -120,9 +120,7 @@
     // stack: input, ...
     PUSH $c
     // stack: c, input, ...
-    %add_const(1) // This will be optimized out.
-    // stack: c + 1, input, ...
-    GT // Check it backwards: (input <= c) == (c + 1 > input)
+    LT ISZERO // Check it backwards: (input <= c) == !(c < input)
     // stack: input <= c, ...
 %endmacro
 
@@ -138,9 +136,7 @@
     // stack: input, ...
     PUSH $c
     // stack: c, input, ...
-    %sub_const(1) // This will be optimized out.
-    // stack: c - 1, input, ...
-    LT // Check it backwards: (input >= c) == (c - 1 < input)
+    GT ISZERO // Check it backwards: (input >= c) == !(c > input)
     // stack: input >= c, ...
 %endmacro
 
diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
index ac46011a..a334f2d8 100644
--- a/evm/src/cpu/kernel/optimizer.rs
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -50,6 +50,8 @@ fn constant_propagation(code: &mut Vec<Item>) {
                 "SUB" => Some(x.overflowing_sub(y).0),
                 "MUL" => Some(x.overflowing_mul(y).0),
                 "DIV" => Some(x.checked_div(y).unwrap_or(U256::zero())),
+                "MOD" => Some(x.checked_rem(y).unwrap_or(U256::zero())),
+                "EXP" => Some(x.overflowing_pow(y).0),
                 "SHL" => Some(x << y),
                 "SHR" => Some(x >> y),
                 "AND" => Some(x & y),
@@ -58,6 +60,11 @@ fn constant_propagation(code: &mut Vec<Item>) {
                 "LT" => Some(u256_from_bool(x < y)),
                 "GT" => Some(u256_from_bool(x > y)),
                 "EQ" => Some(u256_from_bool(x == y)),
+                "BYTE" => Some(if x < 32.into() {
+                    y.byte(x.as_usize()).into()
+                } else {
+                    U256::zero()
+                }),
                 _ => None,
             }
             .map(|res| vec![Push(Literal(res))])

From 8aad0b07465f6043dc1d9a609b3d3ab2cc410e83 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Tue, 2 Aug 2022 15:57:06 -0700
Subject: [PATCH 71/85] Feedback

---
 evm/src/cpu/kernel/assembler.rs | 2 +-
 evm/src/cpu/kernel/optimizer.rs | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs
index 0a1232e1..4e3381b5 100644
--- a/evm/src/cpu/kernel/assembler.rs
+++ b/evm/src/cpu/kernel/assembler.rs
@@ -538,6 +538,6 @@ mod tests {
         constants: HashMap<String, U256>,
     ) -> Kernel {
         let parsed_files = files.iter().map(|f| parse(f)).collect_vec();
-        assemble(parsed_files, constants, false)
+        assemble(parsed_files, constants, true)
     }
 }
diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
index a334f2d8..6fe9d496 100644
--- a/evm/src/cpu/kernel/optimizer.rs
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -89,6 +89,7 @@ fn no_op_jumps(code: &mut Vec<Item>) {
 }
 
 /// Remove swaps: `[PUSH x, PUSH y, SWAP1] -> [PUSH y, PUSH x]`.
+// Could be generalized to recognize more than two pushes.
 fn remove_swapped_pushes(code: &mut Vec<Item>) {
     replace_windows(code, |window| {
         if let [Push(x), Push(y), StandardOp(swap1)] = window
@@ -113,6 +114,7 @@ fn remove_swaps_commutative(code: &mut Vec<Item>) {
 }
 
 /// Remove push-pop type patterns, such as: `[DUP1, POP]`.
+// Could be extended to other non-side-effecting operations, e.g. [DUP1, ADD, POP] -> [POP].
 fn remove_ignored_values(code: &mut Vec<Item>) {
     replace_windows(code, |[a, b]| {
         if let StandardOp(pop) = b && &pop == "POP" {

From 9b5b77d3e9d0b6d830c2277e297ba6df23e5e116 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Wed, 3 Aug 2022 09:57:40 -0700
Subject: [PATCH 72/85] Check if suggested code is actually better

---
 evm/src/cpu/kernel/cost_estimator.rs | 37 ++++++++++++++++++++++++++++
 evm/src/cpu/kernel/mod.rs            |  1 +
 evm/src/cpu/kernel/optimizer.rs      | 25 ++++++++++++++++---
 3 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 evm/src/cpu/kernel/cost_estimator.rs

diff --git a/evm/src/cpu/kernel/cost_estimator.rs b/evm/src/cpu/kernel/cost_estimator.rs
new file mode 100644
index 00000000..3dfcf63a
--- /dev/null
+++ b/evm/src/cpu/kernel/cost_estimator.rs
@@ -0,0 +1,37 @@
+use crate::cpu::kernel::assembler::BYTES_PER_OFFSET;
+use crate::cpu::kernel::ast::Item;
+use crate::cpu::kernel::ast::Item::*;
+use crate::cpu::kernel::ast::PushTarget::*;
+use crate::cpu::kernel::utils::u256_to_trimmed_be_bytes;
+
+pub(crate) fn is_code_improved(before: &[Item], after: &[Item]) -> bool {
+    cost_estimate(after) < cost_estimate(before)
+}
+
+fn cost_estimate(code: &[Item]) -> u32 {
+    code.iter().map(cost_estimate_item).sum()
+}
+
+fn cost_estimate_item(item: &Item) -> u32 {
+    match item {
+        MacroDef(_, _, _) => 0,
+        GlobalLabelDeclaration(_) => 0,
+        LocalLabelDeclaration(_) => 0,
+        Push(Literal(n)) => cost_estimate_push(u256_to_trimmed_be_bytes(n).len()),
+        Push(Label(_)) => cost_estimate_push(BYTES_PER_OFFSET as usize),
+        ProverInput(_) => 1,
+        StandardOp(op) => cost_estimate_standard_op(op.as_str()),
+        _ => panic!("Unexpected item: {:?}", item),
+    }
+}
+
+fn cost_estimate_standard_op(_op: &str) -> u32 {
+    // For now we just treat any standard operation as having the same cost. This is pretty naive,
+    // but should work fine with our current set of simple optimization rules.
+    1
+}
+
+fn cost_estimate_push(num_bytes: usize) -> u32 {
+    // TODO: Once PUSH is actually implemented, check if this needs to be revised.
+    num_bytes as u32
+}
diff --git a/evm/src/cpu/kernel/mod.rs b/evm/src/cpu/kernel/mod.rs
index d87c1e13..59caff76 100644
--- a/evm/src/cpu/kernel/mod.rs
+++ b/evm/src/cpu/kernel/mod.rs
@@ -1,6 +1,7 @@
 pub mod aggregator;
 pub mod assembler;
 mod ast;
+mod cost_estimator;
 pub(crate) mod keccak_util;
 mod opcodes;
 mod optimizer;
diff --git a/evm/src/cpu/kernel/optimizer.rs b/evm/src/cpu/kernel/optimizer.rs
index 6fe9d496..2a1db6d3 100644
--- a/evm/src/cpu/kernel/optimizer.rs
+++ b/evm/src/cpu/kernel/optimizer.rs
@@ -5,6 +5,7 @@ use PushTarget::Literal;
 use crate::cpu::kernel::ast::Item::{GlobalLabelDeclaration, LocalLabelDeclaration};
 use crate::cpu::kernel::ast::PushTarget::Label;
 use crate::cpu::kernel::ast::{Item, PushTarget};
+use crate::cpu::kernel::cost_estimator::is_code_improved;
 use crate::cpu::kernel::utils::{replace_windows, u256_from_bool};
 
 pub(crate) fn optimize_asm(code: &mut Vec<Item>) {
@@ -30,7 +31,7 @@ fn optimize_asm_once(code: &mut Vec<Item>) {
 /// Constant propagation.
 fn constant_propagation(code: &mut Vec<Item>) {
     // Constant propagation for unary ops: `[PUSH x, UNARYOP] -> [PUSH UNARYOP(x)]`
-    replace_windows(code, |window| {
+    replace_windows_if_better(code, |window| {
         if let [Push(Literal(x)), StandardOp(op)] = window {
             match op.as_str() {
                 "ISZERO" => Some(vec![Push(Literal(u256_from_bool(x.is_zero())))]),
@@ -43,7 +44,7 @@ fn constant_propagation(code: &mut Vec<Item>) {
     });
 
     // Constant propagation for binary ops: `[PUSH y, PUSH x, BINOP] -> [PUSH BINOP(x, y)]`
-    replace_windows(code, |window| {
+    replace_windows_if_better(code, |window| {
         if let [Push(Literal(y)), Push(Literal(x)), StandardOp(op)] = window {
             match op.as_str() {
                 "ADD" => Some(x.overflowing_add(y).0),
@@ -129,6 +130,17 @@ fn remove_ignored_values(code: &mut Vec<Item>) {
     });
 }
 
+/// Like `replace_windows`, but specifically for code, and only makes replacements if our cost
+/// estimator thinks that the new code is more efficient.
+fn replace_windows_if_better<const W: usize, F>(code: &mut Vec<Item>, maybe_replace: F)
+where
+    F: Fn([Item; W]) -> Option<Vec<Item>>,
+{
+    replace_windows(code, |window| {
+        maybe_replace(window.clone()).filter(|suggestion| is_code_improved(&window, suggestion))
+    })
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -153,13 +165,18 @@ mod tests {
 
     #[test]
     fn test_constant_propagation_sub_underflowing() {
-        let mut code = vec![
+        let original = vec![
             Push(Literal(U256::one())),
             Push(Literal(U256::zero())),
             StandardOp("SUB".into()),
         ];
+        let mut code = original.clone();
         constant_propagation(&mut code);
-        assert_eq!(code, vec![Push(Literal(U256::max_value()))]);
+        // Constant propagation could replace the code with [PUSH U256::MAX], but that's actually
+        // more expensive, so the code shouldn't be changed.
+        // (The code could also be replaced with [PUSH 0; NOT], which would be an improvement, but
+        // our optimizer isn't smart enough yet.)
+        assert_eq!(code, original);
     }
 
     #[test]

From b4d83f8db2e4d331a0077aef993bbf85576fada8 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Wed, 3 Aug 2022 13:40:43 -0700
Subject: [PATCH 73/85] More metadata fields

---
 evm/src/cpu/kernel/context_metadata.rs | 20 +++++++++++++++++++-
 evm/src/cpu/kernel/global_metadata.rs  | 10 ++++++++--
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/context_metadata.rs b/evm/src/cpu/kernel/context_metadata.rs
index cf0bfc23..5b6ce303 100644
--- a/evm/src/cpu/kernel/context_metadata.rs
+++ b/evm/src/cpu/kernel/context_metadata.rs
@@ -7,10 +7,20 @@ pub(crate) enum ContextMetadata {
     ParentProgramCounter = 1,
     CalldataSize = 2,
     ReturndataSize = 3,
+    /// The address of the account associated with this context.
+    Address = 4,
+    /// The size of the code under the account associated with this context.
+    /// While this information could be obtained from the state trie, it is best to cache it since
+    /// the `CODESIZE` instruction is very cheap.
+    CodeSize = 5,
+    /// The address of the caller who spawned this context.
+    Caller = 6,
+    /// The value (in wei) deposited by the caller.
+    CallValue = 7,
 }
 
 impl ContextMetadata {
-    pub(crate) const COUNT: usize = 4;
+    pub(crate) const COUNT: usize = 8;
 
     pub(crate) fn all() -> [Self; Self::COUNT] {
         [
@@ -18,6 +28,10 @@ impl ContextMetadata {
             Self::ParentProgramCounter,
             Self::CalldataSize,
             Self::ReturndataSize,
+            Self::Address,
+            Self::CodeSize,
+            Self::Caller,
+            Self::CallValue,
         ]
     }
 
@@ -28,6 +42,10 @@ impl ContextMetadata {
             ContextMetadata::ParentProgramCounter => "CTX_METADATA_PARENT_PC",
             ContextMetadata::CalldataSize => "CTX_METADATA_CALLDATA_SIZE",
             ContextMetadata::ReturndataSize => "CTX_METADATA_RETURNDATA_SIZE",
+            ContextMetadata::Address => "CTX_METADATA_ADDRESS",
+            ContextMetadata::CodeSize => "CTX_METADATA_CODE_SIZE",
+            ContextMetadata::Caller => "CTX_METADATA_CALLER",
+            ContextMetadata::CallValue => "CTX_METADATA_CALL_VALUE",
         }
     }
 }
diff --git a/evm/src/cpu/kernel/global_metadata.rs b/evm/src/cpu/kernel/global_metadata.rs
index 23c5a4c7..6343a2e6 100644
--- a/evm/src/cpu/kernel/global_metadata.rs
+++ b/evm/src/cpu/kernel/global_metadata.rs
@@ -5,19 +5,25 @@ pub(crate) enum GlobalMetadata {
     /// The largest context ID that has been used so far in this execution. Tracking this allows us
     /// give each new context a unique ID, so that its memory will be zero-initialized.
     LargestContext = 0,
+    /// The address of the sender of the transaction.
+    Origin = 1,
+    /// The size of active memory, in bytes.
+    MemorySize = 2,
 }
 
 impl GlobalMetadata {
-    pub(crate) const COUNT: usize = 1;
+    pub(crate) const COUNT: usize = 3;
 
     pub(crate) fn all() -> [Self; Self::COUNT] {
-        [Self::LargestContext]
+        [Self::LargestContext, Self::Origin, Self::MemorySize]
     }
 
     /// The variable name that gets passed into kernel assembly code.
     pub(crate) fn var_name(&self) -> &'static str {
         match self {
             GlobalMetadata::LargestContext => "GLOBAL_METADATA_LARGEST_CONTEXT",
+            GlobalMetadata::Origin => "GLOBAL_METADATA_ORIGIN",
+            GlobalMetadata::MemorySize => "GLOBAL_METADATA_MEMORY_SIZE",
         }
     }
 }

From f58990160e3df8694b718af83d1f937a411b27e2 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Wed, 3 Aug 2022 13:44:44 -0700
Subject: [PATCH 74/85] min, max macros

Will be used later for things like updating `MemorySize`.
---
 evm/src/cpu/kernel/asm/util/basic_macros.asm | 27 ++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm
index e266b2cb..3b2e482b 100644
--- a/evm/src/cpu/kernel/asm/util/basic_macros.asm
+++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm
@@ -140,6 +140,11 @@
     // stack: input >= c, ...
 %endmacro
 
+%macro consume_gas_const(c)
+    PUSH $c
+    CONSUME_GAS
+%endmacro
+
 // If pred is zero, yields z; otherwise, yields nz
 %macro select
     // stack: pred, nz, z
@@ -188,3 +193,25 @@
     mul
     // stack: x^2
 %endmacro
+
+%macro min
+    // stack: x, y
+    DUP2
+    DUP2
+    // stack: x, y, x, y
+    LT
+    // stack: x < y, x, y
+    %select_bool
+    // stack: min
+%endmacro
+
+%macro max
+    // stack: x, y
+    DUP2
+    DUP2
+    // stack: x, y, x, y
+    GT
+    // stack: x > y, x, y
+    %select_bool
+    // stack: max
+%endmacro

From dfd715fafb73ded564c11f559755943947070952 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Wed, 3 Aug 2022 13:52:52 -0700
Subject: [PATCH 75/85] Fix case where a valid constant propagation a broke
 test

---
 evm/src/cpu/kernel/assembler.rs | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs
index 4e3381b5..636251a3 100644
--- a/evm/src/cpu/kernel/assembler.rs
+++ b/evm/src/cpu/kernel/assembler.rs
@@ -446,10 +446,11 @@ mod tests {
 
     #[test]
     fn macro_with_vars() {
-        let kernel = parse_and_assemble(&[
+        let files = &[
             "%macro add(x, y) PUSH $x PUSH $y ADD %endmacro",
             "%add(2, 3)",
-        ]);
+        ];
+        let kernel = parse_and_assemble_ext(files, HashMap::new(), false);
         let push1 = get_push_opcode(1);
         let add = get_opcode("ADD");
         assert_eq!(kernel.code, vec![push1, 2, push1, 3, add]);
@@ -487,7 +488,7 @@ mod tests {
         let mut constants = HashMap::new();
         constants.insert("DEAD_BEEF".into(), 0xDEADBEEFu64.into());
 
-        let kernel = parse_and_assemble_with_constants(code, constants);
+        let kernel = parse_and_assemble_ext(code, constants, true);
         let push4 = get_push_opcode(4);
         assert_eq!(kernel.code, vec![push4, 0xDE, 0xAD, 0xBE, 0xEF]);
     }
@@ -518,7 +519,7 @@ mod tests {
 
         let mut consts = HashMap::new();
         consts.insert("LIFE".into(), 42.into());
-        parse_and_assemble_with_constants(&["%stack (a, b) -> (b, @LIFE)"], consts);
+        parse_and_assemble_ext(&["%stack (a, b) -> (b, @LIFE)"], consts, true);
         // We won't check the code since there are two equally efficient implementations.
 
         let kernel = parse_and_assemble(&["start: %stack (a, b) -> (start)"]);
@@ -530,14 +531,15 @@ mod tests {
     }
 
     fn parse_and_assemble(files: &[&str]) -> Kernel {
-        parse_and_assemble_with_constants(files, HashMap::new())
+        parse_and_assemble_ext(files, HashMap::new(), true)
     }
 
-    fn parse_and_assemble_with_constants(
+    fn parse_and_assemble_ext(
         files: &[&str],
         constants: HashMap<String, U256>,
+        optimize: bool,
     ) -> Kernel {
         let parsed_files = files.iter().map(|f| parse(f)).collect_vec();
-        assemble(parsed_files, constants, true)
+        assemble(parsed_files, constants, optimize)
     }
 }

From fae653da2abacf10131cffc83a91ed7672559fd1 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Wed, 3 Aug 2022 21:56:25 -0700
Subject: [PATCH 76/85] Missing retdest

---
 evm/src/cpu/kernel/asm/rlp/encode.asm | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/rlp/encode.asm b/evm/src/cpu/kernel/asm/rlp/encode.asm
index b2446c37..58cb9230 100644
--- a/evm/src/cpu/kernel/asm/rlp/encode.asm
+++ b/evm/src/cpu/kernel/asm/rlp/encode.asm
@@ -1,17 +1,17 @@
 // RLP-encode a scalar, i.e. a variable-length integer.
-// Pre stack: pos, scalar
+// Pre stack: pos, scalar, retdest
 // Post stack: (empty)
 global encode_rlp_scalar:
     PANIC // TODO: implement
 
 // RLP-encode a fixed-length 160-bit string. Assumes string < 2^160.
-// Pre stack: pos, string
+// Pre stack: pos, string, retdest
 // Post stack: (empty)
 global encode_rlp_160:
     PANIC // TODO: implement
 
 // RLP-encode a fixed-length 256-bit string.
-// Pre stack: pos, string
+// Pre stack: pos, string, retdest
 // Post stack: (empty)
 global encode_rlp_256:
     PANIC // TODO: implement

From 7423124e36245aa26886c2cf330ec9a33494d665 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Wed, 3 Aug 2022 22:09:36 -0700
Subject: [PATCH 77/85] Split up memory asm and add more helper functions

---
 evm/src/cpu/kernel/aggregator.rs              |  4 +-
 .../asm/{memory.asm => memory/core.asm}       | 81 +++++++------------
 evm/src/cpu/kernel/asm/memory/memcpy.asm      | 52 ++++++++++++
 evm/src/cpu/kernel/asm/memory/metadata.asm    | 35 ++++++++
 evm/src/cpu/kernel/asm/memory/txn_fields.asm  | 17 ++++
 5 files changed, 134 insertions(+), 55 deletions(-)
 rename evm/src/cpu/kernel/asm/{memory.asm => memory/core.asm} (64%)
 create mode 100644 evm/src/cpu/kernel/asm/memory/memcpy.asm
 create mode 100644 evm/src/cpu/kernel/asm/memory/metadata.asm
 create mode 100644 evm/src/cpu/kernel/asm/memory/txn_fields.asm

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 7f34f90b..4c8a1173 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -23,7 +23,9 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/secp256k1/moddiv.asm"),
         include_str!("asm/exp.asm"),
         include_str!("asm/halt.asm"),
-        include_str!("asm/memory.asm"),
+        include_str!("asm/memory/core.asm"),
+        include_str!("asm/memory/memcpy.asm"),
+        include_str!("asm/memory/txn_fields.asm"),
         include_str!("asm/rlp/encode.asm"),
         include_str!("asm/rlp/decode.asm"),
         include_str!("asm/rlp/read_to_memory.asm"),
diff --git a/evm/src/cpu/kernel/asm/memory.asm b/evm/src/cpu/kernel/asm/memory/core.asm
similarity index 64%
rename from evm/src/cpu/kernel/asm/memory.asm
rename to evm/src/cpu/kernel/asm/memory/core.asm
index 81474d12..2c896345 100644
--- a/evm/src/cpu/kernel/asm/memory.asm
+++ b/evm/src/cpu/kernel/asm/memory/core.asm
@@ -26,10 +26,10 @@
     // stack: (empty)
 %endmacro
 
-// Load a single byte from kernel code.
-%macro mload_kernel_code
+// Load a single value from the given segment of kernel (context 0) memory.
+%macro mload_kernel(segment)
     // stack: offset
-    PUSH @SEGMENT_CODE
+    PUSH $segment
     // stack: segment, offset
     PUSH 0 // kernel has context 0
     // stack: context, segment, offset
@@ -37,6 +37,24 @@
     // stack: value
 %endmacro
 
+// Store a single value from the given segment of kernel (context 0) memory.
+%macro mstore_kernel(segment)
+    // stack: offset, value
+    PUSH $segment
+    // stack: segment, offset, value
+    PUSH 0 // kernel has context 0
+    // stack: context, segment, offset, value
+    MSTORE_GENERAL
+    // stack: (empty)
+%endmacro
+
+// Load a single byte from kernel code.
+%macro mload_kernel_code
+    // stack: offset
+    %mload_kernel(@SEGMENT_CODE)
+    // stack: value
+%endmacro
+
 // Load a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0),
 // from kernel code.
 %macro mload_kernel_code_u32
@@ -67,54 +85,9 @@
     // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0
 %endmacro
 
-// Copies `count` values from
-//     SRC = (src_ctx, src_segment, src_addr)
-// to
-//     DST = (dst_ctx, dst_segment, dst_addr).
-// These tuple definitions are used for brevity in the stack comments below.
-global memcpy:
-    JUMPDEST
-    // stack: DST, SRC, count, retdest
-    DUP7
-    // stack: count, DST, SRC, count, retdest
-    ISZERO
-    // stack: count == 0, DST, SRC, count, retdest
-    %jumpi(memcpy_finish)
-    // stack: DST, SRC, count, retdest
-
-    // Copy the next value.
-    DUP6
-    DUP6
-    DUP6
-    // stack: SRC, DST, SRC, count, retdest
-    MLOAD_GENERAL
-    // stack: value, DST, SRC, count, retdest
-    DUP4
-    DUP4
-    DUP4
-    // stack: DST, value, DST, SRC, count, retdest
-    MSTORE_GENERAL
-    // stack: DST, SRC, count, retdest
-
-    // Increment dst_addr.
-    SWAP2
-    %add_const(1)
-    SWAP2
-    // Increment src_addr.
-    SWAP5
-    %add_const(1)
-    SWAP5
-    // Decrement count.
-    SWAP6
-    %sub_const(1)
-    SWAP6
-
-    // Continue the loop.
-    %jump(memcpy)
-
-memcpy_finish:
-    JUMPDEST
-    // stack: DST, SRC, count, retdest
-    %pop7
-    // stack: retdest
-    JUMP
+// Store a single byte to kernel code.
+%macro mstore_kernel_code
+    // stack: offset, value
+    %mstore_kernel(@SEGMENT_CODE)
+    // stack: (empty)
+%endmacro
diff --git a/evm/src/cpu/kernel/asm/memory/memcpy.asm b/evm/src/cpu/kernel/asm/memory/memcpy.asm
new file mode 100644
index 00000000..f319d962
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/memory/memcpy.asm
@@ -0,0 +1,52 @@
+
+// Copies `count` values from
+//     SRC = (src_ctx, src_segment, src_addr)
+// to
+//     DST = (dst_ctx, dst_segment, dst_addr).
+// These tuple definitions are used for brevity in the stack comments below.
+global memcpy:
+    JUMPDEST
+    // stack: DST, SRC, count, retdest
+    DUP7
+    // stack: count, DST, SRC, count, retdest
+    ISZERO
+    // stack: count == 0, DST, SRC, count, retdest
+    %jumpi(memcpy_finish)
+    // stack: DST, SRC, count, retdest
+
+    // Copy the next value.
+    DUP6
+    DUP6
+    DUP6
+    // stack: SRC, DST, SRC, count, retdest
+    MLOAD_GENERAL
+    // stack: value, DST, SRC, count, retdest
+    DUP4
+    DUP4
+    DUP4
+    // stack: DST, value, DST, SRC, count, retdest
+    MSTORE_GENERAL
+    // stack: DST, SRC, count, retdest
+
+    // Increment dst_addr.
+    SWAP2
+    %add_const(1)
+    SWAP2
+    // Increment src_addr.
+    SWAP5
+    %add_const(1)
+    SWAP5
+    // Decrement count.
+    SWAP6
+    %sub_const(1)
+    SWAP6
+
+    // Continue the loop.
+    %jump(memcpy)
+
+memcpy_finish:
+    JUMPDEST
+    // stack: DST, SRC, count, retdest
+    %pop7
+    // stack: retdest
+    JUMP
diff --git a/evm/src/cpu/kernel/asm/memory/metadata.asm b/evm/src/cpu/kernel/asm/memory/metadata.asm
new file mode 100644
index 00000000..22eb853f
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/memory/metadata.asm
@@ -0,0 +1,35 @@
+// Load the given global metadata field from memory.
+%macro mload_global_metadata(field)
+    // stack: (empty)
+    PUSH $field
+    // stack: offset
+    %mload_kernel(@SEGMENT_GLOBAL_METADATA)
+    // stack: value
+%endmacro
+
+// Store the given global metadata field to memory.
+%macro mstore_global_metadata(field)
+    // stack: value
+    PUSH $field
+    // stack: offset, value
+    %mload_kernel(@SEGMENT_GLOBAL_METADATA)
+    // stack: (empty)
+%endmacro
+
+// Load the given context metadata field from memory.
+%macro mload_context_metadata(field)
+    // stack: (empty)
+    PUSH $field
+    // stack: offset
+    %mload_current(@SEGMENT_CONTEXT_METADATA)
+    // stack: value
+%endmacro
+
+// Store the given context metadata field to memory.
+%macro mstore_context_metadata(field)
+    // stack: value
+    PUSH $field
+    // stack: offset, value
+    %mload_current(@SEGMENT_CONTEXT_METADATA)
+    // stack: (empty)
+%endmacro
diff --git a/evm/src/cpu/kernel/asm/memory/txn_fields.asm b/evm/src/cpu/kernel/asm/memory/txn_fields.asm
new file mode 100644
index 00000000..d15b7264
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/memory/txn_fields.asm
@@ -0,0 +1,17 @@
+// Load the given normalized transaction field from memory.
+%macro mload_txn_field(field)
+    // stack: (empty)
+    PUSH $field
+    // stack: offset
+    %mload_kernel(@SEGMENT_NORMALIZED_TXN)
+    // stack: value
+%endmacro
+
+// Store the given normalized transaction field to memory.
+%macro mstore_txn_field(field)
+    // stack: value
+    PUSH $field
+    // stack: offset, value
+    %mstore_kernel(@SEGMENT_NORMALIZED_TXN)
+    // stack: (empty)
+%endmacro

From 1e6cf4c4ab5b9bff03806b9c0bdb7fb8b917f258 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Thu, 4 Aug 2022 09:34:46 -0700
Subject: [PATCH 78/85] newline

---
 evm/src/cpu/kernel/asm/memory/memcpy.asm | 1 -
 1 file changed, 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/memory/memcpy.asm b/evm/src/cpu/kernel/asm/memory/memcpy.asm
index f319d962..0a390736 100644
--- a/evm/src/cpu/kernel/asm/memory/memcpy.asm
+++ b/evm/src/cpu/kernel/asm/memory/memcpy.asm
@@ -1,4 +1,3 @@
-
 // Copies `count` values from
 //     SRC = (src_ctx, src_segment, src_addr)
 // to

From bf4cf1c64f6ea055a9ff3922f59bce65202e187c Mon Sep 17 00:00:00 2001
From: Nicholas Ward <npward@berkeley.edu>
Date: Thu, 4 Aug 2022 14:40:34 -0400
Subject: [PATCH 79/85] fix

---
 evm/src/memory/memory_stark.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/evm/src/memory/memory_stark.rs b/evm/src/memory/memory_stark.rs
index 398c2c15..6c3896e7 100644
--- a/evm/src/memory/memory_stark.rs
+++ b/evm/src/memory/memory_stark.rs
@@ -499,7 +499,11 @@ pub(crate) mod tests {
 
                 let (context, segment, virt, vals) = if is_read {
                     let written: Vec<_> = current_memory_values.keys().collect();
-                    let &(context, segment, virt) = written[rng.gen_range(0..written.len())];
+                    let &(mut context, mut segment, mut virt) = written[rng.gen_range(0..written.len())];
+                    while new_writes_this_cycle.contains_key(&(context, segment, virt)) {
+                        (context, segment, virt) = *written[rng.gen_range(0..written.len())];
+                    }
+                    
                     let &vals = current_memory_values
                         .get(&(context, segment, virt))
                         .unwrap();

From bbcb4195215a1e2635eacc6117229420e516082e Mon Sep 17 00:00:00 2001
From: Nicholas Ward <npward@berkeley.edu>
Date: Thu, 4 Aug 2022 14:56:16 -0400
Subject: [PATCH 80/85] fmt

---
 evm/src/memory/memory_stark.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/evm/src/memory/memory_stark.rs b/evm/src/memory/memory_stark.rs
index 6c3896e7..5a17ed20 100644
--- a/evm/src/memory/memory_stark.rs
+++ b/evm/src/memory/memory_stark.rs
@@ -499,11 +499,12 @@ pub(crate) mod tests {
 
                 let (context, segment, virt, vals) = if is_read {
                     let written: Vec<_> = current_memory_values.keys().collect();
-                    let &(mut context, mut segment, mut virt) = written[rng.gen_range(0..written.len())];
+                    let &(mut context, mut segment, mut virt) =
+                        written[rng.gen_range(0..written.len())];
                     while new_writes_this_cycle.contains_key(&(context, segment, virt)) {
                         (context, segment, virt) = *written[rng.gen_range(0..written.len())];
                     }
-                    
+
                     let &vals = current_memory_values
                         .get(&(context, segment, virt))
                         .unwrap();

From 616eb618f22a0f4a037dd897eab47e9a57ce44b4 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Thu, 4 Aug 2022 12:23:48 -0700
Subject: [PATCH 81/85] Support macro-local labels

Again borrowing syntax from NASM. Example from the test:

    %macro spin
    %%start:
        PUSH %%start
        JUMP
    %endmacro

One thing this lets us do is create "wrapper" macros which call a function, then return to the code immediately following the macro call, such as

    %macro decode_rlp_scalar
        %stack (pos) -> (pos, %%after)
        %jump(decode_rlp_scalar)
    %%after:
    %endmacro

I used this to clean up `type_0.asm`.

However, since such macros need to insert `%%after` beneath any arguments in the stack, using them will be suboptimal in some cases. I wouldn't worry about it generally, but we might want to avoid them in performance-critical code, or functions with many arguments like `memcpy`.
---
 evm/src/cpu/kernel/asm/rlp/decode.asm         | 21 +++++
 .../cpu/kernel/asm/transactions/type_0.asm    | 81 +++++--------------
 evm/src/cpu/kernel/assembler.rs               | 69 +++++++++++++---
 evm/src/cpu/kernel/ast.rs                     |  4 +
 evm/src/cpu/kernel/evm_asm.pest               | 12 +--
 evm/src/cpu/kernel/parser.rs                  | 13 ++-
 evm/src/cpu/kernel/stack_manipulation.rs      |  8 +-
 7 files changed, 125 insertions(+), 83 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/rlp/decode.asm b/evm/src/cpu/kernel/asm/rlp/decode.asm
index 24d8d5a7..0388276a 100644
--- a/evm/src/cpu/kernel/asm/rlp/decode.asm
+++ b/evm/src/cpu/kernel/asm/rlp/decode.asm
@@ -54,6 +54,13 @@ decode_rlp_string_len_large:
     // stack: pos', len_of_len, retdest
     %jump(decode_int_given_len)
 
+// Convenience macro to call decode_rlp_string_len and return where we left off.
+%macro decode_rlp_string_len
+    %stack (pos) -> (pos, %%after)
+    %jump(decode_rlp_string_len)
+%%after:
+%endmacro
+
 // Parse a scalar from RLP memory.
 // Pre stack: pos, retdest
 // Post stack: pos', scalar
@@ -73,6 +80,13 @@ global decode_rlp_scalar:
     // to decode_int_given_len.
     %jump(decode_rlp_string_len)
 
+// Convenience macro to call decode_rlp_scalar and return where we left off.
+%macro decode_rlp_scalar
+    %stack (pos) -> (pos, %%after)
+    %jump(decode_rlp_scalar)
+%%after:
+%endmacro
+
 // Parse the length of an RLP list from memory.
 // Pre stack: pos, retdest
 // Post stack: pos', len
@@ -111,6 +125,13 @@ decode_rlp_list_len_big:
     // stack: pos', len_of_len, retdest
     %jump(decode_int_given_len)
 
+// Convenience macro to call decode_rlp_list_len and return where we left off.
+%macro decode_rlp_list_len
+    %stack (pos) -> (pos, %%after)
+    %jump(decode_rlp_list_len)
+%%after:
+%endmacro
+
 // Parse an integer of the given length. It is assumed that the integer will
 // fit in a single (256-bit) word on the stack.
 // Pre stack: pos, len, retdest
diff --git a/evm/src/cpu/kernel/asm/transactions/type_0.asm b/evm/src/cpu/kernel/asm/transactions/type_0.asm
index 4e39f6c3..8711790d 100644
--- a/evm/src/cpu/kernel/asm/transactions/type_0.asm
+++ b/evm/src/cpu/kernel/asm/transactions/type_0.asm
@@ -14,78 +14,50 @@
 global process_type_0_txn:
     JUMPDEST
     // stack: (empty)
-    PUSH process_txn_with_len
     PUSH 0 // initial pos
-    // stack: pos, process_txn_with_len
-    %jump(decode_rlp_list_len)
-
-process_txn_with_len:
+    // stack: pos
+    %decode_rlp_list_len
     // We don't actually need the length.
     %stack (pos, len) -> (pos)
 
-    PUSH store_nonce
-    SWAP1
-    // stack: pos, store_nonce
-    %jump(decode_rlp_scalar)
-
-store_nonce:
+    // Decode the nonce and store it.
+    // stack: pos
+    %decode_rlp_scalar
     %stack (pos, nonce) -> (@TXN_FIELD_NONCE, nonce, pos)
     %mstore_current(@SEGMENT_NORMALIZED_TXN)
 
-    // stack: pos
-    PUSH store_gas_price
-    SWAP1
-    // stack: pos, store_gas_price
-    %jump(decode_rlp_scalar)
-
-store_gas_price:
+    // Decode the gas price and store it.
     // For legacy transactions, we set both the
     // TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS and TXN_FIELD_MAX_FEE_PER_GAS
     // fields to gas_price.
+    // stack: pos
+    %decode_rlp_scalar
     %stack (pos, gas_price) -> (@TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS, gas_price,
                                 @TXN_FIELD_MAX_FEE_PER_GAS, gas_price, pos)
     %mstore_current(@SEGMENT_NORMALIZED_TXN)
     %mstore_current(@SEGMENT_NORMALIZED_TXN)
 
+    // Decode the gas limit and store it.
     // stack: pos
-    PUSH store_gas_limit
-    SWAP1
-    // stack: pos, store_gas_limit
-    %jump(decode_rlp_scalar)
-
-store_gas_limit:
+    %decode_rlp_scalar
     %stack (pos, gas_limit) -> (@TXN_FIELD_GAS_LIMIT, gas_limit, pos)
     %mstore_current(@SEGMENT_NORMALIZED_TXN)
 
+    // Decode the "to" field and store it.
     // stack: pos
-    PUSH store_to
-    SWAP1
-    // stack: pos, store_to
-    %jump(decode_rlp_scalar)
-
-store_to:
+    %decode_rlp_scalar
     %stack (pos, to) -> (@TXN_FIELD_TO, to, pos)
     %mstore_current(@SEGMENT_NORMALIZED_TXN)
-    // stack: pos
 
-parse_value:
+    // Decode the value field and store it.
     // stack: pos
-    PUSH store_value
-    SWAP1
-    // stack: pos, store_value
-    %jump(decode_rlp_scalar)
-
-store_value:
+    %decode_rlp_scalar
     %stack (pos, value) -> (@TXN_FIELD_VALUE, value, pos)
     %mstore_current(@SEGMENT_NORMALIZED_TXN)
 
+    // Decode the data length, store it, and compute new_pos after any data.
     // stack: pos
-    PUSH store_data_len
-    SWAP1
-    // stack: pos, store_data_len
-    %jump(decode_rlp_string_len)
-
-store_data_len:
+    %decode_rlp_string_len
     %stack (pos, data_len) -> (@TXN_FIELD_DATA_LEN, data_len, pos, data_len, pos, data_len)
     %mstore_current(@SEGMENT_NORMALIZED_TXN)
     // stack: pos, data_len, pos, data_len
@@ -105,12 +77,7 @@ store_data_len:
 
 parse_v:
     // stack: pos
-    PUSH process_v
-    SWAP1
-    // stack: pos, process_v
-    %jump(decode_rlp_scalar)
-
-process_v:
+    %decode_rlp_scalar
     // stack: pos, v
     SWAP1
     // stack: v, pos
@@ -154,22 +121,12 @@ process_v_new_style:
 
 parse_r:
     // stack: pos
-    PUSH store_r
-    SWAP1
-    // stack: pos, store_r
-    %jump(decode_rlp_scalar)
-
-store_r:
+    %decode_rlp_scalar
     %stack (pos, r) -> (@TXN_FIELD_R, r, pos)
     %mstore_current(@SEGMENT_NORMALIZED_TXN)
 
     // stack: pos
-    PUSH store_s
-    SWAP1
-    // stack: pos, store_s
-    %jump(decode_rlp_scalar)
-
-store_s:
+    %decode_rlp_scalar
     %stack (pos, s) -> (@TXN_FIELD_S, s)
     %mstore_current(@SEGMENT_NORMALIZED_TXN)
     // stack: (empty)
diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs
index 636251a3..14ec9aa0 100644
--- a/evm/src/cpu/kernel/assembler.rs
+++ b/evm/src/cpu/kernel/assembler.rs
@@ -5,6 +5,7 @@ use itertools::izip;
 use log::debug;
 
 use super::ast::PushTarget;
+use crate::cpu::kernel::ast::Item::LocalLabelDeclaration;
 use crate::cpu::kernel::ast::StackReplacement;
 use crate::cpu::kernel::keccak_util::hash_kernel;
 use crate::cpu::kernel::optimizer::optimize_asm;
@@ -76,8 +77,9 @@ pub(crate) fn assemble(
     let mut offset = 0;
     let mut expanded_files = Vec::with_capacity(files.len());
     let mut local_labels = Vec::with_capacity(files.len());
+    let mut macro_counter = 0;
     for file in files {
-        let expanded_file = expand_macros(file.body, &macros);
+        let expanded_file = expand_macros(file.body, &macros, &mut macro_counter);
         let expanded_file = expand_repeats(expanded_file);
         let expanded_file = inline_constants(expanded_file, &constants);
         let mut expanded_file = expand_stack_manipulation(expanded_file);
@@ -120,7 +122,11 @@ fn find_macros(files: &[File]) -> HashMap<String, Macro> {
     macros
 }
 
-fn expand_macros(body: Vec<Item>, macros: &HashMap<String, Macro>) -> Vec<Item> {
+fn expand_macros(
+    body: Vec<Item>,
+    macros: &HashMap<String, Macro>,
+    macro_counter: &mut u32,
+) -> Vec<Item> {
     let mut expanded = vec![];
     for item in body {
         match item {
@@ -128,7 +134,7 @@ fn expand_macros(body: Vec<Item>, macros: &HashMap<String, Macro>) -> Vec<Item>
                 // At this phase, we no longer need macro definitions.
             }
             Item::MacroCall(m, args) => {
-                expanded.extend(expand_macro_call(m, args, macros));
+                expanded.extend(expand_macro_call(m, args, macros, macro_counter));
             }
             item => {
                 expanded.push(item);
@@ -142,6 +148,7 @@ fn expand_macro_call(
     name: String,
     args: Vec<PushTarget>,
     macros: &HashMap<String, Macro>,
+    macro_counter: &mut u32,
 ) -> Vec<Item> {
     let _macro = macros
         .get(&name)
@@ -156,6 +163,8 @@ fn expand_macro_call(
         args.len()
     );
 
+    let get_actual_label = |macro_label| format!("@{}.{}", macro_counter, macro_label);
+
     let get_arg = |var| {
         let param_index = _macro.get_param_index(var);
         args[param_index].clone()
@@ -164,10 +173,13 @@ fn expand_macro_call(
     let expanded_item = _macro
         .items
         .iter()
-        .map(|item| {
-            if let Item::Push(PushTarget::MacroVar(var)) = item {
-                Item::Push(get_arg(var))
-            } else if let Item::MacroCall(name, args) = item {
+        .map(|item| match item {
+            Item::MacroLabelDeclaration(label) => LocalLabelDeclaration(get_actual_label(label)),
+            Item::Push(PushTarget::MacroLabel(label)) => {
+                Item::Push(PushTarget::Label(get_actual_label(label)))
+            }
+            Item::Push(PushTarget::MacroVar(var)) => Item::Push(get_arg(var)),
+            Item::MacroCall(name, args) => {
                 let expanded_args = args
                     .iter()
                     .map(|arg| {
@@ -179,14 +191,28 @@ fn expand_macro_call(
                     })
                     .collect();
                 Item::MacroCall(name.clone(), expanded_args)
-            } else {
-                item.clone()
             }
+            Item::StackManipulation(before, after) => {
+                let after = after
+                    .iter()
+                    .map(|replacement| {
+                        if let StackReplacement::MacroLabel(label) = replacement {
+                            StackReplacement::Identifier(get_actual_label(label))
+                        } else {
+                            replacement.clone()
+                        }
+                    })
+                    .collect();
+                Item::StackManipulation(before.clone(), after)
+            }
+            _ => item.clone(),
         })
         .collect();
 
+    *macro_counter += 1;
+
     // Recursively expand any macros in the expanded code.
-    expand_macros(expanded_item, macros)
+    expand_macros(expanded_item, macros, macro_counter)
 }
 
 fn expand_repeats(body: Vec<Item>) -> Vec<Item> {
@@ -247,7 +273,8 @@ fn find_labels(
             Item::MacroDef(_, _, _)
             | Item::MacroCall(_, _)
             | Item::Repeat(_, _)
-            | Item::StackManipulation(_, _) => {
+            | Item::StackManipulation(_, _)
+            | Item::MacroLabelDeclaration(_) => {
                 panic!("Item should have been expanded already: {:?}", item);
             }
             Item::GlobalLabelDeclaration(label) => {
@@ -282,7 +309,8 @@ fn assemble_file(
             Item::MacroDef(_, _, _)
             | Item::MacroCall(_, _)
             | Item::Repeat(_, _)
-            | Item::StackManipulation(_, _) => {
+            | Item::StackManipulation(_, _)
+            | Item::MacroLabelDeclaration(_) => {
                 panic!("Item should have been expanded already: {:?}", item);
             }
             Item::GlobalLabelDeclaration(_) | Item::LocalLabelDeclaration(_) => {
@@ -303,6 +331,7 @@ fn assemble_file(
                             .map(|i| offset.to_le_bytes()[i as usize])
                             .collect()
                     }
+                    PushTarget::MacroLabel(v) => panic!("Macro label not in a macro: {}", v),
                     PushTarget::MacroVar(v) => panic!("Variable not in a macro: {}", v),
                     PushTarget::Constant(c) => panic!("Constant wasn't inlined: {}", c),
                 };
@@ -325,6 +354,7 @@ fn push_target_size(target: &PushTarget) -> u8 {
     match target {
         PushTarget::Literal(n) => u256_to_trimmed_be_bytes(n).len() as u8,
         PushTarget::Label(_) => BYTES_PER_OFFSET,
+        PushTarget::MacroLabel(v) => panic!("Macro label not in a macro: {}", v),
         PushTarget::MacroVar(v) => panic!("Variable not in a macro: {}", v),
         PushTarget::Constant(c) => panic!("Constant wasn't inlined: {}", c),
     }
@@ -456,6 +486,21 @@ mod tests {
         assert_eq!(kernel.code, vec![push1, 2, push1, 3, add]);
     }
 
+    #[test]
+    fn macro_with_label() {
+        let files = &[
+            "%macro spin %%start: PUSH %%start JUMP %endmacro",
+            "%spin %spin",
+        ];
+        let kernel = parse_and_assemble_ext(files, HashMap::new(), false);
+        let push3 = get_push_opcode(BYTES_PER_OFFSET);
+        let jump = get_opcode("JUMP");
+        assert_eq!(
+            kernel.code,
+            vec![push3, 0, 0, 0, jump, push3, 0, 0, 5, jump]
+        );
+    }
+
     #[test]
     fn macro_in_macro_with_vars() {
         let kernel = parse_and_assemble(&[
diff --git a/evm/src/cpu/kernel/ast.rs b/evm/src/cpu/kernel/ast.rs
index a0de748a..24cf01e1 100644
--- a/evm/src/cpu/kernel/ast.rs
+++ b/evm/src/cpu/kernel/ast.rs
@@ -24,6 +24,8 @@ pub(crate) enum Item {
     GlobalLabelDeclaration(String),
     /// Declares a label that is local to the current file.
     LocalLabelDeclaration(String),
+    /// Declares a label that is local to the macro it's declared in.
+    MacroLabelDeclaration(String),
     /// A `PUSH` operation.
     Push(PushTarget),
     /// A `ProverInput` operation.
@@ -39,6 +41,7 @@ pub(crate) enum StackReplacement {
     /// Can be either a named item or a label.
     Identifier(String),
     Literal(U256),
+    MacroLabel(String),
     MacroVar(String),
     Constant(String),
 }
@@ -48,6 +51,7 @@ pub(crate) enum StackReplacement {
 pub(crate) enum PushTarget {
     Literal(U256),
     Label(String),
+    MacroLabel(String),
     MacroVar(String),
     Constant(String),
 }
diff --git a/evm/src/cpu/kernel/evm_asm.pest b/evm/src/cpu/kernel/evm_asm.pest
index 0703798e..8ea7de4b 100644
--- a/evm/src/cpu/kernel/evm_asm.pest
+++ b/evm/src/cpu/kernel/evm_asm.pest
@@ -15,7 +15,7 @@ literal = { literal_hex | literal_decimal }
 variable = ${ "$" ~ identifier }
 constant = ${ "@" ~ identifier }
 
-item = { macro_def | macro_call | repeat | stack | global_label | local_label | bytes_item | push_instruction | prover_input_instruction | nullary_instruction }
+item = { macro_def | macro_call | repeat | stack | global_label_decl | local_label_decl | macro_label_decl | bytes_item | push_instruction | prover_input_instruction | nullary_instruction }
 macro_def = { ^"%macro" ~ identifier ~ paramlist? ~ item* ~ ^"%endmacro" }
 macro_call = ${ "%" ~ !(^"macro" | ^"endmacro" | ^"rep" | ^"endrep" | ^"stack") ~ identifier ~ macro_arglist? }
 repeat = { ^"%rep" ~ literal ~ item* ~ ^"%endrep" }
@@ -23,12 +23,14 @@ paramlist = { "(" ~ identifier ~ ("," ~ identifier)* ~ ")" }
 macro_arglist = !{ "(" ~ push_target ~ ("," ~ push_target)* ~ ")" }
 stack = { ^"%stack" ~ paramlist ~ "->" ~ stack_replacements }
 stack_replacements = { "(" ~ stack_replacement ~ ("," ~ stack_replacement)* ~ ")" }
-stack_replacement = { literal | identifier | constant }
-global_label = { ^"GLOBAL " ~ identifier ~ ":" }
-local_label = { identifier ~ ":" }
+stack_replacement = { literal | identifier | constant | macro_label | variable }
+global_label_decl = ${ ^"GLOBAL " ~ identifier ~ ":" }
+local_label_decl = ${ identifier ~ ":" }
+macro_label_decl = ${ "%%" ~ identifier ~ ":" }
+macro_label = ${ "%%" ~ identifier }
 bytes_item = { ^"BYTES " ~ literal ~ ("," ~ literal)* }
 push_instruction = { ^"PUSH " ~ push_target }
-push_target = { literal | identifier | variable | constant }
+push_target = { literal | identifier | macro_label | variable | constant }
 prover_input_instruction = { ^"PROVER_INPUT" ~ "(" ~ prover_input_fn ~ ")" }
 prover_input_fn = { identifier ~ ("::" ~ identifier)*}
 nullary_instruction = { identifier }
diff --git a/evm/src/cpu/kernel/parser.rs b/evm/src/cpu/kernel/parser.rs
index 66bf0757..9ed578d4 100644
--- a/evm/src/cpu/kernel/parser.rs
+++ b/evm/src/cpu/kernel/parser.rs
@@ -28,12 +28,15 @@ fn parse_item(item: Pair<Rule>) -> Item {
         Rule::macro_call => parse_macro_call(item),
         Rule::repeat => parse_repeat(item),
         Rule::stack => parse_stack(item),
-        Rule::global_label => {
+        Rule::global_label_decl => {
             Item::GlobalLabelDeclaration(item.into_inner().next().unwrap().as_str().into())
         }
-        Rule::local_label => {
+        Rule::local_label_decl => {
             Item::LocalLabelDeclaration(item.into_inner().next().unwrap().as_str().into())
         }
+        Rule::macro_label_decl => {
+            Item::MacroLabelDeclaration(item.into_inner().next().unwrap().as_str().into())
+        }
         Rule::bytes_item => Item::Bytes(item.into_inner().map(parse_literal_u8).collect()),
         Rule::push_instruction => Item::Push(parse_push_target(item.into_inner().next().unwrap())),
         Rule::prover_input_instruction => Item::ProverInput(
@@ -117,6 +120,9 @@ fn parse_stack_replacement(target: Pair<Rule>) -> StackReplacement {
     match inner.as_rule() {
         Rule::identifier => StackReplacement::Identifier(inner.as_str().into()),
         Rule::literal => StackReplacement::Literal(parse_literal_u256(inner)),
+        Rule::macro_label => {
+            StackReplacement::MacroLabel(inner.into_inner().next().unwrap().as_str().into())
+        }
         Rule::variable => {
             StackReplacement::MacroVar(inner.into_inner().next().unwrap().as_str().into())
         }
@@ -133,6 +139,9 @@ fn parse_push_target(target: Pair<Rule>) -> PushTarget {
     match inner.as_rule() {
         Rule::literal => PushTarget::Literal(parse_literal_u256(inner)),
         Rule::identifier => PushTarget::Label(inner.as_str().into()),
+        Rule::macro_label => {
+            PushTarget::MacroLabel(inner.into_inner().next().unwrap().as_str().into())
+        }
         Rule::variable => PushTarget::MacroVar(inner.into_inner().next().unwrap().as_str().into()),
         Rule::constant => PushTarget::Constant(inner.into_inner().next().unwrap().as_str().into()),
         _ => panic!("Unexpected {:?}", inner.as_rule()),
diff --git a/evm/src/cpu/kernel/stack_manipulation.rs b/evm/src/cpu/kernel/stack_manipulation.rs
index 71746f16..a1f02c7e 100644
--- a/evm/src/cpu/kernel/stack_manipulation.rs
+++ b/evm/src/cpu/kernel/stack_manipulation.rs
@@ -42,7 +42,9 @@ fn expand(names: Vec<String>, replacements: Vec<StackReplacement>) -> Vec<Item>
                 }
             }
             StackReplacement::Literal(n) => StackItem::PushTarget(PushTarget::Literal(n)),
-            StackReplacement::MacroVar(_) | StackReplacement::Constant(_) => {
+            StackReplacement::MacroLabel(_)
+            | StackReplacement::MacroVar(_)
+            | StackReplacement::Constant(_) => {
                 panic!("Should have been expanded already: {:?}", item)
             }
         })
@@ -230,7 +232,9 @@ impl StackOp {
                 let bytes = match target {
                     PushTarget::Literal(n) => u256_to_trimmed_be_bytes(n).len() as u32,
                     PushTarget::Label(_) => BYTES_PER_OFFSET as u32,
-                    PushTarget::MacroVar(_) | PushTarget::Constant(_) => {
+                    PushTarget::MacroLabel(_)
+                    | PushTarget::MacroVar(_)
+                    | PushTarget::Constant(_) => {
                         panic!("Target should have been expanded already: {:?}", target)
                     }
                 };

From 3d5a9174fdcfbe47f32b4ae66fa5be4b1498c2f3 Mon Sep 17 00:00:00 2001
From: Sladuca <sladuca777@gmail.com>
Date: Sat, 6 Aug 2022 11:17:36 -0400
Subject: [PATCH 82/85] remove explicit feature include

---
 evm/Cargo.toml         | 2 +-
 starky/Cargo.toml      | 2 +-
 system_zero/Cargo.toml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/evm/Cargo.toml b/evm/Cargo.toml
index c5ec7f0b..c10ab104 100644
--- a/evm/Cargo.toml
+++ b/evm/Cargo.toml
@@ -5,7 +5,7 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-plonky2 = { path = "../plonky2", features = ["timing"] }
+plonky2 = { path = "../plonky2" }
 plonky2_util = { path = "../util" }
 anyhow = "1.0.40"
 env_logger = "0.9.0"
diff --git a/starky/Cargo.toml b/starky/Cargo.toml
index 3ce62c56..4e67856d 100644
--- a/starky/Cargo.toml
+++ b/starky/Cargo.toml
@@ -5,7 +5,7 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-plonky2 = { path = "../plonky2", features = ["timing"]}
+plonky2 = { path = "../plonky2" }
 plonky2_util = { path = "../util" }
 anyhow = "1.0.40"
 env_logger = "0.9.0"
diff --git a/system_zero/Cargo.toml b/system_zero/Cargo.toml
index 458ce27a..f1cb5729 100644
--- a/system_zero/Cargo.toml
+++ b/system_zero/Cargo.toml
@@ -5,7 +5,7 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-plonky2 = { path = "../plonky2", features = ["timing"] }
+plonky2 = { path = "../plonky2" }
 plonky2_util = { path = "../util" }
 starky = { path = "../starky" }
 anyhow = "1.0.40"

From e7216f2683559385bf3c172aecf75ec419c0dad9 Mon Sep 17 00:00:00 2001
From: Sladuca <sladuca777@gmail.com>
Date: Sat, 6 Aug 2022 11:35:32 -0400
Subject: [PATCH 83/85] feature-gate rand

---
 field/Cargo.toml                 |  6 +++++-
 field/src/extension/quadratic.rs |  4 ++--
 field/src/extension/quartic.rs   |  4 ++--
 field/src/extension/quintic.rs   |  4 ++--
 field/src/goldilocks_field.rs    |  4 ++--
 field/src/secp256k1_base.rs      |  7 ++++---
 field/src/secp256k1_scalar.rs    |  7 ++++---
 field/src/types.rs               |  7 +++++--
 plonky2/Cargo.toml               | 15 ++++++++++++---
 plonky2/src/gates/mod.rs         |  1 +
 plonky2/src/hash/hash_types.rs   |  8 +++++---
 11 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/field/Cargo.toml b/field/Cargo.toml
index 748b65ac..1a72bd6c 100644
--- a/field/Cargo.toml
+++ b/field/Cargo.toml
@@ -4,12 +4,16 @@ description = "Finite field arithmetic"
 version = "0.1.0"
 edition = "2021"
 
+[features]
+default = ["rand"]
+rand = ["dep:rand"]
+
 [dependencies]
 plonky2_util = { path = "../util" }
 anyhow = "1.0.40"
 itertools = "0.10.0"
 num = { version = "0.4", features = [ "rand" ] }
-rand = "0.8.4"
+rand = { optional = true, version = "0.8.4" }
 serde = { version = "1.0", features = ["derive"] }
 unroll = "0.1.5"
 static_assertions = "1.1.0"
diff --git a/field/src/extension/quadratic.rs b/field/src/extension/quadratic.rs
index 5789ecc1..d68df42e 100644
--- a/field/src/extension/quadratic.rs
+++ b/field/src/extension/quadratic.rs
@@ -4,7 +4,6 @@ use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssi
 
 use num::bigint::BigUint;
 use num::Integer;
-use rand::Rng;
 use serde::{Deserialize, Serialize};
 
 use crate::extension::{Extendable, FieldExtension, Frobenius, OEF};
@@ -103,7 +102,8 @@ impl<F: Extendable<2>> Field for QuadraticExtension<F> {
         F::from_noncanonical_u128(n).into()
     }
 
-    fn rand_from_rng<R: Rng>(rng: &mut R) -> Self {
+    #[cfg(feature = "rand")]
+    fn rand_from_rng<R: rand::Rng>(rng: &mut R) -> Self {
         Self([F::rand_from_rng(rng), F::rand_from_rng(rng)])
     }
 }
diff --git a/field/src/extension/quartic.rs b/field/src/extension/quartic.rs
index ed8006f2..fc0cbcf8 100644
--- a/field/src/extension/quartic.rs
+++ b/field/src/extension/quartic.rs
@@ -5,7 +5,6 @@ use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssi
 use num::bigint::BigUint;
 use num::traits::Pow;
 use num::Integer;
-use rand::Rng;
 use serde::{Deserialize, Serialize};
 
 use crate::extension::{Extendable, FieldExtension, Frobenius, OEF};
@@ -115,7 +114,8 @@ impl<F: Extendable<4>> Field for QuarticExtension<F> {
         F::from_noncanonical_u128(n).into()
     }
 
-    fn rand_from_rng<R: Rng>(rng: &mut R) -> Self {
+    #[cfg(feature = "rand")]
+    fn rand_from_rng<R: rand::Rng>(rng: &mut R) -> Self {
         Self::from_basefield_array([
             F::rand_from_rng(rng),
             F::rand_from_rng(rng),
diff --git a/field/src/extension/quintic.rs b/field/src/extension/quintic.rs
index 7a992b7d..564674c3 100644
--- a/field/src/extension/quintic.rs
+++ b/field/src/extension/quintic.rs
@@ -4,7 +4,6 @@ use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssi
 
 use num::bigint::BigUint;
 use num::traits::Pow;
-use rand::Rng;
 use serde::{Deserialize, Serialize};
 
 use crate::extension::{Extendable, FieldExtension, Frobenius, OEF};
@@ -112,7 +111,8 @@ impl<F: Extendable<5>> Field for QuinticExtension<F> {
         F::from_noncanonical_u128(n).into()
     }
 
-    fn rand_from_rng<R: Rng>(rng: &mut R) -> Self {
+    #[cfg(feature = "rand")]
+    fn rand_from_rng<R: rand::Rng>(rng: &mut R) -> Self {
         Self::from_basefield_array([
             F::rand_from_rng(rng),
             F::rand_from_rng(rng),
diff --git a/field/src/goldilocks_field.rs b/field/src/goldilocks_field.rs
index 545d515a..c5075b5d 100644
--- a/field/src/goldilocks_field.rs
+++ b/field/src/goldilocks_field.rs
@@ -6,7 +6,6 @@ use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssi
 
 use num::{BigUint, Integer};
 use plonky2_util::{assume, branch_hint};
-use rand::Rng;
 use serde::{Deserialize, Serialize};
 
 use crate::inversion::try_inverse_u64;
@@ -105,7 +104,8 @@ impl Field for GoldilocksField {
         reduce128(n)
     }
 
-    fn rand_from_rng<R: Rng>(rng: &mut R) -> Self {
+    #[cfg(feature = "rand")]
+    fn rand_from_rng<R: rand::Rng>(rng: &mut R) -> Self {
         Self::from_canonical_u64(rng.gen_range(0..Self::ORDER))
     }
 
diff --git a/field/src/secp256k1_base.rs b/field/src/secp256k1_base.rs
index 9dd41a5d..9e39b982 100644
--- a/field/src/secp256k1_base.rs
+++ b/field/src/secp256k1_base.rs
@@ -5,9 +5,8 @@ use std::iter::{Product, Sum};
 use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign};
 
 use itertools::Itertools;
-use num::bigint::{BigUint, RandBigInt};
+use num::bigint::BigUint;
 use num::{Integer, One};
-use rand::Rng;
 use serde::{Deserialize, Serialize};
 
 use crate::types::{Field, PrimeField};
@@ -133,7 +132,9 @@ impl Field for Secp256K1Base {
         Self([n.0, n.1 as u64, 0, 0])
     }
 
-    fn rand_from_rng<R: Rng>(rng: &mut R) -> Self {
+    #[cfg(feature = "rand")]
+    fn rand_from_rng<R: rand::Rng>(rng: &mut R) -> Self {
+        use num::bigint::RandBigInt;
         Self::from_biguint(rng.gen_biguint_below(&Self::order()))
     }
 }
diff --git a/field/src/secp256k1_scalar.rs b/field/src/secp256k1_scalar.rs
index ec1ad19e..eea67fab 100644
--- a/field/src/secp256k1_scalar.rs
+++ b/field/src/secp256k1_scalar.rs
@@ -6,9 +6,8 @@ use std::iter::{Product, Sum};
 use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign};
 
 use itertools::Itertools;
-use num::bigint::{BigUint, RandBigInt};
+use num::bigint::BigUint;
 use num::{Integer, One};
-use rand::Rng;
 use serde::{Deserialize, Serialize};
 
 use crate::types::{Field, PrimeField};
@@ -142,7 +141,9 @@ impl Field for Secp256K1Scalar {
         Self([n.0, n.1 as u64, 0, 0])
     }
 
-    fn rand_from_rng<R: Rng>(rng: &mut R) -> Self {
+    #[cfg(feature = "rand")]
+    fn rand_from_rng<R: rand::Rng>(rng: &mut R) -> Self {
+        use num::bigint::RandBigInt;
         Self::from_biguint(rng.gen_biguint_below(&Self::order()))
     }
 }
diff --git a/field/src/types.rs b/field/src/types.rs
index 81945e5a..b7335704 100644
--- a/field/src/types.rs
+++ b/field/src/types.rs
@@ -6,7 +6,6 @@ use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssi
 use num::bigint::BigUint;
 use num::{Integer, One, ToPrimitive, Zero};
 use plonky2_util::bits_u64;
-use rand::Rng;
 use serde::de::DeserializeOwned;
 use serde::Serialize;
 
@@ -312,7 +311,8 @@ pub trait Field:
         Self::from_noncanonical_u128(n)
     }
 
-    fn rand_from_rng<R: Rng>(rng: &mut R) -> Self;
+    #[cfg(feature = "rand")]
+    fn rand_from_rng<R: rand::Rng>(rng: &mut R) -> Self;
 
     fn exp_power_of_2(&self, power_log: usize) -> Self {
         let mut res = *self;
@@ -391,14 +391,17 @@ pub trait Field:
         }
     }
 
+    #[cfg(feature = "rand")]
     fn rand() -> Self {
         Self::rand_from_rng(&mut rand::thread_rng())
     }
 
+    #[cfg(feature = "rand")]
     fn rand_arr<const N: usize>() -> [Self; N] {
         Self::rand_vec(N).try_into().unwrap()
     }
 
+    #[cfg(feature = "rand")]
     fn rand_vec(n: usize) -> Vec<Self> {
         (0..n).map(|_| Self::rand()).collect()
     }
diff --git a/plonky2/Cargo.toml b/plonky2/Cargo.toml
index 9ee89344..ae94f780 100644
--- a/plonky2/Cargo.toml
+++ b/plonky2/Cargo.toml
@@ -11,8 +11,11 @@ edition = "2021"
 default-run = "generate_constants"
 
 [features]
-default = ["parallel"]
+default = ["parallel", "rand", "rand_chacha", "gate_testing"]
 parallel = ["maybe_rayon/parallel"]
+rand = ["dep:rand", "plonky2_field/rand"]
+gate_testing = ["rand"]
+rand_chacha = ["dep:rand_chacha"]
 
 [dependencies]
 plonky2_field = { path = "../field" }
@@ -21,8 +24,8 @@ env_logger = "0.9.0"
 log = "0.4.14"
 itertools = "0.10.0"
 num = { version = "0.4", features = [ "rand" ] }
-rand = "0.8.4"
-rand_chacha = "0.3.1"
+rand = { version = "0.8.4", optional = true }
+rand_chacha = { version = "0.3.1", optional = true }
 maybe_rayon = { path = "../maybe_rayon" }
 unroll = "0.1.5"
 anyhow = "1.0.40"
@@ -32,6 +35,8 @@ keccak-hash = "0.8.0"
 static_assertions = "1.1.0"
 
 [dev-dependencies]
+rand = "0.8.4"
+rand_chacha = "0.3.1"
 criterion = "0.3.5"
 tynm = "0.1.6"
 structopt = "0.3.26"
@@ -41,6 +46,10 @@ rayon = "1.5.1"
 [target.'cfg(not(target_env = "msvc"))'.dev-dependencies]
 jemallocator = "0.3.2"
 
+[[bin]]
+name = "generate_constants"
+required-features = ["rand", "rand_chacha"]
+
 [[bench]]
 name = "field_arithmetic"
 harness = false
diff --git a/plonky2/src/gates/mod.rs b/plonky2/src/gates/mod.rs
index 786ba12c..df65b44c 100644
--- a/plonky2/src/gates/mod.rs
+++ b/plonky2/src/gates/mod.rs
@@ -24,4 +24,5 @@ pub mod util;
 
 // Can't use #[cfg(test)] here because it needs to be visible to other crates.
 // See https://github.com/rust-lang/cargo/issues/8379
+#[cfg(any(feature = "gate_testing", test))]
 pub mod gate_testing;
diff --git a/plonky2/src/hash/hash_types.rs b/plonky2/src/hash/hash_types.rs
index 281930a5..14303ad3 100644
--- a/plonky2/src/hash/hash_types.rs
+++ b/plonky2/src/hash/hash_types.rs
@@ -1,6 +1,5 @@
 use plonky2_field::goldilocks_field::GoldilocksField;
 use plonky2_field::types::{Field, PrimeField64};
-use rand::Rng;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 
 use crate::hash::poseidon::Poseidon;
@@ -37,7 +36,8 @@ impl<F: Field> HashOut<F> {
         Self { elements }
     }
 
-    pub fn rand_from_rng<R: Rng>(rng: &mut R) -> Self {
+    #[cfg(feature = "parallel")]
+    pub fn rand_from_rng<R: rand::Rng>(rng: &mut R) -> Self {
         Self {
             elements: [
                 F::rand_from_rng(rng),
@@ -115,12 +115,14 @@ pub struct MerkleCapTarget(pub Vec<HashOutTarget>);
 pub struct BytesHash<const N: usize>(pub [u8; N]);
 
 impl<const N: usize> BytesHash<N> {
-    pub fn rand_from_rng<R: Rng>(rng: &mut R) -> Self {
+    #[cfg(feature = "parallel")]
+    pub fn rand_from_rng<R: rand::Rng>(rng: &mut R) -> Self {
         let mut buf = [0; N];
         rng.fill_bytes(&mut buf);
         Self(buf)
     }
 
+    #[cfg(feature = "rand")]
     pub fn rand() -> Self {
         Self::rand_from_rng(&mut rand::thread_rng())
     }

From 1e5383c63db10f088206c59cbb750f6ca7973d81 Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sat, 6 Aug 2022 22:18:53 -0400
Subject: [PATCH 84/85] Stub push/pop

---
 plonky2/src/util/timing.rs | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/plonky2/src/util/timing.rs b/plonky2/src/util/timing.rs
index d16ceead..42033038 100644
--- a/plonky2/src/util/timing.rs
+++ b/plonky2/src/util/timing.rs
@@ -99,6 +99,9 @@ impl TimingTree {
         })
     }
 
+    #[cfg(not(feature = "timing"))]
+    pub fn push(&mut self, _ctx: &str, _level: log::Level) {}
+
     /// Close the deepest open scope from this tree.
     #[cfg(feature = "timing")]
     pub fn pop(&mut self) {
@@ -114,6 +117,9 @@ impl TimingTree {
         self.exit_time = Some(Instant::now());
     }
 
+    #[cfg(not(feature = "timing"))]
+    pub fn pop(&mut self) {}
+
     #[cfg(feature = "timing")]
     fn duration(&self) -> Duration {
         self.exit_time
@@ -171,26 +177,16 @@ impl TimingTree {
 #[macro_export]
 macro_rules! timed {
     ($timing_tree:expr, $level:expr, $ctx:expr, $exp:expr) => {{
-        #[cfg(feature = "timing")]
         $timing_tree.push($ctx, $level);
-
         let res = $exp;
-
-        #[cfg(feature = "timing")]
         $timing_tree.pop();
-
         res
     }};
     // If no context is specified, default to Debug.
     ($timing_tree:expr, $ctx:expr, $exp:expr) => {{
-        #[cfg(feature = "timing")]
         $timing_tree.push($ctx, log::Level::Debug);
-
         let res = $exp;
-
-        #[cfg(feature = "timing")]
         $timing_tree.pop();
-
         res
     }};
 }

From 385a990c50f6164f843e48bc2a0a4319097d078c Mon Sep 17 00:00:00 2001
From: Daniel Lubarov <daniel@lubarov.com>
Date: Sat, 6 Aug 2022 22:27:17 -0400
Subject: [PATCH 85/85] Unsuppress warnings

---
 evm/src/keccak/keccak_stark.rs | 1 -
 evm/src/memory/memory_stark.rs | 1 -
 plonky2/src/fri/oracle.rs      | 8 ++++----
 plonky2/src/fri/prover.rs      | 6 +++---
 system_zero/src/system_zero.rs | 1 -
 5 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/evm/src/keccak/keccak_stark.rs b/evm/src/keccak/keccak_stark.rs
index de80bf44..53dd66ab 100644
--- a/evm/src/keccak/keccak_stark.rs
+++ b/evm/src/keccak/keccak_stark.rs
@@ -194,7 +194,6 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakStark<F, D> {
     }
 
     pub fn generate_trace(&self, inputs: Vec<[u64; NUM_INPUTS]>) -> Vec<PolynomialValues<F>> {
-        #[allow(unused_mut)]
         let mut timing = TimingTree::new("generate trace", log::Level::Debug);
 
         // Generate the witness, except for permuted columns in the lookup argument.
diff --git a/evm/src/memory/memory_stark.rs b/evm/src/memory/memory_stark.rs
index bc38e66d..82e10869 100644
--- a/evm/src/memory/memory_stark.rs
+++ b/evm/src/memory/memory_stark.rs
@@ -192,7 +192,6 @@ impl<F: RichField + Extendable<D>, const D: usize> MemoryStark<F, D> {
     }
 
     pub(crate) fn generate_trace(&self, memory_ops: Vec<MemoryOp>) -> Vec<PolynomialValues<F>> {
-        #[allow(unused_mut)]
         let mut timing = TimingTree::new("generate trace", log::Level::Debug);
 
         // Generate most of the trace in row-major form.
diff --git a/plonky2/src/fri/oracle.rs b/plonky2/src/fri/oracle.rs
index c6e3a1ff..312b458b 100644
--- a/plonky2/src/fri/oracle.rs
+++ b/plonky2/src/fri/oracle.rs
@@ -71,7 +71,7 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
         rate_bits: usize,
         blinding: bool,
         cap_height: usize,
-        _timing: &mut TimingTree,
+        timing: &mut TimingTree,
         fft_root_table: Option<&FftRootTable<F>>,
     ) -> Self
     where
@@ -79,15 +79,15 @@ impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize>
     {
         let degree = polynomials[0].len();
         let lde_values = timed!(
-            _timing,
+            timing,
             "FFT + blinding",
             Self::lde_values(&polynomials, rate_bits, blinding, fft_root_table)
         );
 
-        let mut leaves = timed!(_timing, "transpose LDEs", transpose(&lde_values));
+        let mut leaves = timed!(timing, "transpose LDEs", transpose(&lde_values));
         reverse_index_bits_in_place(&mut leaves);
         let merkle_tree = timed!(
-            _timing,
+            timing,
             "build Merkle tree",
             MerkleTree::new(leaves, cap_height)
         );
diff --git a/plonky2/src/fri/prover.rs b/plonky2/src/fri/prover.rs
index f8467d2b..6136a9a1 100644
--- a/plonky2/src/fri/prover.rs
+++ b/plonky2/src/fri/prover.rs
@@ -23,7 +23,7 @@ pub fn fri_proof<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const
     lde_polynomial_values: PolynomialValues<F::Extension>,
     challenger: &mut Challenger<F, C::Hasher>,
     fri_params: &FriParams,
-    _timing: &mut TimingTree,
+    timing: &mut TimingTree,
 ) -> FriProof<F, C::Hasher, D>
 where
     [(); C::Hasher::HASH_SIZE]:,
@@ -33,7 +33,7 @@ where
 
     // Commit phase
     let (trees, final_coeffs) = timed!(
-        _timing,
+        timing,
         "fold codewords in the commitment phase",
         fri_committed_trees::<F, C, D>(
             lde_polynomial_coeffs,
@@ -46,7 +46,7 @@ where
     // PoW phase
     let current_hash = challenger.get_hash();
     let pow_witness = timed!(
-        _timing,
+        timing,
         "find proof-of-work witness",
         fri_proof_of_work::<F, C, D>(current_hash, &fri_params.config)
     );
diff --git a/system_zero/src/system_zero.rs b/system_zero/src/system_zero.rs
index ce44c283..19c2df8c 100644
--- a/system_zero/src/system_zero.rs
+++ b/system_zero/src/system_zero.rs
@@ -69,7 +69,6 @@ impl<F: RichField + Extendable<D>, const D: usize> SystemZero<F, D> {
     }
 
     pub fn generate_trace(&self) -> Vec<PolynomialValues<F>> {
-        #[allow(unused_mut)]
         let mut timing = TimingTree::new("generate trace", log::Level::Debug);
 
         // Generate the witness, except for permuted columns in the lookup argument.