diff --git a/evm/src/arithmetic/add.rs b/evm/src/arithmetic/add.rs
new file mode 100644
index 00000000..80f03d63
--- /dev/null
+++ b/evm/src/arithmetic/add.rs
@@ -0,0 +1,211 @@
+use itertools::izip;
+use plonky2::field::extension::Extendable;
+use plonky2::field::packed::PackedField;
+use plonky2::field::types::Field;
+use plonky2::hash::hash_types::RichField;
+use plonky2::iop::ext_target::ExtensionTarget;
+
+use crate::arithmetic::columns::*;
+use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
+use crate::range_check_error;
+
+/// Given two sequences `larger` and `smaller` of equal length (not
+/// checked), verifies that \sum_i larger[i] 2^(LIMB_BITS * i) ==
+/// \sum_i smaller[i] 2^(LIMB_BITS * i), taking care of carry propagation.
+///
+/// The sequences must have been produced by `{add,sub}::eval_packed_generic()`.
+pub(crate) fn eval_packed_generic_are_equal
(
+ yield_constr: &mut ConstraintConsumer
,
+ is_op: P,
+ larger: I,
+ smaller: J,
+) where
+ P: PackedField,
+ I: Iterator- ,
+ J: Iterator
- ,
+{
+ let overflow = P::Scalar::from_canonical_u64(1 << LIMB_BITS);
+ let overflow_inv = overflow.inverse();
+ let mut cy = P::ZEROS;
+ for (a, b) in larger.zip(smaller) {
+ // t should be either 0 or 2^LIMB_BITS
+ let t = cy + a - b;
+ yield_constr.constraint(is_op * t * (overflow - t));
+ // cy <-- 0 or 1
+ // NB: this is multiplication by a constant, so doesn't
+ // increase the degree of the constraint.
+ cy = t * overflow_inv;
+ }
+}
+
+pub(crate) fn eval_ext_circuit_are_equal(
+ builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder,
+ yield_constr: &mut RecursiveConstraintConsumer,
+ is_op: ExtensionTarget,
+ larger: I,
+ smaller: J,
+) where
+ F: RichField + Extendable,
+ I: Iterator
- >,
+ J: Iterator
- >,
+{
+ // 2^LIMB_BITS in the base field
+ let overflow_base = F::from_canonical_u64(1 << LIMB_BITS);
+ // 2^LIMB_BITS in the extension field as an ExtensionTarget
+ let overflow = builder.constant_extension(F::Extension::from(overflow_base));
+ // 2^-LIMB_BITS in the base field.
+ let overflow_inv = F::inverse_2exp(LIMB_BITS);
+
+ let mut cy = builder.zero_extension();
+ for (a, b) in larger.zip(smaller) {
+ // t0 = cy + a
+ let t0 = builder.add_extension(cy, a);
+ // t = t0 - b
+ let t = builder.sub_extension(t0, b);
+ // t1 = overflow - t
+ let t1 = builder.sub_extension(overflow, t);
+ // t2 = t * t1
+ let t2 = builder.mul_extension(t, t1);
+
+ let filtered_limb_constraint = builder.mul_extension(is_op, t2);
+ yield_constr.constraint(builder, filtered_limb_constraint);
+
+ cy = builder.mul_const_extension(overflow_inv, t);
+ }
+}
+
+pub fn generate(lv: &mut [F; NUM_ARITH_COLUMNS]) {
+ let input0_limbs = ADD_INPUT_0.map(|c| lv[c].to_canonical_u64());
+ let input1_limbs = ADD_INPUT_1.map(|c| lv[c].to_canonical_u64());
+
+ // Input and output have 16-bit limbs
+ let mut output_limbs = [0u64; N_LIMBS];
+
+ const MASK: u64 = (1u64 << LIMB_BITS) - 1u64;
+ let mut cy = 0u64;
+ for (i, a, b) in izip!(0.., input0_limbs, input1_limbs) {
+ let s = a + b + cy;
+ cy = s >> LIMB_BITS;
+ assert!(cy <= 1u64, "input limbs were larger than 16 bits");
+ output_limbs[i] = s & MASK;
+ }
+ // last carry is dropped because this is addition modulo 2^256.
+
+ for (&c, output_limb) in ADD_OUTPUT.iter().zip(output_limbs) {
+ lv[c] = F::from_canonical_u64(output_limb);
+ }
+}
+
+pub fn eval_packed_generic(
+ lv: &[P; NUM_ARITH_COLUMNS],
+ yield_constr: &mut ConstraintConsumer
,
+) {
+ range_check_error!(ADD_INPUT_0, 16);
+ range_check_error!(ADD_INPUT_1, 16);
+ range_check_error!(ADD_OUTPUT, 16);
+
+ let is_add = lv[IS_ADD];
+ let input0_limbs = ADD_INPUT_0.iter().map(|&c| lv[c]);
+ let input1_limbs = ADD_INPUT_1.iter().map(|&c| lv[c]);
+ let output_limbs = ADD_OUTPUT.iter().map(|&c| lv[c]);
+
+ // This computed output is not yet reduced; i.e. some limbs may be
+ // more than 16 bits.
+ let output_computed = input0_limbs.zip(input1_limbs).map(|(a, b)| a + b);
+
+ eval_packed_generic_are_equal(yield_constr, is_add, output_computed, output_limbs);
+}
+
+#[allow(clippy::needless_collect)]
+pub fn eval_ext_circuit, const D: usize>(
+ builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder,
+ lv: &[ExtensionTarget; NUM_ARITH_COLUMNS],
+ yield_constr: &mut RecursiveConstraintConsumer,
+) {
+ let is_add = lv[IS_ADD];
+ let input0_limbs = ADD_INPUT_0.iter().map(|&c| lv[c]);
+ let input1_limbs = ADD_INPUT_1.iter().map(|&c| lv[c]);
+ let output_limbs = ADD_OUTPUT.iter().map(|&c| lv[c]);
+
+ // Since `map` is lazy and the closure passed to it borrows
+ // `builder`, we can't then borrow builder again below in the call
+ // to `eval_ext_circuit_are_equal`. The solution is to force
+ // evaluation with `collect`.
+ let output_computed = input0_limbs
+ .zip(input1_limbs)
+ .map(|(a, b)| builder.add_extension(a, b))
+ .collect::>>();
+
+ eval_ext_circuit_are_equal(
+ builder,
+ yield_constr,
+ is_add,
+ output_computed.into_iter(),
+ output_limbs,
+ );
+}
+
+#[cfg(test)]
+mod tests {
+ use plonky2::field::goldilocks_field::GoldilocksField;
+ use plonky2::field::types::Field;
+ use rand::{Rng, SeedableRng};
+ use rand_chacha::ChaCha8Rng;
+
+ use super::*;
+ use crate::arithmetic::columns::NUM_ARITH_COLUMNS;
+ use crate::constraint_consumer::ConstraintConsumer;
+
+ // TODO: Should be able to refactor this test to apply to all operations.
+ #[test]
+ fn generate_eval_consistency_not_add() {
+ type F = GoldilocksField;
+
+ let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+ let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+
+ // if `IS_ADD == 0`, then the constraints should be met even
+ // if all values are garbage.
+ lv[IS_ADD] = F::ZERO;
+
+ let mut constrant_consumer = ConstraintConsumer::new(
+ vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ );
+ eval_packed_generic(&lv, &mut constrant_consumer);
+ for &acc in &constrant_consumer.constraint_accs {
+ assert_eq!(acc, GoldilocksField::ZERO);
+ }
+ }
+
+ #[test]
+ fn generate_eval_consistency_add() {
+ type F = GoldilocksField;
+
+ let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+ let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+
+ // set `IS_ADD == 1` and ensure all constraints are satisfied.
+ lv[IS_ADD] = F::ONE;
+ // set inputs to random values
+ for (&ai, bi) in ADD_INPUT_0.iter().zip(ADD_INPUT_1) {
+ lv[ai] = F::from_canonical_u16(rng.gen());
+ lv[bi] = F::from_canonical_u16(rng.gen());
+ }
+
+ generate(&mut lv);
+
+ let mut constrant_consumer = ConstraintConsumer::new(
+ vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ );
+ eval_packed_generic(&lv, &mut constrant_consumer);
+ for &acc in &constrant_consumer.constraint_accs {
+ assert_eq!(acc, GoldilocksField::ZERO);
+ }
+ }
+}
diff --git a/evm/src/arithmetic/arithmetic_stark.rs b/evm/src/arithmetic/arithmetic_stark.rs
new file mode 100644
index 00000000..ce8c7528
--- /dev/null
+++ b/evm/src/arithmetic/arithmetic_stark.rs
@@ -0,0 +1,87 @@
+use std::marker::PhantomData;
+use std::ops::Add;
+
+use itertools::Itertools;
+use plonky2::field::extension::{Extendable, FieldExtension};
+use plonky2::field::packed::PackedField;
+use plonky2::hash::hash_types::RichField;
+
+use crate::arithmetic::add;
+use crate::arithmetic::columns;
+use crate::arithmetic::mul;
+use crate::arithmetic::sub;
+use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
+use crate::stark::Stark;
+use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars};
+
+#[derive(Copy, Clone)]
+pub struct ArithmeticStark {
+ pub f: PhantomData,
+}
+
+impl ArithmeticStark {
+ pub fn generate(&self, local_values: &mut [F; columns::NUM_ARITH_COLUMNS]) {
+ // Check that at most one operation column is "one" and that the
+ // rest are "zero".
+ assert_eq!(
+ columns::ALL_OPERATIONS
+ .iter()
+ .map(|&c| {
+ if local_values[c] == F::ONE {
+ Ok(1u64)
+ } else if local_values[c] == F::ZERO {
+ Ok(0u64)
+ } else {
+ Err("column was not 0 nor 1")
+ }
+ })
+ .fold_ok(0u64, Add::add),
+ Ok(1)
+ );
+
+ if local_values[columns::IS_ADD].is_one() {
+ add::generate(local_values);
+ } else if local_values[columns::IS_SUB].is_one() {
+ sub::generate(local_values);
+ } else if local_values[columns::IS_MUL].is_one() {
+ mul::generate(local_values);
+ } else {
+ todo!("the requested operation has not yet been implemented");
+ }
+ }
+}
+
+impl, const D: usize> Stark for ArithmeticStark {
+ const COLUMNS: usize = columns::NUM_ARITH_COLUMNS;
+ const PUBLIC_INPUTS: usize = 0;
+
+ fn eval_packed_generic(
+ &self,
+ vars: StarkEvaluationVars,
+ yield_constr: &mut ConstraintConsumer,
+ ) where
+ FE: FieldExtension,
+ P: PackedField,
+ {
+ let lv = vars.local_values;
+ add::eval_packed_generic(lv, yield_constr);
+ sub::eval_packed_generic(lv, yield_constr);
+ mul::eval_packed_generic(lv, yield_constr);
+ }
+
+ fn eval_ext_circuit(
+ &self,
+ builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder,
+ vars: StarkEvaluationTargets,
+ yield_constr: &mut RecursiveConstraintConsumer,
+ ) {
+ let lv = vars.local_values;
+ add::eval_ext_circuit(builder, lv, yield_constr);
+ sub::eval_ext_circuit(builder, lv, yield_constr);
+ mul::eval_ext_circuit(builder, lv, yield_constr);
+ }
+
+ fn constraint_degree(&self) -> usize {
+ 3
+ }
+}
diff --git a/evm/src/arithmetic/columns.rs b/evm/src/arithmetic/columns.rs
new file mode 100644
index 00000000..e51419a8
--- /dev/null
+++ b/evm/src/arithmetic/columns.rs
@@ -0,0 +1,82 @@
+//! Arithmetic unit
+
+pub const LIMB_BITS: usize = 16;
+const EVM_REGISTER_BITS: usize = 256;
+
+/// Return the number of LIMB_BITS limbs that are in an EVM
+/// register-sized number, panicking if LIMB_BITS doesn't divide in
+/// the EVM register size.
+const fn n_limbs() -> usize {
+ if EVM_REGISTER_BITS % LIMB_BITS != 0 {
+ panic!("limb size must divide EVM register size");
+ }
+ EVM_REGISTER_BITS / LIMB_BITS
+}
+
+/// Number of LIMB_BITS limbs that are in on EVM register-sized number.
+pub const N_LIMBS: usize = n_limbs();
+
+pub const IS_ADD: usize = 0;
+pub const IS_MUL: usize = IS_ADD + 1;
+pub const IS_SUB: usize = IS_MUL + 1;
+pub const IS_DIV: usize = IS_SUB + 1;
+pub const IS_SDIV: usize = IS_DIV + 1;
+pub const IS_MOD: usize = IS_SDIV + 1;
+pub const IS_SMOD: usize = IS_MOD + 1;
+pub const IS_ADDMOD: usize = IS_SMOD + 1;
+pub const IS_MULMOD: usize = IS_ADDMOD + 1;
+pub const IS_LT: usize = IS_MULMOD + 1;
+pub const IS_GT: usize = IS_LT + 1;
+pub const IS_SLT: usize = IS_GT + 1;
+pub const IS_SGT: usize = IS_SLT + 1;
+pub const IS_SHL: usize = IS_SGT + 1;
+pub const IS_SHR: usize = IS_SHL + 1;
+pub const IS_SAR: usize = IS_SHR + 1;
+
+const START_SHARED_COLS: usize = IS_SAR + 1;
+
+pub(crate) const ALL_OPERATIONS: [usize; 16] = [
+ IS_ADD, IS_MUL, IS_SUB, IS_DIV, IS_SDIV, IS_MOD, IS_SMOD, IS_ADDMOD, IS_MULMOD, IS_LT, IS_GT,
+ IS_SLT, IS_SGT, IS_SHL, IS_SHR, IS_SAR,
+];
+
+/// Within the Arithmetic Unit, there are shared columns which can be
+/// used by any arithmetic circuit, depending on which one is active
+/// this cycle. Can be increased as needed as other operations are
+/// implemented.
+const NUM_SHARED_COLS: usize = 64;
+
+const fn shared_col(i: usize) -> usize {
+ assert!(i < NUM_SHARED_COLS);
+ START_SHARED_COLS + i
+}
+
+const fn gen_input_cols(start: usize) -> [usize; N] {
+ let mut cols = [0usize; N];
+ let mut i = 0;
+ while i < N {
+ cols[i] = shared_col(start + i);
+ i += 1;
+ }
+ cols
+}
+
+const GENERAL_INPUT_0: [usize; N_LIMBS] = gen_input_cols::(0);
+const GENERAL_INPUT_1: [usize; N_LIMBS] = gen_input_cols::(N_LIMBS);
+const GENERAL_INPUT_2: [usize; N_LIMBS] = gen_input_cols::(2 * N_LIMBS);
+const AUX_INPUT_0: [usize; N_LIMBS] = gen_input_cols::(3 * N_LIMBS);
+
+pub(crate) const ADD_INPUT_0: [usize; N_LIMBS] = GENERAL_INPUT_0;
+pub(crate) const ADD_INPUT_1: [usize; N_LIMBS] = GENERAL_INPUT_1;
+pub(crate) const ADD_OUTPUT: [usize; N_LIMBS] = GENERAL_INPUT_2;
+
+pub(crate) const SUB_INPUT_0: [usize; N_LIMBS] = GENERAL_INPUT_0;
+pub(crate) const SUB_INPUT_1: [usize; N_LIMBS] = GENERAL_INPUT_1;
+pub(crate) const SUB_OUTPUT: [usize; N_LIMBS] = GENERAL_INPUT_2;
+
+pub(crate) const MUL_INPUT_0: [usize; N_LIMBS] = GENERAL_INPUT_0;
+pub(crate) const MUL_INPUT_1: [usize; N_LIMBS] = GENERAL_INPUT_1;
+pub(crate) const MUL_OUTPUT: [usize; N_LIMBS] = GENERAL_INPUT_2;
+pub(crate) const MUL_AUX_INPUT: [usize; N_LIMBS] = AUX_INPUT_0;
+
+pub const NUM_ARITH_COLUMNS: usize = START_SHARED_COLS + NUM_SHARED_COLS;
diff --git a/evm/src/arithmetic/mod.rs b/evm/src/arithmetic/mod.rs
new file mode 100644
index 00000000..07c4c5a9
--- /dev/null
+++ b/evm/src/arithmetic/mod.rs
@@ -0,0 +1,7 @@
+mod add;
+mod mul;
+mod sub;
+mod utils;
+
+pub mod arithmetic_stark;
+pub(crate) mod columns;
diff --git a/evm/src/arithmetic/mul.rs b/evm/src/arithmetic/mul.rs
new file mode 100644
index 00000000..72270517
--- /dev/null
+++ b/evm/src/arithmetic/mul.rs
@@ -0,0 +1,269 @@
+//! Support for the EVM MUL instruction.
+//!
+//! This crate verifies an EVM MUL instruction, which takes two
+//! 256-bit inputs A and B, and produces a 256-bit output C satisfying
+//!
+//! C = A*B (mod 2^256).
+//!
+//! Inputs A and B, and output C, are given as arrays of 16-bit
+//! limbs. For example, if the limbs of A are a[0]...a[15], then
+//!
+//! A = \sum_{i=0}^15 a[i] β^i,
+//!
+//! where β = 2^16. To verify that A, B and C satisfy the equation we
+//! proceed as follows. Define a(x) = \sum_{i=0}^15 a[i] x^i (so A = a(β))
+//! and similarly for b(x) and c(x). Then A*B = C (mod 2^256) if and only
+//! if there exist polynomials q and m such that
+//!
+//! a(x)*b(x) - c(x) - m(x)*x^16 - (x - β)*q(x) == 0.
+//!
+//! Because A, B and C are 256-bit numbers, the degrees of a, b and c
+//! are (at most) 15. Thus deg(a*b) <= 30, so deg(m) <= 14 and deg(q)
+//! <= 29. However, the fact that we're verifying the equality modulo
+//! 2^256 means that we can ignore terms of degree >= 16, since for
+//! them evaluating at β gives a factor of β^16 = 2^256 which is 0.
+//!
+//! Hence, to verify the equality, we don't need m(x) at all, and we
+//! only need to know q(x) up to degree 14 (so that (x-β)*q(x) has
+//! degree 15). On the other hand, the coefficients of q(x) can be as
+//! large as 16*(β-2) or 20 bits.
+
+use plonky2::field::extension::Extendable;
+use plonky2::field::packed::PackedField;
+use plonky2::field::types::Field;
+use plonky2::hash::hash_types::RichField;
+use plonky2::iop::ext_target::ExtensionTarget;
+
+use crate::arithmetic::columns::*;
+use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
+use crate::range_check_error;
+
+pub fn generate(lv: &mut [F; NUM_ARITH_COLUMNS]) {
+ let input0_limbs = MUL_INPUT_0.map(|c| lv[c].to_canonical_u64());
+ let input1_limbs = MUL_INPUT_1.map(|c| lv[c].to_canonical_u64());
+
+ const MASK: u64 = (1u64 << LIMB_BITS) - 1u64;
+
+ // Input and output have 16-bit limbs
+ let mut aux_in_limbs = [0u64; N_LIMBS];
+ let mut output_limbs = [0u64; N_LIMBS];
+
+ let mut unreduced_prod = [0u64; N_LIMBS];
+
+ // Column-wise pen-and-paper long multiplication on 16-bit limbs.
+ // We have heaps of space at the top of each limb, so by
+ // calculating column-wise (instead of the usual row-wise) we
+ // avoid a bunch of carry propagation handling (at the expense of
+ // slightly worse cache coherency), and it makes it easy to
+ // calculate the coefficients of a(x)*b(x) (in unreduced_prod).
+ let mut cy = 0u64;
+ for col in 0..N_LIMBS {
+ for i in 0..=col {
+ // Invariant: i + j = col
+ let j = col - i;
+ let ai_x_bj = input0_limbs[i] * input1_limbs[j];
+ unreduced_prod[col] += ai_x_bj;
+ }
+ let t = unreduced_prod[col] + cy;
+ cy = t >> LIMB_BITS;
+ output_limbs[col] = t & MASK;
+ }
+ // In principle, the last cy could be dropped because this is
+ // multiplication modulo 2^256. However, we need it below for
+ // aux_in_limbs to handle the fact that unreduced_prod will
+ // inevitably contain a one digit's worth that is > 2^256.
+
+ for (&c, output_limb) in MUL_OUTPUT.iter().zip(output_limbs) {
+ lv[c] = F::from_canonical_u64(output_limb);
+ }
+ for deg in 0..N_LIMBS {
+ // deg'th element <- a*b - c
+ unreduced_prod[deg] -= output_limbs[deg];
+ }
+
+ // unreduced_prod is the coefficients of the polynomial a(x)*b(x) - c(x).
+ // This must be zero when evaluated at x = B = 2^LIMB_BITS, hence it's
+ // divisible by (B - x). If we write unreduced_prod as
+ //
+ // a(x)*b(x) - c(x) = \sum_{i=0}^n p_i x^i
+ // = (B - x) \sum_{i=0}^{n-1} q_i x^i
+ //
+ // then by comparing coefficients it is easy to see that
+ //
+ // q_0 = p_0 / B and q_i = (p_i + q_{i-1}) / B
+ //
+ // for 0 < i < n-1 (and the divisions are exact).
+ aux_in_limbs[0] = unreduced_prod[0] >> LIMB_BITS;
+ for deg in 1..N_LIMBS - 1 {
+ aux_in_limbs[deg] = (unreduced_prod[deg] + aux_in_limbs[deg - 1]) >> LIMB_BITS;
+ }
+ aux_in_limbs[N_LIMBS - 1] = cy;
+
+ for deg in 0..N_LIMBS {
+ let c = MUL_AUX_INPUT[deg];
+ lv[c] = F::from_canonical_u64(aux_in_limbs[deg]);
+ }
+}
+
+pub fn eval_packed_generic(
+ lv: &[P; NUM_ARITH_COLUMNS],
+ yield_constr: &mut ConstraintConsumer,
+) {
+ range_check_error!(MUL_INPUT_0, 16);
+ range_check_error!(MUL_INPUT_1, 16);
+ range_check_error!(MUL_OUTPUT, 16);
+ range_check_error!(MUL_AUX_INPUT, 20);
+
+ let is_mul = lv[IS_MUL];
+ let input0_limbs = MUL_INPUT_0.map(|c| lv[c]);
+ let input1_limbs = MUL_INPUT_1.map(|c| lv[c]);
+ let output_limbs = MUL_OUTPUT.map(|c| lv[c]);
+ let aux_limbs = MUL_AUX_INPUT.map(|c| lv[c]);
+
+ // Constraint poly holds the coefficients of the polynomial that
+ // must be identically zero for this multiplication to be
+ // verified. It is initialised to the /negative/ of the claimed
+ // output.
+ let mut constr_poly = [P::ZEROS; N_LIMBS];
+
+ assert_eq!(constr_poly.len(), N_LIMBS);
+
+ // After this loop constr_poly holds the coefficients of the
+ // polynomial A(x)B(x) - C(x), where A, B and C are the polynomials
+ //
+ // A(x) = \sum_i input0_limbs[i] * 2^LIMB_BITS
+ // B(x) = \sum_i input1_limbs[i] * 2^LIMB_BITS
+ // C(x) = \sum_i output_limbs[i] * 2^LIMB_BITS
+ //
+ // This polynomial should equal (2^LIMB_BITS - x) * Q(x) where Q is
+ //
+ // Q(x) = \sum_i aux_limbs[i] * 2^LIMB_BITS
+ //
+ for col in 0..N_LIMBS {
+ // Invariant: i + j = col
+ for i in 0..=col {
+ let j = col - i;
+ constr_poly[col] += input0_limbs[i] * input1_limbs[j];
+ }
+ constr_poly[col] -= output_limbs[col];
+ }
+
+ // This subtracts (2^LIMB_BITS - x) * Q(x) from constr_poly.
+ let base = P::Scalar::from_canonical_u64(1 << LIMB_BITS);
+ constr_poly[0] -= base * aux_limbs[0];
+ for deg in 1..N_LIMBS {
+ constr_poly[deg] -= (base * aux_limbs[deg]) - aux_limbs[deg - 1];
+ }
+
+ // At this point constr_poly holds the coefficients of the
+ // polynomial A(x)B(x) - C(x) - (x - 2^LIMB_BITS)*Q(x). The
+ // multiplication is valid if and only if all of those
+ // coefficients are zero.
+ for &c in &constr_poly {
+ yield_constr.constraint(is_mul * c);
+ }
+}
+
+pub fn eval_ext_circuit, const D: usize>(
+ builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder,
+ lv: &[ExtensionTarget; NUM_ARITH_COLUMNS],
+ yield_constr: &mut RecursiveConstraintConsumer,
+) {
+ let is_mul = lv[IS_MUL];
+ let input0_limbs = MUL_INPUT_0.map(|c| lv[c]);
+ let input1_limbs = MUL_INPUT_1.map(|c| lv[c]);
+ let output_limbs = MUL_OUTPUT.map(|c| lv[c]);
+ let aux_in_limbs = MUL_AUX_INPUT.map(|c| lv[c]);
+
+ let zero = builder.zero_extension();
+ let mut constr_poly = [zero; N_LIMBS]; // pointless init
+
+ // Invariant: i + j = deg
+ for col in 0..N_LIMBS {
+ let mut acc = zero;
+ for i in 0..=col {
+ let j = col - i;
+ acc = builder.mul_add_extension(input0_limbs[i], input1_limbs[j], acc);
+ }
+ constr_poly[col] = builder.sub_extension(acc, output_limbs[col]);
+ }
+
+ let base = F::from_canonical_u64(1 << LIMB_BITS);
+ let t = builder.mul_const_extension(base, aux_in_limbs[0]);
+ constr_poly[0] = builder.sub_extension(constr_poly[0], t);
+ for deg in 1..N_LIMBS {
+ let t0 = builder.mul_const_extension(base, aux_in_limbs[deg]);
+ let t1 = builder.sub_extension(t0, aux_in_limbs[deg - 1]);
+ constr_poly[deg] = builder.sub_extension(constr_poly[deg], t1);
+ }
+
+ for &c in &constr_poly {
+ let filter = builder.mul_extension(is_mul, c);
+ yield_constr.constraint(builder, filter);
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use plonky2::field::goldilocks_field::GoldilocksField;
+ use plonky2::field::types::Field;
+ use rand::{Rng, SeedableRng};
+ use rand_chacha::ChaCha8Rng;
+
+ use super::*;
+ use crate::arithmetic::columns::NUM_ARITH_COLUMNS;
+ use crate::constraint_consumer::ConstraintConsumer;
+
+ // TODO: Should be able to refactor this test to apply to all operations.
+ #[test]
+ fn generate_eval_consistency_not_mul() {
+ type F = GoldilocksField;
+
+ let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+ let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+
+ // if `IS_MUL == 0`, then the constraints should be met even
+ // if all values are garbage.
+ lv[IS_MUL] = F::ZERO;
+
+ let mut constrant_consumer = ConstraintConsumer::new(
+ vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ );
+ eval_packed_generic(&lv, &mut constrant_consumer);
+ for &acc in &constrant_consumer.constraint_accs {
+ assert_eq!(acc, GoldilocksField::ZERO);
+ }
+ }
+
+ #[test]
+ fn generate_eval_consistency_mul() {
+ type F = GoldilocksField;
+
+ let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+ let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+
+ // set `IS_MUL == 1` and ensure all constraints are satisfied.
+ lv[IS_MUL] = F::ONE;
+ // set inputs to random values
+ for (&ai, bi) in MUL_INPUT_0.iter().zip(MUL_INPUT_1) {
+ lv[ai] = F::from_canonical_u16(rng.gen());
+ lv[bi] = F::from_canonical_u16(rng.gen());
+ }
+
+ generate(&mut lv);
+
+ let mut constrant_consumer = ConstraintConsumer::new(
+ vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ );
+ eval_packed_generic(&lv, &mut constrant_consumer);
+ for &acc in &constrant_consumer.constraint_accs {
+ assert_eq!(acc, GoldilocksField::ZERO);
+ }
+ }
+}
diff --git a/evm/src/arithmetic/sub.rs b/evm/src/arithmetic/sub.rs
new file mode 100644
index 00000000..ce7932e2
--- /dev/null
+++ b/evm/src/arithmetic/sub.rs
@@ -0,0 +1,148 @@
+use itertools::izip;
+use plonky2::field::extension::Extendable;
+use plonky2::field::packed::PackedField;
+use plonky2::hash::hash_types::RichField;
+use plonky2::iop::ext_target::ExtensionTarget;
+
+use crate::arithmetic::add::{eval_ext_circuit_are_equal, eval_packed_generic_are_equal};
+use crate::arithmetic::columns::*;
+use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
+use crate::range_check_error;
+
+pub fn generate(lv: &mut [F; NUM_ARITH_COLUMNS]) {
+ let input0_limbs = SUB_INPUT_0.map(|c| lv[c].to_canonical_u64());
+ let input1_limbs = SUB_INPUT_1.map(|c| lv[c].to_canonical_u64());
+
+ // Input and output have 16-bit limbs
+ let mut output_limbs = [0u64; N_LIMBS];
+
+ const LIMB_BOUNDARY: u64 = 1 << LIMB_BITS;
+ const MASK: u64 = LIMB_BOUNDARY - 1u64;
+
+ let mut br = 0u64;
+ for (i, a, b) in izip!(0.., input0_limbs, input1_limbs) {
+ let d = LIMB_BOUNDARY + a - b - br;
+ // if a < b, then d < 2^16 so br = 1
+ // if a >= b, then d >= 2^16 so br = 0
+ br = 1u64 - (d >> LIMB_BITS);
+ assert!(br <= 1u64, "input limbs were larger than 16 bits");
+ output_limbs[i] = d & MASK;
+ }
+ // last borrow is dropped because this is subtraction modulo 2^256.
+
+ for (&c, output_limb) in SUB_OUTPUT.iter().zip(output_limbs) {
+ lv[c] = F::from_canonical_u64(output_limb);
+ }
+}
+
+pub fn eval_packed_generic(
+ lv: &[P; NUM_ARITH_COLUMNS],
+ yield_constr: &mut ConstraintConsumer,
+) {
+ range_check_error!(SUB_INPUT_0, 16);
+ range_check_error!(SUB_INPUT_1, 16);
+ range_check_error!(SUB_OUTPUT, 16);
+
+ let is_sub = lv[IS_SUB];
+ let input0_limbs = SUB_INPUT_0.iter().map(|&c| lv[c]);
+ let input1_limbs = SUB_INPUT_1.iter().map(|&c| lv[c]);
+ let output_limbs = SUB_OUTPUT.iter().map(|&c| lv[c]);
+
+ let output_computed = input0_limbs.zip(input1_limbs).map(|(a, b)| a - b);
+
+ eval_packed_generic_are_equal(yield_constr, is_sub, output_limbs, output_computed);
+}
+
+#[allow(clippy::needless_collect)]
+pub fn eval_ext_circuit, const D: usize>(
+ builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder,
+ lv: &[ExtensionTarget; NUM_ARITH_COLUMNS],
+ yield_constr: &mut RecursiveConstraintConsumer,
+) {
+ let is_sub = lv[IS_SUB];
+ let input0_limbs = SUB_INPUT_0.iter().map(|&c| lv[c]);
+ let input1_limbs = SUB_INPUT_1.iter().map(|&c| lv[c]);
+ let output_limbs = SUB_OUTPUT.iter().map(|&c| lv[c]);
+
+ // Since `map` is lazy and the closure passed to it borrows
+ // `builder`, we can't then borrow builder again below in the call
+ // to `eval_ext_circuit_are_equal`. The solution is to force
+ // evaluation with `collect`.
+ let output_computed = input0_limbs
+ .zip(input1_limbs)
+ .map(|(a, b)| builder.sub_extension(a, b))
+ .collect::>>();
+
+ eval_ext_circuit_are_equal(
+ builder,
+ yield_constr,
+ is_sub,
+ output_limbs,
+ output_computed.into_iter(),
+ );
+}
+
+#[cfg(test)]
+mod tests {
+ use plonky2::field::goldilocks_field::GoldilocksField;
+ use plonky2::field::types::Field;
+ use rand::{Rng, SeedableRng};
+ use rand_chacha::ChaCha8Rng;
+
+ use super::*;
+ use crate::arithmetic::columns::NUM_ARITH_COLUMNS;
+ use crate::constraint_consumer::ConstraintConsumer;
+
+ // TODO: Should be able to refactor this test to apply to all operations.
+ #[test]
+ fn generate_eval_consistency_not_sub() {
+ type F = GoldilocksField;
+
+ let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+ let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+
+ // if `IS_SUB == 0`, then the constraints should be met even
+ // if all values are garbage.
+ lv[IS_SUB] = F::ZERO;
+
+ let mut constrant_consumer = ConstraintConsumer::new(
+ vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ );
+ eval_packed_generic(&lv, &mut constrant_consumer);
+ for &acc in &constrant_consumer.constraint_accs {
+ assert_eq!(acc, GoldilocksField::ZERO);
+ }
+ }
+
+ #[test]
+ fn generate_eval_consistency_sub() {
+ type F = GoldilocksField;
+
+ let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+ let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+
+ // set `IS_SUB == 1` and ensure all constraints are satisfied.
+ lv[IS_SUB] = F::ONE;
+ // set inputs to random values
+ for (&ai, bi) in SUB_INPUT_0.iter().zip(SUB_INPUT_1) {
+ lv[ai] = F::from_canonical_u16(rng.gen());
+ lv[bi] = F::from_canonical_u16(rng.gen());
+ }
+
+ generate(&mut lv);
+
+ let mut constrant_consumer = ConstraintConsumer::new(
+ vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ GoldilocksField::ONE,
+ );
+ eval_packed_generic(&lv, &mut constrant_consumer);
+ for &acc in &constrant_consumer.constraint_accs {
+ assert_eq!(acc, GoldilocksField::ZERO);
+ }
+ }
+}
diff --git a/evm/src/arithmetic/utils.rs b/evm/src/arithmetic/utils.rs
new file mode 100644
index 00000000..dc9a0a2f
--- /dev/null
+++ b/evm/src/arithmetic/utils.rs
@@ -0,0 +1,22 @@
+use log::error;
+
+/// Emit an error message regarding unchecked range assumptions.
+/// Assumes the values in `cols` are `[cols[0], cols[0] + 1, ...,
+/// cols[0] + cols.len() - 1]`.
+pub(crate) fn _range_check_error(file: &str, line: u32, cols: &[usize]) {
+ error!(
+ "{}:{}: arithmetic unit skipped {}-bit range-checks on columns {}--{}: not yet implemented",
+ line,
+ file,
+ RC_BITS,
+ cols[0],
+ cols[0] + cols.len() - 1
+ );
+}
+
+#[macro_export]
+macro_rules! range_check_error {
+ ($cols:ident, $rc_bits:expr) => {
+ $crate::arithmetic::utils::_range_check_error::<$rc_bits>(file!(), line!(), &$cols);
+ };
+}
diff --git a/evm/src/lib.rs b/evm/src/lib.rs
index 9ded1431..1d91e8e0 100644
--- a/evm/src/lib.rs
+++ b/evm/src/lib.rs
@@ -5,6 +5,7 @@
#![feature(generic_const_exprs)]
pub mod all_stark;
+pub mod arithmetic;
pub mod config;
pub mod constraint_consumer;
pub mod cpu;