From 50c9638b552ded673468652b36bd3d224ce0a00c Mon Sep 17 00:00:00 2001
From: Hamish Ivey-Law <426294+unzvfu@users.noreply.github.com>
Date: Fri, 26 Aug 2022 09:13:47 +1000
Subject: [PATCH] EVM arithmetic unit: unsigned comparisons (#688)

* Refactor u256 calculation; return cy/br from calculations.

* Implement less than and greater than operations.

* Add file documentation.
---
 evm/src/arithmetic/add.rs              |  35 ++--
 evm/src/arithmetic/arithmetic_stark.rs |   7 +
 evm/src/arithmetic/columns.rs          |   5 +
 evm/src/arithmetic/compare.rs          | 219 +++++++++++++++++++++++++
 evm/src/arithmetic/mod.rs              |   1 +
 evm/src/arithmetic/sub.rs              |  23 +--
 evm/src/arithmetic/utils.rs            |   3 +
 7 files changed, 270 insertions(+), 23 deletions(-)
 create mode 100644 evm/src/arithmetic/compare.rs
diff --git a/evm/src/arithmetic/add.rs b/evm/src/arithmetic/add.rs
index 80f03d63..e87566b6 100644
--- a/evm/src/arithmetic/add.rs
+++ b/evm/src/arithmetic/add.rs
@@ -9,6 +9,21 @@ use crate::arithmetic::columns::*;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::range_check_error;
 
+pub(crate) fn u256_add_cc(input0: [u64; N_LIMBS], input1: [u64; N_LIMBS]) -> ([u64; N_LIMBS], u64) {
+    // Input and output have 16-bit limbs
+    let mut output = [0u64; N_LIMBS];
+
+    const MASK: u64 = (1u64 << LIMB_BITS) - 1u64;
+    let mut cy = 0u64;
+    for (i, a, b) in izip!(0.., input0, input1) {
+        let s = a + b + cy;
+        cy = s >> LIMB_BITS;
+        assert!(cy <= 1u64, "input limbs were larger than 16 bits");
+        output[i] = s & MASK;
+    }
+    (output, cy)
+}
+
 /// Given two sequences `larger` and `smaller` of equal length (not
 /// checked), verifies that \sum_i larger[i] 2^(LIMB_BITS * i) ==
 /// \sum_i smaller[i] 2^(LIMB_BITS * i), taking care of carry propagation.
@@ -19,7 +34,8 @@ pub(crate) fn eval_packed_generic_are_equal<P, I, J>(
     is_op: P,
     larger: I,
     smaller: J,
-) where
+) -> P
+where
     P: PackedField,
     I: Iterator<Item = P>,
     J: Iterator<Item = P>,
@@ -36,6 +52,7 @@ pub(crate) fn eval_packed_generic_are_equal<P, I, J>(
         // increase the degree of the constraint.
         cy = t * overflow_inv;
     }
+    cy
 }
 
 pub(crate) fn eval_ext_circuit_are_equal<F, const D: usize, I, J>(
@@ -44,7 +61,8 @@ pub(crate) fn eval_ext_circuit_are_equal<F, const D: usize, I, J>(
     is_op: ExtensionTarget<D>,
     larger: I,
     smaller: J,
-) where
+) -> ExtensionTarget<D>
+where
     F: RichField + Extendable<D>,
     I: Iterator<Item = ExtensionTarget<D>>,
     J: Iterator<Item = ExtensionTarget<D>>,
@@ -72,6 +90,7 @@ pub(crate) fn eval_ext_circuit_are_equal<F, const D: usize, I, J>(
 
         cy = builder.mul_const_extension(overflow_inv, t);
     }
+    cy
 }
 
 pub fn generate<F: RichField>(lv: &mut [F; NUM_ARITH_COLUMNS]) {
@@ -79,17 +98,7 @@ pub fn generate<F: RichField>(lv: &mut [F; NUM_ARITH_COLUMNS]) {
     let input1_limbs = ADD_INPUT_1.map(|c| lv[c].to_canonical_u64());
 
     // Input and output have 16-bit limbs
-    let mut output_limbs = [0u64; N_LIMBS];
-
-    const MASK: u64 = (1u64 << LIMB_BITS) - 1u64;
-    let mut cy = 0u64;
-    for (i, a, b) in izip!(0.., input0_limbs, input1_limbs) {
-        let s = a + b + cy;
-        cy = s >> LIMB_BITS;
-        assert!(cy <= 1u64, "input limbs were larger than 16 bits");
-        output_limbs[i] = s & MASK;
-    }
-    // last carry is dropped because this is addition modulo 2^256.
+    let (output_limbs, _) = u256_add_cc(input0_limbs, input1_limbs);
 
     for (&c, output_limb) in ADD_OUTPUT.iter().zip(output_limbs) {
         lv[c] = F::from_canonical_u64(output_limb);
diff --git a/evm/src/arithmetic/arithmetic_stark.rs b/evm/src/arithmetic/arithmetic_stark.rs
index ce8c7528..d3dfa982 100644
--- a/evm/src/arithmetic/arithmetic_stark.rs
+++ b/evm/src/arithmetic/arithmetic_stark.rs
@@ -8,6 +8,7 @@ use plonky2::hash::hash_types::RichField;
 
 use crate::arithmetic::add;
 use crate::arithmetic::columns;
+use crate::arithmetic::compare;
 use crate::arithmetic::mul;
 use crate::arithmetic::sub;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
@@ -45,6 +46,10 @@ impl<F: RichField, const D: usize> ArithmeticStark<F, D> {
             sub::generate(local_values);
         } else if local_values[columns::IS_MUL].is_one() {
             mul::generate(local_values);
+        } else if local_values[columns::IS_LT].is_one() {
+            compare::generate(local_values, columns::IS_LT);
+        } else if local_values[columns::IS_GT].is_one() {
+            compare::generate(local_values, columns::IS_GT);
         } else {
             todo!("the requested operation has not yet been implemented");
         }
@@ -67,6 +72,7 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for ArithmeticSta
         add::eval_packed_generic(lv, yield_constr);
         sub::eval_packed_generic(lv, yield_constr);
         mul::eval_packed_generic(lv, yield_constr);
+        compare::eval_packed_generic(lv, yield_constr);
     }
 
     fn eval_ext_circuit(
@@ -79,6 +85,7 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for ArithmeticSta
         add::eval_ext_circuit(builder, lv, yield_constr);
         sub::eval_ext_circuit(builder, lv, yield_constr);
         mul::eval_ext_circuit(builder, lv, yield_constr);
+        compare::eval_ext_circuit(builder, lv, yield_constr);
     }
 
     fn constraint_degree(&self) -> usize {
diff --git a/evm/src/arithmetic/columns.rs b/evm/src/arithmetic/columns.rs
index e51419a8..7b44adc1 100644
--- a/evm/src/arithmetic/columns.rs
+++ b/evm/src/arithmetic/columns.rs
@@ -79,4 +79,9 @@ pub(crate) const MUL_INPUT_1: [usize; N_LIMBS] = GENERAL_INPUT_1;
 pub(crate) const MUL_OUTPUT: [usize; N_LIMBS] = GENERAL_INPUT_2;
 pub(crate) const MUL_AUX_INPUT: [usize; N_LIMBS] = AUX_INPUT_0;
 
+pub(crate) const CMP_INPUT_0: [usize; N_LIMBS] = GENERAL_INPUT_0;
+pub(crate) const CMP_INPUT_1: [usize; N_LIMBS] = GENERAL_INPUT_1;
+pub(crate) const CMP_OUTPUT: usize = GENERAL_INPUT_2[0];
+pub(crate) const CMP_AUX_INPUT: [usize; N_LIMBS] = AUX_INPUT_0;
+
 pub const NUM_ARITH_COLUMNS: usize = START_SHARED_COLS + NUM_SHARED_COLS;
diff --git a/evm/src/arithmetic/compare.rs b/evm/src/arithmetic/compare.rs
new file mode 100644
index 00000000..8410cade
--- /dev/null
+++ b/evm/src/arithmetic/compare.rs
@@ -0,0 +1,219 @@
+//! Support for EVM LT and GT instructions
+//!
+//! This crate verifies EVM LT and GT instructions (i.e. for unsigned
+//! inputs). The difference between LT and GT is of course just a
+//! matter of the order of the inputs. The verification is essentially
+//! identical to the SUB instruction: For both SUB and LT we have values
+//!
+//!   - `input0`
+//!   - `input1`
+//!   - `difference` (mod 2^256)
+//!   - `borrow` (= 0 or 1)
+//!
+//! satisfying `input0 - input1 = difference + borrow * 2^256`. Where
+//! SUB verifies `difference` and ignores `borrow`, LT verifies
+//! `borrow` (and uses `difference` as an auxiliary input).
+
+use plonky2::field::extension::Extendable;
+use plonky2::field::packed::PackedField;
+use plonky2::hash::hash_types::RichField;
+use plonky2::iop::ext_target::ExtensionTarget;
+
+use crate::arithmetic::add::{eval_ext_circuit_are_equal, eval_packed_generic_are_equal};
+use crate::arithmetic::columns::*;
+use crate::arithmetic::sub::u256_sub_br;
+use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
+use crate::range_check_error;
+
+pub(crate) fn generate<F: RichField>(lv: &mut [F; NUM_ARITH_COLUMNS], op: usize) {
+    let input0 = CMP_INPUT_0.map(|c| lv[c].to_canonical_u64());
+    let input1 = CMP_INPUT_1.map(|c| lv[c].to_canonical_u64());
+
+    let (diff, br) = match op {
+        // input0 - input1 == diff + br*2^256
+        IS_LT => u256_sub_br(input0, input1),
+        // input1 - input0 == diff + br*2^256
+        IS_GT => u256_sub_br(input1, input0),
+        IS_SLT => todo!(),
+        IS_SGT => todo!(),
+        _ => panic!("op code not a comparison"),
+    };
+
+    for (&c, diff_limb) in CMP_AUX_INPUT.iter().zip(diff) {
+        lv[c] = F::from_canonical_u64(diff_limb);
+    }
+    lv[CMP_OUTPUT] = F::from_canonical_u64(br);
+}
+
+pub(crate) fn eval_packed_generic_lt<P: PackedField>(
+    yield_constr: &mut ConstraintConsumer<P>,
+    is_op: P,
+    input0: [P; N_LIMBS],
+    input1: [P; N_LIMBS],
+    aux: [P; N_LIMBS],
+    output: P,
+) {
+    // Verify (input0 < input1) == output by providing aux such that
+    // input0 - input1 == aux + output*2^256.
+    let lhs_limbs = input0.iter().zip(input1).map(|(&a, b)| a - b);
+    let cy = eval_packed_generic_are_equal(yield_constr, is_op, aux.into_iter(), lhs_limbs);
+    // We don't need to check that cy is 0 or 1, since output has
+    // already been checked to be 0 or 1.
+    yield_constr.constraint(is_op * (cy - output));
+}
+
+pub fn eval_packed_generic<P: PackedField>(
+    lv: &[P; NUM_ARITH_COLUMNS],
+    yield_constr: &mut ConstraintConsumer<P>,
+) {
+    range_check_error!(CMP_INPUT_0, 16);
+    range_check_error!(CMP_INPUT_1, 16);
+    range_check_error!(CMP_AUX_INPUT, 16);
+    range_check_error!([CMP_OUTPUT], 1);
+
+    let input0 = CMP_INPUT_0.map(|c| lv[c]);
+    let input1 = CMP_INPUT_1.map(|c| lv[c]);
+    let aux = CMP_AUX_INPUT.map(|c| lv[c]);
+    let output = lv[CMP_OUTPUT];
+
+    eval_packed_generic_lt(yield_constr, lv[IS_LT], input0, input1, aux, output);
+    eval_packed_generic_lt(yield_constr, lv[IS_GT], input1, input0, aux, output);
+}
+
+#[allow(clippy::needless_collect)]
+pub(crate) fn eval_ext_circuit_lt<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+    is_op: ExtensionTarget<D>,
+    input0: [ExtensionTarget<D>; N_LIMBS],
+    input1: [ExtensionTarget<D>; N_LIMBS],
+    aux: [ExtensionTarget<D>; N_LIMBS],
+    output: ExtensionTarget<D>,
+) {
+    // Since `map` is lazy and the closure passed to it borrows
+    // `builder`, we can't then borrow builder again below in the call
+    // to `eval_ext_circuit_are_equal`. The solution is to force
+    // evaluation with `collect`.
+    let lhs_limbs = input0
+        .iter()
+        .zip(input1)
+        .map(|(&a, b)| builder.sub_extension(a, b))
+        .collect::<Vec<ExtensionTarget<D>>>();
+
+    let cy = eval_ext_circuit_are_equal(
+        builder,
+        yield_constr,
+        is_op,
+        aux.into_iter(),
+        lhs_limbs.into_iter(),
+    );
+    let good_output = builder.sub_extension(cy, output);
+    let filter = builder.mul_extension(is_op, good_output);
+    yield_constr.constraint(builder, filter);
+}
+
+pub fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
+    lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+) {
+    let input0 = CMP_INPUT_0.map(|c| lv[c]);
+    let input1 = CMP_INPUT_1.map(|c| lv[c]);
+    let aux = CMP_AUX_INPUT.map(|c| lv[c]);
+    let output = lv[CMP_OUTPUT];
+
+    eval_ext_circuit_lt(
+        builder,
+        yield_constr,
+        lv[IS_LT],
+        input0,
+        input1,
+        aux,
+        output,
+    );
+    eval_ext_circuit_lt(
+        builder,
+        yield_constr,
+        lv[IS_GT],
+        input1,
+        input0,
+        aux,
+        output,
+    );
+}
+
+#[cfg(test)]
+mod tests {
+    use plonky2::field::goldilocks_field::GoldilocksField;
+    use plonky2::field::types::Field;
+    use rand::{Rng, SeedableRng};
+    use rand_chacha::ChaCha8Rng;
+
+    use super::*;
+    use crate::arithmetic::columns::NUM_ARITH_COLUMNS;
+    use crate::constraint_consumer::ConstraintConsumer;
+
+    // TODO: Should be able to refactor this test to apply to all operations.
+    #[test]
+    fn generate_eval_consistency_not_compare() {
+        type F = GoldilocksField;
+
+        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+        let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+
+        // if `IS_LT == 0`, then the constraints should be met even if
+        // all values are garbage. `eval_packed_generic` handles IS_GT
+        // at the same time, so we check both at once.
+        lv[IS_LT] = F::ZERO;
+        lv[IS_GT] = F::ZERO;
+
+        let mut constrant_consumer = ConstraintConsumer::new(
+            vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+            F::ONE,
+            F::ONE,
+            F::ONE,
+        );
+        eval_packed_generic(&lv, &mut constrant_consumer);
+        for &acc in &constrant_consumer.constraint_accs {
+            assert_eq!(acc, F::ZERO);
+        }
+    }
+
+    #[test]
+    fn generate_eval_consistency_compare() {
+        type F = GoldilocksField;
+
+        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+        let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+        const N_ITERS: usize = 1000;
+
+        for _ in 0..N_ITERS {
+            for (op, other_op) in [(IS_LT, IS_GT), (IS_GT, IS_LT)] {
+                // set op == 1 and ensure all constraints are satisfied.
+                // we have to explicitly set the other op to zero since both
+                // are treated by the call.
+                lv[op] = F::ONE;
+                lv[other_op] = F::ZERO;
+
+                // set inputs to random values
+                for (&ai, bi) in CMP_INPUT_0.iter().zip(CMP_INPUT_1) {
+                    lv[ai] = F::from_canonical_u16(rng.gen());
+                    lv[bi] = F::from_canonical_u16(rng.gen());
+                }
+
+                generate(&mut lv, op);
+
+                let mut constrant_consumer = ConstraintConsumer::new(
+                    vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+                    F::ONE,
+                    F::ONE,
+                    F::ONE,
+                );
+                eval_packed_generic(&lv, &mut constrant_consumer);
+                for &acc in &constrant_consumer.constraint_accs {
+                    assert_eq!(acc, F::ZERO);
+                }
+            }
+        }
+    }
+}
diff --git a/evm/src/arithmetic/mod.rs b/evm/src/arithmetic/mod.rs
index 07c4c5a9..69fbda09 100644
--- a/evm/src/arithmetic/mod.rs
+++ b/evm/src/arithmetic/mod.rs
@@ -1,4 +1,5 @@
 mod add;
+mod compare;
 mod mul;
 mod sub;
 mod utils;
diff --git a/evm/src/arithmetic/sub.rs b/evm/src/arithmetic/sub.rs
index ce7932e2..c632eb94 100644
--- a/evm/src/arithmetic/sub.rs
+++ b/evm/src/arithmetic/sub.rs
@@ -9,26 +9,29 @@ use crate::arithmetic::columns::*;
 use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
 use crate::range_check_error;
 
-pub fn generate<F: RichField>(lv: &mut [F; NUM_ARITH_COLUMNS]) {
-    let input0_limbs = SUB_INPUT_0.map(|c| lv[c].to_canonical_u64());
-    let input1_limbs = SUB_INPUT_1.map(|c| lv[c].to_canonical_u64());
-
-    // Input and output have 16-bit limbs
-    let mut output_limbs = [0u64; N_LIMBS];
-
+pub(crate) fn u256_sub_br(input0: [u64; N_LIMBS], input1: [u64; N_LIMBS]) -> ([u64; N_LIMBS], u64) {
     const LIMB_BOUNDARY: u64 = 1 << LIMB_BITS;
     const MASK: u64 = LIMB_BOUNDARY - 1u64;
 
+    let mut output = [0u64; N_LIMBS];
     let mut br = 0u64;
-    for (i, a, b) in izip!(0.., input0_limbs, input1_limbs) {
+    for (i, a, b) in izip!(0.., input0, input1) {
         let d = LIMB_BOUNDARY + a - b - br;
         // if a < b, then d < 2^16 so br = 1
         // if a >= b, then d >= 2^16 so br = 0
         br = 1u64 - (d >> LIMB_BITS);
         assert!(br <= 1u64, "input limbs were larger than 16 bits");
-        output_limbs[i] = d & MASK;
+        output[i] = d & MASK;
     }
-    // last borrow is dropped because this is subtraction modulo 2^256.
+
+    (output, br)
+}
+
+pub fn generate<F: RichField>(lv: &mut [F; NUM_ARITH_COLUMNS]) {
+    let input0_limbs = SUB_INPUT_0.map(|c| lv[c].to_canonical_u64());
+    let input1_limbs = SUB_INPUT_1.map(|c| lv[c].to_canonical_u64());
+
+    let (output_limbs, _) = u256_sub_br(input0_limbs, input1_limbs);
 
     for (&c, output_limb) in SUB_OUTPUT.iter().zip(output_limbs) {
         lv[c] = F::from_canonical_u64(output_limb);
diff --git a/evm/src/arithmetic/utils.rs b/evm/src/arithmetic/utils.rs
index dc9a0a2f..c50481f3 100644
--- a/evm/src/arithmetic/utils.rs
+++ b/evm/src/arithmetic/utils.rs
@@ -19,4 +19,7 @@ macro_rules! range_check_error {
     ($cols:ident, $rc_bits:expr) => {
         $crate::arithmetic::utils::_range_check_error::<$rc_bits>(file!(), line!(), &$cols);
     };
+    ([$cols:ident], $rc_bits:expr) => {
+        $crate::arithmetic::utils::_range_check_error::<$rc_bits>(file!(), line!(), &[$cols]);
+    };
 }