From 76c86c55b3f04db84fa4b3aa78b58490b2d17f13 Mon Sep 17 00:00:00 2001
From: Hamish Ivey-Law <426294+unzvfu@users.noreply.github.com>
Date: Fri, 8 Apr 2022 14:54:33 +1000
Subject: [PATCH] System Zero binary bitwise operations (#529)

* First draft of bitwise AND.

* Refactor everything; add support for other binary bitops.

* Documentation; fix integration into rest of ALU.

* Allow `cargo fmt` to make code harder to read.

* Refactor following Jakub's suggestions.

* Fix variable name.

* Minor documentation.

* Basic tests.

* Address Daniel's PR comments.

* Remove the 'BIT' prefix from the BIT{AND,IOR,XOR,ANDNOT} names.

* cargo fmt/clippy

* Simplify bit test.
---
 system_zero/src/alu/bitops.rs    | 360 +++++++++++++++++++++++++++++++
 system_zero/src/alu/mod.rs       |  21 +-
 system_zero/src/lib.rs           |   2 +
 system_zero/src/registers/alu.rs |  34 ++-
 4 files changed, 413 insertions(+), 4 deletions(-)
 create mode 100644 system_zero/src/alu/bitops.rs
diff --git a/system_zero/src/alu/bitops.rs b/system_zero/src/alu/bitops.rs
new file mode 100644
index 00000000..97c50332
--- /dev/null
+++ b/system_zero/src/alu/bitops.rs
@@ -0,0 +1,360 @@
+use plonky2::field::extension_field::Extendable;
+use plonky2::field::field_types::{Field, PrimeField64};
+use plonky2::field::packed_field::PackedField;
+use plonky2::hash::hash_types::RichField;
+use plonky2::iop::ext_target::ExtensionTarget;
+use plonky2::plonk::circuit_builder::CircuitBuilder;
+use plonky2::plonk::plonk_common::reduce_with_powers_ext_recursive;
+use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
+
+use crate::registers::alu::*;
+use crate::registers::NUM_COLUMNS;
+
+/// Interpret the 32 elements of `bits` as bits from low to high of a
+/// u32 and return \sum_i bits[i] 2^i as an element of P.
+fn binary_to_u32<F, P>(bits: [P; 32]) -> P
+where
+    F: Field,
+    P: PackedField<Scalar = F>,
+{
+    bits.into_iter()
+        .enumerate()
+        .map(|(i, b)| b * F::from_canonical_u64(1u64 << i))
+        .sum()
+}
+
+fn generate_bitop_32<F: PrimeField64>(
+    values: &mut [F; NUM_COLUMNS],
+    bitop: usize,
+    input_a_regs: [usize; 32],
+    input_b_regs: [usize; 32],
+    output_reg: usize,
+) {
+    let a_bits = input_a_regs.map(|r| values[r]);
+    let b_bits = input_b_regs.map(|r| values[r]);
+
+    let a = binary_to_u32(a_bits).to_canonical_u64() as u32;
+    let b = binary_to_u32(b_bits).to_canonical_u64() as u32;
+
+    let out = match bitop {
+        IS_AND => a & b,
+        IS_IOR => a | b,
+        IS_XOR => a ^ b,
+        IS_ANDNOT => a & !b,
+        _ => panic!("unrecognized bitop instruction code"),
+    };
+
+    values[output_reg] = F::from_canonical_u32(out);
+}
+
+/// Use the `COL_BIT_DECOMP_INPUT_[AB]_{LO,HI}_*` registers to read
+/// bits from `values`, apply `bitop` to the reconstructed u32's (both
+/// lo and hi, for 64 bits total), and write the result to the
+/// `COL_BITOP_OUTPUT_*` registers.
+pub(crate) fn generate_bitop<F: PrimeField64>(values: &mut [F; NUM_COLUMNS], bitop: usize) {
+    // Generate lo half
+    generate_bitop_32(
+        values,
+        bitop,
+        COL_BIT_DECOMP_INPUT_A_LO_BIN_REGS,
+        COL_BIT_DECOMP_INPUT_B_LO_BIN_REGS,
+        COL_BITOP_OUTPUT_0,
+    );
+    // Generate hi half
+    generate_bitop_32(
+        values,
+        bitop,
+        COL_BIT_DECOMP_INPUT_A_HI_BIN_REGS,
+        COL_BIT_DECOMP_INPUT_B_HI_BIN_REGS,
+        COL_BITOP_OUTPUT_1,
+    );
+}
+
+fn eval_bitop_32<F: Field, P: PackedField<Scalar = F>>(
+    lv: &[P; NUM_COLUMNS],
+    input_a_regs: [usize; 32],
+    input_b_regs: [usize; 32],
+    output_reg: usize,
+    yield_constr: &mut ConstraintConsumer<P>,
+) {
+    // Filters
+    let is_and = lv[IS_AND];
+    let is_ior = lv[IS_IOR];
+    let is_xor = lv[IS_XOR];
+    let is_andnot = lv[IS_ANDNOT];
+
+    // Inputs
+    let a_bits = input_a_regs.map(|r| lv[r]);
+    let b_bits = input_b_regs.map(|r| lv[r]);
+
+    // Ensure that the inputs are bits
+    let inst_constr = is_and + is_ior + is_xor + is_andnot;
+    a_bits.map(|v| yield_constr.constraint(inst_constr * (v * v - v)));
+    b_bits.map(|v| yield_constr.constraint(inst_constr * (v * v - v)));
+
+    // Output
+    let output = lv[output_reg];
+
+    let a = binary_to_u32(a_bits);
+    let b = binary_to_u32(b_bits);
+    let a_and_b = binary_to_u32(a_bits.zip(b_bits).map(|(b0, b1)| b0 * b1));
+
+    let constraint = is_and * (a_and_b - output)
+        + is_ior * (a + b - a_and_b - output)
+        + is_xor * (a + b - a_and_b * F::TWO - output)
+        + is_andnot * (a - a_and_b - output);
+
+    yield_constr.constraint(constraint);
+}
+
+/// Verify an AND, IOR, XOR, or ANDNOT  instruction.
+pub(crate) fn eval_bitop<F: Field, P: PackedField<Scalar = F>>(
+    lv: &[P; NUM_COLUMNS],
+    yield_constr: &mut ConstraintConsumer<P>,
+) {
+    // Constraint for lo half
+    eval_bitop_32(
+        lv,
+        COL_BIT_DECOMP_INPUT_A_LO_BIN_REGS,
+        COL_BIT_DECOMP_INPUT_B_LO_BIN_REGS,
+        COL_BITOP_OUTPUT_0,
+        yield_constr,
+    );
+    // Constraint for hi half
+    eval_bitop_32(
+        lv,
+        COL_BIT_DECOMP_INPUT_A_HI_BIN_REGS,
+        COL_BIT_DECOMP_INPUT_B_HI_BIN_REGS,
+        COL_BITOP_OUTPUT_1,
+        yield_constr,
+    );
+}
+
+fn constrain_all_to_bits<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut CircuitBuilder<F, D>,
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+    filter: ExtensionTarget<D>,
+    vals: &[ExtensionTarget<D>],
+) {
+    for v in vals.iter() {
+        let t0 = builder.mul_sub_extension(*v, *v, *v);
+        let t1 = builder.mul_extension(filter, t0);
+        yield_constr.constraint(builder, t1)
+    }
+}
+
+/// As for `eval_bitop`, but build with `builder`.
+fn eval_bitop_32_recursively<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut CircuitBuilder<F, D>,
+    lv: &[ExtensionTarget<D>; NUM_COLUMNS],
+    input_a_regs: [usize; 32],
+    input_b_regs: [usize; 32],
+    output_reg: usize,
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+) {
+    // Filters
+    let is_and = lv[IS_AND];
+    let is_ior = lv[IS_IOR];
+    let is_xor = lv[IS_XOR];
+    let is_andnot = lv[IS_ANDNOT];
+
+    // Inputs
+    let a_bits = input_a_regs.map(|r| lv[r]);
+    let b_bits = input_b_regs.map(|r| lv[r]);
+
+    // Ensure that the inputs are bits
+    let inst_constr = builder.add_many_extension(&[is_and, is_ior, is_xor, is_andnot]);
+    constrain_all_to_bits(builder, yield_constr, inst_constr, &a_bits);
+    constrain_all_to_bits(builder, yield_constr, inst_constr, &b_bits);
+
+    // Output
+    let output = lv[output_reg];
+
+    let limb_base = builder.constant(F::TWO);
+    let a = reduce_with_powers_ext_recursive(builder, &a_bits, limb_base);
+    let b = reduce_with_powers_ext_recursive(builder, &b_bits, limb_base);
+    let a_and_b_bits = a_bits
+        .zip(b_bits)
+        .map(|(b0, b1)| builder.mul_extension(b0, b1));
+    let a_and_b = reduce_with_powers_ext_recursive(builder, &a_and_b_bits, limb_base);
+
+    let and_constr = {
+        let t = builder.sub_extension(a_and_b, output);
+        builder.mul_extension(t, is_and)
+    };
+
+    let ior_constr = {
+        let t0 = builder.add_extension(a, b);
+        let t1 = builder.sub_extension(t0, a_and_b);
+        let t2 = builder.sub_extension(t1, output);
+        builder.mul_extension(t2, is_ior)
+    };
+
+    let xor_constr = {
+        let t0 = builder.add_extension(a, b);
+        let t1 = builder.mul_const_extension(F::TWO, a_and_b);
+        let t2 = builder.sub_extension(t0, t1);
+        let t3 = builder.sub_extension(t2, output);
+        builder.mul_extension(t3, is_xor)
+    };
+
+    let andnot_constr = {
+        let t0 = builder.sub_extension(a, a_and_b);
+        let t1 = builder.sub_extension(t0, output);
+        builder.mul_extension(t1, is_andnot)
+    };
+
+    let constr = builder.add_many_extension(&[and_constr, ior_constr, xor_constr, andnot_constr]);
+    yield_constr.constraint(builder, constr);
+}
+
+/// As for `eval_bitop` but with a builder.
+pub(crate) fn eval_bitop_recursively<F: RichField + Extendable<D>, const D: usize>(
+    builder: &mut CircuitBuilder<F, D>,
+    lv: &[ExtensionTarget<D>; NUM_COLUMNS],
+    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
+) {
+    // Recursive constraint for lo half
+    eval_bitop_32_recursively(
+        builder,
+        lv,
+        COL_BIT_DECOMP_INPUT_A_LO_BIN_REGS,
+        COL_BIT_DECOMP_INPUT_B_LO_BIN_REGS,
+        COL_BITOP_OUTPUT_0,
+        yield_constr,
+    );
+    // Recursive constraint for hi half
+    eval_bitop_32_recursively(
+        builder,
+        lv,
+        COL_BIT_DECOMP_INPUT_A_HI_BIN_REGS,
+        COL_BIT_DECOMP_INPUT_B_HI_BIN_REGS,
+        COL_BITOP_OUTPUT_1,
+        yield_constr,
+    );
+}
+
+#[cfg(test)]
+mod tests {
+    use plonky2::field::field_types::Field;
+    use plonky2::field::goldilocks_field::GoldilocksField;
+    use rand::{Rng, SeedableRng};
+    use rand_chacha::ChaCha8Rng;
+    use starky::constraint_consumer::ConstraintConsumer;
+
+    use super::*;
+    use crate::registers::NUM_COLUMNS;
+
+    #[test]
+    fn generate_eval_consistency_not_bitop() {
+        type F = GoldilocksField;
+
+        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+        let mut values = [F::default(); NUM_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+
+        // if `IS_bitop == 0`, then the constraints should be met even
+        // if all values are garbage.
+        for bitop in [IS_AND, IS_IOR, IS_XOR, IS_ANDNOT] {
+            values[bitop] = F::ZERO;
+        }
+
+        let mut constrant_consumer = ConstraintConsumer::new(
+            vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+            GoldilocksField::ONE,
+            GoldilocksField::ONE,
+            GoldilocksField::ONE,
+        );
+        eval_bitop(&values, &mut constrant_consumer);
+        for &acc in &constrant_consumer.constraint_accs {
+            assert_eq!(acc, GoldilocksField::ZERO);
+        }
+    }
+
+    #[test]
+    fn generate_eval_consistency_bitop() {
+        type F = GoldilocksField;
+
+        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+        let mut values = [F::default(); NUM_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+
+        const BITOPS: [usize; 4] = [IS_AND, IS_IOR, IS_XOR, IS_ANDNOT];
+        for bitop in BITOPS {
+            // Reset all the instruction registers
+            for op in BITOPS {
+                values[op] = F::ZERO;
+            }
+            // set `IS_bitop == 1` and ensure all constraints are satisfied.
+            values[bitop] = F::ONE;
+
+            // Set inputs to random binary values
+            let all_bin_regs = COL_BIT_DECOMP_INPUT_A_LO_BIN_REGS
+                .into_iter()
+                .chain(COL_BIT_DECOMP_INPUT_A_HI_BIN_REGS)
+                .chain(COL_BIT_DECOMP_INPUT_B_LO_BIN_REGS)
+                .chain(COL_BIT_DECOMP_INPUT_B_HI_BIN_REGS);
+
+            for reg in all_bin_regs {
+                values[reg] = F::from_canonical_u32(rng.gen::<u32>() & 1);
+            }
+
+            generate_bitop(&mut values, bitop);
+
+            let mut constrant_consumer = ConstraintConsumer::new(
+                vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+                GoldilocksField::ONE,
+                GoldilocksField::ONE,
+                GoldilocksField::ONE,
+            );
+            eval_bitop(&values, &mut constrant_consumer);
+            for &acc in &constrant_consumer.constraint_accs {
+                assert_eq!(acc, GoldilocksField::ZERO);
+            }
+        }
+    }
+
+    #[test]
+    fn generate_eval_consistency_bit_inputs() {
+        type F = GoldilocksField;
+
+        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
+        let mut values = [F::default(); NUM_COLUMNS].map(|_| F::rand_from_rng(&mut rng));
+
+        const BITOPS: [usize; 4] = [IS_AND, IS_IOR, IS_XOR, IS_ANDNOT];
+        for bitop in BITOPS {
+            // Reset all the instruction registers
+            for op in BITOPS {
+                values[op] = F::ZERO;
+            }
+            // set `IS_bitop == 1` and ensure all constraints are satisfied.
+            values[bitop] = F::ONE;
+
+            // Set inputs to random binary values
+            let all_bin_regs = COL_BIT_DECOMP_INPUT_A_LO_BIN_REGS
+                .into_iter()
+                .chain(COL_BIT_DECOMP_INPUT_A_HI_BIN_REGS)
+                .chain(COL_BIT_DECOMP_INPUT_B_LO_BIN_REGS)
+                .chain(COL_BIT_DECOMP_INPUT_B_HI_BIN_REGS);
+
+            for reg in all_bin_regs {
+                values[reg] = F::from_canonical_u32(rng.gen::<u32>() & 1);
+            }
+            // Make first "bit" non-binary.
+            values[COL_BIT_DECOMP_INPUT_A_LO_BIN_REGS[0]] = F::TWO;
+
+            generate_bitop(&mut values, bitop);
+
+            let mut constrant_consumer = ConstraintConsumer::new(
+                vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
+                GoldilocksField::ONE,
+                GoldilocksField::ONE,
+                GoldilocksField::ONE,
+            );
+            eval_bitop(&values, &mut constrant_consumer);
+            for &acc in &constrant_consumer.constraint_accs {
+                assert_ne!(acc, GoldilocksField::ZERO);
+            }
+        }
+    }
+
+    // TODO: test eval_division_recursively.
+}
diff --git a/system_zero/src/alu/mod.rs b/system_zero/src/alu/mod.rs
index b1b05dc2..d53ad114 100644
--- a/system_zero/src/alu/mod.rs
+++ b/system_zero/src/alu/mod.rs
@@ -8,6 +8,7 @@ use starky::vars::StarkEvaluationTargets;
 use starky::vars::StarkEvaluationVars;
 
 use crate::alu::addition::{eval_addition, eval_addition_recursively, generate_addition};
+use crate::alu::bitops::{eval_bitop, eval_bitop_recursively, generate_bitop};
 use crate::alu::division::{eval_division, eval_division_recursively, generate_division};
 use crate::alu::mul_add::{eval_mul_add, eval_mul_add_recursively, generate_mul_add};
 use crate::alu::subtraction::{
@@ -18,11 +19,17 @@ use crate::registers::alu::*;
 use crate::registers::NUM_COLUMNS;
 
 mod addition;
+mod bitops;
 mod canonical;
 mod division;
 mod mul_add;
 mod subtraction;
 
+// TODO: This probably belongs in a more easily accessible location.
+const ALL_OPERATIONS: [usize; 8] = [
+    IS_ADD, IS_SUB, IS_MUL_ADD, IS_DIV, IS_AND, IS_IOR, IS_XOR, IS_ANDNOT,
+];
+
 pub(crate) fn generate_alu<F: PrimeField64>(values: &mut [F; NUM_COLUMNS]) {
     if values[IS_ADD].is_one() {
         generate_addition(values);
@@ -32,6 +39,14 @@ pub(crate) fn generate_alu<F: PrimeField64>(values: &mut [F; NUM_COLUMNS]) {
         generate_mul_add(values);
     } else if values[IS_DIV].is_one() {
         generate_division(values);
+    } else if values[IS_AND].is_one() {
+        generate_bitop(values, IS_AND);
+    } else if values[IS_IOR].is_one() {
+        generate_bitop(values, IS_IOR);
+    } else if values[IS_XOR].is_one() {
+        generate_bitop(values, IS_XOR);
+    } else if values[IS_ANDNOT].is_one() {
+        generate_bitop(values, IS_ANDNOT);
     }
 }
 
@@ -42,7 +57,7 @@ pub(crate) fn eval_alu<F: Field, P: PackedField<Scalar = F>>(
     let local_values = &vars.local_values;
 
     // Check that the operation flag values are binary.
-    for col in [IS_ADD, IS_SUB, IS_MUL_ADD, IS_DIV] {
+    for col in ALL_OPERATIONS {
         let val = local_values[col];
         yield_constr.constraint(val * val - val);
     }
@@ -51,6 +66,7 @@ pub(crate) fn eval_alu<F: Field, P: PackedField<Scalar = F>>(
     eval_subtraction(local_values, yield_constr);
     eval_mul_add(local_values, yield_constr);
     eval_division(local_values, yield_constr);
+    eval_bitop(local_values, yield_constr);
 }
 
 pub(crate) fn eval_alu_recursively<F: RichField + Extendable<D>, const D: usize>(
@@ -61,7 +77,7 @@ pub(crate) fn eval_alu_recursively<F: RichField + Extendable<D>, const D: usize>
     let local_values = &vars.local_values;
 
     // Check that the operation flag values are binary.
-    for col in [IS_ADD, IS_SUB, IS_MUL_ADD, IS_DIV] {
+    for col in ALL_OPERATIONS {
         let val = local_values[col];
         let constraint = builder.mul_sub_extension(val, val, val);
         yield_constr.constraint(builder, constraint);
@@ -71,4 +87,5 @@ pub(crate) fn eval_alu_recursively<F: RichField + Extendable<D>, const D: usize>
     eval_subtraction_recursively(builder, local_values, yield_constr);
     eval_mul_add_recursively(builder, local_values, yield_constr);
     eval_division_recursively(builder, local_values, yield_constr);
+    eval_bitop_recursively(builder, local_values, yield_constr);
 }
diff --git a/system_zero/src/lib.rs b/system_zero/src/lib.rs
index 7719afab..8681a97c 100644
--- a/system_zero/src/lib.rs
+++ b/system_zero/src/lib.rs
@@ -1,3 +1,5 @@
+#![feature(array_zip)]
+
 mod alu;
 mod core_registers;
 pub mod lookup;
diff --git a/system_zero/src/registers/alu.rs b/system_zero/src/registers/alu.rs
index 7b89b479..39cf73d5 100644
--- a/system_zero/src/registers/alu.rs
+++ b/system_zero/src/registers/alu.rs
@@ -4,13 +4,17 @@ pub(crate) const IS_ADD: usize = super::START_ALU;
 pub(crate) const IS_SUB: usize = IS_ADD + 1;
 pub(crate) const IS_MUL_ADD: usize = IS_SUB + 1;
 pub(crate) const IS_DIV: usize = IS_MUL_ADD + 1;
+pub(crate) const IS_AND: usize = IS_DIV + 1;
+pub(crate) const IS_IOR: usize = IS_AND + 1;
+pub(crate) const IS_XOR: usize = IS_IOR + 1;
+pub(crate) const IS_ANDNOT: usize = IS_XOR + 1;
 
-const START_SHARED_COLS: usize = IS_DIV + 1;
+const START_SHARED_COLS: usize = IS_ANDNOT + 1;
 
 /// Within the ALU, there are shared columns which can be used by any arithmetic/logic
 /// circuit, depending on which one is active this cycle.
 // Can be increased as needed as other operations are implemented.
-const NUM_SHARED_COLS: usize = 4;
+const NUM_SHARED_COLS: usize = 130;
 
 const fn shared_col(i: usize) -> usize {
     debug_assert!(i < NUM_SHARED_COLS);
@@ -89,4 +93,30 @@ pub(crate) const COL_DIV_RANGE_CHECKED_TMP_0: usize = super::range_check_16::col
 /// The second 16-bit chunk of a temporary value (divisor - remainder - 1).
 pub(crate) const COL_DIV_RANGE_CHECKED_TMP_1: usize = super::range_check_16::col_rc_16_input(5);
 
+///
+/// Bitwise logic operations
+///
+
+/// Bit decomposition of 64-bit values, as 32-bit low and high halves.
+
+const fn gen_bitop_32bit_input_regs(start: usize) -> [usize; 32] {
+    let mut regs = [0usize; 32];
+    let mut i = 0;
+    while i < 32 {
+        regs[i] = shared_col(start + i);
+        i += 1;
+    }
+    regs
+}
+
+pub(crate) const COL_BIT_DECOMP_INPUT_A_LO_BIN_REGS: [usize; 32] = gen_bitop_32bit_input_regs(0);
+pub(crate) const COL_BIT_DECOMP_INPUT_A_HI_BIN_REGS: [usize; 32] = gen_bitop_32bit_input_regs(32);
+pub(crate) const COL_BIT_DECOMP_INPUT_B_LO_BIN_REGS: [usize; 32] = gen_bitop_32bit_input_regs(64);
+pub(crate) const COL_BIT_DECOMP_INPUT_B_HI_BIN_REGS: [usize; 32] = gen_bitop_32bit_input_regs(96);
+
+/// The first 32-bit chunk of the output, based on little-endian ordering.
+pub(crate) const COL_BITOP_OUTPUT_0: usize = shared_col(128);
+/// The second 32-bit chunk of the output, based on little-endian ordering.
+pub(crate) const COL_BITOP_OUTPUT_1: usize = shared_col(129);
+
 pub(super) const END: usize = START_SHARED_COLS + NUM_SHARED_COLS;