From 3d83d63f0ba5f6ee3f12ca1d6ca5c29db5343341 Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Mon, 18 Jul 2022 11:50:58 -0700 Subject: [PATCH] Shared CPU columns I was thinking we could have two sets of shared columns: - First, a set of "core" columns which would contain instruction decoding registers during an execution cycle, or some counter data during a kernel bootloading cycle. - Second, a set of "general" columns which would be more general-purpose. For now it could contain "looking" columns for most CTLs (Keccak, arithmetic and logic; NOT memory since memory can be used simultaneously with the others). It could potentially be reused for other things too, such as the registers used for `EQ` and `IS_ZERO` (but I know it's nontrivial to share those since we would need to use lower-degree constraints, so I wouldn't bother for now). This PR implements just the latter. If it looks good I'll proceed with the former afterward. --- evm/src/all_stark.rs | 38 ++++----- evm/src/cpu/bootstrap_kernel.rs | 10 ++- evm/src/cpu/{columns.rs => columns/mod.rs} | 31 ++++--- evm/src/cpu/columns/shared.rs | 95 ++++++++++++++++++++++ evm/src/cpu/cpu_stark.rs | 12 +-- evm/src/cpu/simple_logic/eq_iszero.rs | 34 ++++---- evm/src/cpu/simple_logic/not.rs | 9 +- 7 files changed, 166 insertions(+), 63 deletions(-) rename evm/src/cpu/{columns.rs => columns/mod.rs} (87%) create mode 100644 evm/src/cpu/columns/shared.rs diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs index ba157fc0..58756703 100644 --- a/evm/src/all_stark.rs +++ b/evm/src/all_stark.rs @@ -130,7 +130,7 @@ mod tests { use anyhow::Result; use ethereum_types::U256; - use itertools::{izip, Itertools}; + use itertools::Itertools; use plonky2::field::polynomial::PolynomialValues; use plonky2::field::types::{Field, PrimeField64}; use plonky2::iop::witness::PartialWitness; @@ -246,13 +246,10 @@ mod tests { let mut row: cpu::columns::CpuColumnsView = [F::ZERO; CpuStark::::COLUMNS].into(); row.is_keccak = F::ONE; - for (j, input, output) in izip!( - 0..2 * NUM_INPUTS, - row.keccak_input_limbs.iter_mut(), - row.keccak_output_limbs.iter_mut() - ) { - *input = keccak_input_limbs[i][j]; - *output = keccak_output_limbs[i][j]; + let keccak = row.general.keccak_mut(); + for j in 0..2 * NUM_INPUTS { + keccak.input_limbs[j] = keccak_input_limbs[i][j]; + keccak.output_limbs[j] = keccak_output_limbs[i][j]; } cpu_stark.generate(row.borrow_mut()); cpu_trace_rows.push(row.into()); @@ -270,21 +267,22 @@ mod tests { .into_iter() .map(|(col, opcode)| logic_trace[col].values[i] * F::from_canonical_u64(opcode)) .sum(); - for (cols_cpu, cols_logic) in [ - (&mut row.logic_input0, logic::columns::INPUT0), - (&mut row.logic_input1, logic::columns::INPUT1), - ] { - for (col_cpu, limb_cols_logic) in cols_cpu - .iter_mut() - .zip(logic::columns::limb_bit_cols_for_input(cols_logic)) - { - *col_cpu = - limb_from_bits_le(limb_cols_logic.map(|col| logic_trace[col].values[i])); - } + let logic = row.general.logic_mut(); + + let input0_bit_cols = logic::columns::limb_bit_cols_for_input(logic::columns::INPUT0); + for (col_cpu, limb_cols_logic) in logic.input0.iter_mut().zip(input0_bit_cols) { + *col_cpu = limb_from_bits_le(limb_cols_logic.map(|col| logic_trace[col].values[i])); } - for (col_cpu, col_logic) in row.logic_output.iter_mut().zip(logic::columns::RESULT) { + + let input1_bit_cols = logic::columns::limb_bit_cols_for_input(logic::columns::INPUT1); + for (col_cpu, limb_cols_logic) in logic.input1.iter_mut().zip(input1_bit_cols) { + *col_cpu = limb_from_bits_le(limb_cols_logic.map(|col| logic_trace[col].values[i])); + } + + for (col_cpu, col_logic) in logic.output.iter_mut().zip(logic::columns::RESULT) { *col_cpu = logic_trace[col_logic].values[i]; } + cpu_stark.generate(row.borrow_mut()); cpu_trace_rows.push(row.into()); } diff --git a/evm/src/cpu/bootstrap_kernel.rs b/evm/src/cpu/bootstrap_kernel.rs index af307a28..2c6afb51 100644 --- a/evm/src/cpu/bootstrap_kernel.rs +++ b/evm/src/cpu/bootstrap_kernel.rs @@ -56,7 +56,8 @@ pub(crate) fn generate_bootstrap_kernel(state: &mut GenerationState } sponge_state[sponge_input_pos] = packed_bytes; - state.current_cpu_row.keccak_input_limbs = sponge_state.map(F::from_canonical_u32); + let keccak = state.current_cpu_row.general.keccak_mut(); + keccak.input_limbs = sponge_state.map(F::from_canonical_u32); state.commit_cpu_row(); sponge_input_pos = (sponge_input_pos + 1) % KECCAK_RATE_LIMBS; @@ -65,7 +66,8 @@ pub(crate) fn generate_bootstrap_kernel(state: &mut GenerationState if sponge_input_pos == 0 { state.current_cpu_row.is_keccak = F::ONE; keccakf_u32s(&mut sponge_state); - state.current_cpu_row.keccak_output_limbs = sponge_state.map(F::from_canonical_u32); + let keccak = state.current_cpu_row.general.keccak_mut(); + keccak.output_limbs = sponge_state.map(F::from_canonical_u32); } } } @@ -97,7 +99,7 @@ pub(crate) fn eval_bootstrap_kernel>( for (&expected, actual) in KERNEL .code_hash .iter() - .zip(local_values.keccak_output_limbs) + .zip(local_values.general.keccak().output_limbs) { let expected = P::from(F::from_canonical_u32(expected)); let diff = expected - actual; @@ -137,7 +139,7 @@ pub(crate) fn eval_bootstrap_kernel_circuit, const for (&expected, actual) in KERNEL .code_hash .iter() - .zip(local_values.keccak_output_limbs) + .zip(local_values.general.keccak().output_limbs) { let expected = builder.constant_extension(F::Extension::from_canonical_u32(expected)); let diff = builder.sub_extension(expected, actual); diff --git a/evm/src/cpu/columns.rs b/evm/src/cpu/columns/mod.rs similarity index 87% rename from evm/src/cpu/columns.rs rename to evm/src/cpu/columns/mod.rs index f3a400c6..51acbfd3 100644 --- a/evm/src/cpu/columns.rs +++ b/evm/src/cpu/columns/mod.rs @@ -2,14 +2,18 @@ #![allow(dead_code)] use std::borrow::{Borrow, BorrowMut}; +use std::fmt::Debug; use std::mem::{size_of, transmute, transmute_copy, ManuallyDrop}; use std::ops::{Index, IndexMut}; +use crate::cpu::columns::shared::CpuSharedColumnsView; use crate::memory; +mod shared; + #[repr(C)] #[derive(Eq, PartialEq, Debug)] -pub struct CpuColumnsView { +pub struct CpuColumnsView { /// Filter. 1 if the row is part of bootstrapping the kernel code, 0 otherwise. pub is_bootstrap_kernel: T, @@ -136,14 +140,9 @@ pub struct CpuColumnsView { /// Filter. 1 iff a Keccak permutation is computed on this row. pub is_keccak: T, - pub keccak_input_limbs: [T; 50], - pub keccak_output_limbs: [T; 50], - // Assuming a limb size of 16 bits. This can be changed, but it must be <= 28 bits. - // TODO: These input/output columns can be shared between the logic operations and others. - pub logic_input0: [T; 16], - pub logic_input1: [T; 16], - pub logic_output: [T; 16], + pub(crate) general: CpuSharedColumnsView, + pub simple_logic_diff: T, pub simple_logic_diff_inv: T, @@ -169,43 +168,43 @@ unsafe fn transmute_no_compile_time_size_checks(value: T) -> U { transmute_copy(&value) } -impl From<[T; NUM_CPU_COLUMNS]> for CpuColumnsView { +impl From<[T; NUM_CPU_COLUMNS]> for CpuColumnsView { fn from(value: [T; NUM_CPU_COLUMNS]) -> Self { unsafe { transmute_no_compile_time_size_checks(value) } } } -impl From> for [T; NUM_CPU_COLUMNS] { +impl From> for [T; NUM_CPU_COLUMNS] { fn from(value: CpuColumnsView) -> Self { unsafe { transmute_no_compile_time_size_checks(value) } } } -impl Borrow> for [T; NUM_CPU_COLUMNS] { +impl Borrow> for [T; NUM_CPU_COLUMNS] { fn borrow(&self) -> &CpuColumnsView { unsafe { transmute(self) } } } -impl BorrowMut> for [T; NUM_CPU_COLUMNS] { +impl BorrowMut> for [T; NUM_CPU_COLUMNS] { fn borrow_mut(&mut self) -> &mut CpuColumnsView { unsafe { transmute(self) } } } -impl Borrow<[T; NUM_CPU_COLUMNS]> for CpuColumnsView { +impl Borrow<[T; NUM_CPU_COLUMNS]> for CpuColumnsView { fn borrow(&self) -> &[T; NUM_CPU_COLUMNS] { unsafe { transmute(self) } } } -impl BorrowMut<[T; NUM_CPU_COLUMNS]> for CpuColumnsView { +impl BorrowMut<[T; NUM_CPU_COLUMNS]> for CpuColumnsView { fn borrow_mut(&mut self) -> &mut [T; NUM_CPU_COLUMNS] { unsafe { transmute(self) } } } -impl Index for CpuColumnsView +impl Index for CpuColumnsView where [T]: Index, { @@ -217,7 +216,7 @@ where } } -impl IndexMut for CpuColumnsView +impl IndexMut for CpuColumnsView where [T]: IndexMut, { diff --git a/evm/src/cpu/columns/shared.rs b/evm/src/cpu/columns/shared.rs new file mode 100644 index 00000000..34db209d --- /dev/null +++ b/evm/src/cpu/columns/shared.rs @@ -0,0 +1,95 @@ +use std::borrow::{Borrow, BorrowMut}; +use std::fmt::{Debug, Formatter}; +use std::mem::{size_of, transmute}; + +/// General purpose columns, which can have different meanings depending on what CTL or other +/// operation is occurring at this row. +pub(crate) union CpuSharedColumnsView { + keccak: CpuKeccakView, + arithmetic: CpuArithmeticView, + logic: CpuLogicView, +} + +impl CpuSharedColumnsView { + // SAFETY: Each view is a valid interpretation of the underlying array. + pub(crate) fn keccak(&self) -> &CpuKeccakView { + unsafe { &self.keccak } + } + + // SAFETY: Each view is a valid interpretation of the underlying array. + pub(crate) fn keccak_mut(&mut self) -> &mut CpuKeccakView { + unsafe { &mut self.keccak } + } + + // SAFETY: Each view is a valid interpretation of the underlying array. + pub(crate) fn arithmetic(&self) -> &CpuArithmeticView { + unsafe { &self.arithmetic } + } + + // SAFETY: Each view is a valid interpretation of the underlying array. + pub(crate) fn arithmetic_mut(&mut self) -> &mut CpuArithmeticView { + unsafe { &mut self.arithmetic } + } + + // SAFETY: Each view is a valid interpretation of the underlying array. + pub(crate) fn logic(&self) -> &CpuLogicView { + unsafe { &self.logic } + } + + // SAFETY: Each view is a valid interpretation of the underlying array. + pub(crate) fn logic_mut(&mut self) -> &mut CpuLogicView { + unsafe { &mut self.logic } + } +} + +impl PartialEq for CpuSharedColumnsView { + fn eq(&self, other: &Self) -> bool { + let self_arr: &[T; NUM_SHARED_COLUMNS] = self.borrow(); + let other_arr: &[T; NUM_SHARED_COLUMNS] = other.borrow(); + self_arr == other_arr + } +} + +impl Eq for CpuSharedColumnsView {} + +impl Debug for CpuSharedColumnsView { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let self_arr: &[T; NUM_SHARED_COLUMNS] = self.borrow(); + Debug::fmt(self_arr, f) + } +} + +impl Borrow<[T; NUM_SHARED_COLUMNS]> for CpuSharedColumnsView { + fn borrow(&self) -> &[T; NUM_SHARED_COLUMNS] { + unsafe { transmute(self) } + } +} + +impl BorrowMut<[T; NUM_SHARED_COLUMNS]> for CpuSharedColumnsView { + fn borrow_mut(&mut self) -> &mut [T; NUM_SHARED_COLUMNS] { + unsafe { transmute(self) } + } +} + +#[derive(Copy, Clone)] +pub(crate) struct CpuKeccakView { + pub(crate) input_limbs: [T; 50], + pub(crate) output_limbs: [T; 50], +} + +#[derive(Copy, Clone)] +pub(crate) struct CpuArithmeticView { + // TODO: Add "looking" columns for the arithmetic CTL. + tmp: T, // temporary, to suppress errors +} + +#[derive(Copy, Clone)] +pub(crate) struct CpuLogicView { + // Assuming a limb size of 16 bits. This can be changed, but it must be <= 28 bits. + pub(crate) input0: [T; 16], + pub(crate) input1: [T; 16], + pub(crate) output: [T; 16], +} + +// `u8` is guaranteed to have a `size_of` of 1. +pub const NUM_SHARED_COLUMNS: usize = size_of::>(); diff --git a/evm/src/cpu/cpu_stark.rs b/evm/src/cpu/cpu_stark.rs index 1e5cc887..0e4d69f2 100644 --- a/evm/src/cpu/cpu_stark.rs +++ b/evm/src/cpu/cpu_stark.rs @@ -16,8 +16,9 @@ use crate::stark::Stark; use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; pub fn ctl_data_keccak() -> Vec> { - let mut res: Vec<_> = Column::singles(COL_MAP.keccak_input_limbs).collect(); - res.extend(Column::singles(COL_MAP.keccak_output_limbs)); + let keccak = COL_MAP.general.keccak(); + let mut res: Vec<_> = Column::singles(keccak.input_limbs).collect(); + res.extend(Column::singles(keccak.output_limbs)); res } @@ -27,9 +28,10 @@ pub fn ctl_filter_keccak() -> Column { pub fn ctl_data_logic() -> Vec> { let mut res = Column::singles([COL_MAP.is_and, COL_MAP.is_or, COL_MAP.is_xor]).collect_vec(); - res.extend(Column::singles(COL_MAP.logic_input0)); - res.extend(Column::singles(COL_MAP.logic_input1)); - res.extend(Column::singles(COL_MAP.logic_output)); + let logic = COL_MAP.general.logic(); + res.extend(Column::singles(logic.input0)); + res.extend(Column::singles(logic.input1)); + res.extend(Column::singles(logic.output)); res } diff --git a/evm/src/cpu/simple_logic/eq_iszero.rs b/evm/src/cpu/simple_logic/eq_iszero.rs index 97e000b6..75bb8bb6 100644 --- a/evm/src/cpu/simple_logic/eq_iszero.rs +++ b/evm/src/cpu/simple_logic/eq_iszero.rs @@ -9,6 +9,7 @@ use crate::cpu::columns::CpuColumnsView; const LIMB_SIZE: usize = 16; pub fn generate(lv: &mut CpuColumnsView) { + let logic = lv.general.logic_mut(); let eq_filter = lv.is_eq.to_canonical_u64(); let iszero_filter = lv.is_iszero.to_canonical_u64(); assert!(eq_filter <= 1); @@ -20,9 +21,10 @@ pub fn generate(lv: &mut CpuColumnsView) { } let diffs = if eq_filter == 1 { - lv.logic_input0 + logic + .input0 .into_iter() - .zip(lv.logic_input1) + .zip(logic.input1) .map(|(in0, in1)| { assert_eq!(in0.to_canonical_u64() >> LIMB_SIZE, 0); assert_eq!(in1.to_canonical_u64() >> LIMB_SIZE, 0); @@ -31,7 +33,7 @@ pub fn generate(lv: &mut CpuColumnsView) { }) .sum() } else if iszero_filter == 1 { - lv.logic_input0.into_iter().sum() + logic.input0.into_iter().sum() } else { panic!() }; @@ -39,8 +41,8 @@ pub fn generate(lv: &mut CpuColumnsView) { lv.simple_logic_diff = diffs; lv.simple_logic_diff_inv = diffs.try_inverse().unwrap_or(F::ZERO); - lv.logic_output[0] = F::from_bool(diffs == F::ZERO); - for out_limb_ref in lv.logic_output[1..].iter_mut() { + logic.output[0] = F::from_bool(diffs == F::ZERO); + for out_limb_ref in logic.output[1..].iter_mut() { *out_limb_ref = F::ZERO; } } @@ -49,17 +51,18 @@ pub fn eval_packed( lv: &CpuColumnsView

, yield_constr: &mut ConstraintConsumer

, ) { + let logic = lv.general.logic(); let eq_filter = lv.is_eq; let iszero_filter = lv.is_iszero; let eq_or_iszero_filter = eq_filter + iszero_filter; - let ls_bit = lv.logic_output[0]; + let ls_bit = logic.output[0]; // Handle EQ and ISZERO. Most limbs of the output are 0, but the least-significant one is // either 0 or 1. yield_constr.constraint(eq_or_iszero_filter * ls_bit * (ls_bit - P::ONES)); - for &bit in &lv.logic_output[1..] { + for &bit in &logic.output[1..] { yield_constr.constraint(eq_or_iszero_filter * bit); } @@ -67,13 +70,13 @@ pub fn eval_packed( let diffs = lv.simple_logic_diff; let diffs_inv = lv.simple_logic_diff_inv; { - let input0_sum: P = lv.logic_input0.into_iter().sum(); + let input0_sum: P = logic.input0.into_iter().sum(); yield_constr.constraint(iszero_filter * (diffs - input0_sum)); - let sum_squared_diffs: P = lv - .logic_input0 + let sum_squared_diffs: P = logic + .input0 .into_iter() - .zip(lv.logic_input1) + .zip(logic.input1) .map(|(in0, in1)| (in0 - in1).square()) .sum(); yield_constr.constraint(eq_filter * (diffs - sum_squared_diffs)); @@ -90,11 +93,12 @@ pub fn eval_ext_circuit, const D: usize>( lv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { + let logic = lv.general.logic(); let eq_filter = lv.is_eq; let iszero_filter = lv.is_iszero; let eq_or_iszero_filter = builder.add_extension(eq_filter, iszero_filter); - let ls_bit = lv.logic_output[0]; + let ls_bit = logic.output[0]; // Handle EQ and ISZERO. Most limbs of the output are 0, but the least-significant one is // either 0 or 1. @@ -104,7 +108,7 @@ pub fn eval_ext_circuit, const D: usize>( yield_constr.constraint(builder, constr); } - for &bit in &lv.logic_output[1..] { + for &bit in &logic.output[1..] { let constr = builder.mul_extension(eq_or_iszero_filter, bit); yield_constr.constraint(builder, constr); } @@ -113,14 +117,14 @@ pub fn eval_ext_circuit, const D: usize>( let diffs = lv.simple_logic_diff; let diffs_inv = lv.simple_logic_diff_inv; { - let input0_sum = builder.add_many_extension(lv.logic_input0); + let input0_sum = builder.add_many_extension(logic.input0); { let constr = builder.sub_extension(diffs, input0_sum); let constr = builder.mul_extension(iszero_filter, constr); yield_constr.constraint(builder, constr); } - let sum_squared_diffs = lv.logic_input0.into_iter().zip(lv.logic_input1).fold( + let sum_squared_diffs = logic.input0.into_iter().zip(logic.input1).fold( builder.zero_extension(), |acc, (in0, in1)| { let diff = builder.sub_extension(in0, in1); diff --git a/evm/src/cpu/simple_logic/not.rs b/evm/src/cpu/simple_logic/not.rs index d1ba4d46..efbf51a6 100644 --- a/evm/src/cpu/simple_logic/not.rs +++ b/evm/src/cpu/simple_logic/not.rs @@ -17,7 +17,8 @@ pub fn generate(lv: &mut CpuColumnsView) { } assert_eq!(is_not_filter, 1); - for (input, output_ref) in lv.logic_input0.into_iter().zip(lv.logic_output.iter_mut()) { + let logic = lv.general.logic_mut(); + for (input, output_ref) in logic.input0.into_iter().zip(logic.output.iter_mut()) { let input = input.to_canonical_u64(); assert_eq!(input >> LIMB_SIZE, 0); let output = input ^ ALL_1_LIMB; @@ -30,10 +31,11 @@ pub fn eval_packed( yield_constr: &mut ConstraintConsumer

, ) { // This is simple: just do output = 0xffff - input. + let logic = lv.general.logic(); let cycle_filter = lv.is_cpu_cycle; let is_not_filter = lv.is_not; let filter = cycle_filter * is_not_filter; - for (input, output) in lv.logic_input0.into_iter().zip(lv.logic_output) { + for (input, output) in logic.input0.into_iter().zip(logic.output) { yield_constr .constraint(filter * (output + input - P::Scalar::from_canonical_u64(ALL_1_LIMB))); } @@ -44,10 +46,11 @@ pub fn eval_ext_circuit, const D: usize>( lv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { + let logic = lv.general.logic(); let cycle_filter = lv.is_cpu_cycle; let is_not_filter = lv.is_not; let filter = builder.mul_extension(cycle_filter, is_not_filter); - for (input, output) in lv.logic_input0.into_iter().zip(lv.logic_output) { + for (input, output) in logic.input0.into_iter().zip(logic.output) { let constr = builder.add_extension(output, input); let constr = builder.arithmetic_extension( F::ONE,