diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs index 60775deb..cf368a6d 100644 --- a/evm/src/all_stark.rs +++ b/evm/src/all_stark.rs @@ -59,7 +59,7 @@ impl, const D: usize> AllStark { } } -#[derive(Copy, Clone)] +#[derive(Debug, Copy, Clone)] pub enum Table { Cpu = 0, Keccak = 1, @@ -132,7 +132,7 @@ mod tests { use ethereum_types::U256; use itertools::{izip, Itertools}; use plonky2::field::polynomial::PolynomialValues; - use plonky2::field::types::Field; + use plonky2::field::types::{Field, PrimeField64}; use plonky2::iop::witness::PartialWitness; use plonky2::plonk::circuit_builder::CircuitBuilder; use plonky2::plonk::circuit_data::CircuitConfig; @@ -143,6 +143,7 @@ mod tests { use crate::all_stark::AllStark; use crate::config::StarkConfig; use crate::cpu::cpu_stark::CpuStark; + use crate::cross_table_lookup::testutils::check_ctls; use crate::keccak::keccak_stark::{KeccakStark, NUM_INPUTS, NUM_ROUNDS}; use crate::logic::{self, LogicStark, Operation}; use crate::memory::memory_stark::{generate_random_memory_ops, MemoryStark}; @@ -197,9 +198,11 @@ mod tests { num_memory_ops: usize, memory_stark: &MemoryStark, rng: &mut R, - ) -> Vec> { + ) -> (Vec>, usize) { let memory_ops = generate_random_memory_ops(num_memory_ops, rng); - memory_stark.generate_trace(memory_ops) + let trace = memory_stark.generate_trace(memory_ops); + let num_ops = trace[0].values.len(); + (trace, num_ops) } fn make_cpu_trace( @@ -288,32 +291,34 @@ mod tests { cpu_stark.generate(row.borrow_mut()); cpu_trace_rows.push(row.into()); } - - let mut current_cpu_index = 0; - let mut last_timestamp = memory_trace[memory::columns::TIMESTAMP].values[0]; for i in 0..num_memory_ops { - let mem_timestamp = memory_trace[memory::columns::TIMESTAMP].values[i]; - let clock = mem_timestamp; - let op = (0..NUM_CHANNELS) - .filter(|&o| memory_trace[memory::columns::is_channel(o)].values[i] == F::ONE) - .collect_vec()[0]; + let mem_timestamp: usize = memory_trace[memory::columns::TIMESTAMP].values[i] + .to_canonical_u64() + .try_into() + .unwrap(); + let clock = mem_timestamp / NUM_CHANNELS; + let channel = mem_timestamp % NUM_CHANNELS; - if mem_timestamp != last_timestamp { - current_cpu_index += 1; - last_timestamp = mem_timestamp; - } + let is_padding_row = (0..NUM_CHANNELS) + .map(|c| memory_trace[memory::columns::is_channel(c)].values[i]) + .all(|x| x == F::ZERO); - let row: &mut cpu::columns::CpuColumnsView = - cpu_trace_rows[current_cpu_index].borrow_mut(); + if !is_padding_row { + let row: &mut cpu::columns::CpuColumnsView = cpu_trace_rows[clock].borrow_mut(); - row.mem_channel_used[op] = F::ONE; - row.clock = clock; - row.mem_is_read[op] = memory_trace[memory::columns::IS_READ].values[i]; - row.mem_addr_context[op] = memory_trace[memory::columns::ADDR_CONTEXT].values[i]; - row.mem_addr_segment[op] = memory_trace[memory::columns::ADDR_SEGMENT].values[i]; - row.mem_addr_virtual[op] = memory_trace[memory::columns::ADDR_VIRTUAL].values[i]; - for j in 0..8 { - row.mem_value[op][j] = memory_trace[memory::columns::value_limb(j)].values[i]; + row.mem_channel_used[channel] = F::ONE; + row.clock = F::from_canonical_usize(clock); + row.mem_is_read[channel] = memory_trace[memory::columns::IS_READ].values[i]; + row.mem_addr_context[channel] = + memory_trace[memory::columns::ADDR_CONTEXT].values[i]; + row.mem_addr_segment[channel] = + memory_trace[memory::columns::ADDR_SEGMENT].values[i]; + row.mem_addr_virtual[channel] = + memory_trace[memory::columns::ADDR_VIRTUAL].values[i]; + for j in 0..8 { + row.mem_value[channel][j] = + memory_trace[memory::columns::value_limb(j)].values[i]; + } } } @@ -336,6 +341,8 @@ mod tests { let keccak_trace = make_keccak_trace(num_keccak_perms, &all_stark.keccak_stark, &mut rng); let logic_trace = make_logic_trace(num_logic_rows, &all_stark.logic_stark, &mut rng); let mut memory_trace = make_memory_trace(num_memory_ops, &all_stark.memory_stark, &mut rng); + let mut memory_trace = mem_trace.0; + let num_memory_ops = mem_trace.1; let cpu_trace = make_cpu_trace( num_keccak_perms, num_logic_rows, @@ -346,10 +353,13 @@ mod tests { &mut memory_trace, ); + let traces = vec![cpu_trace, keccak_trace, logic_trace, memory_trace]; + check_ctls(&traces, &all_stark.cross_table_lookups); + let proof = prove::( &all_stark, config, - vec![cpu_trace, keccak_trace, logic_trace, memory_trace], + traces, vec![vec![]; 4], &mut TimingTree::default(), )?; diff --git a/evm/src/cpu/cpu_stark.rs b/evm/src/cpu/cpu_stark.rs index e642985f..1e5cc887 100644 --- a/evm/src/cpu/cpu_stark.rs +++ b/evm/src/cpu/cpu_stark.rs @@ -40,7 +40,6 @@ pub fn ctl_filter_logic() -> Column { pub fn ctl_data_memory(channel: usize) -> Vec> { debug_assert!(channel < NUM_CHANNELS); let mut cols: Vec> = Column::singles([ - COL_MAP.clock, COL_MAP.mem_is_read[channel], COL_MAP.mem_addr_context[channel], COL_MAP.mem_addr_segment[channel], @@ -48,6 +47,14 @@ pub fn ctl_data_memory(channel: usize) -> Vec> { ]) .collect_vec(); cols.extend(Column::singles(COL_MAP.mem_value[channel])); + + let scalar = F::from_canonical_usize(NUM_CHANNELS); + let addend = F::from_canonical_usize(channel); + cols.push(Column::linear_combination_with_constant( + vec![(COL_MAP.clock, scalar)], + addend, + )); + cols } diff --git a/evm/src/cross_table_lookup.rs b/evm/src/cross_table_lookup.rs index 60ce25d7..4097df7b 100644 --- a/evm/src/cross_table_lookup.rs +++ b/evm/src/cross_table_lookup.rs @@ -649,3 +649,137 @@ pub(crate) fn verify_cross_table_lookups_circuit< } debug_assert!(ctl_zs_openings.iter_mut().all(|iter| iter.next().is_none())); } + +#[cfg(test)] +pub(crate) mod testutils { + use std::collections::HashMap; + + use plonky2::field::polynomial::PolynomialValues; + use plonky2::field::types::Field; + + use crate::all_stark::Table; + use crate::cross_table_lookup::{CrossTableLookup, TableWithColumns}; + + type MultiSet = HashMap, Vec<(Table, usize)>>; + + /// Check that the provided traces and cross-table lookups are consistent. + pub(crate) fn check_ctls( + trace_poly_values: &[Vec>], + cross_table_lookups: &[CrossTableLookup], + ) { + for (i, ctl) in cross_table_lookups.iter().enumerate() { + check_ctl(trace_poly_values, ctl, i); + } + } + + fn check_ctl( + trace_poly_values: &[Vec>], + ctl: &CrossTableLookup, + ctl_index: usize, + ) { + let CrossTableLookup { + looking_tables, + looked_table, + default, + } = ctl; + + // Maps `m` with `(table, i) in m[row]` iff the `i`-th row of `table` is equal to `row` and + // the filter is 1. Without default values, the CTL check holds iff `looking_multiset == looked_multiset`. + let mut looking_multiset = MultiSet::::new(); + let mut looked_multiset = MultiSet::::new(); + + for table in looking_tables { + process_table(trace_poly_values, table, &mut looking_multiset); + } + process_table(trace_poly_values, looked_table, &mut looked_multiset); + + let empty = &vec![]; + // Check that every row in the looking tables appears in the looked table the same number of times + // with some special logic for the default row. + for (row, looking_locations) in &looking_multiset { + let looked_locations = looked_multiset.get(row).unwrap_or(empty); + if let Some(default) = default { + if row == default { + continue; + } + } + check_locations(looking_locations, looked_locations, ctl_index, row); + } + let extra_default_count = default.as_ref().map(|d| { + let looking_default_locations = looking_multiset.get(d).unwrap_or(empty); + let looked_default_locations = looked_multiset.get(d).unwrap_or(empty); + looking_default_locations + .len() + .checked_sub(looked_default_locations.len()) + .unwrap_or_else(|| { + // If underflow, panic. There should be more default rows in the looking side. + check_locations( + looking_default_locations, + looked_default_locations, + ctl_index, + d, + ); + unreachable!() + }) + }); + // Check that the number of extra default rows is correct. + if let Some(count) = extra_default_count { + assert_eq!( + count, + looking_tables + .iter() + .map(|table| trace_poly_values[table.table as usize][0].len()) + .sum::() + - trace_poly_values[looked_table.table as usize][0].len() + ); + } + // Check that every row in the looked tables appears in the looked table the same number of times. + for (row, looked_locations) in &looked_multiset { + let looking_locations = looking_multiset.get(row).unwrap_or(empty); + check_locations(looking_locations, looked_locations, ctl_index, row); + } + } + + fn process_table( + trace_poly_values: &[Vec>], + table: &TableWithColumns, + multiset: &mut MultiSet, + ) { + let trace = &trace_poly_values[table.table as usize]; + for i in 0..trace[0].len() { + let filter = if let Some(column) = &table.filter_column { + column.eval_table(trace, i) + } else { + F::ONE + }; + if filter.is_one() { + let row = table + .columns + .iter() + .map(|c| c.eval_table(trace, i)) + .collect::>(); + multiset.entry(row).or_default().push((table.table, i)); + } else { + assert_eq!(filter, F::ZERO, "Non-binary filter?") + } + } + } + + fn check_locations( + looking_locations: &[(Table, usize)], + looked_locations: &[(Table, usize)], + ctl_index: usize, + row: &[F], + ) { + if looking_locations.len() != looked_locations.len() { + panic!( + "CTL #{ctl_index}:\n\ + Row {row:?} is present {l0} times in the looking tables, but {l1} times in the looked table.\n\ + Looking locations (Table, Row index): {looking_locations:?}.\n\ + Looked locations (Table, Row index): {looked_locations:?}.", + l0 = looking_locations.len(), + l1 = looked_locations.len(), + ); + } + } +} diff --git a/evm/src/memory/columns.rs b/evm/src/memory/columns.rs index d9fa927f..214a7e4b 100644 --- a/evm/src/memory/columns.rs +++ b/evm/src/memory/columns.rs @@ -1,5 +1,7 @@ //! Memory registers. +use std::ops::Range; + use crate::memory::{NUM_CHANNELS, VALUE_LIMBS}; pub(crate) const TIMESTAMP: usize = 0; @@ -36,20 +38,22 @@ pub(crate) const CONTEXT_FIRST_CHANGE: usize = SORTED_VALUE_START + VALUE_LIMBS; pub(crate) const SEGMENT_FIRST_CHANGE: usize = CONTEXT_FIRST_CHANGE + 1; pub(crate) const VIRTUAL_FIRST_CHANGE: usize = SEGMENT_FIRST_CHANGE + 1; +// Flags to indicate if this operation came from the `i`th channel of the memory bus. +const IS_CHANNEL_START: usize = VIRTUAL_FIRST_CHANGE + 1; +pub(crate) const fn is_channel(channel: usize) -> usize { + debug_assert!(channel < NUM_CHANNELS); + IS_CHANNEL_START + channel +} + // We use a range check to ensure sorting. -pub(crate) const RANGE_CHECK: usize = VIRTUAL_FIRST_CHANGE + 1; +pub(crate) const RANGE_CHECK: usize = IS_CHANNEL_START + NUM_CHANNELS; // The counter column (used for the range check) starts from 0 and increments. pub(crate) const COUNTER: usize = RANGE_CHECK + 1; // Helper columns for the permutation argument used to enforce the range check. pub(crate) const RANGE_CHECK_PERMUTED: usize = COUNTER + 1; pub(crate) const COUNTER_PERMUTED: usize = RANGE_CHECK_PERMUTED + 1; -// Flags to indicate if this operation came from the `i`th channel of the memory bus. -const IS_CHANNEL_START: usize = COUNTER_PERMUTED + 1; -#[allow(dead_code)] -pub(crate) const fn is_channel(channel: usize) -> usize { - debug_assert!(channel < NUM_CHANNELS); - IS_CHANNEL_START + channel -} +// Columns to be padded at the top with zeroes, before the permutation argument takes place. +pub(crate) const COLUMNS_TO_PAD: Range = TIMESTAMP..RANGE_CHECK + 1; -pub(crate) const NUM_COLUMNS: usize = IS_CHANNEL_START + NUM_CHANNELS; +pub(crate) const NUM_COLUMNS: usize = COUNTER_PERMUTED + 1; diff --git a/evm/src/memory/memory_stark.rs b/evm/src/memory/memory_stark.rs index 11fa687a..17da621b 100644 --- a/evm/src/memory/memory_stark.rs +++ b/evm/src/memory/memory_stark.rs @@ -11,17 +11,17 @@ use plonky2::timed; use plonky2::util::timing::TimingTree; use rand::Rng; -use super::columns::is_channel; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use crate::cross_table_lookup::Column; use crate::lookup::{eval_lookups, eval_lookups_circuit, permuted_cols}; use crate::memory::columns::{ - sorted_value_limb, value_limb, ADDR_CONTEXT, ADDR_SEGMENT, ADDR_VIRTUAL, CONTEXT_FIRST_CHANGE, - COUNTER, COUNTER_PERMUTED, IS_READ, NUM_COLUMNS, RANGE_CHECK, RANGE_CHECK_PERMUTED, - SEGMENT_FIRST_CHANGE, SORTED_ADDR_CONTEXT, SORTED_ADDR_SEGMENT, SORTED_ADDR_VIRTUAL, - SORTED_IS_READ, SORTED_TIMESTAMP, TIMESTAMP, VIRTUAL_FIRST_CHANGE, + is_channel, sorted_value_limb, value_limb, ADDR_CONTEXT, ADDR_SEGMENT, ADDR_VIRTUAL, + COLUMNS_TO_PAD, CONTEXT_FIRST_CHANGE, COUNTER, COUNTER_PERMUTED, IS_READ, NUM_COLUMNS, + RANGE_CHECK, RANGE_CHECK_PERMUTED, SEGMENT_FIRST_CHANGE, SORTED_ADDR_CONTEXT, + SORTED_ADDR_SEGMENT, SORTED_ADDR_VIRTUAL, SORTED_IS_READ, SORTED_TIMESTAMP, TIMESTAMP, + VIRTUAL_FIRST_CHANGE, }; -use crate::memory::NUM_CHANNELS; +use crate::memory::{NUM_CHANNELS, VALUE_LIMBS}; use crate::permutation::PermutationPair; use crate::stark::Stark; use crate::util::trace_rows_to_poly_values; @@ -30,9 +30,10 @@ use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; pub(crate) const NUM_PUBLIC_INPUTS: usize = 0; pub fn ctl_data() -> Vec> { - let mut res = Column::singles([TIMESTAMP, IS_READ, ADDR_CONTEXT, ADDR_SEGMENT, ADDR_VIRTUAL]) - .collect_vec(); + let mut res = + Column::singles([IS_READ, ADDR_CONTEXT, ADDR_SEGMENT, ADDR_VIRTUAL]).collect_vec(); res.extend(Column::singles((0..8).map(value_limb))); + res.push(Column::single(TIMESTAMP)); res } @@ -64,7 +65,7 @@ pub fn generate_random_memory_ops( let mut current_memory_values: HashMap<(usize, usize, usize), [F; 8]> = HashMap::new(); let num_cycles = num_ops / 2; - for i in 0..num_cycles { + for clock in 0..num_cycles { let mut used_indices = HashSet::new(); let mut new_writes_this_cycle = HashMap::new(); let mut has_read = false; @@ -75,7 +76,7 @@ pub fn generate_random_memory_ops( } used_indices.insert(channel_index); - let is_read = if i == 0 { + let is_read = if clock == 0 { false } else { !has_read && rng.gen() @@ -110,9 +111,10 @@ pub fn generate_random_memory_ops( (context, segment, virt, vals) }; + let timestamp = clock * NUM_CHANNELS + channel_index; memory_ops.push(MemoryOp { channel_index, - timestamp: i, + timestamp, is_read, context, segment, @@ -199,7 +201,7 @@ pub fn generate_range_check_value( context_first_change: &[F], segment_first_change: &[F], virtual_first_change: &[F], -) -> Vec { +) -> (Vec, usize) { let num_ops = context.len(); let mut range_check = Vec::new(); @@ -208,7 +210,6 @@ pub fn generate_range_check_value( - context_first_change[idx] - segment_first_change[idx] - virtual_first_change[idx]; - range_check.push( context_first_change[idx] * (context[idx + 1] - context[idx] - F::ONE) + segment_first_change[idx] * (segment[idx + 1] - segment[idx] - F::ONE) @@ -216,10 +217,11 @@ pub fn generate_range_check_value( + this_address_unchanged * (timestamp[idx + 1] - timestamp[idx] - F::ONE), ); } - range_check.push(F::ZERO); - range_check + let max_diff = range_check.iter().map(F::to_canonical_u64).max().unwrap() as usize; + + (range_check, max_diff) } impl, const D: usize> MemoryStark { @@ -253,6 +255,9 @@ impl, const D: usize> MemoryStark { self.generate_memory(&mut trace_cols); + // The number of rows may have changed, if the range check required padding. + let num_ops = trace_cols[0].len(); + let mut trace_rows = vec![[F::ZERO; NUM_COLUMNS]; num_ops]; for (i, col) in trace_cols.iter().enumerate() { for (j, &val) in col.iter().enumerate() { @@ -294,7 +299,7 @@ impl, const D: usize> MemoryStark { let (context_first_change, segment_first_change, virtual_first_change) = generate_first_change_flags(&sorted_context, &sorted_segment, &sorted_virtual); - let range_check_value = generate_range_check_value( + let (range_check_value, max_diff) = generate_range_check_value( &sorted_context, &sorted_segment, &sorted_virtual, @@ -303,6 +308,8 @@ impl, const D: usize> MemoryStark { &segment_first_change, &virtual_first_change, ); + let to_pad_to = (max_diff + 1).max(num_trace_rows).next_power_of_two(); + let to_pad = to_pad_to - num_trace_rows; trace_cols[SORTED_TIMESTAMP] = sorted_timestamp; trace_cols[SORTED_IS_READ] = sorted_is_read; @@ -310,7 +317,7 @@ impl, const D: usize> MemoryStark { trace_cols[SORTED_ADDR_SEGMENT] = sorted_segment; trace_cols[SORTED_ADDR_VIRTUAL] = sorted_virtual; for i in 0..num_trace_rows { - for j in 0..8 { + for j in 0..VALUE_LIMBS { trace_cols[sorted_value_limb(j)][i] = sorted_values[i][j]; } } @@ -320,9 +327,12 @@ impl, const D: usize> MemoryStark { trace_cols[VIRTUAL_FIRST_CHANGE] = virtual_first_change; trace_cols[RANGE_CHECK] = range_check_value; - trace_cols[COUNTER] = (0..num_trace_rows) - .map(|i| F::from_canonical_usize(i)) - .collect(); + + for col in COLUMNS_TO_PAD { + trace_cols[col].splice(0..0, vec![F::ZERO; to_pad]); + } + + trace_cols[COUNTER] = (0..to_pad_to).map(|i| F::from_canonical_usize(i)).collect(); let (permuted_inputs, permuted_table) = permuted_cols(&trace_cols[RANGE_CHECK], &trace_cols[COUNTER]); @@ -382,6 +392,12 @@ impl, const D: usize> Stark for MemoryStark, const D: usize> Stark for MemoryStark, const D: usize> Stark for MemoryStark, const D: usize> Stark for MemoryStark, const D: usize> Stark for MemoryStark, const D: usize> Stark for MemoryStark Vec { + let mut unsorted_cols = vec![TIMESTAMP, IS_READ, ADDR_CONTEXT, ADDR_SEGMENT, ADDR_VIRTUAL]; + unsorted_cols.extend((0..VALUE_LIMBS).map(value_limb)); + let mut sorted_cols = vec![ + SORTED_TIMESTAMP, + SORTED_IS_READ, + SORTED_ADDR_CONTEXT, + SORTED_ADDR_SEGMENT, + SORTED_ADDR_VIRTUAL, + ]; + sorted_cols.extend((0..VALUE_LIMBS).map(sorted_value_limb)); + let column_pairs: Vec<_> = unsorted_cols + .into_iter() + .zip(sorted_cols.iter().cloned()) + .collect(); + vec![ + PermutationPair { column_pairs }, PermutationPair::singletons(RANGE_CHECK, RANGE_CHECK_PERMUTED), PermutationPair::singletons(COUNTER, COUNTER_PERMUTED), ] diff --git a/u32/src/gates/arithmetic_u32.rs b/u32/src/gates/arithmetic_u32.rs index c46c9e47..c05ed86c 100644 --- a/u32/src/gates/arithmetic_u32.rs +++ b/u32/src/gates/arithmetic_u32.rs @@ -36,31 +36,36 @@ impl, const D: usize> U32ArithmeticGate { } pub(crate) fn num_ops(config: &CircuitConfig) -> usize { - let wires_per_op = 5 + Self::num_limbs(); - let routed_wires_per_op = 5; - (config.num_wires / wires_per_op).min(config.num_routed_wires / routed_wires_per_op) + let wires_per_op = Self::routed_wires_per_op() + Self::num_limbs(); + (config.num_wires / wires_per_op).min(config.num_routed_wires / Self::routed_wires_per_op()) } pub fn wire_ith_multiplicand_0(&self, i: usize) -> usize { debug_assert!(i < self.num_ops); - 5 * i + Self::routed_wires_per_op() * i } pub fn wire_ith_multiplicand_1(&self, i: usize) -> usize { debug_assert!(i < self.num_ops); - 5 * i + 1 + Self::routed_wires_per_op() * i + 1 } pub fn wire_ith_addend(&self, i: usize) -> usize { debug_assert!(i < self.num_ops); - 5 * i + 2 + Self::routed_wires_per_op() * i + 2 } pub fn wire_ith_output_low_half(&self, i: usize) -> usize { debug_assert!(i < self.num_ops); - 5 * i + 3 + Self::routed_wires_per_op() * i + 3 } + pub fn wire_ith_output_high_half(&self, i: usize) -> usize { debug_assert!(i < self.num_ops); - 5 * i + 4 + Self::routed_wires_per_op() * i + 4 + } + + pub fn wire_ith_inverse(&self, i: usize) -> usize { + debug_assert!(i < self.num_ops); + Self::routed_wires_per_op() * i + 5 } pub fn limb_bits() -> usize { @@ -69,11 +74,13 @@ impl, const D: usize> U32ArithmeticGate { pub fn num_limbs() -> usize { 64 / Self::limb_bits() } - + pub fn routed_wires_per_op() -> usize { + 6 + } pub fn wire_ith_output_jth_limb(&self, i: usize, j: usize) -> usize { debug_assert!(i < self.num_ops); debug_assert!(j < Self::num_limbs()); - 5 * self.num_ops + Self::num_limbs() * i + j + Self::routed_wires_per_op() * self.num_ops + Self::num_limbs() * i + j } } @@ -93,9 +100,28 @@ impl, const D: usize> Gate for U32ArithmeticG let output_low = vars.local_wires[self.wire_ith_output_low_half(i)]; let output_high = vars.local_wires[self.wire_ith_output_high_half(i)]; + let inverse = vars.local_wires[self.wire_ith_inverse(i)]; - let base = F::Extension::from_canonical_u64(1 << 32u64); - let combined_output = output_high * base + output_low; + // Check canonicity of combined_output = output_high * 2^32 + output_low + let combined_output = { + let base = F::Extension::from_canonical_u64(1 << 32u64); + let one = F::Extension::ONE; + let u32_max = F::Extension::from_canonical_u32(u32::MAX); + + // This is zero if and only if the high limb is `u32::MAX`. + // u32::MAX - output_high + let diff = u32_max - output_high; + // If this is zero, the diff is invertible, so the high limb is not `u32::MAX`. + // inverse * diff - 1 + let hi_not_max = inverse * diff - one; + // If this is zero, either the high limb is not `u32::MAX`, or the low limb is zero. + // hi_not_max * limb_0_u32 + let hi_not_max_or_lo_zero = hi_not_max * output_low; + + constraints.push(hi_not_max_or_lo_zero); + + output_high * base + output_low + }; constraints.push(combined_output - computed_output); @@ -152,10 +178,27 @@ impl, const D: usize> Gate for U32ArithmeticG let output_low = vars.local_wires[self.wire_ith_output_low_half(i)]; let output_high = vars.local_wires[self.wire_ith_output_high_half(i)]; + let inverse = vars.local_wires[self.wire_ith_inverse(i)]; - let base: F::Extension = F::from_canonical_u64(1 << 32u64).into(); - let base_target = builder.constant_extension(base); - let combined_output = builder.mul_add_extension(output_high, base_target, output_low); + // Check canonicity of combined_output = output_high * 2^32 + output_low + let combined_output = { + let base: F::Extension = F::from_canonical_u64(1 << 32u64).into(); + let base_target = builder.constant_extension(base); + let one = builder.one_extension(); + let u32_max = + builder.constant_extension(F::Extension::from_canonical_u32(u32::MAX)); + + // This is zero if and only if the high limb is `u32::MAX`. + let diff = builder.sub_extension(u32_max, output_high); + // If this is zero, the diff is invertible, so the high limb is not `u32::MAX`. + let hi_not_max = builder.mul_sub_extension(inverse, diff, one); + // If this is zero, either the high limb is not `u32::MAX`, or the low limb is zero. + let hi_not_max_or_lo_zero = builder.mul_extension(hi_not_max, output_low); + + constraints.push(hi_not_max_or_lo_zero); + + builder.mul_add_extension(output_high, base_target, output_low) + }; constraints.push(builder.sub_extension(combined_output, computed_output)); @@ -211,7 +254,7 @@ impl, const D: usize> Gate for U32ArithmeticG } fn num_wires(&self) -> usize { - self.num_ops * (5 + Self::num_limbs()) + self.num_ops * (Self::routed_wires_per_op() + Self::num_limbs()) } fn num_constants(&self) -> usize { @@ -223,7 +266,7 @@ impl, const D: usize> Gate for U32ArithmeticG } fn num_constraints(&self) -> usize { - self.num_ops * (3 + Self::num_limbs()) + self.num_ops * (4 + Self::num_limbs()) } } @@ -244,9 +287,27 @@ impl, const D: usize> PackedEvaluableBase let output_low = vars.local_wires[self.wire_ith_output_low_half(i)]; let output_high = vars.local_wires[self.wire_ith_output_high_half(i)]; + let inverse = vars.local_wires[self.wire_ith_inverse(i)]; - let base = F::from_canonical_u64(1 << 32u64); - let combined_output = output_high * base + output_low; + let combined_output = { + let base = P::from(F::from_canonical_u64(1 << 32u64)); + let one = P::ONES; + let u32_max = P::from(F::from_canonical_u32(u32::MAX)); + + // This is zero if and only if the high limb is `u32::MAX`. + // u32::MAX - output_high + let diff = u32_max - output_high; + // If this is zero, the diff is invertible, so the high limb is not `u32::MAX`. + // inverse * diff - 1 + let hi_not_max = inverse * diff - one; + // If this is zero, either the high limb is not `u32::MAX`, or the low limb is zero. + // hi_not_max * limb_0_u32 + let hi_not_max_or_lo_zero = hi_not_max * output_low; + + yield_constr.one(hi_not_max_or_lo_zero); + + output_high * base + output_low + }; yield_constr.one(combined_output - computed_output); @@ -322,6 +383,15 @@ impl, const D: usize> SimpleGenerator out_buffer.set_wire(output_high_wire, output_high); out_buffer.set_wire(output_low_wire, output_low); + let diff = u32::MAX as u64 - output_high_u64; + let inverse = if diff == 0 { + F::ZERO + } else { + F::from_canonical_u64(diff).inverse() + }; + let inverse_wire = local_wire(self.gate.wire_ith_inverse(self.i)); + out_buffer.set_wire(inverse_wire, inverse); + let num_limbs = U32ArithmeticGate::::num_limbs(); let limb_base = 1 << U32ArithmeticGate::::limb_bits(); let output_limbs_u64 = unfold((), move |_| { @@ -347,8 +417,10 @@ mod tests { use plonky2::gates::gate::Gate; use plonky2::gates::gate_testing::{test_eval_fns, test_low_degree}; use plonky2::hash::hash_types::HashOut; + use plonky2::hash::hash_types::RichField; use plonky2::plonk::config::{GenericConfig, PoseidonGoldilocksConfig}; use plonky2::plonk::vars::EvaluationVars; + use plonky2_field::extension::Extendable; use plonky2_field::goldilocks_field::GoldilocksField; use plonky2_field::types::Field; use rand::Rng; @@ -374,6 +446,59 @@ mod tests { }) } + fn get_wires< + F: RichField + Extendable, + FF: From, + const D: usize, + const NUM_U32_ARITHMETIC_OPS: usize, + >( + multiplicands_0: Vec, + multiplicands_1: Vec, + addends: Vec, + ) -> Vec { + let mut v0 = Vec::new(); + let mut v1 = Vec::new(); + + let limb_bits = U32ArithmeticGate::::limb_bits(); + let num_limbs = U32ArithmeticGate::::num_limbs(); + let limb_base = 1 << limb_bits; + for c in 0..NUM_U32_ARITHMETIC_OPS { + let m0 = multiplicands_0[c]; + let m1 = multiplicands_1[c]; + let a = addends[c]; + + let mut output = m0 * m1 + a; + let output_low = output & ((1 << 32) - 1); + let output_high = output >> 32; + let diff = u32::MAX as u64 - output_high; + let inverse = if diff == 0 { + F::ZERO + } else { + F::from_canonical_u64(diff).inverse() + }; + + let mut output_limbs = Vec::with_capacity(num_limbs); + for _i in 0..num_limbs { + output_limbs.push(output % limb_base); + output /= limb_base; + } + let mut output_limbs_f: Vec<_> = output_limbs + .into_iter() + .map(F::from_canonical_u64) + .collect(); + + v0.push(F::from_canonical_u64(m0)); + v0.push(F::from_canonical_u64(m1)); + v0.push(F::from_noncanonical_u64(a)); + v0.push(F::from_canonical_u64(output_low)); + v0.push(F::from_canonical_u64(output_high)); + v0.push(inverse); + v1.append(&mut output_limbs_f); + } + + v0.iter().chain(v1.iter()).map(|&x| x.into()).collect() + } + #[test] fn test_gate_constraint() { const D: usize = 2; @@ -382,47 +507,6 @@ mod tests { type FF = >::FE; const NUM_U32_ARITHMETIC_OPS: usize = 3; - fn get_wires( - multiplicands_0: Vec, - multiplicands_1: Vec, - addends: Vec, - ) -> Vec { - let mut v0 = Vec::new(); - let mut v1 = Vec::new(); - - let limb_bits = U32ArithmeticGate::::limb_bits(); - let num_limbs = U32ArithmeticGate::::num_limbs(); - let limb_base = 1 << limb_bits; - for c in 0..NUM_U32_ARITHMETIC_OPS { - let m0 = multiplicands_0[c]; - let m1 = multiplicands_1[c]; - let a = addends[c]; - - let mut output = m0 * m1 + a; - let output_low = output & ((1 << 32) - 1); - let output_high = output >> 32; - - let mut output_limbs = Vec::with_capacity(num_limbs); - for _i in 0..num_limbs { - output_limbs.push(output % limb_base); - output /= limb_base; - } - let mut output_limbs_f: Vec<_> = output_limbs - .into_iter() - .map(F::from_canonical_u64) - .collect(); - - v0.push(F::from_canonical_u64(m0)); - v0.push(F::from_canonical_u64(m1)); - v0.push(F::from_canonical_u64(a)); - v0.push(F::from_canonical_u64(output_low)); - v0.push(F::from_canonical_u64(output_high)); - v1.append(&mut output_limbs_f); - } - - v0.iter().chain(v1.iter()).map(|&x| x.into()).collect() - } - let mut rng = rand::thread_rng(); let multiplicands_0: Vec<_> = (0..NUM_U32_ARITHMETIC_OPS) .map(|_| rng.gen::() as u64) @@ -441,7 +525,11 @@ mod tests { let vars = EvaluationVars { local_constants: &[], - local_wires: &get_wires(multiplicands_0, multiplicands_1, addends), + local_wires: &get_wires::( + multiplicands_0, + multiplicands_1, + addends, + ), public_inputs_hash: &HashOut::rand(), }; @@ -450,4 +538,39 @@ mod tests { "Gate constraints are not satisfied." ); } + + #[test] + fn test_canonicity() { + const D: usize = 2; + type C = PoseidonGoldilocksConfig; + type F = >::F; + type FF = >::FE; + const NUM_U32_ARITHMETIC_OPS: usize = 3; + + let multiplicands_0 = vec![0; NUM_U32_ARITHMETIC_OPS]; + let multiplicands_1 = vec![0; NUM_U32_ARITHMETIC_OPS]; + // A non-canonical addend will produce a non-canonical output using + // get_wires. + let addends = vec![0xFFFFFFFF00000001; NUM_U32_ARITHMETIC_OPS]; + + let gate = U32ArithmeticGate:: { + num_ops: NUM_U32_ARITHMETIC_OPS, + _phantom: PhantomData, + }; + + let vars = EvaluationVars { + local_constants: &[], + local_wires: &get_wires::( + multiplicands_0, + multiplicands_1, + addends, + ), + public_inputs_hash: &HashOut::rand(), + }; + + assert!( + !gate.eval_unfiltered(vars).iter().all(|x| x.is_zero()), + "Non-canonical output should not pass constraints." + ); + } }