From 0802d6c0211cb9873bbf33bde06bb077a28c02b4 Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Thu, 14 Jul 2022 11:31:47 -0700 Subject: [PATCH] Continue work on bootstrapping The kernel is hashed using a Keccak based sponge for now. We could switch to Poseidon later if our kernel grows too large. Note that we use simple zero-padding (pad0*) instead of the standard pad10* rule. It's simpler, and we don't care that the prover can add extra 0s at the end of the code. The program counter can never reach those bytes, and even if it could, they'd be 0 anyway given the EVM's zero-initialization rule. In one CPU row, we can do a whole Keccak hash (via the CTL), absorbing 136 bytes. But we can't actually bootstrap that many bytes of kernel code in one row, because we're also limited by memory bandwidth. Currently we can write 4 bytes of the kernel to memory in one row. So we treat the `keccak_input_limbs` columns as a buffer. We gradually fill up this buffer, 4 bytes (one `u32` word) at a time. Every `136 / 4 = 34` rows, the buffer will be full, so at that point we activate the Keccak CTL to absorb the buffer. --- evm/Cargo.toml | 1 + evm/src/cpu/bootstrap_kernel.rs | 110 ++++++++++++++++++++++++------ evm/src/cpu/columns.rs | 1 + evm/src/cpu/kernel/aggregator.rs | 3 + evm/src/cpu/kernel/assembler.rs | 27 +++++--- evm/src/cpu/kernel/keccak_util.rs | 14 ++++ evm/src/cpu/kernel/mod.rs | 1 + evm/src/generation/mod.rs | 2 +- evm/src/generation/state.rs | 29 ++++---- 9 files changed, 144 insertions(+), 44 deletions(-) create mode 100644 evm/src/cpu/kernel/keccak_util.rs diff --git a/evm/Cargo.toml b/evm/Cargo.toml index facf300b..e8dcfdfe 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -13,6 +13,7 @@ ethereum-types = "0.13.1" hex = { version = "0.4.3", optional = true } itertools = "0.10.3" log = "0.4.14" +once_cell = "1.13.0" pest = "2.1.3" pest_derive = "2.1.0" rayon = "1.5.1" diff --git a/evm/src/cpu/bootstrap_kernel.rs b/evm/src/cpu/bootstrap_kernel.rs index ba10b70e..3d5d4f1c 100644 --- a/evm/src/cpu/bootstrap_kernel.rs +++ b/evm/src/cpu/bootstrap_kernel.rs @@ -1,33 +1,74 @@ //! The initial phase of execution, where the kernel code is hashed while being written to memory. //! The hash is then checked against a precomputed kernel hash. +use std::borrow::Borrow; + use itertools::Itertools; use plonky2::field::extension::Extendable; use plonky2::field::packed::PackedField; use plonky2::field::types::Field; use plonky2::hash::hash_types::RichField; use plonky2::plonk::circuit_builder::CircuitBuilder; +use plonky2_util::ceil_div_usize; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; -use crate::cpu::columns::{COL_MAP, NUM_CPU_COLUMNS}; +use crate::cpu::columns::{CpuColumnsView, NUM_CPU_COLUMNS}; +use crate::cpu::kernel::aggregator::KERNEL; +use crate::cpu::kernel::keccak_util::keccakf_u32s; use crate::cpu::public_inputs::NUM_PUBLIC_INPUTS; use crate::generation::state::GenerationState; use crate::memory; -use crate::memory::segments; +use crate::memory::{segments, NUM_CHANNELS}; use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; +/// The Keccak rate (1088 bits), measured in bytes. +const KECCAK_RATE_BYTES: usize = 1088 / 8; + +/// The Keccak rate (1088 bits), measured in u32 limbs. +const KECCAK_RATE_LIMBS: usize = 1088 / 32; + +/// We can't process more than `NUM_CHANNELS` bytes per row, since that's all the memory bandwidth +/// we have. We also can't process more than 4 bytes (or the number of bytes in a `u32`), since we +/// want them to fit in a single limb of Keccak input. +const BYTES_PER_ROW: usize = 4; + pub(crate) fn generate_bootstrap_kernel(state: &mut GenerationState) { - for chunk in &state.kernel.code.clone().into_iter().enumerate().chunks(4) { + let mut code = KERNEL.code.clone(); + + // Zero-pad the code such that its size is a multiple of the Keccak rate. + let padded_size = ceil_div_usize(code.len(), KECCAK_RATE_BYTES) * KECCAK_RATE_BYTES; + code.resize(padded_size, 0); + + let mut sponge_state = [0u32; 50]; + let mut sponge_input_pos: usize = 0; + + // Iterate through chunks of the code, such that we can write one chunk to memory per row. + for chunk in &code.into_iter().enumerate().chunks(BYTES_PER_ROW) { + state.current_cpu_row.is_bootstrap_kernel = F::ONE; + + // Write this chunk to memory, while simultaneously packing its bytes into a u32 word. + let mut packed_bytes: u32 = 0; for (addr, byte) in chunk { let mut value = [F::ZERO; memory::VALUE_LIMBS]; value[0] = F::from_canonical_u8(byte); - let channel = addr % memory::NUM_CHANNELS; + let channel = addr % NUM_CHANNELS; state.set_mem_current(channel, segments::CODE, addr, value); - // TODO: Set other registers. + packed_bytes = (packed_bytes << 8) | byte as u32; + } - state.commit_cpu_row(); + sponge_state[sponge_input_pos] = packed_bytes; + state.current_cpu_row.keccak_input_limbs = sponge_state.map(F::from_canonical_u32); + state.commit_cpu_row(); + + sponge_input_pos = (sponge_input_pos + 1) % KECCAK_RATE_LIMBS; + // If we just crossed a multiple of KECCAK_RATE_LIMBS, then we've filled the Keccak input + // buffer, so it's time to absorb. + if sponge_input_pos == 0 { + state.current_cpu_row.is_keccak = F::ONE; + keccakf_u32s(&mut sponge_state); + state.current_cpu_row.keccak_output_limbs = sponge_state.map(F::from_canonical_u32); } } } @@ -36,18 +77,34 @@ pub(crate) fn eval_bootstrap_kernel>( vars: StarkEvaluationVars, yield_constr: &mut ConstraintConsumer

, ) { + let local_values: &CpuColumnsView<_> = vars.local_values.borrow(); + let next_values: &CpuColumnsView<_> = vars.next_values.borrow(); + // IS_BOOTSTRAP_KERNEL must have an init value of 1, a final value of 0, and a delta in {0, -1}. - let local_is_bootstrap = vars.local_values[COL_MAP.is_bootstrap_kernel]; - let next_is_bootstrap = vars.next_values[COL_MAP.is_bootstrap_kernel]; + let local_is_bootstrap = local_values.is_bootstrap_kernel; + let next_is_bootstrap = next_values.is_bootstrap_kernel; yield_constr.constraint_first_row(local_is_bootstrap - P::ONES); yield_constr.constraint_last_row(local_is_bootstrap); let delta_is_bootstrap = next_is_bootstrap - local_is_bootstrap; yield_constr.constraint_transition(delta_is_bootstrap * (delta_is_bootstrap + P::ONES)); - // If IS_BOOTSTRAP_KERNEL changed (from 1 to 0), check that the current kernel hash matches a - // precomputed one. - let hash_diff = F::ZERO; // TODO - yield_constr.constraint_transition(delta_is_bootstrap * hash_diff) + // TODO: Constraints to enforce that, if IS_BOOTSTRAP_KERNEL, + // - If CLOCK is a multiple of KECCAK_RATE_LIMBS, activate the Keccak CTL, and ensure the output + // is copied to the next row (besides the first limb which will immediately be overwritten). + // - Otherwise, ensure that the Keccak input is copied to the next row (besides the next limb). + + // If IS_BOOTSTRAP_KERNEL changed (from 1 to 0), check that + // - the clock is a multiple of KECCAK_RATE_LIMBS (TODO) + // - the current kernel hash matches a precomputed one + for (&expected, actual) in KERNEL + .code_hash + .iter() + .zip(local_values.keccak_output_limbs) + { + let expected = P::from(F::from_canonical_u32(expected)); + let diff = expected - actual; + yield_constr.constraint_transition(delta_is_bootstrap * diff); + } } pub(crate) fn eval_bootstrap_kernel_circuit, const D: usize>( @@ -55,11 +112,13 @@ pub(crate) fn eval_bootstrap_kernel_circuit, const vars: StarkEvaluationTargets, yield_constr: &mut RecursiveConstraintConsumer, ) { + let local_values: &CpuColumnsView<_> = vars.local_values.borrow(); + let next_values: &CpuColumnsView<_> = vars.next_values.borrow(); let one = builder.one_extension(); // IS_BOOTSTRAP_KERNEL must have an init value of 1, a final value of 0, and a delta in {0, -1}. - let local_is_bootstrap = vars.local_values[COL_MAP.is_bootstrap_kernel]; - let next_is_bootstrap = vars.next_values[COL_MAP.is_bootstrap_kernel]; + let local_is_bootstrap = local_values.is_bootstrap_kernel; + let next_is_bootstrap = next_values.is_bootstrap_kernel; let constraint = builder.sub_extension(local_is_bootstrap, one); yield_constr.constraint_first_row(builder, constraint); yield_constr.constraint_last_row(builder, local_is_bootstrap); @@ -68,9 +127,22 @@ pub(crate) fn eval_bootstrap_kernel_circuit, const builder.mul_add_extension(delta_is_bootstrap, delta_is_bootstrap, delta_is_bootstrap); yield_constr.constraint_transition(builder, constraint); - // If IS_BOOTSTRAP_KERNEL changed (from 1 to 0), check that the current kernel hash matches a - // precomputed one. - let hash_diff = builder.zero_extension(); // TODO - let constraint = builder.mul_extension(delta_is_bootstrap, hash_diff); - yield_constr.constraint_transition(builder, constraint) + // TODO: Constraints to enforce that, if IS_BOOTSTRAP_KERNEL, + // - If CLOCK is a multiple of KECCAK_RATE_LIMBS, activate the Keccak CTL, and ensure the output + // is copied to the next row (besides the first limb which will immediately be overwritten). + // - Otherwise, ensure that the Keccak input is copied to the next row (besides the next limb). + + // If IS_BOOTSTRAP_KERNEL changed (from 1 to 0), check that + // - the clock is a multiple of KECCAK_RATE_LIMBS (TODO) + // - the current kernel hash matches a precomputed one + for (&expected, actual) in KERNEL + .code_hash + .iter() + .zip(local_values.keccak_output_limbs) + { + let expected = builder.constant_extension(F::Extension::from_canonical_u32(expected)); + let diff = builder.sub_extension(expected, actual); + let constraint = builder.mul_extension(delta_is_bootstrap, diff); + yield_constr.constraint_transition(builder, constraint); + } } diff --git a/evm/src/cpu/columns.rs b/evm/src/cpu/columns.rs index 1fce6cb7..f3a400c6 100644 --- a/evm/src/cpu/columns.rs +++ b/evm/src/cpu/columns.rs @@ -8,6 +8,7 @@ use std::ops::{Index, IndexMut}; use crate::memory; #[repr(C)] +#[derive(Eq, PartialEq, Debug)] pub struct CpuColumnsView { /// Filter. 1 if the row is part of bootstrapping the kernel code, 0 otherwise. pub is_bootstrap_kernel: T, diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 04d57e75..6ca88ba1 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -4,10 +4,13 @@ use std::collections::HashMap; use ethereum_types::U256; use itertools::Itertools; +use once_cell::sync::Lazy; use super::assembler::{assemble, Kernel}; use crate::cpu::kernel::parser::parse; +pub static KERNEL: Lazy = Lazy::new(combined_kernel); + pub fn evm_constants() -> HashMap { let mut c = HashMap::new(); c.insert("SEGMENT_ID_TXN_DATA".into(), 0.into()); // TODO: Replace with actual segment ID. diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs index bdc8ded4..c3ccea7a 100644 --- a/evm/src/cpu/kernel/assembler.rs +++ b/evm/src/cpu/kernel/assembler.rs @@ -6,6 +6,7 @@ use log::debug; use super::ast::PushTarget; use crate::cpu::kernel::ast::Literal; +use crate::cpu::kernel::keccak_util::hash_kernel; use crate::cpu::kernel::{ ast::{File, Item}, opcodes::{get_opcode, get_push_opcode}, @@ -19,9 +20,25 @@ const BYTES_PER_OFFSET: u8 = 3; #[derive(PartialEq, Eq, Debug)] pub struct Kernel { pub(crate) code: Vec, + + /// Computed using `hash_kernel`. It is encoded as `u32` limbs for convenience, since we deal + /// with `u32` limbs in our Keccak table. + pub(crate) code_hash: [u32; 8], + pub(crate) global_labels: HashMap, } +impl Kernel { + fn new(code: Vec, global_labels: HashMap) -> Self { + let code_hash = hash_kernel(&code); + Self { + code, + code_hash, + global_labels, + } + } +} + struct Macro { params: Vec, items: Vec, @@ -56,10 +73,7 @@ pub(crate) fn assemble(files: Vec, constants: HashMap) -> Ke debug!("Assembled file size: {} bytes", file_len); } assert_eq!(code.len(), offset, "Code length doesn't match offset."); - Kernel { - code, - global_labels, - } + Kernel::new(code, global_labels) } fn find_macros(files: &[File]) -> HashMap { @@ -286,10 +300,7 @@ mod tests { expected_global_labels.insert("function_1".to_string(), 0); expected_global_labels.insert("function_2".to_string(), 3); - let expected_kernel = Kernel { - code: expected_code, - global_labels: expected_global_labels, - }; + let expected_kernel = Kernel::new(expected_code, expected_global_labels); let program = vec![file_1, file_2]; assert_eq!(assemble(program, HashMap::new()), expected_kernel); diff --git a/evm/src/cpu/kernel/keccak_util.rs b/evm/src/cpu/kernel/keccak_util.rs new file mode 100644 index 00000000..1498ba08 --- /dev/null +++ b/evm/src/cpu/kernel/keccak_util.rs @@ -0,0 +1,14 @@ +/// A Keccak-f based hash. +/// +/// This hash does not use standard Keccak padding, since we don't care about extra zeros at the +/// end of the code. +pub(crate) fn hash_kernel(_code: &[u8]) -> [u32; 8] { + let state = [0u32; 50]; + // TODO: absorb code + state[0..8].try_into().unwrap() +} + +/// Like tiny-keccak's `keccakf`, but deals with `u32` limbs instead of `u64` limbs. +pub(crate) fn keccakf_u32s(_state: &mut [u32; 50]) { + // TODO: Implement +} diff --git a/evm/src/cpu/kernel/mod.rs b/evm/src/cpu/kernel/mod.rs index ae765b99..bc7608dd 100644 --- a/evm/src/cpu/kernel/mod.rs +++ b/evm/src/cpu/kernel/mod.rs @@ -1,6 +1,7 @@ pub mod aggregator; pub mod assembler; mod ast; +pub(crate) mod keccak_util; mod opcodes; mod parser; diff --git a/evm/src/generation/mod.rs b/evm/src/generation/mod.rs index 4eb3cbbf..edca27dd 100644 --- a/evm/src/generation/mod.rs +++ b/evm/src/generation/mod.rs @@ -46,7 +46,7 @@ pub fn generate_traces, const D: usize>( logic_ops: logic_inputs, .. } = state; - assert_eq!(current_cpu_row, [F::ZERO; NUM_CPU_COLUMNS]); + assert_eq!(current_cpu_row, [F::ZERO; NUM_CPU_COLUMNS].into()); let cpu_trace = trace_rows_to_poly_values(cpu_rows); let keccak_trace = all_stark.keccak_stark.generate_trace(keccak_inputs); diff --git a/evm/src/generation/state.rs b/evm/src/generation/state.rs index 1b26a3c4..7a95f7e4 100644 --- a/evm/src/generation/state.rs +++ b/evm/src/generation/state.rs @@ -1,20 +1,17 @@ +use std::mem; + use ethereum_types::U256; use plonky2::field::types::Field; -use crate::cpu::columns::NUM_CPU_COLUMNS; -use crate::cpu::kernel::aggregator::combined_kernel; -use crate::cpu::kernel::assembler::Kernel; +use crate::cpu::columns::{CpuColumnsView, NUM_CPU_COLUMNS}; use crate::generation::memory::MemoryState; -use crate::logic::{Op, Operation}; use crate::memory::memory_stark::MemoryOp; use crate::{keccak, logic}; #[derive(Debug)] pub(crate) struct GenerationState { - pub(crate) kernel: Kernel, - pub(crate) cpu_rows: Vec<[F; NUM_CPU_COLUMNS]>, - pub(crate) current_cpu_row: [F; NUM_CPU_COLUMNS], + pub(crate) current_cpu_row: CpuColumnsView, pub(crate) current_context: usize, pub(crate) memory: MemoryState, @@ -27,24 +24,24 @@ impl GenerationState { /// Compute logical AND, and record the operation to be added in the logic table later. #[allow(unused)] // TODO: Should be used soon. pub(crate) fn and(&mut self, input0: U256, input1: U256) -> U256 { - self.logic_op(Op::And, input0, input1) + self.logic_op(logic::Op::And, input0, input1) } /// Compute logical OR, and record the operation to be added in the logic table later. #[allow(unused)] // TODO: Should be used soon. pub(crate) fn or(&mut self, input0: U256, input1: U256) -> U256 { - self.logic_op(Op::Or, input0, input1) + self.logic_op(logic::Op::Or, input0, input1) } /// Compute logical XOR, and record the operation to be added in the logic table later. #[allow(unused)] // TODO: Should be used soon. pub(crate) fn xor(&mut self, input0: U256, input1: U256) -> U256 { - self.logic_op(Op::Xor, input0, input1) + self.logic_op(logic::Op::Xor, input0, input1) } /// Compute logical AND, and record the operation to be added in the logic table later. - pub(crate) fn logic_op(&mut self, op: Op, input0: U256, input1: U256) -> U256 { - let operation = Operation::new(op, input0, input1); + pub(crate) fn logic_op(&mut self, op: logic::Op, input0: U256, input1: U256) -> U256 { + let operation = logic::Operation::new(op, input0, input1); let result = operation.result; self.logic_ops.push(operation); result @@ -96,8 +93,9 @@ impl GenerationState { } pub(crate) fn commit_cpu_row(&mut self) { - self.cpu_rows.push(self.current_cpu_row); - self.current_cpu_row = [F::ZERO; NUM_CPU_COLUMNS]; + let mut swapped_row = [F::ZERO; NUM_CPU_COLUMNS].into(); + mem::swap(&mut self.current_cpu_row, &mut swapped_row); + self.cpu_rows.push(swapped_row.into()); } } @@ -106,9 +104,8 @@ impl GenerationState { impl Default for GenerationState { fn default() -> Self { Self { - kernel: combined_kernel(), cpu_rows: vec![], - current_cpu_row: [F::ZERO; NUM_CPU_COLUMNS], + current_cpu_row: [F::ZERO; NUM_CPU_COLUMNS].into(), current_context: 0, memory: MemoryState::default(), keccak_inputs: vec![],