Merge pull request #612 from mir-protocol/bootstrapping_continued

Continue work on bootstrapping
This commit is contained in:
Daniel Lubarov 2022-07-15 13:03:16 -07:00 committed by GitHub
commit 83643aa584
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 146 additions and 44 deletions

View File

@ -13,6 +13,7 @@ ethereum-types = "0.13.1"
hex = { version = "0.4.3", optional = true }
itertools = "0.10.3"
log = "0.4.14"
once_cell = "1.13.0"
pest = "2.1.3"
pest_derive = "2.1.0"
rayon = "1.5.1"

View File

@ -1,33 +1,74 @@
//! The initial phase of execution, where the kernel code is hashed while being written to memory.
//! The hash is then checked against a precomputed kernel hash.
use std::borrow::Borrow;
use itertools::Itertools;
use plonky2::field::extension::Extendable;
use plonky2::field::packed::PackedField;
use plonky2::field::types::Field;
use plonky2::hash::hash_types::RichField;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use plonky2_util::ceil_div_usize;
use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::cpu::columns::{COL_MAP, NUM_CPU_COLUMNS};
use crate::cpu::columns::{CpuColumnsView, NUM_CPU_COLUMNS};
use crate::cpu::kernel::aggregator::KERNEL;
use crate::cpu::kernel::keccak_util::keccakf_u32s;
use crate::cpu::public_inputs::NUM_PUBLIC_INPUTS;
use crate::generation::state::GenerationState;
use crate::memory;
use crate::memory::segments;
use crate::memory::{segments, NUM_CHANNELS};
use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars};
/// The Keccak rate (1088 bits), measured in bytes.
const KECCAK_RATE_BYTES: usize = 1088 / 8;
/// The Keccak rate (1088 bits), measured in u32 limbs.
const KECCAK_RATE_LIMBS: usize = 1088 / 32;
/// We can't process more than `NUM_CHANNELS` bytes per row, since that's all the memory bandwidth
/// we have. We also can't process more than 4 bytes (or the number of bytes in a `u32`), since we
/// want them to fit in a single limb of Keccak input.
const BYTES_PER_ROW: usize = 4;
pub(crate) fn generate_bootstrap_kernel<F: Field>(state: &mut GenerationState<F>) {
for chunk in &state.kernel.code.clone().into_iter().enumerate().chunks(4) {
let mut code = KERNEL.code.clone();
// Zero-pad the code such that its size is a multiple of the Keccak rate.
let padded_size = ceil_div_usize(code.len(), KECCAK_RATE_BYTES) * KECCAK_RATE_BYTES;
code.resize(padded_size, 0);
let mut sponge_state = [0u32; 50];
let mut sponge_input_pos: usize = 0;
// Iterate through chunks of the code, such that we can write one chunk to memory per row.
for chunk in &code.into_iter().enumerate().chunks(BYTES_PER_ROW) {
state.current_cpu_row.is_bootstrap_kernel = F::ONE;
// Write this chunk to memory, while simultaneously packing its bytes into a u32 word.
let mut packed_bytes: u32 = 0;
for (addr, byte) in chunk {
let mut value = [F::ZERO; memory::VALUE_LIMBS];
value[0] = F::from_canonical_u8(byte);
let channel = addr % memory::NUM_CHANNELS;
let channel = addr % NUM_CHANNELS;
state.set_mem_current(channel, segments::CODE, addr, value);
// TODO: Set other registers.
packed_bytes = (packed_bytes << 8) | byte as u32;
}
state.commit_cpu_row();
sponge_state[sponge_input_pos] = packed_bytes;
state.current_cpu_row.keccak_input_limbs = sponge_state.map(F::from_canonical_u32);
state.commit_cpu_row();
sponge_input_pos = (sponge_input_pos + 1) % KECCAK_RATE_LIMBS;
// If we just crossed a multiple of KECCAK_RATE_LIMBS, then we've filled the Keccak input
// buffer, so it's time to absorb.
if sponge_input_pos == 0 {
state.current_cpu_row.is_keccak = F::ONE;
keccakf_u32s(&mut sponge_state);
state.current_cpu_row.keccak_output_limbs = sponge_state.map(F::from_canonical_u32);
}
}
}
@ -36,18 +77,35 @@ pub(crate) fn eval_bootstrap_kernel<F: Field, P: PackedField<Scalar = F>>(
vars: StarkEvaluationVars<F, P, NUM_CPU_COLUMNS, NUM_PUBLIC_INPUTS>,
yield_constr: &mut ConstraintConsumer<P>,
) {
let local_values: &CpuColumnsView<_> = vars.local_values.borrow();
let next_values: &CpuColumnsView<_> = vars.next_values.borrow();
// IS_BOOTSTRAP_KERNEL must have an init value of 1, a final value of 0, and a delta in {0, -1}.
let local_is_bootstrap = vars.local_values[COL_MAP.is_bootstrap_kernel];
let next_is_bootstrap = vars.next_values[COL_MAP.is_bootstrap_kernel];
let local_is_bootstrap = local_values.is_bootstrap_kernel;
let next_is_bootstrap = next_values.is_bootstrap_kernel;
yield_constr.constraint_first_row(local_is_bootstrap - P::ONES);
yield_constr.constraint_last_row(local_is_bootstrap);
let delta_is_bootstrap = next_is_bootstrap - local_is_bootstrap;
yield_constr.constraint_transition(delta_is_bootstrap * (delta_is_bootstrap + P::ONES));
// If IS_BOOTSTRAP_KERNEL changed (from 1 to 0), check that the current kernel hash matches a
// precomputed one.
let hash_diff = F::ZERO; // TODO
yield_constr.constraint_transition(delta_is_bootstrap * hash_diff)
// TODO: Constraints to enforce that, if IS_BOOTSTRAP_KERNEL,
// - If CLOCK is a multiple of KECCAK_RATE_LIMBS, activate the Keccak CTL, and ensure the output
// is copied to the next row (besides the first limb which will immediately be overwritten).
// - Otherwise, ensure that the Keccak input is copied to the next row (besides the next limb).
// - The next limb we add to the buffer is also written to memory.
// If IS_BOOTSTRAP_KERNEL changed (from 1 to 0), check that
// - the clock is a multiple of KECCAK_RATE_LIMBS (TODO)
// - the current kernel hash matches a precomputed one
for (&expected, actual) in KERNEL
.code_hash
.iter()
.zip(local_values.keccak_output_limbs)
{
let expected = P::from(F::from_canonical_u32(expected));
let diff = expected - actual;
yield_constr.constraint_transition(delta_is_bootstrap * diff);
}
}
pub(crate) fn eval_bootstrap_kernel_circuit<F: RichField + Extendable<D>, const D: usize>(
@ -55,11 +113,13 @@ pub(crate) fn eval_bootstrap_kernel_circuit<F: RichField + Extendable<D>, const
vars: StarkEvaluationTargets<D, NUM_CPU_COLUMNS, NUM_PUBLIC_INPUTS>,
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
) {
let local_values: &CpuColumnsView<_> = vars.local_values.borrow();
let next_values: &CpuColumnsView<_> = vars.next_values.borrow();
let one = builder.one_extension();
// IS_BOOTSTRAP_KERNEL must have an init value of 1, a final value of 0, and a delta in {0, -1}.
let local_is_bootstrap = vars.local_values[COL_MAP.is_bootstrap_kernel];
let next_is_bootstrap = vars.next_values[COL_MAP.is_bootstrap_kernel];
let local_is_bootstrap = local_values.is_bootstrap_kernel;
let next_is_bootstrap = next_values.is_bootstrap_kernel;
let constraint = builder.sub_extension(local_is_bootstrap, one);
yield_constr.constraint_first_row(builder, constraint);
yield_constr.constraint_last_row(builder, local_is_bootstrap);
@ -68,9 +128,23 @@ pub(crate) fn eval_bootstrap_kernel_circuit<F: RichField + Extendable<D>, const
builder.mul_add_extension(delta_is_bootstrap, delta_is_bootstrap, delta_is_bootstrap);
yield_constr.constraint_transition(builder, constraint);
// If IS_BOOTSTRAP_KERNEL changed (from 1 to 0), check that the current kernel hash matches a
// precomputed one.
let hash_diff = builder.zero_extension(); // TODO
let constraint = builder.mul_extension(delta_is_bootstrap, hash_diff);
yield_constr.constraint_transition(builder, constraint)
// TODO: Constraints to enforce that, if IS_BOOTSTRAP_KERNEL,
// - If CLOCK is a multiple of KECCAK_RATE_LIMBS, activate the Keccak CTL, and ensure the output
// is copied to the next row (besides the first limb which will immediately be overwritten).
// - Otherwise, ensure that the Keccak input is copied to the next row (besides the next limb).
// - The next limb we add to the buffer is also written to memory.
// If IS_BOOTSTRAP_KERNEL changed (from 1 to 0), check that
// - the clock is a multiple of KECCAK_RATE_LIMBS (TODO)
// - the current kernel hash matches a precomputed one
for (&expected, actual) in KERNEL
.code_hash
.iter()
.zip(local_values.keccak_output_limbs)
{
let expected = builder.constant_extension(F::Extension::from_canonical_u32(expected));
let diff = builder.sub_extension(expected, actual);
let constraint = builder.mul_extension(delta_is_bootstrap, diff);
yield_constr.constraint_transition(builder, constraint);
}
}

View File

@ -8,6 +8,7 @@ use std::ops::{Index, IndexMut};
use crate::memory;
#[repr(C)]
#[derive(Eq, PartialEq, Debug)]
pub struct CpuColumnsView<T> {
/// Filter. 1 if the row is part of bootstrapping the kernel code, 0 otherwise.
pub is_bootstrap_kernel: T,

View File

@ -4,10 +4,13 @@ use std::collections::HashMap;
use ethereum_types::U256;
use itertools::Itertools;
use once_cell::sync::Lazy;
use super::assembler::{assemble, Kernel};
use crate::cpu::kernel::parser::parse;
pub static KERNEL: Lazy<Kernel> = Lazy::new(combined_kernel);
pub fn evm_constants() -> HashMap<String, U256> {
let mut c = HashMap::new();
c.insert("SEGMENT_ID_TXN_DATA".into(), 0.into()); // TODO: Replace with actual segment ID.

View File

@ -6,6 +6,7 @@ use log::debug;
use super::ast::PushTarget;
use crate::cpu::kernel::ast::Literal;
use crate::cpu::kernel::keccak_util::hash_kernel;
use crate::cpu::kernel::{
ast::{File, Item},
opcodes::{get_opcode, get_push_opcode},
@ -19,9 +20,25 @@ const BYTES_PER_OFFSET: u8 = 3;
#[derive(PartialEq, Eq, Debug)]
pub struct Kernel {
pub(crate) code: Vec<u8>,
/// Computed using `hash_kernel`. It is encoded as `u32` limbs for convenience, since we deal
/// with `u32` limbs in our Keccak table.
pub(crate) code_hash: [u32; 8],
pub(crate) global_labels: HashMap<String, usize>,
}
impl Kernel {
fn new(code: Vec<u8>, global_labels: HashMap<String, usize>) -> Self {
let code_hash = hash_kernel(&code);
Self {
code,
code_hash,
global_labels,
}
}
}
struct Macro {
params: Vec<String>,
items: Vec<Item>,
@ -57,10 +74,7 @@ pub(crate) fn assemble(files: Vec<File>, constants: HashMap<String, U256>) -> Ke
debug!("Assembled file size: {} bytes", file_len);
}
assert_eq!(code.len(), offset, "Code length doesn't match offset.");
Kernel {
code,
global_labels,
}
Kernel::new(code, global_labels)
}
fn find_macros(files: &[File]) -> HashMap<String, Macro> {
@ -302,10 +316,7 @@ mod tests {
expected_global_labels.insert("function_1".to_string(), 0);
expected_global_labels.insert("function_2".to_string(), 3);
let expected_kernel = Kernel {
code: expected_code,
global_labels: expected_global_labels,
};
let expected_kernel = Kernel::new(expected_code, expected_global_labels);
let program = vec![file_1, file_2];
assert_eq!(assemble(program, HashMap::new()), expected_kernel);

View File

@ -0,0 +1,14 @@
/// A Keccak-f based hash.
///
/// This hash does not use standard Keccak padding, since we don't care about extra zeros at the
/// end of the code.
pub(crate) fn hash_kernel(_code: &[u8]) -> [u32; 8] {
let state = [0u32; 50];
// TODO: absorb code
state[0..8].try_into().unwrap()
}
/// Like tiny-keccak's `keccakf`, but deals with `u32` limbs instead of `u64` limbs.
pub(crate) fn keccakf_u32s(_state: &mut [u32; 50]) {
// TODO: Implement
}

View File

@ -1,6 +1,7 @@
pub mod aggregator;
pub mod assembler;
mod ast;
pub(crate) mod keccak_util;
mod opcodes;
mod parser;

View File

@ -46,7 +46,7 @@ pub fn generate_traces<F: RichField + Extendable<D>, const D: usize>(
logic_ops: logic_inputs,
..
} = state;
assert_eq!(current_cpu_row, [F::ZERO; NUM_CPU_COLUMNS]);
assert_eq!(current_cpu_row, [F::ZERO; NUM_CPU_COLUMNS].into());
let cpu_trace = trace_rows_to_poly_values(cpu_rows);
let keccak_trace = all_stark.keccak_stark.generate_trace(keccak_inputs);

View File

@ -1,20 +1,17 @@
use std::mem;
use ethereum_types::U256;
use plonky2::field::types::Field;
use crate::cpu::columns::NUM_CPU_COLUMNS;
use crate::cpu::kernel::aggregator::combined_kernel;
use crate::cpu::kernel::assembler::Kernel;
use crate::cpu::columns::{CpuColumnsView, NUM_CPU_COLUMNS};
use crate::generation::memory::MemoryState;
use crate::logic::{Op, Operation};
use crate::memory::memory_stark::MemoryOp;
use crate::{keccak, logic};
#[derive(Debug)]
pub(crate) struct GenerationState<F: Field> {
pub(crate) kernel: Kernel,
pub(crate) cpu_rows: Vec<[F; NUM_CPU_COLUMNS]>,
pub(crate) current_cpu_row: [F; NUM_CPU_COLUMNS],
pub(crate) current_cpu_row: CpuColumnsView<F>,
pub(crate) current_context: usize,
pub(crate) memory: MemoryState<F>,
@ -27,24 +24,24 @@ impl<F: Field> GenerationState<F> {
/// Compute logical AND, and record the operation to be added in the logic table later.
#[allow(unused)] // TODO: Should be used soon.
pub(crate) fn and(&mut self, input0: U256, input1: U256) -> U256 {
self.logic_op(Op::And, input0, input1)
self.logic_op(logic::Op::And, input0, input1)
}
/// Compute logical OR, and record the operation to be added in the logic table later.
#[allow(unused)] // TODO: Should be used soon.
pub(crate) fn or(&mut self, input0: U256, input1: U256) -> U256 {
self.logic_op(Op::Or, input0, input1)
self.logic_op(logic::Op::Or, input0, input1)
}
/// Compute logical XOR, and record the operation to be added in the logic table later.
#[allow(unused)] // TODO: Should be used soon.
pub(crate) fn xor(&mut self, input0: U256, input1: U256) -> U256 {
self.logic_op(Op::Xor, input0, input1)
self.logic_op(logic::Op::Xor, input0, input1)
}
/// Compute logical AND, and record the operation to be added in the logic table later.
pub(crate) fn logic_op(&mut self, op: Op, input0: U256, input1: U256) -> U256 {
let operation = Operation::new(op, input0, input1);
pub(crate) fn logic_op(&mut self, op: logic::Op, input0: U256, input1: U256) -> U256 {
let operation = logic::Operation::new(op, input0, input1);
let result = operation.result;
self.logic_ops.push(operation);
result
@ -96,8 +93,9 @@ impl<F: Field> GenerationState<F> {
}
pub(crate) fn commit_cpu_row(&mut self) {
self.cpu_rows.push(self.current_cpu_row);
self.current_cpu_row = [F::ZERO; NUM_CPU_COLUMNS];
let mut swapped_row = [F::ZERO; NUM_CPU_COLUMNS].into();
mem::swap(&mut self.current_cpu_row, &mut swapped_row);
self.cpu_rows.push(swapped_row.into());
}
}
@ -106,9 +104,8 @@ impl<F: Field> GenerationState<F> {
impl<F: Field> Default for GenerationState<F> {
fn default() -> Self {
Self {
kernel: combined_kernel(),
cpu_rows: vec![],
current_cpu_row: [F::ZERO; NUM_CPU_COLUMNS],
current_cpu_row: [F::ZERO; NUM_CPU_COLUMNS].into(),
current_context: 0,
memory: MemoryState::default(),
keccak_inputs: vec![],