From 6d751b13c1356ecd280bb57b446b530e0c4ad743 Mon Sep 17 00:00:00 2001 From: Hamy Ratoanina Date: Mon, 13 Nov 2023 11:03:50 -0500 Subject: [PATCH] Remove values of last memory channel (#1291) * Remove values of last memory channel Co-authored-by: Linda Guiga * Fix merge * Apply comments * Fix ASM * Top stack documentation (#7) * Add doc file * Apply comments * Apply comments * Fix visibility * Fix visibility --------- Co-authored-by: Linda Guiga --- evm/src/all_stark.rs | 6 + evm/src/cpu/bootstrap_kernel.rs | 20 +- evm/src/cpu/byte_unpacking.rs | 49 +++ evm/src/cpu/columns/mod.rs | 17 +- evm/src/cpu/contextops.rs | 11 - evm/src/cpu/cpu_stark.rs | 44 ++- evm/src/cpu/decode.rs | 10 +- evm/src/cpu/docs/memory-handling.md | 32 ++ evm/src/cpu/kernel/asm/account_code.asm | 10 +- evm/src/cpu/kernel/asm/core/call.asm | 24 +- .../cpu/kernel/asm/core/create_receipt.asm | 15 +- evm/src/cpu/kernel/asm/core/exception.asm | 9 +- .../cpu/kernel/asm/core/jumpdest_analysis.asm | 2 +- evm/src/cpu/kernel/asm/core/process_txn.asm | 8 +- evm/src/cpu/kernel/asm/core/syscall.asm | 11 +- evm/src/cpu/kernel/asm/core/terminate.asm | 4 +- evm/src/cpu/kernel/asm/curve/wnaf.asm | 2 +- evm/src/cpu/kernel/asm/memory/core.asm | 4 + evm/src/cpu/kernel/asm/memory/memcpy.asm | 55 ++-- evm/src/cpu/kernel/asm/memory/memset.asm | 26 +- evm/src/cpu/kernel/asm/memory/metadata.asm | 4 +- evm/src/cpu/kernel/asm/memory/packing.asm | 289 ++++++++++++++++-- evm/src/cpu/kernel/asm/memory/syscalls.asm | 7 +- evm/src/cpu/kernel/asm/shift.asm | 17 +- .../cpu/kernel/asm/transactions/type_1.asm | 2 +- .../cpu/kernel/asm/transactions/type_2.asm | 2 +- evm/src/cpu/kernel/interpreter.rs | 76 +++-- evm/src/cpu/kernel/opcodes.rs | 33 +- evm/src/cpu/membus.rs | 16 +- evm/src/cpu/memio.rs | 42 ++- evm/src/cpu/mod.rs | 1 + evm/src/cpu/shift.rs | 2 +- evm/src/cpu/stack.rs | 42 ++- evm/src/cpu/syscalls_exceptions.rs | 6 +- evm/src/generation/mod.rs | 1 + evm/src/generation/prover_input.rs | 6 +- evm/src/witness/gas.rs | 2 +- evm/src/witness/memory.rs | 4 +- evm/src/witness/operation.rs | 23 +- evm/src/witness/transition.rs | 8 +- evm/src/witness/util.rs | 30 +- evm/tests/empty_txn_list.rs | 2 +- 42 files changed, 733 insertions(+), 241 deletions(-) create mode 100644 evm/src/cpu/byte_unpacking.rs create mode 100644 evm/src/cpu/docs/memory-handling.md diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs index 4b95e883..c6a7b447 100644 --- a/evm/src/all_stark.rs +++ b/evm/src/all_stark.rs @@ -223,6 +223,11 @@ fn ctl_memory() -> CrossTableLookup { Some(cpu_stark::ctl_filter_gp_memory(channel)), ) }); + let cpu_push_write_ops = TableWithColumns::new( + Table::Cpu, + cpu_stark::ctl_data_partial_memory::(), + Some(cpu_stark::ctl_filter_partial_memory()), + ); let keccak_sponge_reads = (0..KECCAK_RATE_BYTES).map(|i| { TableWithColumns::new( Table::KeccakSponge, @@ -239,6 +244,7 @@ fn ctl_memory() -> CrossTableLookup { }); let all_lookers = iter::once(cpu_memory_code_read) .chain(cpu_memory_gp_ops) + .chain(iter::once(cpu_push_write_ops)) .chain(keccak_sponge_reads) .chain(byte_packing_ops) .collect(); diff --git a/evm/src/cpu/bootstrap_kernel.rs b/evm/src/cpu/bootstrap_kernel.rs index b04ff379..2d03b3ef 100644 --- a/evm/src/cpu/bootstrap_kernel.rs +++ b/evm/src/cpu/bootstrap_kernel.rs @@ -47,8 +47,9 @@ pub(crate) fn generate_bootstrap_kernel(state: &mut GenerationState final_cpu_row.mem_channels[1].value[0] = F::from_canonical_usize(Segment::Code as usize); // segment final_cpu_row.mem_channels[2].value[0] = F::ZERO; // virt final_cpu_row.mem_channels[3].value[0] = F::from_canonical_usize(KERNEL.code.len()); // len - final_cpu_row.mem_channels[4].value = KERNEL.code_hash.map(F::from_canonical_u32); - final_cpu_row.mem_channels[4].value.reverse(); + + // The resulting hash will be written later in mem_channel[0] of the first CPU row, and will be checked + // with the CTL. keccak_sponge_log( state, MemoryAddress::new(0, Segment::Code, 0), @@ -90,6 +91,7 @@ pub(crate) fn eval_bootstrap_kernel_packed> + F::from_canonical_usize(i); yield_constr.constraint(filter * (channel.addr_virtual - expected_virt)); } + yield_constr.constraint(local_is_bootstrap * local_values.partial_channel.used); // If this is the final bootstrap row (i.e. delta_is_bootstrap = 1), check that // - all memory channels are disabled @@ -97,11 +99,12 @@ pub(crate) fn eval_bootstrap_kernel_packed> for channel in local_values.mem_channels.iter() { yield_constr.constraint_transition(delta_is_bootstrap * channel.used); } + yield_constr.constraint(delta_is_bootstrap * local_values.partial_channel.used); for (&expected, actual) in KERNEL .code_hash .iter() .rev() - .zip(local_values.mem_channels.last().unwrap().value) + .zip(next_values.mem_channels[0].value) { let expected = P::from(F::from_canonical_u32(expected)); let diff = expected - actual; @@ -151,6 +154,10 @@ pub(crate) fn eval_bootstrap_kernel_ext_circuit, co let constraint = builder.mul_extension(filter, virt_diff); yield_constr.constraint(builder, constraint); } + { + let constr = builder.mul_extension(local_is_bootstrap, local_values.partial_channel.used); + yield_constr.constraint(builder, constr); + } // If this is the final bootstrap row (i.e. delta_is_bootstrap = 1), check that // - all memory channels are disabled @@ -159,11 +166,16 @@ pub(crate) fn eval_bootstrap_kernel_ext_circuit, co let constraint = builder.mul_extension(delta_is_bootstrap, channel.used); yield_constr.constraint_transition(builder, constraint); } + { + let constr = builder.mul_extension(delta_is_bootstrap, local_values.partial_channel.used); + yield_constr.constraint(builder, constr); + } + for (&expected, actual) in KERNEL .code_hash .iter() .rev() - .zip(local_values.mem_channels.last().unwrap().value) + .zip(next_values.mem_channels[0].value) { let expected = builder.constant_extension(F::Extension::from_canonical_u32(expected)); let diff = builder.sub_extension(expected, actual); diff --git a/evm/src/cpu/byte_unpacking.rs b/evm/src/cpu/byte_unpacking.rs new file mode 100644 index 00000000..be3ed59d --- /dev/null +++ b/evm/src/cpu/byte_unpacking.rs @@ -0,0 +1,49 @@ +use plonky2::field::extension::Extendable; +use plonky2::field::packed::PackedField; +use plonky2::field::types::Field; +use plonky2::hash::hash_types::RichField; +use plonky2::iop::ext_target::ExtensionTarget; +use plonky2::plonk::circuit_builder::CircuitBuilder; + +use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; +use crate::cpu::columns::CpuColumnsView; + +pub(crate) fn eval_packed( + lv: &CpuColumnsView

, + nv: &CpuColumnsView

, + yield_constr: &mut ConstraintConsumer

, +) { + let filter = lv.op.mstore_32bytes; + let new_offset = nv.mem_channels[0].value[0]; + let virt = lv.mem_channels[2].value[0]; + // Read len from opcode bits and constrain the pushed new offset. + let len_bits: P = lv.opcode_bits[..5] + .iter() + .enumerate() + .map(|(i, &bit)| bit * P::Scalar::from_canonical_u64(1 << i)) + .sum(); + let len = len_bits + P::ONES; + yield_constr.constraint(filter * (new_offset - virt - len)); +} + +pub(crate) fn eval_ext_circuit, const D: usize>( + builder: &mut CircuitBuilder, + lv: &CpuColumnsView>, + nv: &CpuColumnsView>, + yield_constr: &mut RecursiveConstraintConsumer, +) { + let filter = lv.op.mstore_32bytes; + let new_offset = nv.mem_channels[0].value[0]; + let virt = lv.mem_channels[2].value[0]; + // Read len from opcode bits and constrain the pushed new offset. + let len_bits = lv.opcode_bits[..5].iter().enumerate().fold( + builder.zero_extension(), + |cumul, (i, &bit)| { + builder.mul_const_add_extension(F::from_canonical_u64(1 << i), bit, cumul) + }, + ); + let diff = builder.sub_extension(new_offset, virt); + let diff = builder.sub_extension(diff, len_bits); + let constr = builder.mul_sub_extension(filter, diff, filter); + yield_constr.constraint(builder, constr); +} diff --git a/evm/src/cpu/columns/mod.rs b/evm/src/cpu/columns/mod.rs index 72571e5e..94b07dd2 100644 --- a/evm/src/cpu/columns/mod.rs +++ b/evm/src/cpu/columns/mod.rs @@ -38,6 +38,18 @@ pub(crate) struct MemoryChannelView { } /// View of all the columns in `CpuStark`. +#[repr(C)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +// A more lightweight channel, sharing values with the 0-th memory channel +// (which contains the top of the stack). +pub(crate) struct PartialMemoryChannelView { + pub used: T, + pub is_read: T, + pub addr_context: T, + pub addr_segment: T, + pub addr_virtual: T, +} + #[repr(C)] #[derive(Clone, Copy, Eq, PartialEq, Debug)] pub(crate) struct CpuColumnsView { @@ -82,8 +94,11 @@ pub(crate) struct CpuColumnsView { /// CPU clock. pub(crate) clock: T, - /// Memory bus channels in the CPU. Each channel is comprised of 13 columns. + /// Memory bus channels in the CPU. + /// Full channels are comprised of 13 columns. pub mem_channels: [MemoryChannelView; NUM_GP_CHANNELS], + /// Partial channel is only comprised of 5 columns. + pub(crate) partial_channel: PartialMemoryChannelView, } /// Total number of columns in `CpuStark`. diff --git a/evm/src/cpu/contextops.rs b/evm/src/cpu/contextops.rs index 1fe6434e..bdcd77fc 100644 --- a/evm/src/cpu/contextops.rs +++ b/evm/src/cpu/contextops.rs @@ -200,11 +200,6 @@ fn eval_packed_set( yield_constr.constraint(lv.op.context_op * lv.general.stack().stack_inv_aux_2 * limb); } - // Unused channels. - for i in 4..NUM_GP_CHANNELS { - let channel = lv.mem_channels[i]; - yield_constr.constraint(filter * channel.used); - } yield_constr.constraint(filter * new_top_channel.used); } @@ -324,12 +319,6 @@ fn eval_ext_circuit_set, const D: usize>( yield_constr.constraint(builder, constr); } - // Unused channels. - for i in 4..NUM_GP_CHANNELS { - let channel = lv.mem_channels[i]; - let constr = builder.mul_extension(filter, channel.used); - yield_constr.constraint(builder, constr); - } { let constr = builder.mul_extension(filter, new_top_channel.used); yield_constr.constraint(builder, constr); diff --git a/evm/src/cpu/cpu_stark.rs b/evm/src/cpu/cpu_stark.rs index f63e90e1..ef6e74c2 100644 --- a/evm/src/cpu/cpu_stark.rs +++ b/evm/src/cpu/cpu_stark.rs @@ -11,12 +11,14 @@ use plonky2::iop::ext_target::ExtensionTarget; use super::columns::CpuColumnsView; use super::halt; +use super::membus::NUM_GP_CHANNELS; use crate::all_stark::Table; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use crate::cpu::columns::{COL_MAP, NUM_CPU_COLUMNS}; use crate::cpu::{ - bootstrap_kernel, clock, contextops, control_flow, decode, dup_swap, gas, jumps, membus, memio, - modfp254, pc, push0, shift, simple_logic, stack, stack_bounds, syscalls_exceptions, + bootstrap_kernel, byte_unpacking, clock, contextops, control_flow, decode, dup_swap, gas, + jumps, membus, memio, modfp254, pc, push0, shift, simple_logic, stack, stack_bounds, + syscalls_exceptions, }; use crate::cross_table_lookup::{Column, TableWithColumns}; use crate::evaluation_frame::{StarkEvaluationFrame, StarkFrame}; @@ -32,7 +34,7 @@ pub(crate) fn ctl_data_keccak_sponge() -> Vec> { // GP channel 1: stack[-2] = segment // GP channel 2: stack[-3] = virt // GP channel 3: stack[-4] = len - // GP channel 4: pushed = outputs + // Next GP channel 0: pushed = outputs let context = Column::single(COL_MAP.mem_channels[0].value[0]); let segment = Column::single(COL_MAP.mem_channels[1].value[0]); let virt = Column::single(COL_MAP.mem_channels[2].value[0]); @@ -128,12 +130,18 @@ pub(crate) fn ctl_data_byte_unpacking() -> Vec> { // GP channel 1: stack[-2] = segment // GP channel 2: stack[-3] = virt // GP channel 3: stack[-4] = val - // GP channel 4: stack[-5] = len + // Next GP channel 0: pushed = new_offset (virt + len) let context = Column::single(COL_MAP.mem_channels[0].value[0]); let segment = Column::single(COL_MAP.mem_channels[1].value[0]); let virt = Column::single(COL_MAP.mem_channels[2].value[0]); let val = Column::singles(COL_MAP.mem_channels[3].value); - let len = Column::single(COL_MAP.mem_channels[4].value[0]); + + // len can be reconstructed as new_offset - virt. + let len = Column::linear_combination_and_next_row_with_constant( + [(COL_MAP.mem_channels[2].value[0], -F::ONE)], + [(COL_MAP.mem_channels[0].value[0], F::ONE)], + F::ZERO, + ); let num_channels = F::from_canonical_usize(NUM_CHANNELS); let timestamp = Column::linear_combination([(COL_MAP.clock, num_channels)]); @@ -199,6 +207,26 @@ pub(crate) fn ctl_data_gp_memory(channel: usize) -> Vec> { cols } +pub(crate) fn ctl_data_partial_memory() -> Vec> { + let channel_map = COL_MAP.partial_channel; + let values = COL_MAP.mem_channels[0].value; + let mut cols: Vec<_> = Column::singles([ + channel_map.is_read, + channel_map.addr_context, + channel_map.addr_segment, + channel_map.addr_virtual, + ]) + .collect(); + + cols.extend(Column::singles(values)); + + cols.push(mem_time_and_channel( + MEM_GP_CHANNELS_IDX_START + NUM_GP_CHANNELS, + )); + + cols +} + /// CTL filter for code read and write operations. pub(crate) fn ctl_filter_code_memory() -> Column { Column::sum(COL_MAP.op.iter()) @@ -209,6 +237,10 @@ pub(crate) fn ctl_filter_gp_memory(channel: usize) -> Column { Column::single(COL_MAP.mem_channels[channel].used) } +pub(crate) fn ctl_filter_partial_memory() -> Column { + Column::single(COL_MAP.partial_channel.used) +} + /// Structure representing the CPU Stark. #[derive(Copy, Clone, Default)] pub(crate) struct CpuStark { @@ -238,6 +270,7 @@ impl, const D: usize> Stark for CpuStark = next_values.borrow(); bootstrap_kernel::eval_bootstrap_kernel_packed(local_values, next_values, yield_constr); + byte_unpacking::eval_packed(local_values, next_values, yield_constr); clock::eval_packed(local_values, next_values, yield_constr); contextops::eval_packed(local_values, next_values, yield_constr); control_flow::eval_packed_generic(local_values, next_values, yield_constr); @@ -279,6 +312,7 @@ impl, const D: usize> Stark for CpuStark 0`), the current top of the stack is stored with a memory read in `current_row.partial_channel`, which shares its values with `current_row.mem_channels[0]` (which holds the current top of the stack). If the stack was empty, `current_row.partial_channel` is disabled. + +- **The instruction pops, but doesn't push:** After use, the current top of the stack is discarded and doesn't need to be written in memory. If the stack isn't now empty (`current_row.stack_len > num_pops`), the new top of the stack is set in `next_row.mem_channels[0]` with a memory read from the stack segment. If the stack is now empty, `next_row.mem_channels[0]` is disabled. + +In the last two cases, there is an edge case if `current_row.stack_len` is equal to a `special_len`. For a strictly pushing instruction, this happens if the stack is empty, and `special_len = 0`. For a strictly popping instruction, this happens if the next stack is empty, i.e. that all remaining elements are popped, and `special_len = num_pops`. Note that we do not need to check for values below `num_pops`, since this would be a stack underflow exception which is handled separately. +The edge case is detected with the compound flag `1 - not_special_len * stack_inv_aux`, where `not_special_len = current_row - special_len` and `stack_inv_aux` is constrained to be the modular inverse of `is_not_special_len` if it's non-zero, or `0` otherwise. The flag is `1` if `stack_len` is equal to `special_len`, and `0` otherwise. + +This logic can be found in code in the `eval_packed_one` function of `stack.rs`, which multiplies all of the constraints with a degree 1 filter passed as argument. + +## Operation flag merging +To reduce the total number of columns, many operation flags are merged together (e.g. DUP and SWAP) and are distinguished with the binary decomposition of their opcodes. The filter for a merged operation is now of degree 2: for example, `is_swap = current_row.op.dup_swap * current_row.opcode_bits[4]` since the 4th bit is set to 1 for a SWAP and 0 for a DUP. If the two instructions have different stack behaviors, this can be a problem: `eval_packed_one`'s degrees are already of degree 3 and it can't support degree 2 filters. + +When this happens, stack constraints are defined manually in the operation's dedicated file (e.g. `dup_swap.rs`). Implementation details vary case-by-case and can be found in the files. diff --git a/evm/src/cpu/kernel/asm/account_code.asm b/evm/src/cpu/kernel/asm/account_code.asm index 4dddc6df..35f3deba 100644 --- a/evm/src/cpu/kernel/asm/account_code.asm +++ b/evm/src/cpu/kernel/asm/account_code.asm @@ -101,12 +101,12 @@ load_code_loop: DUP2 DUP2 EQ // stack: i == code_size, i, code_size, codehash, ctx, segment, retdest %jumpi(load_code_check) + DUP1 + // stack: i, i, code_size, codehash, ctx, segment, retdest + DUP6 // segment + DUP6 // context PROVER_INPUT(account_code::get) - // stack: opcode, i, code_size, codehash, ctx, segment, retdest - DUP2 - // stack: i, opcode, i, code_size, codehash, ctx, segment, retdest - DUP7 // segment - DUP7 // context + // stack: opcode, context, segment, i, i, code_size, codehash, ctx, segment, retdest MSTORE_GENERAL // stack: i, code_size, codehash, ctx, segment, retdest %increment diff --git a/evm/src/cpu/kernel/asm/core/call.asm b/evm/src/cpu/kernel/asm/core/call.asm index 25e331cb..ba6c775e 100644 --- a/evm/src/cpu/kernel/asm/core/call.asm +++ b/evm/src/cpu/kernel/asm/core/call.asm @@ -271,7 +271,7 @@ call_too_deep: // because it will already be 0 by default. %macro set_static_true // stack: new_ctx - %stack (new_ctx) -> (new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_STATIC, 1, new_ctx) + %stack (new_ctx) -> (1, new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_STATIC, new_ctx) MSTORE_GENERAL // stack: new_ctx %endmacro @@ -280,7 +280,7 @@ call_too_deep: %macro set_static // stack: new_ctx %mload_context_metadata(@CTX_METADATA_STATIC) - %stack (is_static, new_ctx) -> (new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_STATIC, is_static, new_ctx) + %stack (is_static, new_ctx) -> (is_static, new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_STATIC, new_ctx) MSTORE_GENERAL // stack: new_ctx %endmacro @@ -288,7 +288,7 @@ call_too_deep: %macro set_new_ctx_addr // stack: called_addr, new_ctx %stack (called_addr, new_ctx) - -> (new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_ADDRESS, called_addr, new_ctx) + -> (called_addr, new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_ADDRESS, new_ctx) MSTORE_GENERAL // stack: new_ctx %endmacro @@ -296,7 +296,7 @@ call_too_deep: %macro set_new_ctx_caller // stack: sender, new_ctx %stack (sender, new_ctx) - -> (new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_CALLER, sender, new_ctx) + -> (sender, new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_CALLER, new_ctx) MSTORE_GENERAL // stack: new_ctx %endmacro @@ -304,7 +304,7 @@ call_too_deep: %macro set_new_ctx_value // stack: value, new_ctx %stack (value, new_ctx) - -> (new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_CALL_VALUE, value, new_ctx) + -> (value, new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_CALL_VALUE, new_ctx) MSTORE_GENERAL // stack: new_ctx %endmacro @@ -312,7 +312,7 @@ call_too_deep: %macro set_new_ctx_code_size // stack: code_size, new_ctx %stack (code_size, new_ctx) - -> (new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_CODE_SIZE, code_size, new_ctx) + -> (code_size, new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_CODE_SIZE, new_ctx) MSTORE_GENERAL // stack: new_ctx %endmacro @@ -320,7 +320,7 @@ call_too_deep: %macro set_new_ctx_calldata_size // stack: calldata_size, new_ctx %stack (calldata_size, new_ctx) - -> (new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_CALLDATA_SIZE, calldata_size, new_ctx) + -> (calldata_size, new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_CALLDATA_SIZE, new_ctx) MSTORE_GENERAL // stack: new_ctx %endmacro @@ -328,17 +328,17 @@ call_too_deep: %macro set_new_ctx_gas_limit // stack: gas_limit, new_ctx %stack (gas_limit, new_ctx) - -> (new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_GAS_LIMIT, gas_limit, new_ctx) + -> (gas_limit, new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_GAS_LIMIT, new_ctx) MSTORE_GENERAL // stack: new_ctx %endmacro %macro set_new_ctx_parent_ctx // stack: new_ctx - GET_CONTEXT PUSH @CTX_METADATA_PARENT_CONTEXT PUSH @SEGMENT_CONTEXT_METADATA - DUP4 // new_ctx + DUP3 // new_ctx + GET_CONTEXT MSTORE_GENERAL // stack: new_ctx %endmacro @@ -346,7 +346,7 @@ call_too_deep: %macro set_new_ctx_parent_pc(label) // stack: new_ctx %stack (new_ctx) - -> (new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_PARENT_PC, $label, new_ctx) + -> ($label, new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_PARENT_PC, new_ctx) MSTORE_GENERAL // stack: new_ctx %endmacro @@ -391,7 +391,7 @@ call_too_deep: %jump(memcpy_bytes) %%after: %stack (new_ctx, args_size) -> - (new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_CALLDATA_SIZE, args_size) + (args_size, new_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_CALLDATA_SIZE) MSTORE_GENERAL // stack: (empty) %endmacro diff --git a/evm/src/cpu/kernel/asm/core/create_receipt.asm b/evm/src/cpu/kernel/asm/core/create_receipt.asm index 9478b190..16a4fcaa 100644 --- a/evm/src/cpu/kernel/asm/core/create_receipt.asm +++ b/evm/src/cpu/kernel/asm/core/create_receipt.asm @@ -204,22 +204,19 @@ process_receipt_after_write: %mpt_insert_receipt_trie // stack: new_cum_gas, txn_nb, num_nibbles, retdest // Now, we set the Bloom filter back to 0. We proceed by chunks of 32 bytes. - PUSH 32 PUSH 0 %rep 8 - // stack: counter, 32, new_cum_gas, txn_nb, num_nibbles, retdest - DUP2 + // stack: counter, new_cum_gas, txn_nb, num_nibbles, retdest PUSH 0 // we will fill the memory segment with zeroes DUP2 PUSH @SEGMENT_TXN_BLOOM DUP3 // kernel context is 0 - // stack: ctx, segment, counter, 0, 32, counter, 32, new_cum_gas, txn_nb, num_nibbles, retdest - MSTORE_32BYTES - // stack: counter, 32, new_cum_gas, txn_nb, num_nibbles, retdest - DUP2 - ADD + // stack: ctx, segment, counter, 0, counter, new_cum_gas, txn_nb, num_nibbles, retdes + MSTORE_32BYTES_32 + // stack: new_counter, counter, new_cum_gas, txn_nb, num_nibbles, retdest + SWAP1 POP %endrep - %pop2 + POP // stack: new_cum_gas, txn_nb, num_nibbles, retdest %stack (new_cum_gas, txn_nb, num_nibbles, retdest) -> (retdest, new_cum_gas) JUMP diff --git a/evm/src/cpu/kernel/asm/core/exception.asm b/evm/src/cpu/kernel/asm/core/exception.asm index 64477c05..0f86b9ae 100644 --- a/evm/src/cpu/kernel/asm/core/exception.asm +++ b/evm/src/cpu/kernel/asm/core/exception.asm @@ -274,11 +274,16 @@ min_stack_len_for_opcode: BYTES 4 // 0xa2, LOG2 BYTES 5 // 0xa3, LOG3 BYTES 6 // 0xa4, LOG4 - %rep 11 // 0xa5-0xaf, invalid + + %rep 27 // 0xa5-0xbf, invalid BYTES 0 %endrep - %rep 64 // 0xb0-0xef, invalid + %rep 32 // 0xc0-0xdf, MSTORE_32BYTES + BYTES 4 + %endrep + + %rep 16 // 0xe0-0xef, invalid BYTES 0 %endrep diff --git a/evm/src/cpu/kernel/asm/core/jumpdest_analysis.asm b/evm/src/cpu/kernel/asm/core/jumpdest_analysis.asm index a9d8adf2..a7a52d03 100644 --- a/evm/src/cpu/kernel/asm/core/jumpdest_analysis.asm +++ b/evm/src/cpu/kernel/asm/core/jumpdest_analysis.asm @@ -32,7 +32,7 @@ encountered_jumpdest: // stack: opcode, i, ctx, code_len, retdest POP // stack: i, ctx, code_len, retdest - %stack (i, ctx) -> (ctx, @SEGMENT_JUMPDEST_BITS, i, 1, i, ctx) + %stack (i, ctx) -> (1, ctx, @SEGMENT_JUMPDEST_BITS, i, i, ctx) MSTORE_GENERAL continue: diff --git a/evm/src/cpu/kernel/asm/core/process_txn.asm b/evm/src/cpu/kernel/asm/core/process_txn.asm index 3e7e35db..bb794b65 100644 --- a/evm/src/cpu/kernel/asm/core/process_txn.asm +++ b/evm/src/cpu/kernel/asm/core/process_txn.asm @@ -145,12 +145,12 @@ global process_contract_creation_txn: // stack: new_ctx, address, retdest // Store constructor code length - %mload_txn_field(@TXN_FIELD_DATA_LEN) - // stack: data_len, new_ctx, address, retdest PUSH @CTX_METADATA_CODE_SIZE PUSH @SEGMENT_CONTEXT_METADATA - // stack: segment, offset, data_len, new_ctx, address, retdest - DUP4 // new_ctx + // stack: segment, offset, new_ctx, address, retdest + DUP3 // new_ctx + %mload_txn_field(@TXN_FIELD_DATA_LEN) + // stack: data_len, new_ctx, segment, offset, new_ctx, address, retdest MSTORE_GENERAL // stack: new_ctx, address, retdest diff --git a/evm/src/cpu/kernel/asm/core/syscall.asm b/evm/src/cpu/kernel/asm/core/syscall.asm index d3e7683e..5d1a6c95 100644 --- a/evm/src/cpu/kernel/asm/core/syscall.asm +++ b/evm/src/cpu/kernel/asm/core/syscall.asm @@ -126,14 +126,9 @@ global syscall_jumptable: JUMPTABLE panic // 0xb0-0xbf are invalid opcodes %endrep - // 0xc0-0xcf - %rep 16 - JUMPTABLE panic // 0xc0-0xcf are invalid opcodes - %endrep - - // 0xd0-0xdf - %rep 16 - JUMPTABLE panic // 0xd0-0xdf are invalid opcodes + // 0xc0-0xdf + %rep 32 + JUMPTABLE panic // mstore_32bytes_1-32 are implemented natively %endrep // 0xe0-0xef diff --git a/evm/src/cpu/kernel/asm/core/terminate.asm b/evm/src/cpu/kernel/asm/core/terminate.asm index bdbd3e58..0c5d5a8f 100644 --- a/evm/src/cpu/kernel/asm/core/terminate.asm +++ b/evm/src/cpu/kernel/asm/core/terminate.asm @@ -33,7 +33,7 @@ return_after_gas: // Store the return data size in the parent context's metadata. %stack (parent_ctx, kexit_info, offset, size) -> - (parent_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_RETURNDATA_SIZE, size, offset, size, parent_ctx, kexit_info) + (size, parent_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_RETURNDATA_SIZE, offset, size, parent_ctx, kexit_info) MSTORE_GENERAL // stack: offset, size, parent_ctx, kexit_info @@ -133,7 +133,7 @@ revert_after_gas: // Store the return data size in the parent context's metadata. %stack (parent_ctx, kexit_info, offset, size) -> - (parent_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_RETURNDATA_SIZE, size, offset, size, parent_ctx, kexit_info) + (size, parent_ctx, @SEGMENT_CONTEXT_METADATA, @CTX_METADATA_RETURNDATA_SIZE, offset, size, parent_ctx, kexit_info) MSTORE_GENERAL // stack: offset, size, parent_ctx, kexit_info diff --git a/evm/src/cpu/kernel/asm/curve/wnaf.asm b/evm/src/cpu/kernel/asm/curve/wnaf.asm index 555c9c84..674f8479 100644 --- a/evm/src/cpu/kernel/asm/curve/wnaf.asm +++ b/evm/src/cpu/kernel/asm/curve/wnaf.asm @@ -34,7 +34,7 @@ wnaf_loop_contd: DUP2 SWAP1 SUB %stack (n, m, segment, o, retdest) -> (129, o, m, o, segment, n, retdest) SUB - %stack (i, m, o, segment, n, retdest) -> (0, segment, i, m, o, segment, n, retdest) + %stack (i, m, o, segment, n, retdest) -> (m, 0, segment, i, o, segment, n, retdest) MSTORE_GENERAL // stack: o, segment, n, retdest DUP3 ISZERO %jumpi(wnaf_end) diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index 41d4927b..dcfc12bd 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -102,6 +102,7 @@ // stack: segment, offset, value GET_CONTEXT // stack: context, segment, offset, value + %stack(context, segment, offset, value) -> (value, context, segment, offset) MSTORE_GENERAL // stack: (empty) %endmacro @@ -171,6 +172,7 @@ // stack: segment, offset, value GET_CONTEXT // stack: context, segment, offset, value + %stack(context, segment, offset, value) -> (value, context, segment, offset) MSTORE_GENERAL // stack: (empty) %endmacro @@ -222,6 +224,7 @@ // stack: segment, offset, value PUSH 0 // kernel has context 0 // stack: context, segment, offset, value + %stack(context, segment, offset, value) -> (value, context, segment, offset) MSTORE_GENERAL // stack: (empty) %endmacro @@ -235,6 +238,7 @@ // stack: segment, offset, value PUSH 0 // kernel has context 0 // stack: context, segment, offset, value + %stack(context, segment, offset, value) -> (value, context, segment, offset) MSTORE_GENERAL // stack: (empty) %endmacro diff --git a/evm/src/cpu/kernel/asm/memory/memcpy.asm b/evm/src/cpu/kernel/asm/memory/memcpy.asm index 192b758a..9dd2305d 100644 --- a/evm/src/cpu/kernel/asm/memory/memcpy.asm +++ b/evm/src/cpu/kernel/asm/memory/memcpy.asm @@ -11,18 +11,18 @@ global memcpy: // stack: count == 0, DST, SRC, count, retdest %jumpi(memcpy_finish) // stack: DST, SRC, count, retdest + DUP3 + DUP3 + DUP3 - // Copy the next value. - DUP6 - DUP6 - DUP6 - // stack: SRC, DST, SRC, count, retdest + // Copy the next value + // stack: DST, DST, SRC, count, retdest + DUP9 + DUP9 + DUP9 + // stack: SRC, DST, DST, SRC, count, retdest MLOAD_GENERAL - // stack: value, DST, SRC, count, retdest - DUP4 - DUP4 - DUP4 - // stack: DST, value, DST, SRC, count, retdest + // stack: value, DST, DST, SRC, count, retdest MSTORE_GENERAL // stack: DST, SRC, count, retdest @@ -62,24 +62,22 @@ global memcpy_bytes: // We will pack 32 bytes into a U256 from the source, and then unpack it at the destination. // Copy the next chunk of bytes. PUSH 32 - DUP1 - DUP8 - DUP8 - DUP8 - // stack: SRC, 32, 32, DST, SRC, count, retdest + DUP7 + DUP7 + DUP7 + // stack: SRC, 32, DST, SRC, count, retdest MLOAD_32BYTES - // stack: value, 32, DST, SRC, count, retdest - DUP5 - DUP5 - DUP5 - // stack: DST, value, 32, DST, SRC, count, retdest - MSTORE_32BYTES - // stack: DST, SRC, count, retdest - + // stack: value, DST, SRC, count, retdest + DUP4 + DUP4 + DUP4 + // stack: DST, value, DST, SRC, count, retdest + MSTORE_32BYTES_32 + // stack: new_offset, DST, SRC, count, retdest // Increment dst_addr by 32. - SWAP2 - %add_const(0x20) - SWAP2 + SWAP3 + POP + // stack: DST, SRC, count, retdest // Increment src_addr by 32. SWAP5 %add_const(0x20) @@ -117,8 +115,9 @@ memcpy_bytes_finish: DUP5 DUP5 // stack: DST, value, count, DST, SRC, count, retdest - MSTORE_32BYTES - // stack: DST, SRC, count, retdest + %mstore_unpacking + // stack: new_offset, DST, SRC, count, retdest + POP memcpy_finish: // stack: DST, SRC, count, retdest diff --git a/evm/src/cpu/kernel/asm/memory/memset.asm b/evm/src/cpu/kernel/asm/memory/memset.asm index 1061a625..97e5dae8 100644 --- a/evm/src/cpu/kernel/asm/memory/memset.asm +++ b/evm/src/cpu/kernel/asm/memory/memset.asm @@ -12,19 +12,17 @@ global memset: %jumpi(memset_finish) // stack: DST, count, retdest - PUSH 32 PUSH 0 - DUP5 - DUP5 - DUP5 - // stack: DST, 0, 32, DST, count, retdest - MSTORE_32BYTES - // stack: DST, count, retdest + DUP4 + DUP4 + DUP4 + // stack: DST, 0, DST, count, retdest + MSTORE_32BYTES_32 + // stack: new_offset, DST, count, retdest - // Increment dst_addr. - SWAP2 - %add_const(0x20) - SWAP2 + // Update dst_addr. + SWAP3 + POP // Decrement count. SWAP3 %sub_const(0x20) @@ -50,9 +48,9 @@ memset_finish: DUP5 DUP5 // stack: DST, 0, final_count, DST, final_count, retdest - MSTORE_32BYTES - // stack: DST, final_count, retdest - %pop4 + %mstore_unpacking + // stack: new_offset, DST, final_count, retdest + %pop5 // stack: retdest JUMP diff --git a/evm/src/cpu/kernel/asm/memory/metadata.asm b/evm/src/cpu/kernel/asm/memory/metadata.asm index d5b4033d..625b57f1 100644 --- a/evm/src/cpu/kernel/asm/memory/metadata.asm +++ b/evm/src/cpu/kernel/asm/memory/metadata.asm @@ -47,7 +47,7 @@ // stack: value %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT) %stack (parent_ctx, value) -> - (parent_ctx, @SEGMENT_CONTEXT_METADATA, $field, value) + (value, parent_ctx, @SEGMENT_CONTEXT_METADATA, $field) MSTORE_GENERAL // stack: (empty) %endmacro @@ -56,7 +56,7 @@ // stack: (empty) %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT) %stack (parent_ctx) -> - (parent_ctx, @SEGMENT_CONTEXT_METADATA, $field, $value) + ($value, parent_ctx, @SEGMENT_CONTEXT_METADATA, $field) MSTORE_GENERAL // stack: (empty) %endmacro diff --git a/evm/src/cpu/kernel/asm/memory/packing.asm b/evm/src/cpu/kernel/asm/memory/packing.asm index 1dbbf393..1feeeaf8 100644 --- a/evm/src/cpu/kernel/asm/memory/packing.asm +++ b/evm/src/cpu/kernel/asm/memory/packing.asm @@ -42,11 +42,251 @@ global mload_packing_u64_LE: // Post stack: offset' global mstore_unpacking: // stack: context, segment, offset, value, len, retdest - %stack(context, segment, offset, value, len, retdest) -> (context, segment, offset, value, len, offset, len, retdest) - // stack: context, segment, offset, value, len, offset, len, retdest - MSTORE_32BYTES - // stack: offset, len, retdest - ADD SWAP1 + DUP5 ISZERO + // stack: len == 0, context, segment, offset, value, len, retdest + %jumpi(mstore_unpacking_empty) + %stack(context, segment, offset, value, len, retdest) -> (len, context, segment, offset, value, retdest) + PUSH 3 + // stack: BYTES_PER_JUMP, len, context, segment, offset, value, retdest + MUL + // stack: jump_offset, context, segment, offset, value, retdest + PUSH mstore_unpacking_0 + // stack: mstore_unpacking_0, jump_offset, context, segment, offset, value, retdest + ADD + // stack: address_unpacking, context, segment, offset, value, retdest + JUMP + +mstore_unpacking_empty: + %stack(context, segment, offset, value, len, retdest) -> (retdest, offset) + JUMP + +// This case can never be reached. It's only here to offset the table correctly. +mstore_unpacking_0: + %rep 3 + PANIC + %endrep +mstore_unpacking_1: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_1 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_2: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_2 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_3: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_3 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_4: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_4 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_5: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_5 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_6: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_6 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_7: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_7 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_8: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_8 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_9: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_9 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_10: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_10 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_11: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_11 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_12: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_12 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_13: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_13 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_14: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_14 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_15: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_15 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_16: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_16 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_17: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_17 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_18: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_18 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_19: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_19 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_20: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_20 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_21: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_21 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_22: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_22 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_23: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_23 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_24: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_24 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_25: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_25 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_26: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_26 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_27: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_27 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_28: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_28 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_29: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_29 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_30: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_30 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_31: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_31 + // stack: offset', retdest + SWAP1 + // stack: retdest, offset' + JUMP +mstore_unpacking_32: + // stack: context, segment, offset, value, retdest + MSTORE_32BYTES_32 + // stack: offset', retdest + SWAP1 // stack: retdest, offset' JUMP @@ -59,30 +299,37 @@ global mstore_unpacking: // Pre stack: context, segment, offset, value, retdest // Post stack: offset' global mstore_unpacking_u64_LE: - %stack (context, segment, offset, value) -> (0xff, value, context, segment, offset, value) + %stack (context, segment, offset, value) -> (0xff, value, context, segment, offset, context, segment, offset, value) AND - DUP4 DUP4 DUP4 MSTORE_GENERAL // First byte - %stack (context, segment, offset, value) -> (0xff00, value, context, segment, offset, value) + MSTORE_GENERAL // First byte + DUP3 %add_const(1) + %stack (new_offset, context, segment, offset, value) -> (0xff00, value, context, segment, new_offset, context, segment, offset, value) AND %shr_const(8) - DUP4 %add_const(1) DUP4 DUP4 MSTORE_GENERAL // Second byte - %stack (context, segment, offset, value) -> (0xff0000, value, context, segment, offset, value) + MSTORE_GENERAL // Second byte + DUP3 %add_const(2) + %stack (new_offset, context, segment, offset, value) -> (0xff0000, value, context, segment, new_offset, context, segment, offset, value) AND %shr_const(16) - DUP4 %add_const(2) DUP4 DUP4 MSTORE_GENERAL // Third byte - %stack (context, segment, offset, value) -> (0xff000000, value, context, segment, offset, value) + MSTORE_GENERAL // Third byte + DUP3 %add_const(3) + %stack (new_offset, context, segment, offset, value) -> (0xff000000, value, context, segment, new_offset, context, segment, offset, value) AND %shr_const(24) - DUP4 %add_const(3) DUP4 DUP4 MSTORE_GENERAL // Fourth byte - %stack (context, segment, offset, value) -> (0xff00000000, value, context, segment, offset, value) + MSTORE_GENERAL // Fourth byte + DUP3 %add_const(4) + %stack (new_offset, context, segment, offset, value) -> (0xff00000000, value, context, segment, new_offset, context, segment, offset, value) AND %shr_const(32) - DUP4 %add_const(4) DUP4 DUP4 MSTORE_GENERAL // Fifth byte - %stack (context, segment, offset, value) -> (0xff0000000000, value, context, segment, offset, value) + MSTORE_GENERAL // Fifth byte + DUP3 %add_const(5) + %stack (new_offset, context, segment, offset, value) -> (0xff0000000000, value, context, segment, new_offset, context, segment, offset, value) AND %shr_const(40) - DUP4 %add_const(5) DUP4 DUP4 MSTORE_GENERAL // Sixth byte - %stack (context, segment, offset, value) -> (0xff000000000000, value, context, segment, offset, value) + MSTORE_GENERAL // Sixth byte + DUP3 %add_const(6) + %stack (new_offset, context, segment, offset, value) -> (0xff000000000000, value, context, segment, new_offset, context, segment, offset, value) AND %shr_const(48) - DUP4 %add_const(6) DUP4 DUP4 MSTORE_GENERAL // Seventh byte - %stack (context, segment, offset, value) -> (0xff00000000000000, value, context, segment, offset, value) + MSTORE_GENERAL // Seventh byte + DUP3 %add_const(7) + %stack (new_offset, context, segment, offset, value) -> (0xff00000000000000, value, context, segment, new_offset, context, segment, offset, value) AND %shr_const(56) - DUP4 %add_const(7) DUP4 DUP4 MSTORE_GENERAL // Eighth byte + MSTORE_GENERAL // Eighth byte %pop4 JUMP %macro mstore_unpacking_u64_LE diff --git a/evm/src/cpu/kernel/asm/memory/syscalls.asm b/evm/src/cpu/kernel/asm/memory/syscalls.asm index 3a8c1618..638cd1e3 100644 --- a/evm/src/cpu/kernel/asm/memory/syscalls.asm +++ b/evm/src/cpu/kernel/asm/memory/syscalls.asm @@ -26,11 +26,12 @@ global sys_mstore: // stack: expanded_num_bytes, kexit_info, offset, value %update_mem_bytes // stack: kexit_info, offset, value - %stack(kexit_info, offset, value) -> (offset, value, 32, kexit_info) + %stack(kexit_info, offset, value) -> (offset, value, kexit_info) PUSH @SEGMENT_MAIN_MEMORY GET_CONTEXT - // stack: addr: 3, value, len, kexit_info - MSTORE_32BYTES + // stack: addr: 3, value, kexit_info + MSTORE_32BYTES_32 + POP // stack: kexit_info EXIT_KERNEL diff --git a/evm/src/cpu/kernel/asm/shift.asm b/evm/src/cpu/kernel/asm/shift.asm index ce481ea2..9040f195 100644 --- a/evm/src/cpu/kernel/asm/shift.asm +++ b/evm/src/cpu/kernel/asm/shift.asm @@ -2,22 +2,21 @@ /// /// Specifically, set SHIFT_TABLE_SEGMENT[i] = 2^i for i = 0..255. %macro shift_table_init - push 1 // 2^0 push 0 // initial offset is zero push @SEGMENT_SHIFT_TABLE // segment dup2 // kernel context is 0 + push 1 // 2^0 %rep 255 - // stack: context, segment, ost_i, 2^i + // stack: 2^i, context, segment, ost_i + dup4 + %increment + dup4 + dup4 + // stack: context, segment, ost_(i+1), 2^i, context, segment, ost_i dup4 dup1 add - // stack: 2^(i+1), context, segment, ost_i, 2^i - dup4 - %increment - // stack: ost_(i+1), 2^(i+1), context, segment, ost_i, 2^i - dup4 - dup4 - // stack: context, segment, ost_(i+1), 2^(i+1), context, segment, ost_i, 2^i + // stack: 2^(i+1), context, segment, ost_(i+1), 2^i, context, segment, ost_i %endrep %rep 256 mstore_general diff --git a/evm/src/cpu/kernel/asm/transactions/type_1.asm b/evm/src/cpu/kernel/asm/transactions/type_1.asm index 68d998ae..c9298b66 100644 --- a/evm/src/cpu/kernel/asm/transactions/type_1.asm +++ b/evm/src/cpu/kernel/asm/transactions/type_1.asm @@ -104,7 +104,7 @@ after_serializing_access_list: // Store a `1` in front of the RLP %decrement - %stack (pos) -> (0, @SEGMENT_RLP_RAW, pos, 1, pos) + %stack (pos) -> (1, 0, @SEGMENT_RLP_RAW, pos, pos) MSTORE_GENERAL // stack: pos, rlp_len, retdest diff --git a/evm/src/cpu/kernel/asm/transactions/type_2.asm b/evm/src/cpu/kernel/asm/transactions/type_2.asm index c5762161..b7f6e6c7 100644 --- a/evm/src/cpu/kernel/asm/transactions/type_2.asm +++ b/evm/src/cpu/kernel/asm/transactions/type_2.asm @@ -111,7 +111,7 @@ after_serializing_access_list: // Store a `2` in front of the RLP %decrement - %stack (pos) -> (0, @SEGMENT_RLP_RAW, pos, 2, pos) + %stack (pos) -> (2, 0, @SEGMENT_RLP_RAW, pos, pos) MSTORE_GENERAL // stack: pos, rlp_len, retdest diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index d37b6129..81ecce50 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -426,23 +426,23 @@ impl<'a> Interpreter<'a> { self.stack(), self.get_kernel_general_memory() ), // "PANIC", - 0xee => self.run_mstore_32bytes(), // "MSTORE_32BYTES", - 0xf0 => self.run_syscall(opcode, 3, false)?, // "CREATE", - 0xf1 => self.run_syscall(opcode, 7, false)?, // "CALL", - 0xf2 => self.run_syscall(opcode, 7, false)?, // "CALLCODE", - 0xf3 => self.run_syscall(opcode, 2, false)?, // "RETURN", - 0xf4 => self.run_syscall(opcode, 6, false)?, // "DELEGATECALL", - 0xf5 => self.run_syscall(opcode, 4, false)?, // "CREATE2", - 0xf6 => self.run_get_context(), // "GET_CONTEXT", - 0xf7 => self.run_set_context(), // "SET_CONTEXT", - 0xf8 => self.run_mload_32bytes(), // "MLOAD_32BYTES", - 0xf9 => self.run_exit_kernel(), // "EXIT_KERNEL", - 0xfa => self.run_syscall(opcode, 6, false)?, // "STATICCALL", - 0xfb => self.run_mload_general(), // "MLOAD_GENERAL", - 0xfc => self.run_mstore_general(), // "MSTORE_GENERAL", - 0xfd => self.run_syscall(opcode, 2, false)?, // "REVERT", - 0xfe => bail!("Executed INVALID"), // "INVALID", - 0xff => self.run_syscall(opcode, 1, false)?, // "SELFDESTRUCT", + x if (0xc0..0xe0).contains(&x) => self.run_mstore_32bytes(x - 0xc0 + 1), // "MSTORE_32BYTES", + 0xf0 => self.run_syscall(opcode, 3, false)?, // "CREATE", + 0xf1 => self.run_syscall(opcode, 7, false)?, // "CALL", + 0xf2 => self.run_syscall(opcode, 7, false)?, // "CALLCODE", + 0xf3 => self.run_syscall(opcode, 2, false)?, // "RETURN", + 0xf4 => self.run_syscall(opcode, 6, false)?, // "DELEGATECALL", + 0xf5 => self.run_syscall(opcode, 4, false)?, // "CREATE2", + 0xf6 => self.run_get_context(), // "GET_CONTEXT", + 0xf7 => self.run_set_context(), // "SET_CONTEXT", + 0xf8 => self.run_mload_32bytes(), // "MLOAD_32BYTES", + 0xf9 => self.run_exit_kernel(), // "EXIT_KERNEL", + 0xfa => self.run_syscall(opcode, 6, false)?, // "STATICCALL", + 0xfb => self.run_mload_general(), // "MLOAD_GENERAL", + 0xfc => self.run_mstore_general(), // "MSTORE_GENERAL", + 0xfd => self.run_syscall(opcode, 2, false)?, // "REVERT", + 0xfe => bail!("Executed INVALID"), // "INVALID", + 0xff => self.run_syscall(opcode, 1, false)?, // "SELFDESTRUCT", _ => bail!("Unrecognized opcode {}.", opcode), }; @@ -1177,25 +1177,24 @@ impl<'a> Interpreter<'a> { } fn run_mstore_general(&mut self) { + let value = self.pop(); let context = self.pop().as_usize(); let segment = Segment::all()[self.pop().as_usize()]; let offset = self.pop().as_usize(); - let value = self.pop(); self.generation_state .memory .mstore_general(context, segment, offset, value); } - fn run_mstore_32bytes(&mut self) { + fn run_mstore_32bytes(&mut self, n: u8) { let context = self.pop().as_usize(); let segment = Segment::all()[self.pop().as_usize()]; let offset = self.pop().as_usize(); let value = self.pop(); - let len = self.pop().as_usize(); let mut bytes = vec![0; 32]; value.to_little_endian(&mut bytes); - bytes.resize(len, 0); + bytes.resize(n as usize, 0); bytes.reverse(); for (i, &byte) in bytes.iter().enumerate() { @@ -1203,6 +1202,8 @@ impl<'a> Interpreter<'a> { .memory .mstore_general(context, segment, offset + i, byte.into()); } + + self.push(U256::from(offset + n as usize)); } fn run_exit_kernel(&mut self) { @@ -1455,7 +1456,38 @@ fn get_mnemonic(opcode: u8) -> &'static str { 0xa3 => "LOG3", 0xa4 => "LOG4", 0xa5 => "PANIC", - 0xee => "MSTORE_32BYTES", + 0xc0 => "MSTORE_32_BYTES_1", + 0xc1 => "MSTORE_32_BYTES_2", + 0xc2 => "MSTORE_32_BYTES_3", + 0xc3 => "MSTORE_32_BYTES_4", + 0xc4 => "MSTORE_32_BYTES_5", + 0xc5 => "MSTORE_32_BYTES_6", + 0xc6 => "MSTORE_32_BYTES_7", + 0xc7 => "MSTORE_32_BYTES_8", + 0xc8 => "MSTORE_32_BYTES_9", + 0xc9 => "MSTORE_32_BYTES_10", + 0xca => "MSTORE_32_BYTES_11", + 0xcb => "MSTORE_32_BYTES_12", + 0xcc => "MSTORE_32_BYTES_13", + 0xcd => "MSTORE_32_BYTES_14", + 0xce => "MSTORE_32_BYTES_15", + 0xcf => "MSTORE_32_BYTES_16", + 0xd0 => "MSTORE_32_BYTES_17", + 0xd1 => "MSTORE_32_BYTES_18", + 0xd2 => "MSTORE_32_BYTES_19", + 0xd3 => "MSTORE_32_BYTES_20", + 0xd4 => "MSTORE_32_BYTES_21", + 0xd5 => "MSTORE_32_BYTES_22", + 0xd6 => "MSTORE_32_BYTES_23", + 0xd7 => "MSTORE_32_BYTES_24", + 0xd8 => "MSTORE_32_BYTES_25", + 0xd9 => "MSTORE_32_BYTES_26", + 0xda => "MSTORE_32_BYTES_27", + 0xdb => "MSTORE_32_BYTES_28", + 0xdc => "MSTORE_32_BYTES_29", + 0xdd => "MSTORE_32_BYTES_30", + 0xde => "MSTORE_32_BYTES_31", + 0xdf => "MSTORE_32_BYTES_32", 0xf0 => "CREATE", 0xf1 => "CALL", 0xf2 => "CALLCODE", diff --git a/evm/src/cpu/kernel/opcodes.rs b/evm/src/cpu/kernel/opcodes.rs index b8732079..538fe0a1 100644 --- a/evm/src/cpu/kernel/opcodes.rs +++ b/evm/src/cpu/kernel/opcodes.rs @@ -114,7 +114,38 @@ pub fn get_opcode(mnemonic: &str) -> u8 { "LOG3" => 0xa3, "LOG4" => 0xa4, "PANIC" => 0xa5, - "MSTORE_32BYTES" => 0xee, + "MSTORE_32BYTES_1" => 0xc0, + "MSTORE_32BYTES_2" => 0xc1, + "MSTORE_32BYTES_3" => 0xc2, + "MSTORE_32BYTES_4" => 0xc3, + "MSTORE_32BYTES_5" => 0xc4, + "MSTORE_32BYTES_6" => 0xc5, + "MSTORE_32BYTES_7" => 0xc6, + "MSTORE_32BYTES_8" => 0xc7, + "MSTORE_32BYTES_9" => 0xc8, + "MSTORE_32BYTES_10" => 0xc9, + "MSTORE_32BYTES_11" => 0xca, + "MSTORE_32BYTES_12" => 0xcb, + "MSTORE_32BYTES_13" => 0xcc, + "MSTORE_32BYTES_14" => 0xcd, + "MSTORE_32BYTES_15" => 0xce, + "MSTORE_32BYTES_16" => 0xcf, + "MSTORE_32BYTES_17" => 0xd0, + "MSTORE_32BYTES_18" => 0xd1, + "MSTORE_32BYTES_19" => 0xd2, + "MSTORE_32BYTES_20" => 0xd3, + "MSTORE_32BYTES_21" => 0xd4, + "MSTORE_32BYTES_22" => 0xd5, + "MSTORE_32BYTES_23" => 0xd6, + "MSTORE_32BYTES_24" => 0xd7, + "MSTORE_32BYTES_25" => 0xd8, + "MSTORE_32BYTES_26" => 0xd9, + "MSTORE_32BYTES_27" => 0xda, + "MSTORE_32BYTES_28" => 0xdb, + "MSTORE_32BYTES_29" => 0xdc, + "MSTORE_32BYTES_30" => 0xdd, + "MSTORE_32BYTES_31" => 0xde, + "MSTORE_32BYTES_32" => 0xdf, "CREATE" => 0xf0, "CALL" => 0xf1, "CALLCODE" => 0xf2, diff --git a/evm/src/cpu/membus.rs b/evm/src/cpu/membus.rs index 9fba0065..5b11ca25 100644 --- a/evm/src/cpu/membus.rs +++ b/evm/src/cpu/membus.rs @@ -7,7 +7,7 @@ use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer use crate::cpu::columns::CpuColumnsView; /// General-purpose memory channels; they can read and write to all contexts/segments/addresses. -pub const NUM_GP_CHANNELS: usize = 5; +pub const NUM_GP_CHANNELS: usize = 4; /// Indices for code and general purpose memory channels. pub mod channel_indices { @@ -41,12 +41,16 @@ pub(crate) fn eval_packed( // It should be 0 if in kernel mode and `lv.context` if in user mode. // Note: This doesn't need to be filtered to CPU cycles, as this should also be satisfied // during Kernel bootstrapping. + yield_constr.constraint(lv.code_context - (P::ONES - lv.is_kernel_mode) * lv.context); // Validate `channel.used`. It should be binary. for channel in lv.mem_channels { yield_constr.constraint(channel.used * (channel.used - P::ONES)); } + + // Validate `partial_channel.used`. It should be binary. + yield_constr.constraint(lv.partial_channel.used * (lv.partial_channel.used - P::ONES)); } /// Circuit version of `eval_packed`. @@ -69,4 +73,14 @@ pub(crate) fn eval_ext_circuit, const D: usize>( let constr = builder.mul_sub_extension(channel.used, channel.used, channel.used); yield_constr.constraint(builder, constr); } + + // Validate `partial_channel.used`. It should be binary. + { + let constr = builder.mul_sub_extension( + lv.partial_channel.used, + lv.partial_channel.used, + lv.partial_channel.used, + ); + yield_constr.constraint(builder, constr); + } } diff --git a/evm/src/cpu/memio.rs b/evm/src/cpu/memio.rs index 065c0561..afb0c19f 100644 --- a/evm/src/cpu/memio.rs +++ b/evm/src/cpu/memio.rs @@ -11,12 +11,18 @@ use crate::cpu::membus::NUM_GP_CHANNELS; use crate::cpu::stack; use crate::memory::segments::Segment; -fn get_addr(lv: &CpuColumnsView) -> (T, T, T) { +fn get_addr_load(lv: &CpuColumnsView) -> (T, T, T) { let addr_context = lv.mem_channels[0].value[0]; let addr_segment = lv.mem_channels[1].value[0]; let addr_virtual = lv.mem_channels[2].value[0]; (addr_context, addr_segment, addr_virtual) } +fn get_addr_store(lv: &CpuColumnsView) -> (T, T, T) { + let addr_context = lv.mem_channels[1].value[0]; + let addr_segment = lv.mem_channels[2].value[0]; + let addr_virtual = lv.mem_channels[3].value[0]; + (addr_context, addr_segment, addr_virtual) +} /// Evaluates constraints for MLOAD_GENERAL. fn eval_packed_load( @@ -27,7 +33,7 @@ fn eval_packed_load( // The opcode for MLOAD_GENERAL is 0xfb. If the operation is MLOAD_GENERAL, lv.opcode_bits[0] = 1. let filter = lv.op.m_op_general * lv.opcode_bits[0]; - let (addr_context, addr_segment, addr_virtual) = get_addr(lv); + let (addr_context, addr_segment, addr_virtual) = get_addr_load(lv); // Check that we are loading the correct value from the correct address. let load_channel = lv.mem_channels[3]; @@ -64,7 +70,7 @@ fn eval_ext_circuit_load, const D: usize>( let mut filter = lv.op.m_op_general; filter = builder.mul_extension(filter, lv.opcode_bits[0]); - let (addr_context, addr_segment, addr_virtual) = get_addr(lv); + let (addr_context, addr_segment, addr_virtual) = get_addr_load(lv); // Check that we are loading the correct value from the correct channel. let load_channel = lv.mem_channels[3]; @@ -90,7 +96,7 @@ fn eval_ext_circuit_load, const D: usize>( } // Disable remaining memory channels, if any. - for &channel in &lv.mem_channels[4..NUM_GP_CHANNELS] { + for &channel in &lv.mem_channels[4..] { let constr = builder.mul_extension(filter, channel.used); yield_constr.constraint(builder, constr); } @@ -114,22 +120,19 @@ fn eval_packed_store( ) { let filter = lv.op.m_op_general * (lv.opcode_bits[0] - P::ONES); - let (addr_context, addr_segment, addr_virtual) = get_addr(lv); + let (addr_context, addr_segment, addr_virtual) = get_addr_store(lv); + + // The value will be checked with the CTL. + let store_channel = lv.partial_channel; - // Check that we are storing the correct value at the correct address. - let value_channel = lv.mem_channels[3]; - let store_channel = lv.mem_channels[4]; yield_constr.constraint(filter * (store_channel.used - P::ONES)); yield_constr.constraint(filter * store_channel.is_read); yield_constr.constraint(filter * (store_channel.addr_context - addr_context)); yield_constr.constraint(filter * (store_channel.addr_segment - addr_segment)); yield_constr.constraint(filter * (store_channel.addr_virtual - addr_virtual)); - for (value_limb, store_limb) in izip!(value_channel.value, store_channel.value) { - yield_constr.constraint(filter * (value_limb - store_limb)); - } // Disable remaining memory channels, if any. - for &channel in &lv.mem_channels[5..] { + for &channel in &lv.mem_channels[4..] { yield_constr.constraint(filter * channel.used); } @@ -172,6 +175,7 @@ fn eval_packed_store( ); let addr_virtual = nv.stack_len - P::ONES; yield_constr.constraint_transition(new_filter * (top_read_channel.addr_virtual - addr_virtual)); + // If stack_len == 4 or MLOAD, disable the channel. yield_constr.constraint( lv.op.m_op_general * (lv.general.stack().stack_inv_aux - P::ONES) * top_read_channel.used, @@ -190,11 +194,10 @@ fn eval_ext_circuit_store, const D: usize>( let filter = builder.mul_sub_extension(lv.op.m_op_general, lv.opcode_bits[0], lv.op.m_op_general); - let (addr_context, addr_segment, addr_virtual) = get_addr(lv); + let (addr_context, addr_segment, addr_virtual) = get_addr_store(lv); - // Check that we are storing the correct value at the correct address. - let value_channel = lv.mem_channels[3]; - let store_channel = lv.mem_channels[4]; + // The value will be checked with the CTL. + let store_channel = lv.partial_channel; { let constr = builder.mul_sub_extension(filter, store_channel.used, filter); yield_constr.constraint(builder, constr); @@ -215,14 +218,9 @@ fn eval_ext_circuit_store, const D: usize>( let constr = builder.mul_extension(filter, diff); yield_constr.constraint(builder, constr); } - for (value_limb, store_limb) in izip!(value_channel.value, store_channel.value) { - let diff = builder.sub_extension(value_limb, store_limb); - let constr = builder.mul_extension(filter, diff); - yield_constr.constraint(builder, constr); - } // Disable remaining memory channels, if any. - for &channel in &lv.mem_channels[5..] { + for &channel in &lv.mem_channels[4..] { let constr = builder.mul_extension(filter, channel.used); yield_constr.constraint(builder, constr); } diff --git a/evm/src/cpu/mod.rs b/evm/src/cpu/mod.rs index 2ee53504..829862b9 100644 --- a/evm/src/cpu/mod.rs +++ b/evm/src/cpu/mod.rs @@ -1,4 +1,5 @@ pub(crate) mod bootstrap_kernel; +mod byte_unpacking; mod clock; pub(crate) mod columns; mod contextops; diff --git a/evm/src/cpu/shift.rs b/evm/src/cpu/shift.rs index e77762fc..3d97c2f1 100644 --- a/evm/src/cpu/shift.rs +++ b/evm/src/cpu/shift.rs @@ -58,7 +58,7 @@ pub(crate) fn eval_packed( // // 1 -> 0 (value to be shifted is the same) // 2 -> 1 (two_exp becomes the multiplicand (resp. divisor)) - // last -> last (output is the same) + // next_0 -> next_0 (output is the same) } /// Circuit version. diff --git a/evm/src/cpu/stack.rs b/evm/src/cpu/stack.rs index 60595184..8710cba3 100644 --- a/evm/src/cpu/stack.rs +++ b/evm/src/cpu/stack.rs @@ -111,8 +111,8 @@ pub(crate) const STACK_BEHAVIORS: OpsColumnsView> = OpsCol disable_other_channels: false, }), mstore_32bytes: Some(StackBehavior { - num_pops: 5, - pushes: false, + num_pops: 4, + pushes: true, disable_other_channels: false, }), exit_kernel: Some(StackBehavior { @@ -172,6 +172,9 @@ pub(crate) fn eval_packed_one( yield_constr.constraint(filter * (channel.addr_virtual - addr_virtual)); } + // You can't have a write of the top of the stack, so you disable the corresponding flag. + yield_constr.constraint(filter * lv.partial_channel.used); + // If you also push, you don't need to read the new top of the stack. // If you don't: // - if the stack isn't empty after the pops, you read the new top from an extra pop. @@ -206,8 +209,9 @@ pub(crate) fn eval_packed_one( else if stack_behavior.pushes { // If len > 0... let new_filter = lv.stack_len * filter; - // You write the previous top of the stack in memory, in the last channel. - let channel = lv.mem_channels[NUM_GP_CHANNELS - 1]; + // You write the previous top of the stack in memory, in the partial channel. + // The value will be checked with the CTL. + let channel = lv.partial_channel; yield_constr.constraint(new_filter * (channel.used - P::ONES)); yield_constr.constraint(new_filter * channel.is_read); yield_constr.constraint(new_filter * (channel.addr_context - lv.context)); @@ -217,9 +221,6 @@ pub(crate) fn eval_packed_one( ); let addr_virtual = lv.stack_len - P::ONES; yield_constr.constraint(new_filter * (channel.addr_virtual - addr_virtual)); - for (limb_ch, limb_top) in channel.value.iter().zip(lv.mem_channels[0].value.iter()) { - yield_constr.constraint(new_filter * (*limb_ch - *limb_top)); - } // Else you disable the channel. yield_constr.constraint( filter @@ -238,11 +239,14 @@ pub(crate) fn eval_packed_one( { yield_constr.constraint(filter * (*limb_old - *limb_new)); } + + // You can't have a write of the top of the stack, so you disable the corresponding flag. + yield_constr.constraint(filter * lv.partial_channel.used); } // Unused channels if stack_behavior.disable_other_channels { - // The first channel contains (or not) the top od the stack and is constrained elsewhere. + // The first channel contains (or not) the top of the stack and is constrained elsewhere. for i in max(1, stack_behavior.num_pops)..NUM_GP_CHANNELS - (stack_behavior.pushes as usize) { let channel = lv.mem_channels[i]; @@ -379,6 +383,12 @@ pub(crate) fn eval_ext_circuit_one, const D: usize> } } + // You can't have a write of the top of the stack, so you disable the corresponding flag. + { + let constr = builder.mul_extension(filter, lv.partial_channel.used); + yield_constr.constraint(builder, constr); + } + // If you also push, you don't need to read the new top of the stack. // If you don't: // - if the stack isn't empty after the pops, you read the new top from an extra pop. @@ -443,7 +453,8 @@ pub(crate) fn eval_ext_circuit_one, const D: usize> // If len > 0... let new_filter = builder.mul_extension(lv.stack_len, filter); // You write the previous top of the stack in memory, in the last channel. - let channel = lv.mem_channels[NUM_GP_CHANNELS - 1]; + // The value will be checked with the CTL + let channel = lv.partial_channel; { let constr = builder.mul_sub_extension(new_filter, channel.used, new_filter); yield_constr.constraint(builder, constr); @@ -473,11 +484,6 @@ pub(crate) fn eval_ext_circuit_one, const D: usize> let constr = builder.arithmetic_extension(F::ONE, F::ONE, new_filter, diff, new_filter); yield_constr.constraint(builder, constr); } - for (limb_ch, limb_top) in channel.value.iter().zip(lv.mem_channels[0].value.iter()) { - let diff = builder.sub_extension(*limb_ch, *limb_top); - let constr = builder.mul_extension(new_filter, diff); - yield_constr.constraint(builder, constr); - } // Else you disable the channel. { let diff = builder.mul_extension(lv.stack_len, lv.general.stack().stack_inv); @@ -509,11 +515,17 @@ pub(crate) fn eval_ext_circuit_one, const D: usize> yield_constr.constraint(builder, constr); } } + + // You can't have a write of the top of the stack, so you disable the corresponding flag. + { + let constr = builder.mul_extension(filter, lv.partial_channel.used); + yield_constr.constraint(builder, constr); + } } // Unused channels if stack_behavior.disable_other_channels { - // The first channel contains (or not) the top od the stack and is constrained elsewhere. + // The first channel contains (or not) the top of the stack and is constrained elsewhere. for i in max(1, stack_behavior.num_pops)..NUM_GP_CHANNELS - (stack_behavior.pushes as usize) { let channel = lv.mem_channels[i]; diff --git a/evm/src/cpu/syscalls_exceptions.rs b/evm/src/cpu/syscalls_exceptions.rs index 49e28f7d..4fc71258 100644 --- a/evm/src/cpu/syscalls_exceptions.rs +++ b/evm/src/cpu/syscalls_exceptions.rs @@ -82,8 +82,8 @@ pub(crate) fn eval_packed( yield_constr.constraint(filter_exception * (channel.addr_virtual - limb_address_exception)); } - // Disable unused channels (the last channel is used to push to the stack) - for channel in &lv.mem_channels[BYTES_PER_OFFSET + 1..NUM_GP_CHANNELS - 1] { + // Disable unused channels + for channel in &lv.mem_channels[BYTES_PER_OFFSET + 1..NUM_GP_CHANNELS] { yield_constr.constraint(total_filter * channel.used); } @@ -239,7 +239,7 @@ pub(crate) fn eval_ext_circuit, const D: usize>( } // Disable unused channels (the last channel is used to push to the stack) - for channel in &lv.mem_channels[BYTES_PER_OFFSET + 1..NUM_GP_CHANNELS - 1] { + for channel in &lv.mem_channels[BYTES_PER_OFFSET + 1..NUM_GP_CHANNELS] { let constr = builder.mul_extension(total_filter, channel.used); yield_constr.constraint(builder, constr); } diff --git a/evm/src/generation/mod.rs b/evm/src/generation/mod.rs index 0deba3a2..ac477931 100644 --- a/evm/src/generation/mod.rs +++ b/evm/src/generation/mod.rs @@ -320,6 +320,7 @@ fn simulate_cpu, const D: usize>( break; } } + log::info!("CPU trace padded to {} cycles", state.traces.clock()); return Ok(()); diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs index 6699a12d..59db3f97 100644 --- a/evm/src/generation/prover_input.rs +++ b/evm/src/generation/prover_input.rs @@ -143,9 +143,9 @@ impl GenerationState { } "get" => { // Return `code[i]`. - // stack: i, code_length, codehash, ... - let i = stack_peek(self, 0).map(u256_to_usize)??; - let codehash = stack_peek(self, 2)?; + // stack: context, segment, i, i, code_size, codehash, ... + let i = stack_peek(self, 2).map(u256_to_usize)??; + let codehash = stack_peek(self, 5)?; Ok(self .inputs .contract_code diff --git a/evm/src/witness/gas.rs b/evm/src/witness/gas.rs index daefafc7..67339e35 100644 --- a/evm/src/witness/gas.rs +++ b/evm/src/witness/gas.rs @@ -48,7 +48,7 @@ pub(crate) fn gas_to_charge(op: Operation) -> u64 { GetContext => KERNEL_ONLY_INSTR, SetContext => KERNEL_ONLY_INSTR, Mload32Bytes => KERNEL_ONLY_INSTR, - Mstore32Bytes => KERNEL_ONLY_INSTR, + Mstore32Bytes(_) => KERNEL_ONLY_INSTR, ExitKernel => KERNEL_ONLY_INSTR, MloadGeneral => KERNEL_ONLY_INSTR, MstoreGeneral => KERNEL_ONLY_INSTR, diff --git a/evm/src/witness/memory.rs b/evm/src/witness/memory.rs index bec3a133..b7885a23 100644 --- a/evm/src/witness/memory.rs +++ b/evm/src/witness/memory.rs @@ -6,9 +6,10 @@ use crate::cpu::membus::{NUM_CHANNELS, NUM_GP_CHANNELS}; pub enum MemoryChannel { Code, GeneralPurpose(usize), + PartialChannel, } -use MemoryChannel::{Code, GeneralPurpose}; +use MemoryChannel::{Code, GeneralPurpose, PartialChannel}; use crate::cpu::kernel::constants::global_metadata::GlobalMetadata; use crate::memory::segments::Segment; @@ -24,6 +25,7 @@ impl MemoryChannel { assert!(n < NUM_GP_CHANNELS); n + 1 } + PartialChannel => NUM_GP_CHANNELS + 1, } } } diff --git a/evm/src/witness/operation.rs b/evm/src/witness/operation.rs index 64ff8566..0c30c6d6 100644 --- a/evm/src/witness/operation.rs +++ b/evm/src/witness/operation.rs @@ -3,7 +3,10 @@ use itertools::Itertools; use keccak_hash::keccak; use plonky2::field::types::Field; -use super::util::{byte_packing_log, byte_unpacking_log, push_no_write, push_with_write}; +use super::util::{ + byte_packing_log, byte_unpacking_log, mem_write_partial_log_and_fill, push_no_write, + push_with_write, +}; use crate::arithmetic::BinaryOperator; use crate::cpu::columns::CpuColumnsView; use crate::cpu::kernel::aggregator::KERNEL; @@ -49,7 +52,7 @@ pub(crate) enum Operation { GetContext, SetContext, Mload32Bytes, - Mstore32Bytes, + Mstore32Bytes(u8), ExitKernel, MloadGeneral, MstoreGeneral, @@ -874,7 +877,7 @@ pub(crate) fn generate_mstore_general( state: &mut GenerationState, mut row: CpuColumnsView, ) -> Result<(), ProgramError> { - let [(context, _), (segment, log_in1), (virt, log_in2), (val, log_in3)] = + let [(val, _), (context, log_in1), (segment, log_in2), (virt, log_in3)] = stack_pop_with_log_and_fill::<4, _>(state, &mut row)?; let address = MemoryAddress { @@ -888,7 +891,7 @@ pub(crate) fn generate_mstore_general( .try_into() .map_err(|_| MemoryError(VirtTooLarge { virt }))?, }; - let log_write = mem_write_gp_log_and_fill(4, address, state, &mut row, val); + let log_write = mem_write_partial_log_and_fill(address, state, &mut row, val); let diff = row.stack_len - F::from_canonical_usize(4); if let Some(inv) = diff.try_inverse() { @@ -912,21 +915,23 @@ pub(crate) fn generate_mstore_general( } pub(crate) fn generate_mstore_32bytes( + n: u8, state: &mut GenerationState, mut row: CpuColumnsView, ) -> Result<(), ProgramError> { - let [(context, _), (segment, log_in1), (base_virt, log_in2), (val, log_in3), (len, log_in4)] = - stack_pop_with_log_and_fill::<5, _>(state, &mut row)?; - let len = u256_to_usize(len)?; + let [(context, _), (segment, log_in1), (base_virt, log_in2), (val, log_in3)] = + stack_pop_with_log_and_fill::<4, _>(state, &mut row)?; let base_address = MemoryAddress::new_u256s(context, segment, base_virt)?; - byte_unpacking_log(state, base_address, val, len); + byte_unpacking_log(state, base_address, val, n as usize); + + let new_offset = base_virt + n; + push_no_write(state, new_offset); state.traces.push_memory(log_in1); state.traces.push_memory(log_in2); state.traces.push_memory(log_in3); - state.traces.push_memory(log_in4); state.traces.push_cpu(row); Ok(()) } diff --git a/evm/src/witness/transition.rs b/evm/src/witness/transition.rs index 19d55ecd..189e9e42 100644 --- a/evm/src/witness/transition.rs +++ b/evm/src/witness/transition.rs @@ -134,7 +134,7 @@ pub(crate) fn decode(registers: RegistersState, opcode: u8) -> Result Ok(Operation::Mstore32Bytes), + (0xc0..=0xdf, true) => Ok(Operation::Mstore32Bytes(opcode - 0xc0 + 1)), (0xf0, _) => Ok(Operation::Syscall(opcode, 3, false)), // CREATE (0xf1, _) => Ok(Operation::Syscall(opcode, 7, false)), // CALL (0xf2, _) => Ok(Operation::Syscall(opcode, 7, false)), // CALLCODE @@ -179,7 +179,7 @@ fn fill_op_flag(op: Operation, row: &mut CpuColumnsView) { Operation::Pc | Operation::Push(0) => &mut flags.pc_push0, Operation::GetContext | Operation::SetContext => &mut flags.context_op, Operation::Mload32Bytes => &mut flags.mload_32bytes, - Operation::Mstore32Bytes => &mut flags.mstore_32bytes, + Operation::Mstore32Bytes(_) => &mut flags.mstore_32bytes, Operation::ExitKernel => &mut flags.exit_kernel, Operation::MloadGeneral | Operation::MstoreGeneral => &mut flags.m_op_general, } = F::ONE; @@ -211,7 +211,7 @@ fn get_op_special_length(op: Operation) -> Option { Operation::Jumpi => JUMPI_OP, Operation::GetContext | Operation::SetContext => None, Operation::Mload32Bytes => STACK_BEHAVIORS.mload_32bytes, - Operation::Mstore32Bytes => STACK_BEHAVIORS.mstore_32bytes, + Operation::Mstore32Bytes(_) => STACK_BEHAVIORS.mstore_32bytes, Operation::ExitKernel => STACK_BEHAVIORS.exit_kernel, Operation::MloadGeneral | Operation::MstoreGeneral => STACK_BEHAVIORS.m_op_general, }; @@ -258,7 +258,7 @@ fn perform_op( Operation::GetContext => generate_get_context(state, row)?, Operation::SetContext => generate_set_context(state, row)?, Operation::Mload32Bytes => generate_mload_32bytes(state, row)?, - Operation::Mstore32Bytes => generate_mstore_32bytes(state, row)?, + Operation::Mstore32Bytes(n) => generate_mstore_32bytes(n, state, row)?, Operation::ExitKernel => generate_exit_kernel(state, row)?, Operation::MloadGeneral => generate_mload_general(state, row)?, Operation::MstoreGeneral => generate_mstore_general(state, row)?, diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs index a87ad50b..0e91590c 100644 --- a/evm/src/witness/util.rs +++ b/evm/src/witness/util.rs @@ -5,7 +5,7 @@ use super::memory::DUMMY_MEMOP; use crate::byte_packing::byte_packing_stark::BytePackingOp; use crate::cpu::columns::CpuColumnsView; use crate::cpu::kernel::keccak_util::keccakf_u8s; -use crate::cpu::membus::{NUM_CHANNELS, NUM_GP_CHANNELS}; +use crate::cpu::membus::NUM_CHANNELS; use crate::cpu::stack_bounds::MAX_USER_STACK_SIZE; use crate::generation::state::GenerationState; use crate::keccak_sponge::columns::{KECCAK_RATE_BYTES, KECCAK_WIDTH_BYTES}; @@ -91,18 +91,13 @@ pub(crate) fn push_with_write( Segment::Stack, state.registers.stack_len - 1, ); - let res = mem_write_gp_log_and_fill( - NUM_GP_CHANNELS - 1, - address, - state, - row, - state.registers.stack_top, - ); + let res = mem_write_partial_log_and_fill(address, state, row, state.registers.stack_top); Some(res) }; push_no_write(state, val); if let Some(log) = write { state.traces.push_memory(log); + row.partial_channel.used = F::ONE; } Ok(()) } @@ -200,6 +195,25 @@ pub(crate) fn mem_write_gp_log_and_fill( op } +pub(crate) fn mem_write_partial_log_and_fill( + address: MemoryAddress, + state: &GenerationState, + row: &mut CpuColumnsView, + val: U256, +) -> MemoryOp { + let op = mem_write_log(MemoryChannel::PartialChannel, address, state, val); + + let channel = &mut row.partial_channel; + assert!(channel.used.is_zero()); + channel.used = F::ONE; + channel.is_read = F::ZERO; + channel.addr_context = F::from_canonical_usize(address.context); + channel.addr_segment = F::from_canonical_usize(address.segment); + channel.addr_virtual = F::from_canonical_usize(address.virt); + + op +} + // Channel 0 already contains the top of the stack. You only need to read // from the second popped element. // If the resulting stack isn't empty, update `stack_top`. diff --git a/evm/tests/empty_txn_list.rs b/evm/tests/empty_txn_list.rs index 3072b48b..d2e8ca33 100644 --- a/evm/tests/empty_txn_list.rs +++ b/evm/tests/empty_txn_list.rs @@ -74,7 +74,7 @@ fn test_empty_txn_list() -> anyhow::Result<()> { let all_circuits = AllRecursiveCircuits::::new( &all_stark, - &[16..17, 10..11, 15..16, 14..15, 9..10, 12..13, 18..19], // Minimal ranges to prove an empty list + &[16..17, 10..11, 15..16, 14..15, 10..11, 12..13, 18..19], // Minimal ranges to prove an empty list &config, );