From 14c2a6dd1ff0f7c08c934373ed47f1910930db7a Mon Sep 17 00:00:00 2001 From: "Brandon H. Gomes" Date: Tue, 15 Nov 2022 01:51:29 -0500 Subject: [PATCH] Fork Update (#3) * Use static `KERNEL` in tests * Print opcode count * Update criterion * Combine all syscalls into one flag (#802) * Combine all syscalls into one flag * Minor: typo * Daniel PR comments * Check that `le_sum` won't overflow * security notes * Test reverse_index_bits Thanks to Least Authority for this * clippy * EVM shift left/right operations (#801) * First parts of shift implementation. * Disable range check errors. * Tidy up ASM. * Update comments; fix some .sum() expressions. * First full draft of shift left/right. * Missed a +1. * Clippy. * Address Jacqui's comments. * Add comment. * Fix missing filter. * Address second round of comments from Jacqui. * Remove signed operation placeholders from arithmetic table. (#812) Co-authored-by: wborgeaud Co-authored-by: Daniel Lubarov Co-authored-by: Jacqueline Nabaglo Co-authored-by: Hamish Ivey-Law <426294+unzvfu@users.noreply.github.com> --- README.md | 6 +- evm/Cargo.toml | 3 +- evm/src/arithmetic/columns.rs | 19 +- evm/src/arithmetic/compare.rs | 2 - evm/src/arithmetic/utils.rs | 30 +-- evm/src/cpu/columns/general.rs | 18 ++ evm/src/cpu/columns/ops.rs | 67 +----- evm/src/cpu/control_flow.rs | 31 ++- evm/src/cpu/cpu_stark.rs | 6 +- evm/src/cpu/decode.rs | 87 +------- evm/src/cpu/kernel/aggregator.rs | 3 + evm/src/cpu/kernel/asm/core/syscall.asm | 160 ++++++++++++++ evm/src/cpu/kernel/asm/core/syscall_stubs.asm | 53 +++++ evm/src/cpu/kernel/asm/main.asm | 4 +- evm/src/cpu/kernel/asm/shift.asm | 25 +++ evm/src/cpu/kernel/assembler.rs | 37 ++-- evm/src/cpu/kernel/ast.rs | 2 + evm/src/cpu/kernel/evm_asm.pest | 3 +- evm/src/cpu/kernel/interpreter.rs | 184 +++++++++++++++- evm/src/cpu/kernel/parser.rs | 3 + evm/src/cpu/kernel/tests/curve_ops.rs | 133 ++++-------- evm/src/cpu/kernel/tests/ecrecover.rs | 37 +--- evm/src/cpu/kernel/tests/exp.rs | 25 +-- evm/src/cpu/kernel/tests/fields.rs | 9 +- evm/src/cpu/kernel/tests/hash.rs | 11 +- evm/src/cpu/kernel/tests/ripemd.rs | 9 +- evm/src/cpu/mod.rs | 1 + evm/src/cpu/shift.rs | 108 ++++++++++ evm/src/cpu/stack.rs | 62 +----- evm/src/cpu/syscalls.rs | 204 ++++++++++++------ evm/src/memory/segments.rs | 8 +- plonky2/Cargo.toml | 2 +- plonky2/src/gadgets/split_base.rs | 6 + system_zero/Cargo.toml | 2 +- util/Cargo.toml | 1 + util/src/lib.rs | 55 +++++ 36 files changed, 942 insertions(+), 474 deletions(-) create mode 100644 evm/src/cpu/kernel/asm/core/syscall.asm create mode 100644 evm/src/cpu/kernel/asm/core/syscall_stubs.asm create mode 100644 evm/src/cpu/kernel/asm/shift.asm create mode 100644 evm/src/cpu/shift.rs diff --git a/README.md b/README.md index 7b924698..59fc4d09 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,11 @@ Jemalloc is known to cause crashes when a binary compiled for x86 is run on an A As this is a monorepo, see the individual crates within for license information. -## Disclaimer +## Security This code has not yet been audited, and should not be used in any production systems. +While Plonky2 is configurable, its defaults generally target 100 bits of security. The default FRI configuration targets 100 bits of *conjectured* security based on the conjecture in [ethSTARK](https://eprint.iacr.org/2021/582). + +Plonky2's default hash function is Poseidon, configured with 8 full rounds, 22 partial rounds, a width of 12 field elements (each ~64 bits), and an S-box of `x^7`. [BBLP22](https://tosc.iacr.org/index.php/ToSC/article/view/9850) suggests that this configuration may have around 95 bits of security, falling a bit short of our 100 bit target. + diff --git a/evm/Cargo.toml b/evm/Cargo.toml index cdb8b5d6..5b48f524 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -28,10 +28,11 @@ rlp = "0.5.1" rlp-derive = "0.1.0" serde = { version = "1.0.144", features = ["derive"] } sha2 = "0.10.2" +static_assertions = "1.1.0" tiny-keccak = "2.0.2" [dev-dependencies] -criterion = "0.3.5" +criterion = "0.4.0" hex = "0.4.3" [features] diff --git a/evm/src/arithmetic/columns.rs b/evm/src/arithmetic/columns.rs index 10bf72d9..923fbc73 100644 --- a/evm/src/arithmetic/columns.rs +++ b/evm/src/arithmetic/columns.rs @@ -22,25 +22,20 @@ pub const IS_ADD: usize = 0; pub const IS_MUL: usize = IS_ADD + 1; pub const IS_SUB: usize = IS_MUL + 1; pub const IS_DIV: usize = IS_SUB + 1; -pub const IS_SDIV: usize = IS_DIV + 1; -pub const IS_MOD: usize = IS_SDIV + 1; -pub const IS_SMOD: usize = IS_MOD + 1; -pub const IS_ADDMOD: usize = IS_SMOD + 1; +pub const IS_MOD: usize = IS_DIV + 1; +pub const IS_ADDMOD: usize = IS_MOD + 1; pub const IS_SUBMOD: usize = IS_ADDMOD + 1; pub const IS_MULMOD: usize = IS_SUBMOD + 1; pub const IS_LT: usize = IS_MULMOD + 1; pub const IS_GT: usize = IS_LT + 1; -pub const IS_SLT: usize = IS_GT + 1; -pub const IS_SGT: usize = IS_SLT + 1; -pub const IS_SHL: usize = IS_SGT + 1; +pub const IS_SHL: usize = IS_GT + 1; pub const IS_SHR: usize = IS_SHL + 1; -pub const IS_SAR: usize = IS_SHR + 1; -const START_SHARED_COLS: usize = IS_SAR + 1; +const START_SHARED_COLS: usize = IS_SHR + 1; -pub(crate) const ALL_OPERATIONS: [usize; 17] = [ - IS_ADD, IS_MUL, IS_SUB, IS_DIV, IS_SDIV, IS_MOD, IS_SMOD, IS_ADDMOD, IS_SUBMOD, IS_MULMOD, - IS_LT, IS_GT, IS_SLT, IS_SGT, IS_SHL, IS_SHR, IS_SAR, +pub(crate) const ALL_OPERATIONS: [usize; 12] = [ + IS_ADD, IS_MUL, IS_SUB, IS_DIV, IS_MOD, IS_ADDMOD, IS_SUBMOD, IS_MULMOD, IS_LT, IS_GT, IS_SHL, + IS_SHR, ]; /// Within the Arithmetic Unit, there are shared columns which can be diff --git a/evm/src/arithmetic/compare.rs b/evm/src/arithmetic/compare.rs index 55dc5764..a9dd785d 100644 --- a/evm/src/arithmetic/compare.rs +++ b/evm/src/arithmetic/compare.rs @@ -35,8 +35,6 @@ pub(crate) fn generate(lv: &mut [F; NUM_ARITH_COLUMNS], op: usize) IS_LT => u256_sub_br(input0, input1), // input1 - input0 == diff + br*2^256 IS_GT => u256_sub_br(input1, input0), - IS_SLT => todo!(), - IS_SGT => todo!(), _ => panic!("op code not a comparison"), }; diff --git a/evm/src/arithmetic/utils.rs b/evm/src/arithmetic/utils.rs index 74999ab4..ec989c94 100644 --- a/evm/src/arithmetic/utils.rs +++ b/evm/src/arithmetic/utils.rs @@ -1,6 +1,5 @@ use std::ops::{Add, AddAssign, Mul, Neg, Range, Shr, Sub, SubAssign}; -use log::error; use plonky2::field::extension::Extendable; use plonky2::hash::hash_types::RichField; use plonky2::iop::ext_target::ExtensionTarget; @@ -11,21 +10,24 @@ use crate::arithmetic::columns::{NUM_ARITH_COLUMNS, N_LIMBS}; /// Emit an error message regarding unchecked range assumptions. /// Assumes the values in `cols` are `[cols[0], cols[0] + 1, ..., /// cols[0] + cols.len() - 1]`. +/// +/// TODO: Hamish to delete this when he has implemented and integrated +/// range checks. pub(crate) fn _range_check_error( - file: &str, - line: u32, - cols: Range, - signedness: &str, + _file: &str, + _line: u32, + _cols: Range, + _signedness: &str, ) { - error!( - "{}:{}: arithmetic unit skipped {}-bit {} range-checks on columns {}--{}: not yet implemented", - line, - file, - RC_BITS, - signedness, - cols.start, - cols.end - 1, - ); + // error!( + // "{}:{}: arithmetic unit skipped {}-bit {} range-checks on columns {}--{}: not yet implemented", + // line, + // file, + // RC_BITS, + // signedness, + // cols.start, + // cols.end - 1, + // ); } #[macro_export] diff --git a/evm/src/cpu/columns/general.rs b/evm/src/cpu/columns/general.rs index 134788dc..5a2c9426 100644 --- a/evm/src/cpu/columns/general.rs +++ b/evm/src/cpu/columns/general.rs @@ -9,6 +9,7 @@ pub(crate) union CpuGeneralColumnsView { arithmetic: CpuArithmeticView, logic: CpuLogicView, jumps: CpuJumpsView, + shift: CpuShiftView, } impl CpuGeneralColumnsView { @@ -51,6 +52,16 @@ impl CpuGeneralColumnsView { pub(crate) fn jumps_mut(&mut self) -> &mut CpuJumpsView { unsafe { &mut self.jumps } } + + // SAFETY: Each view is a valid interpretation of the underlying array. + pub(crate) fn shift(&self) -> &CpuShiftView { + unsafe { &self.shift } + } + + // SAFETY: Each view is a valid interpretation of the underlying array. + pub(crate) fn shift_mut(&mut self) -> &mut CpuShiftView { + unsafe { &mut self.shift } + } } impl PartialEq for CpuGeneralColumnsView { @@ -144,5 +155,12 @@ pub(crate) struct CpuJumpsView { pub(crate) should_trap: T, } +#[derive(Copy, Clone)] +pub(crate) struct CpuShiftView { + // For a shift amount of displacement: [T], this is the inverse of + // sum(displacement[1..]) or zero if the sum is zero. + pub(crate) high_limb_sum_inv: T, +} + // `u8` is guaranteed to have a `size_of` of 1. pub const NUM_SHARED_COLUMNS: usize = size_of::>(); diff --git a/evm/src/cpu/columns/ops.rs b/evm/src/cpu/columns/ops.rs index 04d4d0f2..c265be44 100644 --- a/evm/src/cpu/columns/ops.rs +++ b/evm/src/cpu/columns/ops.rs @@ -7,106 +7,59 @@ use crate::util::{indices_arr, transmute_no_compile_time_size_checks}; #[repr(C)] #[derive(Eq, PartialEq, Debug)] pub struct OpsColumnsView { - pub stop: T, + // TODO: combine ADD, MUL, SUB, DIV, MOD, ADDFP254, MULFP254, SUBFP254, LT, and GT into one flag pub add: T, pub mul: T, pub sub: T, pub div: T, - pub sdiv: T, pub mod_: T, - pub smod: T, + // TODO: combine ADDMOD, MULMOD into one flag pub addmod: T, pub mulmod: T, - pub exp: T, - pub signextend: T, pub addfp254: T, pub mulfp254: T, pub subfp254: T, pub lt: T, pub gt: T, - pub slt: T, - pub sgt: T, pub eq: T, // Note: This column must be 0 when is_cpu_cycle = 0. pub iszero: T, // Note: This column must be 0 when is_cpu_cycle = 0. + // TODO: combine AND, OR, and XOR into one flag pub and: T, pub or: T, pub xor: T, pub not: T, pub byte: T, + // TODO: combine SHL and SHR into one flag pub shl: T, pub shr: T, - pub sar: T, - pub keccak256: T, pub keccak_general: T, - pub address: T, - pub balance: T, - pub origin: T, - pub caller: T, - pub callvalue: T, - pub calldataload: T, - pub calldatasize: T, - pub calldatacopy: T, - pub codesize: T, - pub codecopy: T, - pub gasprice: T, - pub extcodesize: T, - pub extcodecopy: T, - pub returndatasize: T, - pub returndatacopy: T, - pub extcodehash: T, - pub blockhash: T, - pub coinbase: T, - pub timestamp: T, - pub number: T, - pub difficulty: T, - pub gaslimit: T, - pub chainid: T, - pub selfbalance: T, - pub basefee: T, pub prover_input: T, pub pop: T, - pub mload: T, - pub mstore: T, - pub mstore8: T, - pub sload: T, - pub sstore: T, + // TODO: combine JUMP and JUMPI into one flag pub jump: T, // Note: This column must be 0 when is_cpu_cycle = 0. pub jumpi: T, // Note: This column must be 0 when is_cpu_cycle = 0. pub pc: T, - pub msize: T, pub gas: T, pub jumpdest: T, + // TODO: combine GET_STATE_ROOT and SET_STATE_ROOT into one flag pub get_state_root: T, pub set_state_root: T, + // TODO: combine GET_RECEIPT_ROOT and SET_RECEIPT_ROOT into one flag pub get_receipt_root: T, pub set_receipt_root: T, pub push: T, pub dup: T, pub swap: T, - pub log0: T, - pub log1: T, - pub log2: T, - pub log3: T, - pub log4: T, - // PANIC does not get a flag; it fails at the decode stage. - pub create: T, - pub call: T, - pub callcode: T, - pub return_: T, - pub delegatecall: T, - pub create2: T, + // TODO: combine GET_CONTEXT and SET_CONTEXT into one flag pub get_context: T, pub set_context: T, pub consume_gas: T, pub exit_kernel: T, - pub staticcall: T, + // TODO: combine MLOAD_GENERAL and MSTORE_GENERAL into one flag pub mload_general: T, pub mstore_general: T, - pub revert: T, - pub selfdestruct: T, - // TODO: this doesn't actually need its own flag. We can just do `1 - sum(all other flags)`. - pub invalid: T, + pub syscall: T, } // `u8` is guaranteed to have a `size_of` of 1. diff --git a/evm/src/cpu/control_flow.rs b/evm/src/cpu/control_flow.rs index c7b7c6bb..ba0bbd3b 100644 --- a/evm/src/cpu/control_flow.rs +++ b/evm/src/cpu/control_flow.rs @@ -8,36 +8,49 @@ use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer use crate::cpu::columns::{CpuColumnsView, COL_MAP}; use crate::cpu::kernel::aggregator::KERNEL; -// TODO: This list is incomplete. -const NATIVE_INSTRUCTIONS: [usize; 28] = [ +const NATIVE_INSTRUCTIONS: [usize; 37] = [ COL_MAP.op.add, COL_MAP.op.mul, COL_MAP.op.sub, COL_MAP.op.div, - COL_MAP.op.sdiv, COL_MAP.op.mod_, - COL_MAP.op.smod, COL_MAP.op.addmod, COL_MAP.op.mulmod, - COL_MAP.op.signextend, COL_MAP.op.addfp254, COL_MAP.op.mulfp254, COL_MAP.op.subfp254, COL_MAP.op.lt, COL_MAP.op.gt, - COL_MAP.op.slt, - COL_MAP.op.sgt, COL_MAP.op.eq, COL_MAP.op.iszero, COL_MAP.op.and, COL_MAP.op.or, COL_MAP.op.xor, COL_MAP.op.not, - COL_MAP.op.byte, COL_MAP.op.shl, COL_MAP.op.shr, - COL_MAP.op.sar, + COL_MAP.op.keccak_general, + COL_MAP.op.prover_input, COL_MAP.op.pop, + // not JUMP (need to jump) + // not JUMPI (possible need to jump) + COL_MAP.op.pc, + COL_MAP.op.gas, + COL_MAP.op.jumpdest, + COL_MAP.op.get_state_root, + COL_MAP.op.set_state_root, + COL_MAP.op.get_receipt_root, + COL_MAP.op.set_receipt_root, + // not PUSH (need to increment by more than 1) + COL_MAP.op.dup, + COL_MAP.op.swap, + COL_MAP.op.get_context, + COL_MAP.op.set_context, + COL_MAP.op.consume_gas, + // not EXIT_KERNEL (performs a jump) + COL_MAP.op.mload_general, + COL_MAP.op.mstore_general, + // not SYSCALL (performs a jump) ]; fn get_halt_pcs() -> (F, F) { diff --git a/evm/src/cpu/cpu_stark.rs b/evm/src/cpu/cpu_stark.rs index 7b34cc4f..4cc38823 100644 --- a/evm/src/cpu/cpu_stark.rs +++ b/evm/src/cpu/cpu_stark.rs @@ -11,8 +11,8 @@ use plonky2::hash::hash_types::RichField; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use crate::cpu::columns::{CpuColumnsView, COL_MAP, NUM_CPU_COLUMNS}; use crate::cpu::{ - bootstrap_kernel, control_flow, decode, dup_swap, jumps, membus, modfp254, simple_logic, stack, - stack_bounds, syscalls, + bootstrap_kernel, control_flow, decode, dup_swap, jumps, membus, modfp254, shift, simple_logic, + stack, stack_bounds, syscalls, }; use crate::cross_table_lookup::Column; use crate::memory::segments::Segment; @@ -151,6 +151,7 @@ impl, const D: usize> Stark for CpuStark, const D: usize> Stark for CpuStark(lv: &mut CpuColumnsView) { assert!(kernel <= 1); let kernel = kernel != 0; - let mut any_flag_set = false; for (oc, block_length, kernel_only, col) in OPCODES { let available = !kernel_only || kernel; let opcode_match = top_bits[8 - block_length] == oc; let flag = available && opcode_match; lv[col] = F::from_bool(flag); - if flag && any_flag_set { - panic!("opcode matched multiple flags"); - } - any_flag_set = any_flag_set || flag; } - // is_invalid is a catch-all for opcodes we can't decode. - lv.op.invalid = F::from_bool(!any_flag_set); } /// Break up an opcode (which is 8 bits long) into its eight bits. @@ -238,14 +176,12 @@ pub fn eval_packed_generic( let flag = lv[flag_col]; yield_constr.constraint(cycle_filter * flag * (flag - P::ONES)); } - yield_constr.constraint(cycle_filter * lv.op.invalid * (lv.op.invalid - P::ONES)); - // Now check that exactly one is 1. + // Now check that they sum to 0 or 1. let flag_sum: P = OPCODES .into_iter() .map(|(_, _, _, flag_col)| lv[flag_col]) - .sum::

() - + lv.op.invalid; - yield_constr.constraint(cycle_filter * (P::ONES - flag_sum)); + .sum::

(); + yield_constr.constraint(cycle_filter * flag_sum * (flag_sum - P::ONES)); // Finally, classify all opcodes, together with the kernel flag, into blocks for (oc, block_length, kernel_only, col) in OPCODES { @@ -308,20 +244,15 @@ pub fn eval_ext_circuit, const D: usize>( let constr = builder.mul_extension(cycle_filter, constr); yield_constr.constraint(builder, constr); } + // Now check that they sum to 0 or 1. { - let constr = builder.mul_sub_extension(lv.op.invalid, lv.op.invalid, lv.op.invalid); - let constr = builder.mul_extension(cycle_filter, constr); - yield_constr.constraint(builder, constr); - } - // Now check that exactly one is 1. - { - let mut constr = builder.one_extension(); + let mut flag_sum = builder.zero_extension(); for (_, _, _, flag_col) in OPCODES { let flag = lv[flag_col]; - constr = builder.sub_extension(constr, flag); + flag_sum = builder.add_extension(flag_sum, flag); } - constr = builder.sub_extension(constr, lv.op.invalid); - constr = builder.mul_extension(cycle_filter, constr); + let constr = builder.mul_sub_extension(flag_sum, flag_sum, flag_sum); + let constr = builder.mul_extension(cycle_filter, constr); yield_constr.constraint(builder, constr); } diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 49398288..9ccfcd15 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -18,6 +18,8 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/core/invalid.asm"), include_str!("asm/core/nonce.asm"), include_str!("asm/core/process_txn.asm"), + include_str!("asm/core/syscall.asm"), + include_str!("asm/core/syscall_stubs.asm"), include_str!("asm/core/terminate.asm"), include_str!("asm/core/transfer.asm"), include_str!("asm/core/util.asm"), @@ -75,6 +77,7 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/sha2/store_pad.asm"), include_str!("asm/sha2/temp_words.asm"), include_str!("asm/sha2/write_length.asm"), + include_str!("asm/shift.asm"), include_str!("asm/transactions/router.asm"), include_str!("asm/transactions/type_0.asm"), include_str!("asm/transactions/type_1.asm"), diff --git a/evm/src/cpu/kernel/asm/core/syscall.asm b/evm/src/cpu/kernel/asm/core/syscall.asm new file mode 100644 index 00000000..2de50993 --- /dev/null +++ b/evm/src/cpu/kernel/asm/core/syscall.asm @@ -0,0 +1,160 @@ +global syscall_jumptable: + // 0x00-0x0f + JUMPTABLE sys_stop + JUMPTABLE panic // add is implemented natively + JUMPTABLE panic // mul is implemented natively + JUMPTABLE panic // sub is implemented natively + JUMPTABLE panic // div is implemented natively + JUMPTABLE sys_sdiv + JUMPTABLE panic // mod is implemented natively + JUMPTABLE sys_smod + JUMPTABLE panic // addmod is implemented natively + JUMPTABLE panic // mulmod is implemented natively + JUMPTABLE sys_exp + JUMPTABLE sys_signextend + JUMPTABLE panic // 0x0c is an invalid opcode + JUMPTABLE panic // 0x0d is an invalid opcode + JUMPTABLE panic // 0x0e is an invalid opcode + JUMPTABLE panic // 0x0f is an invalid opcode + + // 0x10-0x1f + JUMPTABLE panic // lt is implemented natively + JUMPTABLE panic // gt is implemented natively + JUMPTABLE sys_slt + JUMPTABLE sys_sgt + JUMPTABLE panic // eq is implemented natively + JUMPTABLE panic // iszero is implemented natively + JUMPTABLE panic // and is implemented natively + JUMPTABLE panic // or is implemented natively + JUMPTABLE panic // xor is implemented natively + JUMPTABLE panic // not is implemented natively + JUMPTABLE panic // byte is implemented natively + JUMPTABLE panic // shl is implemented natively + JUMPTABLE panic // shr is implemented natively + JUMPTABLE sys_sar + JUMPTABLE panic // 0x1e is an invalid opcode + JUMPTABLE panic // 0x1f is an invalid opcode + + // 0x20-0x2f + JUMPTABLE sys_keccak256 + %rep 15 + JUMPTABLE panic // 0x21-0x2f are invalid opcodes + %endrep + + // 0x30-0x3f + JUMPTABLE sys_address + JUMPTABLE sys_balance + JUMPTABLE sys_origin + JUMPTABLE sys_caller + JUMPTABLE sys_callvalue + JUMPTABLE sys_calldataload + JUMPTABLE sys_calldatasize + JUMPTABLE sys_calldatacopy + JUMPTABLE sys_codesize + JUMPTABLE sys_codecopy + JUMPTABLE sys_gasprice + JUMPTABLE sys_extcodesize + JUMPTABLE sys_extcodecopy + JUMPTABLE sys_returndatasize + JUMPTABLE sys_returndatacopy + JUMPTABLE sys_extcodehash + + // 0x40-0x4f + JUMPTABLE sys_blockhash + JUMPTABLE sys_coinbase + JUMPTABLE sys_timestamp + JUMPTABLE sys_number + JUMPTABLE sys_prevrandao + JUMPTABLE sys_gaslimit + JUMPTABLE sys_chainid + JUMPTABLE sys_selfbalance + JUMPTABLE sys_basefee + %rep 7 + JUMPTABLE panic // 0x49-0x4f are invalid opcodes + %endrep + + // 0x50-0x5f + JUMPTABLE panic // pop is implemented natively + JUMPTABLE sys_mload + JUMPTABLE sys_mstore + JUMPTABLE sys_mstore8 + JUMPTABLE sys_sload + JUMPTABLE sys_sstore + JUMPTABLE panic // jump is implemented natively + JUMPTABLE panic // jumpi is implemented natively + JUMPTABLE panic // pc is implemented natively + JUMPTABLE sys_msize + JUMPTABLE panic // gas is implemented natively + JUMPTABLE panic // jumpdest is implemented natively + JUMPTABLE panic // 0x5c is an invalid opcode + JUMPTABLE panic // 0x5d is an invalid opcode + JUMPTABLE panic // 0x5e is an invalid opcode + JUMPTABLE panic // 0x5f is an invalid opcode + + // 0x60-0x6f + %rep 16 + JUMPTABLE panic // push1-push16 are implemented natively + %endrep + + // 0x70-0x7f + %rep 16 + JUMPTABLE panic // push17-push32 are implemented natively + %endrep + + // 0x80-0x8f + %rep 16 + JUMPTABLE panic // dup1-dup16 are implemented natively + %endrep + + // 0x90-0x9f + %rep 16 + JUMPTABLE panic // swap1-swap16 are implemented natively + %endrep + + // 0xa0-0xaf + JUMPTABLE sys_log0 + JUMPTABLE sys_log1 + JUMPTABLE sys_log2 + JUMPTABLE sys_log3 + JUMPTABLE sys_log4 + %rep 11 + JUMPTABLE panic // 0xa5-0xaf are invalid opcodes + %endrep + + // 0xb0-0xbf + %rep 16 + JUMPTABLE panic // 0xb0-0xbf are invalid opcodes + %endrep + + // 0xc0-0xcf + %rep 16 + JUMPTABLE panic // 0xc0-0xcf are invalid opcodes + %endrep + + // 0xd0-0xdf + %rep 16 + JUMPTABLE panic // 0xd0-0xdf are invalid opcodes + %endrep + + // 0xe0-0xef + %rep 16 + JUMPTABLE panic // 0xe0-0xef are invalid opcodes + %endrep + + // 0xf0-0xff + JUMPTABLE sys_create + JUMPTABLE sys_call + JUMPTABLE sys_callcode + JUMPTABLE sys_return + JUMPTABLE sys_delegatecall + JUMPTABLE sys_create2 + JUMPTABLE panic // 0xf6 is an invalid opcode + JUMPTABLE panic // 0xf7 is an invalid opcode + JUMPTABLE panic // 0xf8 is an invalid opcode + JUMPTABLE panic // 0xf9 is an invalid opcode + JUMPTABLE sys_staticcall + JUMPTABLE panic // 0xfb is an invalid opcode + JUMPTABLE panic // 0xfc is an invalid opcode + JUMPTABLE sys_revert + JUMPTABLE panic // 0xfe is an invalid opcode + JUMPTABLE sys_selfdestruct diff --git a/evm/src/cpu/kernel/asm/core/syscall_stubs.asm b/evm/src/cpu/kernel/asm/core/syscall_stubs.asm new file mode 100644 index 00000000..d39d8145 --- /dev/null +++ b/evm/src/cpu/kernel/asm/core/syscall_stubs.asm @@ -0,0 +1,53 @@ +// Labels for unimplemented syscalls to make the kernel assemble. +// Each label should be removed from this file once it is implemented. + +global sys_sdiv: +global sys_smod: +global sys_signextend: +global sys_slt: +global sys_sgt: +global sys_sar: +global sys_keccak256: +global sys_address: +global sys_balance: +global sys_origin: +global sys_caller: +global sys_callvalue: +global sys_calldataload: +global sys_calldatasize: +global sys_calldatacopy: +global sys_codesize: +global sys_codecopy: +global sys_gasprice: +global sys_extcodesize: +global sys_extcodecopy: +global sys_returndatasize: +global sys_returndatacopy: +global sys_extcodehash: +global sys_blockhash: +global sys_coinbase: +global sys_timestamp: +global sys_number: +global sys_prevrandao: +global sys_gaslimit: +global sys_chainid: +global sys_selfbalance: +global sys_basefee: +global sys_mload: +global sys_mstore: +global sys_mstore8: +global sys_sload: +global sys_sstore: +global sys_msize: +global sys_log0: +global sys_log1: +global sys_log2: +global sys_log3: +global sys_log4: +global sys_create: +global sys_call: +global sys_callcode: +global sys_delegatecall: +global sys_create2: +global sys_staticcall: + PANIC diff --git a/evm/src/cpu/kernel/asm/main.asm b/evm/src/cpu/kernel/asm/main.asm index e8c8e3e4..41cb8079 100644 --- a/evm/src/cpu/kernel/asm/main.asm +++ b/evm/src/cpu/kernel/asm/main.asm @@ -1,5 +1,7 @@ global main: - // First, load all MPT data from the prover. + // First, initialise the shift table + %shift_table_init + // Second, load all MPT data from the prover. PUSH txn_loop %jump(load_all_mpts) diff --git a/evm/src/cpu/kernel/asm/shift.asm b/evm/src/cpu/kernel/asm/shift.asm new file mode 100644 index 00000000..ce481ea2 --- /dev/null +++ b/evm/src/cpu/kernel/asm/shift.asm @@ -0,0 +1,25 @@ +/// Initialise the lookup table of binary powers for doing left/right shifts +/// +/// Specifically, set SHIFT_TABLE_SEGMENT[i] = 2^i for i = 0..255. +%macro shift_table_init + push 1 // 2^0 + push 0 // initial offset is zero + push @SEGMENT_SHIFT_TABLE // segment + dup2 // kernel context is 0 + %rep 255 + // stack: context, segment, ost_i, 2^i + dup4 + dup1 + add + // stack: 2^(i+1), context, segment, ost_i, 2^i + dup4 + %increment + // stack: ost_(i+1), 2^(i+1), context, segment, ost_i, 2^i + dup4 + dup4 + // stack: context, segment, ost_(i+1), 2^(i+1), context, segment, ost_i, 2^i + %endrep + %rep 256 + mstore_general + %endrep +%endmacro diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs index aad2dd53..5f9584ba 100644 --- a/evm/src/cpu/kernel/assembler.rs +++ b/evm/src/cpu/kernel/assembler.rs @@ -300,11 +300,29 @@ fn find_labels( } Item::StandardOp(_) => *offset += 1, Item::Bytes(bytes) => *offset += bytes.len(), + Item::Jumptable(labels) => *offset += labels.len() * (BYTES_PER_OFFSET as usize), } } local_labels } +fn look_up_label( + label: &String, + local_labels: &HashMap, + global_labels: &HashMap, +) -> Vec { + let offset = local_labels + .get(label) + .or_else(|| global_labels.get(label)) + .unwrap_or_else(|| panic!("No such label: {label}")); + // We want the BYTES_PER_OFFSET least significant bytes in BE order. + // It's easiest to rev the first BYTES_PER_OFFSET bytes of the LE encoding. + (0..BYTES_PER_OFFSET) + .rev() + .map(|i| offset.to_le_bytes()[i as usize]) + .collect() +} + fn assemble_file( body: Vec, code: &mut Vec, @@ -327,18 +345,7 @@ fn assemble_file( Item::Push(target) => { let target_bytes: Vec = match target { PushTarget::Literal(n) => u256_to_trimmed_be_bytes(&n), - PushTarget::Label(label) => { - let offset = local_labels - .get(&label) - .or_else(|| global_labels.get(&label)) - .unwrap_or_else(|| panic!("No such label: {label}")); - // We want the BYTES_PER_OFFSET least significant bytes in BE order. - // It's easiest to rev the first BYTES_PER_OFFSET bytes of the LE encoding. - (0..BYTES_PER_OFFSET) - .rev() - .map(|i| offset.to_le_bytes()[i as usize]) - .collect() - } + PushTarget::Label(label) => look_up_label(&label, &local_labels, global_labels), PushTarget::MacroLabel(v) => panic!("Macro label not in a macro: {v}"), PushTarget::MacroVar(v) => panic!("Variable not in a macro: {v}"), PushTarget::Constant(c) => panic!("Constant wasn't inlined: {c}"), @@ -353,6 +360,12 @@ fn assemble_file( code.push(get_opcode(&opcode)); } Item::Bytes(bytes) => code.extend(bytes), + Item::Jumptable(labels) => { + for label in labels { + let bytes = look_up_label(&label, &local_labels, global_labels); + code.extend(bytes); + } + } } } } diff --git a/evm/src/cpu/kernel/ast.rs b/evm/src/cpu/kernel/ast.rs index 6180b1c8..ed4f6dbb 100644 --- a/evm/src/cpu/kernel/ast.rs +++ b/evm/src/cpu/kernel/ast.rs @@ -34,6 +34,8 @@ pub(crate) enum Item { StandardOp(String), /// Literal hex data; should contain an even number of hex chars. Bytes(Vec), + /// Creates a table of addresses from a list of labels. + Jumptable(Vec), } /// The left hand side of a %stack stack-manipulation macro. diff --git a/evm/src/cpu/kernel/evm_asm.pest b/evm/src/cpu/kernel/evm_asm.pest index 9b8721f4..e7337430 100644 --- a/evm/src/cpu/kernel/evm_asm.pest +++ b/evm/src/cpu/kernel/evm_asm.pest @@ -15,7 +15,7 @@ literal = { literal_hex | literal_decimal } variable = ${ "$" ~ identifier } constant = ${ "@" ~ identifier } -item = { macro_def | macro_call | repeat | stack | global_label_decl | local_label_decl | macro_label_decl | bytes_item | push_instruction | prover_input_instruction | nullary_instruction } +item = { macro_def | macro_call | repeat | stack | global_label_decl | local_label_decl | macro_label_decl | bytes_item | jumptable_item | push_instruction | prover_input_instruction | nullary_instruction } macro_def = { ^"%macro" ~ identifier ~ paramlist? ~ item* ~ ^"%endmacro" } macro_call = ${ "%" ~ !((^"macro" | ^"endmacro" | ^"rep" | ^"endrep" | ^"stack") ~ !identifier_char) ~ identifier ~ macro_arglist? } repeat = { ^"%rep" ~ literal ~ item* ~ ^"%endrep" } @@ -35,6 +35,7 @@ macro_label_decl = ${ "%%" ~ identifier ~ ":" } macro_label = ${ "%%" ~ identifier } bytes_item = { ^"BYTES " ~ literal ~ ("," ~ literal)* } +jumptable_item = { ^"JUMPTABLE " ~ identifier ~ ("," ~ identifier)* } push_instruction = { ^"PUSH " ~ push_target } push_target = { literal | identifier | macro_label | variable | constant } prover_input_instruction = { ^"PROVER_INPUT" ~ "(" ~ prover_input_fn ~ ")" } diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 82bd382b..6e3f9e5d 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -8,7 +8,6 @@ use keccak_hash::keccak; use plonky2::field::goldilocks_field::GoldilocksField; use crate::cpu::kernel::aggregator::KERNEL; -use crate::cpu::kernel::assembler::Kernel; use crate::cpu::kernel::constants::context_metadata::ContextMetadata; use crate::cpu::kernel::constants::global_metadata::GlobalMetadata; use crate::cpu::kernel::constants::txn_fields::NormalizedTxnField; @@ -78,19 +77,18 @@ pub struct Interpreter<'a> { pub(crate) halt_offsets: Vec, pub(crate) debug_offsets: Vec, running: bool, + opcode_count: [usize; 0x100], } -pub fn run_with_kernel( - // TODO: Remove param and just use KERNEL. - kernel: &Kernel, +pub fn run_interpreter( initial_offset: usize, initial_stack: Vec, -) -> anyhow::Result { +) -> anyhow::Result> { run( - &kernel.code, + &KERNEL.code, initial_offset, initial_stack, - &kernel.prover_inputs, + &KERNEL.prover_inputs, ) } @@ -132,6 +130,7 @@ impl<'a> Interpreter<'a> { halt_offsets: vec![DEFAULT_HALT_OFFSET], debug_offsets: vec![], running: false, + opcode_count: [0; 0x100], } } @@ -140,6 +139,12 @@ impl<'a> Interpreter<'a> { while self.running { self.run_opcode()?; } + println!("Opcode count:"); + for i in 0..0x100 { + if self.opcode_count[i] > 0 { + println!("{}: {}", get_mnemonic(i as u8), self.opcode_count[i]) + } + } Ok(()) } @@ -223,6 +228,7 @@ impl<'a> Interpreter<'a> { fn run_opcode(&mut self) -> anyhow::Result<()> { let opcode = self.code().get(self.offset).byte(0); + self.opcode_count[opcode as usize] += 1; self.incr(1); match opcode { 0x00 => self.run_stop(), // "STOP", @@ -690,6 +696,170 @@ fn find_jumpdests(code: &[u8]) -> Vec { res } +fn get_mnemonic(opcode: u8) -> &'static str { + match opcode { + 0x00 => "STOP", + 0x01 => "ADD", + 0x02 => "MUL", + 0x03 => "SUB", + 0x04 => "DIV", + 0x05 => "SDIV", + 0x06 => "MOD", + 0x07 => "SMOD", + 0x08 => "ADDMOD", + 0x09 => "MULMOD", + 0x0a => "EXP", + 0x0b => "SIGNEXTEND", + 0x0c => "ADDFP254", + 0x0d => "MULFP254", + 0x0e => "SUBFP254", + 0x10 => "LT", + 0x11 => "GT", + 0x12 => "SLT", + 0x13 => "SGT", + 0x14 => "EQ", + 0x15 => "ISZERO", + 0x16 => "AND", + 0x17 => "OR", + 0x18 => "XOR", + 0x19 => "NOT", + 0x1a => "BYTE", + 0x1b => "SHL", + 0x1c => "SHR", + 0x1d => "SAR", + 0x20 => "KECCAK256", + 0x21 => "KECCAK_GENERAL", + 0x30 => "ADDRESS", + 0x31 => "BALANCE", + 0x32 => "ORIGIN", + 0x33 => "CALLER", + 0x34 => "CALLVALUE", + 0x35 => "CALLDATALOAD", + 0x36 => "CALLDATASIZE", + 0x37 => "CALLDATACOPY", + 0x38 => "CODESIZE", + 0x39 => "CODECOPY", + 0x3a => "GASPRICE", + 0x3b => "EXTCODESIZE", + 0x3c => "EXTCODECOPY", + 0x3d => "RETURNDATASIZE", + 0x3e => "RETURNDATACOPY", + 0x3f => "EXTCODEHASH", + 0x40 => "BLOCKHASH", + 0x41 => "COINBASE", + 0x42 => "TIMESTAMP", + 0x43 => "NUMBER", + 0x44 => "DIFFICULTY", + 0x45 => "GASLIMIT", + 0x46 => "CHAINID", + 0x48 => "BASEFEE", + 0x49 => "PROVER_INPUT", + 0x50 => "POP", + 0x51 => "MLOAD", + 0x52 => "MSTORE", + 0x53 => "MSTORE8", + 0x54 => "SLOAD", + 0x55 => "SSTORE", + 0x56 => "JUMP", + 0x57 => "JUMPI", + 0x58 => "GETPC", + 0x59 => "MSIZE", + 0x5a => "GAS", + 0x5b => "JUMPDEST", + 0x5c => "GET_STATE_ROOT", + 0x5d => "SET_STATE_ROOT", + 0x5e => "GET_RECEIPT_ROOT", + 0x5f => "SET_RECEIPT_ROOT", + 0x60 => "PUSH1", + 0x61 => "PUSH2", + 0x62 => "PUSH3", + 0x63 => "PUSH4", + 0x64 => "PUSH5", + 0x65 => "PUSH6", + 0x66 => "PUSH7", + 0x67 => "PUSH8", + 0x68 => "PUSH9", + 0x69 => "PUSH10", + 0x6a => "PUSH11", + 0x6b => "PUSH12", + 0x6c => "PUSH13", + 0x6d => "PUSH14", + 0x6e => "PUSH15", + 0x6f => "PUSH16", + 0x70 => "PUSH17", + 0x71 => "PUSH18", + 0x72 => "PUSH19", + 0x73 => "PUSH20", + 0x74 => "PUSH21", + 0x75 => "PUSH22", + 0x76 => "PUSH23", + 0x77 => "PUSH24", + 0x78 => "PUSH25", + 0x79 => "PUSH26", + 0x7a => "PUSH27", + 0x7b => "PUSH28", + 0x7c => "PUSH29", + 0x7d => "PUSH30", + 0x7e => "PUSH31", + 0x7f => "PUSH32", + 0x80 => "DUP1", + 0x81 => "DUP2", + 0x82 => "DUP3", + 0x83 => "DUP4", + 0x84 => "DUP5", + 0x85 => "DUP6", + 0x86 => "DUP7", + 0x87 => "DUP8", + 0x88 => "DUP9", + 0x89 => "DUP10", + 0x8a => "DUP11", + 0x8b => "DUP12", + 0x8c => "DUP13", + 0x8d => "DUP14", + 0x8e => "DUP15", + 0x8f => "DUP16", + 0x90 => "SWAP1", + 0x91 => "SWAP2", + 0x92 => "SWAP3", + 0x93 => "SWAP4", + 0x94 => "SWAP5", + 0x95 => "SWAP6", + 0x96 => "SWAP7", + 0x97 => "SWAP8", + 0x98 => "SWAP9", + 0x99 => "SWAP10", + 0x9a => "SWAP11", + 0x9b => "SWAP12", + 0x9c => "SWAP13", + 0x9d => "SWAP14", + 0x9e => "SWAP15", + 0x9f => "SWAP16", + 0xa0 => "LOG0", + 0xa1 => "LOG1", + 0xa2 => "LOG2", + 0xa3 => "LOG3", + 0xa4 => "LOG4", + 0xa5 => "PANIC", + 0xf0 => "CREATE", + 0xf1 => "CALL", + 0xf2 => "CALLCODE", + 0xf3 => "RETURN", + 0xf4 => "DELEGATECALL", + 0xf5 => "CREATE2", + 0xf6 => "GET_CONTEXT", + 0xf7 => "SET_CONTEXT", + 0xf8 => "CONSUME_GAS", + 0xf9 => "EXIT_KERNEL", + 0xfa => "STATICCALL", + 0xfb => "MLOAD_GENERAL", + 0xfc => "MSTORE_GENERAL", + 0xfd => "REVERT", + 0xfe => "INVALID", + 0xff => "SELFDESTRUCT", + _ => panic!("Unrecognized opcode {opcode}"), + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/evm/src/cpu/kernel/parser.rs b/evm/src/cpu/kernel/parser.rs index b7a8124b..49181b71 100644 --- a/evm/src/cpu/kernel/parser.rs +++ b/evm/src/cpu/kernel/parser.rs @@ -39,6 +39,9 @@ fn parse_item(item: Pair) -> Item { Item::MacroLabelDeclaration(item.into_inner().next().unwrap().as_str().into()) } Rule::bytes_item => Item::Bytes(item.into_inner().map(parse_literal_u8).collect()), + Rule::jumptable_item => { + Item::Jumptable(item.into_inner().map(|i| i.as_str().into()).collect()) + } Rule::push_instruction => Item::Push(parse_push_target(item.into_inner().next().unwrap())), Rule::prover_input_instruction => Item::ProverInput( item.into_inner() diff --git a/evm/src/cpu/kernel/tests/curve_ops.rs b/evm/src/cpu/kernel/tests/curve_ops.rs index 9ba24185..3410cee8 100644 --- a/evm/src/cpu/kernel/tests/curve_ops.rs +++ b/evm/src/cpu/kernel/tests/curve_ops.rs @@ -3,17 +3,16 @@ mod bn { use anyhow::Result; use ethereum_types::U256; - use crate::cpu::kernel::aggregator::combined_kernel; - use crate::cpu::kernel::interpreter::run_with_kernel; + use crate::cpu::kernel::aggregator::KERNEL; + use crate::cpu::kernel::interpreter::run_interpreter; use crate::cpu::kernel::tests::u256ify; #[test] fn test_ec_ops() -> Result<()> { // Make sure we can parse and assemble the entire kernel. - let kernel = combined_kernel(); - let ec_add = kernel.global_labels["ec_add"]; - let ec_double = kernel.global_labels["ec_double"]; - let ec_mul = kernel.global_labels["ec_mul"]; + let ec_add = KERNEL.global_labels["ec_add"]; + let ec_double = KERNEL.global_labels["ec_double"]; + let ec_mul = KERNEL.global_labels["ec_mul"]; let identity = ("0x0", "0x0"); let invalid = ("0x0", "0x3"); // Not on curve let point0 = ( @@ -43,110 +42,76 @@ mod bn { // Standard addition #1 let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point2.1, point2.0])?); // Standard addition #2 let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point2.1, point2.0])?); // Standard doubling #1 let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point3.1, point3.0])?); // Standard doubling #2 let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_double, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_double, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point3.1, point3.0])?); // Standard doubling #3 let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point3.1, point3.0])?); // Addition with identity #1 let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point1.1, point1.0])?); // Addition with identity #2 let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point1.1, point1.0])?); // Addition with identity #3 let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([identity.1, identity.0])?); // Addition with invalid point(s) #1 let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, invalid.1, invalid.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, vec![U256::MAX, U256::MAX]); // Addition with invalid point(s) #2 let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, vec![U256::MAX, U256::MAX]); // Addition with invalid point(s) #3 let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, identity.1, identity.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, vec![U256::MAX, U256::MAX]); // Addition with invalid point(s) #4 let initial_stack = u256ify(["0xdeadbeef", invalid.1, invalid.0, invalid.1, invalid.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, vec![U256::MAX, U256::MAX]); // Scalar multiplication #1 let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point4.1, point4.0])?); // Scalar multiplication #2 let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([identity.1, identity.0])?); // Scalar multiplication #3 let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point0.1, point0.0])?); // Scalar multiplication #4 let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([identity.1, identity.0])?); // Scalar multiplication #5 let initial_stack = u256ify(["0xdeadbeef", s, invalid.1, invalid.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, vec![U256::MAX, U256::MAX]); // Multiple calls @@ -160,9 +125,7 @@ mod bn { point0.1, point0.0, ])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point4.1, point4.0])?); Ok(()) @@ -174,7 +137,7 @@ mod secp { use anyhow::Result; use crate::cpu::kernel::aggregator::combined_kernel; - use crate::cpu::kernel::interpreter::{run, run_with_kernel}; + use crate::cpu::kernel::interpreter::{run, run_interpreter}; use crate::cpu::kernel::tests::u256ify; #[test] @@ -212,9 +175,7 @@ mod secp { // Standard addition #1 let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point1.1, point1.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point2.1, point2.0])?); // Standard addition #2 let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, point0.1, point0.0])?; @@ -225,66 +186,46 @@ mod secp { // Standard doubling #1 let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0, point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point3.1, point3.0])?); // Standard doubling #2 let initial_stack = u256ify(["0xdeadbeef", point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_double, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_double, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point3.1, point3.0])?); // Standard doubling #3 let initial_stack = u256ify(["0xdeadbeef", "0x2", point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point3.1, point3.0])?); // Addition with identity #1 let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, point1.1, point1.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point1.1, point1.0])?); // Addition with identity #2 let initial_stack = u256ify(["0xdeadbeef", point1.1, point1.0, identity.1, identity.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point1.1, point1.0])?); // Addition with identity #3 let initial_stack = u256ify(["0xdeadbeef", identity.1, identity.0, identity.1, identity.0])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([identity.1, identity.0])?); // Scalar multiplication #1 let initial_stack = u256ify(["0xdeadbeef", s, point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point4.1, point4.0])?); // Scalar multiplication #2 let initial_stack = u256ify(["0xdeadbeef", "0x0", point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([identity.1, identity.0])?); // Scalar multiplication #3 let initial_stack = u256ify(["0xdeadbeef", "0x1", point0.1, point0.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point0.1, point0.0])?); // Scalar multiplication #4 let initial_stack = u256ify(["0xdeadbeef", s, identity.1, identity.0])?; - let stack = run_with_kernel(&kernel, ec_mul, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_mul, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([identity.1, identity.0])?); // Multiple calls @@ -298,9 +239,7 @@ mod secp { point0.1, point0.0, ])?; - let stack = run_with_kernel(&kernel, ec_add, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ec_add, initial_stack)?.stack().to_vec(); assert_eq!(stack, u256ify([point4.1, point4.0])?); Ok(()) diff --git a/evm/src/cpu/kernel/tests/ecrecover.rs b/evm/src/cpu/kernel/tests/ecrecover.rs index c01adc53..944c1a71 100644 --- a/evm/src/cpu/kernel/tests/ecrecover.rs +++ b/evm/src/cpu/kernel/tests/ecrecover.rs @@ -3,35 +3,23 @@ use std::str::FromStr; use anyhow::Result; use ethereum_types::U256; -use crate::cpu::kernel::aggregator::combined_kernel; -use crate::cpu::kernel::assembler::Kernel; -use crate::cpu::kernel::interpreter::run_with_kernel; +use crate::cpu::kernel::aggregator::KERNEL; +use crate::cpu::kernel::interpreter::run_interpreter; use crate::cpu::kernel::tests::u256ify; -fn test_valid_ecrecover( - hash: &str, - v: &str, - r: &str, - s: &str, - expected: &str, - kernel: &Kernel, -) -> Result<()> { - let ecrecover = kernel.global_labels["ecrecover"]; +fn test_valid_ecrecover(hash: &str, v: &str, r: &str, s: &str, expected: &str) -> Result<()> { + let ecrecover = KERNEL.global_labels["ecrecover"]; let initial_stack = u256ify(["0xdeadbeef", s, r, v, hash])?; - let stack = run_with_kernel(kernel, ecrecover, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ecrecover, initial_stack)?.stack().to_vec(); assert_eq!(stack[0], U256::from_str(expected).unwrap()); Ok(()) } -fn test_invalid_ecrecover(hash: &str, v: &str, r: &str, s: &str, kernel: &Kernel) -> Result<()> { - let ecrecover = kernel.global_labels["ecrecover"]; +fn test_invalid_ecrecover(hash: &str, v: &str, r: &str, s: &str) -> Result<()> { + let ecrecover = KERNEL.global_labels["ecrecover"]; let initial_stack = u256ify(["0xdeadbeef", s, r, v, hash])?; - let stack = run_with_kernel(kernel, ecrecover, initial_stack)? - .stack() - .to_vec(); + let stack = run_interpreter(ecrecover, initial_stack)?.stack().to_vec(); assert_eq!(stack, vec![U256::MAX]); Ok(()) @@ -39,15 +27,12 @@ fn test_invalid_ecrecover(hash: &str, v: &str, r: &str, s: &str, kernel: &Kernel #[test] fn test_ecrecover() -> Result<()> { - let kernel = combined_kernel(); - test_valid_ecrecover( "0x55f77e8909b1f1c9531c4a309bb2d40388e9ed4b87830c8f90363c6b36255fb9", "0x1b", "0xd667c5a20fa899b253924099e10ae92998626718585b8171eb98de468bbebc", "0x58351f48ce34bf134ee611fb5bf255a5733f0029561d345a7d46bfa344b60ac0", "0x67f3c0Da351384838d7F7641AB0fCAcF853E1844", - &kernel, )?; test_valid_ecrecover( "0x55f77e8909b1f1c9531c4a309bb2d40388e9ed4b87830c8f90363c6b36255fb9", @@ -55,7 +40,6 @@ fn test_ecrecover() -> Result<()> { "0xd667c5a20fa899b253924099e10ae92998626718585b8171eb98de468bbebc", "0x58351f48ce34bf134ee611fb5bf255a5733f0029561d345a7d46bfa344b60ac0", "0xaA58436DeABb64982a386B2De1A8015AA28fCCc0", - &kernel, )?; test_valid_ecrecover( "0x0", @@ -63,7 +47,6 @@ fn test_ecrecover() -> Result<()> { "0x1", "0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364140", "0x3344c6f6eeCA588be132142DB0a32C71ABFAAe7B", - &kernel, )?; test_invalid_ecrecover( @@ -71,28 +54,24 @@ fn test_ecrecover() -> Result<()> { "0x42", // v not in {27,28} "0x1", "0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364140", - &kernel, )?; test_invalid_ecrecover( "0x0", "0x42", "0xd667c5a20fa899b253924099e10ae92998626718585b8171eb98de468bbebc", "0x0", // s=0 - &kernel, )?; test_invalid_ecrecover( "0x0", "0x42", "0x0", // r=0 "0xd667c5a20fa899b253924099e10ae92998626718585b8171eb98de468bbebc", - &kernel, )?; test_invalid_ecrecover( "0x0", "0x1c", "0x3a18b21408d275dde53c0ea86f9c1982eca60193db0ce15008fa408d43024847", // r^3 + 7 isn't a square "0x5db9745f44089305b2f2c980276e7025a594828d878e6e36dd2abd34ca6b9e3d", - &kernel, )?; Ok(()) diff --git a/evm/src/cpu/kernel/tests/exp.rs b/evm/src/cpu/kernel/tests/exp.rs index 25bc5ad3..1655064e 100644 --- a/evm/src/cpu/kernel/tests/exp.rs +++ b/evm/src/cpu/kernel/tests/exp.rs @@ -2,50 +2,43 @@ use anyhow::Result; use ethereum_types::U256; use rand::{thread_rng, Rng}; -use crate::cpu::kernel::aggregator::combined_kernel; -use crate::cpu::kernel::interpreter::{run, run_with_kernel}; +use crate::cpu::kernel::aggregator::KERNEL; +use crate::cpu::kernel::interpreter::{run, run_interpreter}; #[test] fn test_exp() -> Result<()> { // Make sure we can parse and assemble the entire kernel. - let kernel = combined_kernel(); - let exp = kernel.global_labels["exp"]; + let exp = KERNEL.global_labels["exp"]; let mut rng = thread_rng(); let a = U256([0; 4].map(|_| rng.gen())); let b = U256([0; 4].map(|_| rng.gen())); // Random input let initial_stack = vec![0xDEADBEEFu32.into(), b, a]; - let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)? - .stack() - .to_vec(); + let stack_with_kernel = run_interpreter(exp, initial_stack)?.stack().to_vec(); let initial_stack = vec![b, a]; let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP - let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)? + let stack_with_opcode = run(&code, 0, initial_stack, &KERNEL.prover_inputs)? .stack() .to_vec(); assert_eq!(stack_with_kernel, stack_with_opcode); // 0 base let initial_stack = vec![0xDEADBEEFu32.into(), b, U256::zero()]; - let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)? - .stack() - .to_vec(); + let stack_with_kernel = run_interpreter(exp, initial_stack)?.stack().to_vec(); let initial_stack = vec![b, U256::zero()]; let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP - let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)? + let stack_with_opcode = run(&code, 0, initial_stack, &KERNEL.prover_inputs)? .stack() .to_vec(); assert_eq!(stack_with_kernel, stack_with_opcode); // 0 exponent let initial_stack = vec![0xDEADBEEFu32.into(), U256::zero(), a]; - let stack_with_kernel = run_with_kernel(&kernel, exp, initial_stack)? - .stack() - .to_vec(); + let stack_with_kernel = run_interpreter(exp, initial_stack)?.stack().to_vec(); let initial_stack = vec![U256::zero(), a]; let code = [0xa, 0x63, 0xde, 0xad, 0xbe, 0xef, 0x56]; // EXP, PUSH4 deadbeef, JUMP - let stack_with_opcode = run(&code, 0, initial_stack, &kernel.prover_inputs)? + let stack_with_opcode = run(&code, 0, initial_stack, &KERNEL.prover_inputs)? .stack() .to_vec(); assert_eq!(stack_with_kernel, stack_with_opcode); diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs index b8a38887..289a8598 100644 --- a/evm/src/cpu/kernel/tests/fields.rs +++ b/evm/src/cpu/kernel/tests/fields.rs @@ -2,8 +2,8 @@ use anyhow::Result; use ethereum_types::U256; use rand::{thread_rng, Rng}; -use crate::cpu::kernel::aggregator::combined_kernel; -use crate::cpu::kernel::interpreter::run_with_kernel; +use crate::cpu::kernel::aggregator::KERNEL; +use crate::cpu::kernel::interpreter::run_interpreter; // TODO: 107 is hardcoded as a dummy prime for testing // should be changed to the proper implementation prime @@ -137,10 +137,9 @@ fn test_fp6() -> Result<()> { let mut input: Vec = [c, d].into_iter().flatten().flatten().collect(); input.push(0xdeadbeef); - let kernel = combined_kernel(); - let initial_offset = kernel.global_labels["mul_fp6"]; + let initial_offset = KERNEL.global_labels["mul_fp6"]; let initial_stack: Vec = as_stack(input); - let final_stack: Vec = run_with_kernel(&kernel, initial_offset, initial_stack)? + let final_stack: Vec = run_interpreter(initial_offset, initial_stack)? .stack() .to_vec(); diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index d6c0f74f..b7b2e5c2 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -6,8 +6,8 @@ use rand::{thread_rng, Rng}; use ripemd::{Digest, Ripemd160}; use sha2::Sha256; -use crate::cpu::kernel::aggregator::combined_kernel; -use crate::cpu::kernel::interpreter::run_with_kernel; +use crate::cpu::kernel::aggregator::KERNEL; +use crate::cpu::kernel::interpreter::run_interpreter; /// Standard Sha2 implementation. fn sha2(input: Vec) -> U256 { @@ -62,12 +62,11 @@ fn test_hash(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U let initial_stack_custom = make_input_stack(message_custom); // Make the kernel. - let kernel = combined_kernel(); - let kernel_function = kernel.global_labels[hash_fn_label]; + let kernel_function = KERNEL.global_labels[hash_fn_label]; // Run the kernel code. - let result_random = run_with_kernel(&kernel, kernel_function, initial_stack_random)?; - let result_custom = run_with_kernel(&kernel, kernel_function, initial_stack_custom)?; + let result_random = run_interpreter(kernel_function, initial_stack_random)?; + let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; // Extract the final output. let actual_random = result_random.stack()[0]; diff --git a/evm/src/cpu/kernel/tests/ripemd.rs b/evm/src/cpu/kernel/tests/ripemd.rs index 305548ec..78b05cb8 100644 --- a/evm/src/cpu/kernel/tests/ripemd.rs +++ b/evm/src/cpu/kernel/tests/ripemd.rs @@ -2,8 +2,8 @@ use anyhow::Result; use ethereum_types::U256; use itertools::Itertools; -use crate::cpu::kernel::aggregator::combined_kernel; -use crate::cpu::kernel::interpreter::run_with_kernel; +use crate::cpu::kernel::aggregator::KERNEL; +use crate::cpu::kernel::interpreter::run_interpreter; fn make_input(word: &str) -> Vec { let mut input: Vec = vec![word.len().try_into().unwrap()]; @@ -44,10 +44,9 @@ fn test_ripemd_reference() -> Result<()> { let input: Vec = make_input(x); let expected = U256::from(y); - let kernel = combined_kernel(); - let initial_offset = kernel.global_labels["ripemd_stack"]; + let initial_offset = KERNEL.global_labels["ripemd_stack"]; let initial_stack: Vec = input.iter().map(|&x| U256::from(x)).rev().collect(); - let final_stack: Vec = run_with_kernel(&kernel, initial_offset, initial_stack)? + let final_stack: Vec = run_interpreter(initial_offset, initial_stack)? .stack() .to_vec(); let actual = final_stack[0]; diff --git a/evm/src/cpu/mod.rs b/evm/src/cpu/mod.rs index fda5db80..ece07c1c 100644 --- a/evm/src/cpu/mod.rs +++ b/evm/src/cpu/mod.rs @@ -8,6 +8,7 @@ mod jumps; pub mod kernel; pub(crate) mod membus; mod modfp254; +mod shift; mod simple_logic; mod stack; mod stack_bounds; diff --git a/evm/src/cpu/shift.rs b/evm/src/cpu/shift.rs new file mode 100644 index 00000000..d383b6b2 --- /dev/null +++ b/evm/src/cpu/shift.rs @@ -0,0 +1,108 @@ +use plonky2::field::extension::Extendable; +use plonky2::field::packed::PackedField; +use plonky2::field::types::Field; +use plonky2::hash::hash_types::RichField; +use plonky2::iop::ext_target::ExtensionTarget; + +use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; +use crate::cpu::columns::CpuColumnsView; +use crate::cpu::membus::NUM_GP_CHANNELS; +use crate::memory::segments::Segment; + +pub(crate) fn eval_packed( + lv: &CpuColumnsView

, + yield_constr: &mut ConstraintConsumer

, +) { + let is_shift = lv.op.shl + lv.op.shr; + let displacement = lv.mem_channels[1]; // holds the shift displacement d + let two_exp = lv.mem_channels[2]; // holds 2^d + + // Not needed here; val is the input and we're verifying that output is + // val * 2^d (mod 2^256) + //let val = lv.mem_channels[0]; + //let output = lv.mem_channels[NUM_GP_CHANNELS - 1]; + + let shift_table_segment = P::Scalar::from_canonical_u64(Segment::ShiftTable as u64); + + // Only lookup the shifting factor when displacement is < 2^32. + // two_exp.used is true (1) if the high limbs of the displacement are + // zero and false (0) otherwise. + let high_limbs_are_zero = two_exp.used; + yield_constr.constraint(is_shift * (two_exp.is_read - P::ONES)); + + let high_limbs_sum: P = displacement.value[1..].iter().copied().sum(); + let high_limbs_sum_inv = lv.general.shift().high_limb_sum_inv; + // Verify that high_limbs_are_zero = 0 implies high_limbs_sum != 0 and + // high_limbs_are_zero = 1 implies high_limbs_sum = 0. + let t = high_limbs_sum * high_limbs_sum_inv - (P::ONES - high_limbs_are_zero); + yield_constr.constraint(is_shift * t); + yield_constr.constraint(is_shift * high_limbs_sum * high_limbs_are_zero); + + // When the shift displacement is < 2^32, constrain the two_exp + // mem_channel to be the entry corresponding to `displacement` in + // the shift table lookup (will be zero if displacement >= 256). + yield_constr.constraint(is_shift * two_exp.addr_context); // read from kernel memory + yield_constr.constraint(is_shift * (two_exp.addr_segment - shift_table_segment)); + yield_constr.constraint(is_shift * (two_exp.addr_virtual - displacement.value[0])); + + // Other channels must be unused + for chan in &lv.mem_channels[3..NUM_GP_CHANNELS - 1] { + yield_constr.constraint(is_shift * chan.used); // channel is not used + } + + // Cross-table lookup must connect the memory channels here to MUL + // (in the case of left shift) or DIV (in the case of right shift) + // in the arithmetic table. Specifically, the mapping is + // + // 0 -> 0 (value to be shifted is the same) + // 2 -> 1 (two_exp becomes the multiplicand (resp. divisor)) + // last -> last (output is the same) +} + +pub(crate) fn eval_ext_circuit, const D: usize>( + builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder, + lv: &CpuColumnsView>, + yield_constr: &mut RecursiveConstraintConsumer, +) { + let is_shift = builder.add_extension(lv.op.shl, lv.op.shr); + let displacement = lv.mem_channels[1]; + let two_exp = lv.mem_channels[2]; + + let shift_table_segment = F::from_canonical_u64(Segment::ShiftTable as u64); + + let high_limbs_are_zero = two_exp.used; + let one = builder.one_extension(); + let t = builder.sub_extension(two_exp.is_read, one); + let t = builder.mul_extension(is_shift, t); + yield_constr.constraint(builder, t); + + let high_limbs_sum = builder.add_many_extension(&displacement.value[1..]); + let high_limbs_sum_inv = lv.general.shift().high_limb_sum_inv; + let t = builder.one_extension(); + let t = builder.sub_extension(t, high_limbs_are_zero); + let t = builder.mul_sub_extension(high_limbs_sum, high_limbs_sum_inv, t); + let t = builder.mul_extension(is_shift, t); + yield_constr.constraint(builder, t); + + let t = builder.mul_many_extension([is_shift, high_limbs_sum, high_limbs_are_zero]); + yield_constr.constraint(builder, t); + + let t = builder.mul_extension(is_shift, two_exp.addr_context); + yield_constr.constraint(builder, t); + let t = builder.arithmetic_extension( + F::ONE, + -shift_table_segment, + is_shift, + two_exp.addr_segment, + is_shift, + ); + yield_constr.constraint(builder, t); + let t = builder.sub_extension(two_exp.addr_virtual, displacement.value[0]); + let t = builder.mul_extension(is_shift, t); + yield_constr.constraint(builder, t); + + for chan in &lv.mem_channels[3..NUM_GP_CHANNELS - 1] { + let t = builder.mul_extension(is_shift, chan.used); + yield_constr.constraint(builder, t); + } +} diff --git a/evm/src/cpu/stack.rs b/evm/src/cpu/stack.rs index c72688ed..ea235578 100644 --- a/evm/src/cpu/stack.rs +++ b/evm/src/cpu/stack.rs @@ -40,73 +40,33 @@ const BASIC_TERNARY_OP: Option = Some(StackBehavior { // except the first `num_pops` and the last `pushes as usize` channels have their read flag and // address constrained automatically in this file. const STACK_BEHAVIORS: OpsColumnsView> = OpsColumnsView { - stop: None, // TODO add: BASIC_BINARY_OP, mul: BASIC_BINARY_OP, sub: BASIC_BINARY_OP, div: BASIC_BINARY_OP, - sdiv: BASIC_BINARY_OP, mod_: BASIC_BINARY_OP, - smod: BASIC_BINARY_OP, addmod: BASIC_TERNARY_OP, mulmod: BASIC_TERNARY_OP, - exp: None, // TODO - signextend: BASIC_BINARY_OP, addfp254: BASIC_BINARY_OP, mulfp254: BASIC_BINARY_OP, subfp254: BASIC_BINARY_OP, lt: BASIC_BINARY_OP, gt: BASIC_BINARY_OP, - slt: BASIC_BINARY_OP, - sgt: BASIC_BINARY_OP, eq: BASIC_BINARY_OP, iszero: BASIC_UNARY_OP, and: BASIC_BINARY_OP, or: BASIC_BINARY_OP, xor: BASIC_BINARY_OP, - not: BASIC_TERNARY_OP, + not: BASIC_UNARY_OP, byte: BASIC_BINARY_OP, shl: BASIC_BINARY_OP, shr: BASIC_BINARY_OP, - sar: BASIC_BINARY_OP, - keccak256: None, // TODO keccak_general: None, // TODO - address: None, // TODO - balance: None, // TODO - origin: None, // TODO - caller: None, // TODO - callvalue: None, // TODO - calldataload: None, // TODO - calldatasize: None, // TODO - calldatacopy: None, // TODO - codesize: None, // TODO - codecopy: None, // TODO - gasprice: None, // TODO - extcodesize: None, // TODO - extcodecopy: None, // TODO - returndatasize: None, // TODO - returndatacopy: None, // TODO - extcodehash: None, // TODO - blockhash: None, // TODO - coinbase: None, // TODO - timestamp: None, // TODO - number: None, // TODO - difficulty: None, // TODO - gaslimit: None, // TODO - chainid: None, // TODO - selfbalance: None, // TODO - basefee: None, // TODO prover_input: None, // TODO pop: None, // TODO - mload: None, // TODO - mstore: None, // TODO - mstore8: None, // TODO - sload: None, // TODO - sstore: None, // TODO jump: None, // TODO jumpi: None, // TODO pc: None, // TODO - msize: None, // TODO gas: None, // TODO jumpdest: None, // TODO get_state_root: None, // TODO @@ -116,27 +76,17 @@ const STACK_BEHAVIORS: OpsColumnsView> = OpsColumnsView { push: None, // TODO dup: None, swap: None, - log0: None, // TODO - log1: None, // TODO - log2: None, // TODO - log3: None, // TODO - log4: None, // TODO - create: None, // TODO - call: None, // TODO - callcode: None, // TODO - return_: None, // TODO - delegatecall: None, // TODO - create2: None, // TODO get_context: None, // TODO set_context: None, // TODO consume_gas: None, // TODO exit_kernel: None, // TODO - staticcall: None, // TODO mload_general: None, // TODO mstore_general: None, // TODO - revert: None, // TODO - selfdestruct: None, // TODO - invalid: None, // TODO + syscall: Some(StackBehavior { + num_pops: 0, + pushes: true, + disable_other_channels: false, + }), }; fn eval_packed_one( diff --git a/evm/src/cpu/syscalls.rs b/evm/src/cpu/syscalls.rs index 0ac31ef6..4033620e 100644 --- a/evm/src/cpu/syscalls.rs +++ b/evm/src/cpu/syscalls.rs @@ -2,62 +2,81 @@ //! //! These are usually the ones that are too complicated to implement in one CPU table row. -use once_cell::sync::Lazy; use plonky2::field::extension::Extendable; use plonky2::field::packed::PackedField; use plonky2::field::types::Field; use plonky2::hash::hash_types::RichField; use plonky2::iop::ext_target::ExtensionTarget; +use static_assertions::const_assert; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; -use crate::cpu::columns::{CpuColumnsView, COL_MAP}; +use crate::cpu::columns::CpuColumnsView; use crate::cpu::kernel::aggregator::KERNEL; +use crate::cpu::membus::NUM_GP_CHANNELS; +use crate::memory::segments::Segment; -const NUM_SYSCALLS: usize = 3; - -fn make_syscall_list() -> [(usize, usize); NUM_SYSCALLS] { - let kernel = Lazy::force(&KERNEL); - [ - (COL_MAP.op.stop, "sys_stop"), - (COL_MAP.op.exp, "sys_exp"), - (COL_MAP.op.invalid, "handle_invalid"), - ] - .map(|(col_index, handler_name)| (col_index, kernel.global_labels[handler_name])) -} - -static TRAP_LIST: Lazy<[(usize, usize); NUM_SYSCALLS]> = Lazy::new(make_syscall_list); +// Copy the constant but make it `usize`. +const BYTES_PER_OFFSET: usize = crate::cpu::kernel::assembler::BYTES_PER_OFFSET as usize; +const_assert!(BYTES_PER_OFFSET < NUM_GP_CHANNELS); // Reserve one channel for stack push pub fn eval_packed( lv: &CpuColumnsView

, nv: &CpuColumnsView

, yield_constr: &mut ConstraintConsumer

, ) { - let syscall_list = Lazy::force(&TRAP_LIST); - // 1 if _any_ syscall, else 0. - let should_syscall: P = syscall_list - .iter() - .map(|&(col_index, _)| lv[col_index]) - .sum(); - let filter = lv.is_cpu_cycle * should_syscall; + let filter = lv.is_cpu_cycle * lv.op.syscall; - // If syscall: set program counter to the handler address - // Note that at most one of the `lv[col_index]`s will be 1 and all others will be 0. - let syscall_dst: P = syscall_list - .iter() - .map(|&(col_index, handler_addr)| { - lv[col_index] * P::Scalar::from_canonical_usize(handler_addr) - }) + // Look up the handler in memory + let code_segment = P::Scalar::from_canonical_usize(Segment::Code as usize); + let syscall_jumptable_start = + P::Scalar::from_canonical_usize(KERNEL.global_labels["syscall_jumptable"]); + let opcode: P = lv + .opcode_bits + .into_iter() + .enumerate() + .map(|(i, bit)| bit * P::Scalar::from_canonical_u64(1 << i)) .sum(); - yield_constr.constraint_transition(filter * (nv.program_counter - syscall_dst)); - // If syscall: set kernel mode + let opcode_handler_addr_start = + syscall_jumptable_start + opcode * P::Scalar::from_canonical_usize(BYTES_PER_OFFSET); + for (i, channel) in lv.mem_channels[0..BYTES_PER_OFFSET].iter().enumerate() { + yield_constr.constraint(filter * (channel.used - P::ONES)); + yield_constr.constraint(filter * (channel.is_read - P::ONES)); + + // Set kernel context and code segment + yield_constr.constraint(filter * channel.addr_context); + yield_constr.constraint(filter * (channel.addr_segment - code_segment)); + + // Set address, using a separate channel for each of the `BYTES_PER_OFFSET` limbs. + let limb_address = opcode_handler_addr_start + P::Scalar::from_canonical_usize(i); + yield_constr.constraint(filter * (channel.addr_virtual - limb_address)); + } + + // Disable unused channels (the last channel is used to push to the stack) + for channel in &lv.mem_channels[BYTES_PER_OFFSET..NUM_GP_CHANNELS - 1] { + yield_constr.constraint(filter * channel.used); + } + + // Set program counter to the handler address + // The addresses are big-endian in memory + let target = lv.mem_channels[0..BYTES_PER_OFFSET] + .iter() + .map(|channel| channel.value[0]) + .fold(P::ZEROS, |cumul, limb| { + cumul * P::Scalar::from_canonical_u64(256) + limb + }); + yield_constr.constraint_transition(filter * (nv.program_counter - target)); + // Set kernel mode yield_constr.constraint_transition(filter * (nv.is_kernel_mode - P::ONES)); + // Maintain current context + yield_constr.constraint_transition(filter * (nv.context - lv.context)); - let output = lv.mem_channels[0].value; - // If syscall: push current PC to stack + // This memory channel is constrained in `stack.rs`. + let output = lv.mem_channels[NUM_GP_CHANNELS - 1].value; + // Push current PC to stack yield_constr.constraint(filter * (output[0] - lv.program_counter)); - // If syscall: push current kernel flag to stack (share register with PC) + // Push current kernel flag to stack (share register with PC) yield_constr.constraint(filter * (output[1] - lv.is_kernel_mode)); - // If syscall: zero the rest of that register + // Zero the rest of that register for &limb in &output[2..] { yield_constr.constraint(filter * limb); } @@ -69,46 +88,111 @@ pub fn eval_ext_circuit, const D: usize>( nv: &CpuColumnsView>, yield_constr: &mut RecursiveConstraintConsumer, ) { - let syscall_list = Lazy::force(&TRAP_LIST); - // 1 if _any_ syscall, else 0. - let should_syscall = - builder.add_many_extension(syscall_list.iter().map(|&(col_index, _)| lv[col_index])); - let filter = builder.mul_extension(lv.is_cpu_cycle, should_syscall); + let filter = builder.mul_extension(lv.is_cpu_cycle, lv.op.syscall); - // If syscall: set program counter to the handler address + // Look up the handler in memory + let code_segment = F::from_canonical_usize(Segment::Code as usize); + let syscall_jumptable_start = builder.constant_extension( + F::from_canonical_usize(KERNEL.global_labels["syscall_jumptable"]).into(), + ); + let opcode = lv + .opcode_bits + .into_iter() + .rev() + .fold(builder.zero_extension(), |cumul, bit| { + builder.mul_const_add_extension(F::TWO, cumul, bit) + }); + let opcode_handler_addr_start = builder.mul_const_add_extension( + F::from_canonical_usize(BYTES_PER_OFFSET), + opcode, + syscall_jumptable_start, + ); + for (i, channel) in lv.mem_channels[0..BYTES_PER_OFFSET].iter().enumerate() { + { + let constr = builder.mul_sub_extension(filter, channel.used, filter); + yield_constr.constraint(builder, constr); + } + { + let constr = builder.mul_sub_extension(filter, channel.is_read, filter); + yield_constr.constraint(builder, constr); + } + + // Set kernel context and code segment + { + let constr = builder.mul_extension(filter, channel.addr_context); + yield_constr.constraint(builder, constr); + } + { + let constr = builder.arithmetic_extension( + F::ONE, + -code_segment, + filter, + channel.addr_segment, + filter, + ); + yield_constr.constraint(builder, constr); + } + + // Set address, using a separate channel for each of the `BYTES_PER_OFFSET` limbs. + { + let diff = builder.sub_extension(channel.addr_virtual, opcode_handler_addr_start); + let constr = builder.arithmetic_extension( + F::ONE, + -F::from_canonical_usize(i), + filter, + diff, + filter, + ); + yield_constr.constraint(builder, constr); + } + } + + // Disable unused channels (the last channel is used to push to the stack) + for channel in &lv.mem_channels[BYTES_PER_OFFSET..NUM_GP_CHANNELS - 1] { + let constr = builder.mul_extension(filter, channel.used); + yield_constr.constraint(builder, constr); + } + + // Set program counter to the handler address + // The addresses are big-endian in memory { - // Note that at most one of the `lv[col_index]`s will be 1 and all others will be 0. - let syscall_dst = syscall_list.iter().fold( - builder.zero_extension(), - |cumul, &(col_index, handler_addr)| { - let handler_addr = F::from_canonical_usize(handler_addr); - builder.mul_const_add_extension(handler_addr, lv[col_index], cumul) - }, - ); - let constr = builder.sub_extension(nv.program_counter, syscall_dst); - let constr = builder.mul_extension(filter, constr); + let target = lv.mem_channels[0..BYTES_PER_OFFSET] + .iter() + .map(|channel| channel.value[0]) + .fold(builder.zero_extension(), |cumul, limb| { + builder.mul_const_add_extension(F::from_canonical_u64(256), cumul, limb) + }); + let diff = builder.sub_extension(nv.program_counter, target); + let constr = builder.mul_extension(filter, diff); yield_constr.constraint_transition(builder, constr); } - // If syscall: set kernel mode + // Set kernel mode { let constr = builder.mul_sub_extension(filter, nv.is_kernel_mode, filter); yield_constr.constraint_transition(builder, constr); } + // Maintain current context + { + let diff = builder.sub_extension(nv.context, lv.context); + let constr = builder.mul_extension(filter, diff); + yield_constr.constraint_transition(builder, constr); + } - let output = lv.mem_channels[0].value; - // If syscall: push current PC to stack + // This memory channel is constrained in `stack.rs`. + let output = lv.mem_channels[NUM_GP_CHANNELS - 1].value; + // Push current PC to stack { - let constr = builder.sub_extension(output[0], lv.program_counter); - let constr = builder.mul_extension(filter, constr); + let diff = builder.sub_extension(output[0], lv.program_counter); + let constr = builder.mul_extension(filter, diff); yield_constr.constraint(builder, constr); } - // If syscall: push current kernel flag to stack (share register with PC) + // Push current kernel flag to stack (share register with PC) { - let constr = builder.sub_extension(output[1], lv.is_kernel_mode); - let constr = builder.mul_extension(filter, constr); + let diff = builder.sub_extension(output[1], lv.is_kernel_mode); + let constr = builder.mul_extension(filter, diff); yield_constr.constraint(builder, constr); } - // If syscall: zero the rest of that register + // Zero the rest of that register for &limb in &output[2..] { let constr = builder.mul_extension(filter, limb); yield_constr.constraint(builder, constr); diff --git a/evm/src/memory/segments.rs b/evm/src/memory/segments.rs index f8d536e9..7a28cb96 100644 --- a/evm/src/memory/segments.rs +++ b/evm/src/memory/segments.rs @@ -35,10 +35,13 @@ pub(crate) enum Segment { TrieEncodedChild = 14, /// A buffer used to store the lengths of the encodings of a branch node's children. TrieEncodedChildLen = 15, + /// A table of values 2^i for i=0..255 for use with shift + /// instructions; initialised by `kernel/asm/shift.asm::init_shift_table()`. + ShiftTable = 16, } impl Segment { - pub(crate) const COUNT: usize = 16; + pub(crate) const COUNT: usize = 17; pub(crate) fn all() -> [Self; Self::COUNT] { [ @@ -58,6 +61,7 @@ impl Segment { Self::TrieData, Self::TrieEncodedChild, Self::TrieEncodedChildLen, + Self::ShiftTable, ] } @@ -80,6 +84,7 @@ impl Segment { Segment::TrieData => "SEGMENT_TRIE_DATA", Segment::TrieEncodedChild => "SEGMENT_TRIE_ENCODED_CHILD", Segment::TrieEncodedChildLen => "SEGMENT_TRIE_ENCODED_CHILD_LEN", + Segment::ShiftTable => "SEGMENT_SHIFT_TABLE", } } @@ -102,6 +107,7 @@ impl Segment { Segment::TrieData => 256, Segment::TrieEncodedChild => 256, Segment::TrieEncodedChildLen => 6, + Segment::ShiftTable => 256, } } } diff --git a/plonky2/Cargo.toml b/plonky2/Cargo.toml index b3fa5113..b63ef5a5 100644 --- a/plonky2/Cargo.toml +++ b/plonky2/Cargo.toml @@ -37,7 +37,7 @@ static_assertions = "1.1.0" [dev-dependencies] rand = "0.8.4" rand_chacha = "0.3.1" -criterion = "0.3.5" +criterion = "0.4.0" env_logger = "0.9.0" tynm = "0.1.6" structopt = "0.3.26" diff --git a/plonky2/src/gadgets/split_base.rs b/plonky2/src/gadgets/split_base.rs index c539d784..9c85863f 100644 --- a/plonky2/src/gadgets/split_base.rs +++ b/plonky2/src/gadgets/split_base.rs @@ -3,6 +3,7 @@ use std::borrow::Borrow; use itertools::Itertools; use plonky2_field::extension::Extendable; use plonky2_field::types::Field; +use plonky2_util::log_floor; use crate::gates::base_sum::BaseSumGate; use crate::hash::hash_types::RichField; @@ -33,6 +34,11 @@ impl, const D: usize> CircuitBuilder { pub(crate) fn le_sum(&mut self, bits: impl Iterator>) -> Target { let bits = bits.map(|b| *b.borrow()).collect_vec(); let num_bits = bits.len(); + assert!( + num_bits <= log_floor(F::ORDER, 2), + "{} bits may overflow the field", + num_bits + ); if num_bits == 0 { return self.zero(); } diff --git a/system_zero/Cargo.toml b/system_zero/Cargo.toml index f1cb5729..826d69d8 100644 --- a/system_zero/Cargo.toml +++ b/system_zero/Cargo.toml @@ -16,7 +16,7 @@ rand = "0.8.4" rand_chacha = "0.3.1" [dev-dependencies] -criterion = "0.3.5" +criterion = "0.4.0" [[bench]] name = "lookup_permuted_cols" diff --git a/util/Cargo.toml b/util/Cargo.toml index a1ab402a..4e0b4b15 100644 --- a/util/Cargo.toml +++ b/util/Cargo.toml @@ -5,3 +5,4 @@ version = "0.1.0" edition = "2021" [dependencies] +rand = { version = "0.8.5", default-features = false, features = ["getrandom"] } diff --git a/util/src/lib.rs b/util/src/lib.rs index b22a4236..eb297bd8 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -274,8 +274,50 @@ pub fn branch_hint() { #[cfg(test)] mod tests { + use rand::rngs::OsRng; + use rand::Rng; + use crate::{log2_ceil, log2_strict}; + #[test] + fn test_reverse_index_bits() { + let lengths = [32, 128, 1 << 16]; + let mut rng = OsRng; + for _ in 0..32 { + for length in lengths { + let mut rand_list: Vec = Vec::with_capacity(length); + rand_list.resize_with(length, || rng.gen()); + + let out = super::reverse_index_bits(&rand_list); + let expect = reverse_index_bits_naive(&rand_list); + + for (out, expect) in out.iter().zip(&expect) { + assert_eq!(out, expect); + } + } + } + } + + #[test] + fn test_reverse_index_bits_in_place() { + let lengths = [32, 128, 1 << 16]; + let mut rng = OsRng; + for _ in 0..32 { + for length in lengths { + let mut rand_list: Vec = Vec::with_capacity(length); + rand_list.resize_with(length, || rng.gen()); + + let expect = reverse_index_bits_naive(&rand_list); + + super::reverse_index_bits_in_place(&mut rand_list); + + for (got, expect) in rand_list.iter().zip(&expect) { + assert_eq!(got, expect); + } + } + } + } + #[test] fn test_log2_strict() { assert_eq!(log2_strict(1), 0); @@ -326,4 +368,17 @@ mod tests { assert_eq!(log2_ceil(usize::MAX - 1), usize::BITS as usize); assert_eq!(log2_ceil(usize::MAX), usize::BITS as usize); } + + fn reverse_index_bits_naive(arr: &[T]) -> Vec { + let n = arr.len(); + let n_power = log2_strict(n); + + let mut out = vec![None; n]; + for (i, v) in arr.iter().enumerate() { + let dst = i.reverse_bits() >> (64 - n_power); + out[dst] = Some(*v); + } + + out.into_iter().map(|x| x.unwrap()).collect() + } }