diff --git a/evm/Cargo.toml b/evm/Cargo.toml index e282583e..7412d7f0 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -9,8 +9,11 @@ plonky2 = { path = "../plonky2" } plonky2_util = { path = "../util" } anyhow = "1.0.40" env_logger = "0.9.0" +ethereum-types = "0.13.1" itertools = "0.10.0" log = "0.4.14" +pest = "2.1.3" +pest_derive = "2.1.0" rayon = "1.5.1" rand = "0.8.5" rand_chacha = "0.3.1" diff --git a/evm/src/cpu/columns.rs b/evm/src/cpu/columns.rs index 881d79b5..5e5f3f55 100644 --- a/evm/src/cpu/columns.rs +++ b/evm/src/cpu/columns.rs @@ -1,15 +1,21 @@ use std::ops::Range; -// Filter. 1 if the row corresponds to a cycle of execution and 0 otherwise. -// Lets us re-use decode columns in non-cycle rows. -pub const IS_CPU_CYCLE: usize = 0; +/// Filter. 1 if the row is part of bootstrapping the kernel code, 0 otherwise. +pub const IS_BOOTSTRAP_KERNEL: usize = 0; -// If CPU cycle: The opcode being decoded, in {0, ..., 255}. +/// Filter. 1 if the row is part of bootstrapping a contract's code, 0 otherwise. +pub const IS_BOOTSTRAP_CONTRACT: usize = IS_BOOTSTRAP_KERNEL + 1; + +/// Filter. 1 if the row corresponds to a cycle of execution and 0 otherwise. +/// Lets us re-use decode columns in non-cycle rows. +pub const IS_CPU_CYCLE: usize = IS_BOOTSTRAP_CONTRACT + 1; + +/// If CPU cycle: The opcode being decoded, in {0, ..., 255}. pub const OPCODE: usize = IS_CPU_CYCLE + 1; -// If CPU cycle: flags for EVM instructions. PUSHn, DUPn, and SWAPn only get one flag each. Invalid -// opcodes are split between a number of flags for practical reasons. Exactly one of these flags -// must be 1. +/// If CPU cycle: flags for EVM instructions. PUSHn, DUPn, and SWAPn only get one flag each. Invalid +/// opcodes are split between a number of flags for practical reasons. Exactly one of these flags +/// must be 1. pub const IS_STOP: usize = OPCODE + 1; pub const IS_ADD: usize = IS_STOP + 1; pub const IS_MUL: usize = IS_ADD + 1; @@ -121,8 +127,8 @@ pub const IS_INVALID_20: usize = IS_INVALID_19 + 1; pub const START_INSTRUCTION_FLAGS: usize = IS_STOP; pub const END_INSTRUCTION_FLAGS: usize = IS_INVALID_20 + 1; -// If CPU cycle: the opcode, broken up into bits. -// **big-endian** order +/// If CPU cycle: the opcode, broken up into bits. +/// **Big-endian** order. pub const OPCODE_BITS: [usize; 8] = [ END_INSTRUCTION_FLAGS, END_INSTRUCTION_FLAGS + 1, diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs new file mode 100644 index 00000000..514712e4 --- /dev/null +++ b/evm/src/cpu/kernel/aggregator.rs @@ -0,0 +1,28 @@ +//! Loads each kernel assembly file and concatenates them. + +use itertools::Itertools; + +use super::assembler::{assemble, Kernel}; +use crate::cpu::kernel::parser::parse; + +#[allow(dead_code)] // TODO: Should be used once witness generation is done. +pub(crate) fn combined_kernel() -> Kernel { + let files = vec![ + include_str!("asm/storage_read.asm"), + include_str!("asm/storage_write.asm"), + ]; + + let parsed_files = files.iter().map(|f| parse(f)).collect_vec(); + assemble(parsed_files) +} + +#[cfg(test)] +mod tests { + use crate::cpu::kernel::aggregator::combined_kernel; + + #[test] + fn make_kernel() { + // Make sure we can parse and assemble the entire kernel. + combined_kernel(); + } +} diff --git a/evm/src/cpu/kernel/asm/storage_read.asm b/evm/src/cpu/kernel/asm/storage_read.asm new file mode 100644 index 00000000..6a704c61 --- /dev/null +++ b/evm/src/cpu/kernel/asm/storage_read.asm @@ -0,0 +1,10 @@ +// TODO: Dummy code for now. +global storage_read: + JUMPDEST + PUSH 1234 + POP + // An infinite loop: +mylabel: + JUMPDEST + PUSH mylabel + JUMP diff --git a/evm/src/cpu/kernel/asm/storage_write.asm b/evm/src/cpu/kernel/asm/storage_write.asm new file mode 100644 index 00000000..15c41b7c --- /dev/null +++ b/evm/src/cpu/kernel/asm/storage_write.asm @@ -0,0 +1,6 @@ +// TODO: Dummy code for now. +global storage_write: + JUMPDEST + PUSH 123 // Whatever. + POP + BYTES 0x1, 0x02, 3 diff --git a/evm/src/cpu/kernel/assembler.rs b/evm/src/cpu/kernel/assembler.rs new file mode 100644 index 00000000..8b1cb5f3 --- /dev/null +++ b/evm/src/cpu/kernel/assembler.rs @@ -0,0 +1,207 @@ +use std::collections::HashMap; + +use super::ast::PushTarget; +use crate::cpu::kernel::{ + ast::{File, Item}, + opcodes::{get_opcode, get_push_opcode}, +}; + +/// The number of bytes to push when pushing an offset within the code (i.e. when assembling jumps). +/// Ideally we would automatically use the minimal number of bytes required, but that would be +/// nontrivial given the circular dependency between an offset and its size. +const BYTES_PER_OFFSET: u8 = 3; + +#[derive(PartialEq, Eq, Debug)] +pub struct Kernel { + code: Vec, + global_labels: HashMap, +} + +pub(crate) fn assemble(files: Vec) -> Kernel { + let mut code = vec![]; + let mut global_labels = HashMap::new(); + for file in files { + assemble_file(file.body, &mut code, &mut global_labels); + } + Kernel { + code, + global_labels, + } +} + +fn assemble_file(body: Vec, code: &mut Vec, global_labels: &mut HashMap) { + // First discover the offset of each label in this file. + let mut local_labels = HashMap::::new(); + let mut offset = code.len(); + for item in &body { + match item { + Item::GlobalLabelDeclaration(label) => { + let old = global_labels.insert(label.clone(), offset); + assert!(old.is_none(), "Duplicate global label: {}", label); + } + Item::LocalLabelDeclaration(label) => { + let old = local_labels.insert(label.clone(), offset); + assert!(old.is_none(), "Duplicate local label: {}", label); + } + Item::Push(target) => offset += 1 + push_target_size(target) as usize, + Item::StandardOp(_) => offset += 1, + Item::Bytes(bytes) => offset += bytes.len(), + } + } + + // Now that we have label offsets, we can assemble the file. + for item in body { + match item { + Item::GlobalLabelDeclaration(_) | Item::LocalLabelDeclaration(_) => { + // Nothing to do; we processed labels in the prior phase. + } + Item::Push(target) => { + let target_bytes: Vec = match target { + PushTarget::Literal(literal) => literal.to_trimmed_be_bytes(), + PushTarget::Label(label) => { + let offset = local_labels[&label]; + // We want the BYTES_PER_OFFSET least significant bytes in BE order. + // It's easiest to rev the first BYTES_PER_OFFSET bytes of the LE encoding. + (0..BYTES_PER_OFFSET) + .rev() + .map(|i| offset.to_le_bytes()[i as usize]) + .collect() + } + }; + code.push(get_push_opcode(target_bytes.len() as u8)); + code.extend(target_bytes); + } + Item::StandardOp(opcode) => { + code.push(get_opcode(&opcode)); + } + Item::Bytes(bytes) => code.extend(bytes.iter().map(|b| b.to_u8())), + } + } + + assert_eq!( + code.len(), + offset, + "The two phases gave different code lengths" + ); +} + +/// The size of a `PushTarget`, in bytes. +fn push_target_size(target: &PushTarget) -> u8 { + match target { + PushTarget::Literal(lit) => lit.to_trimmed_be_bytes().len() as u8, + PushTarget::Label(_) => BYTES_PER_OFFSET, + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use crate::cpu::kernel::{assembler::*, ast::*}; + + #[test] + fn two_files() { + // We will test two simple files, with a label and a jump, to ensure that jump offsets + // are correctly shifted based on the offset of the containing file. + + let file_1 = File { + body: vec![ + Item::GlobalLabelDeclaration("function_1".to_string()), + Item::StandardOp("JUMPDEST".to_string()), + Item::StandardOp("ADD".to_string()), + Item::StandardOp("MUL".to_string()), + ], + }; + + let file_2 = File { + body: vec![ + Item::GlobalLabelDeclaration("function_2".to_string()), + Item::StandardOp("JUMPDEST".to_string()), + Item::StandardOp("DIV".to_string()), + Item::LocalLabelDeclaration("mylabel".to_string()), + Item::StandardOp("JUMPDEST".to_string()), + Item::StandardOp("MOD".to_string()), + Item::Push(PushTarget::Label("mylabel".to_string())), + Item::StandardOp("JUMP".to_string()), + ], + }; + + let expected_code = vec![ + get_opcode("JUMPDEST"), + get_opcode("ADD"), + get_opcode("MUL"), + get_opcode("JUMPDEST"), + get_opcode("DIV"), + get_opcode("JUMPDEST"), + get_opcode("MOD"), + get_push_opcode(BYTES_PER_OFFSET), + // The label offset, 5, in 3-byte BE form. + 0, + 0, + 5, + get_opcode("JUMP"), + ]; + + let mut expected_global_labels = HashMap::new(); + expected_global_labels.insert("function_1".to_string(), 0); + expected_global_labels.insert("function_2".to_string(), 3); + + let expected_kernel = Kernel { + code: expected_code, + global_labels: expected_global_labels, + }; + + let program = vec![file_1, file_2]; + assert_eq!(assemble(program), expected_kernel); + } + + #[test] + #[should_panic] + fn global_label_collision() { + let file_1 = File { + body: vec![ + Item::GlobalLabelDeclaration("foo".to_string()), + Item::StandardOp("JUMPDEST".to_string()), + ], + }; + let file_2 = File { + body: vec![ + Item::GlobalLabelDeclaration("foo".to_string()), + Item::StandardOp("JUMPDEST".to_string()), + ], + }; + assemble(vec![file_1, file_2]); + } + + #[test] + #[should_panic] + fn local_label_collision() { + let file = File { + body: vec![ + Item::LocalLabelDeclaration("foo".to_string()), + Item::StandardOp("JUMPDEST".to_string()), + Item::LocalLabelDeclaration("foo".to_string()), + Item::StandardOp("ADD".to_string()), + ], + }; + assemble(vec![file]); + } + + #[test] + fn literal_bytes() { + let file = File { + body: vec![ + Item::Bytes(vec![ + Literal::Hex("12".to_string()), + Literal::Decimal("42".to_string()), + ]), + Item::Bytes(vec![ + Literal::Hex("fe".to_string()), + Literal::Decimal("255".to_string()), + ]), + ], + }; + let code = assemble(vec![file]).code; + assert_eq!(code, vec![0x12, 42, 0xfe, 255]) + } +} diff --git a/evm/src/cpu/kernel/ast.rs b/evm/src/cpu/kernel/ast.rs new file mode 100644 index 00000000..717adfea --- /dev/null +++ b/evm/src/cpu/kernel/ast.rs @@ -0,0 +1,84 @@ +use ethereum_types::U256; +use plonky2_util::ceil_div_usize; + +#[derive(Debug)] +pub(crate) struct File { + pub(crate) body: Vec, +} + +#[derive(Debug)] +pub(crate) enum Item { + /// Declares a global label. + GlobalLabelDeclaration(String), + /// Declares a label that is local to the current file. + LocalLabelDeclaration(String), + /// A `PUSH` operation. + Push(PushTarget), + /// Any opcode besides a PUSH opcode. + StandardOp(String), + /// Literal hex data; should contain an even number of hex chars. + Bytes(Vec), +} + +/// The target of a `PUSH` operation. +#[derive(Debug)] +pub(crate) enum PushTarget { + Literal(Literal), + Label(String), +} + +#[derive(Debug)] +pub(crate) enum Literal { + Decimal(String), + Hex(String), +} + +impl Literal { + pub(crate) fn to_trimmed_be_bytes(&self) -> Vec { + let u256 = self.to_u256(); + let num_bytes = ceil_div_usize(u256.bits(), 8); + // `byte` is little-endian, so we manually reverse it. + (0..num_bytes).rev().map(|i| u256.byte(i)).collect() + } + + pub(crate) fn to_u256(&self) -> U256 { + let (src, radix) = match self { + Literal::Decimal(s) => (s, 10), + Literal::Hex(s) => (s, 16), + }; + U256::from_str_radix(src, radix) + .unwrap_or_else(|_| panic!("Not a valid u256 literal: {:?}", self)) + } + + pub(crate) fn to_u8(&self) -> u8 { + let (src, radix) = match self { + Literal::Decimal(s) => (s, 10), + Literal::Hex(s) => (s, 16), + }; + u8::from_str_radix(src, radix) + .unwrap_or_else(|_| panic!("Not a valid u8 literal: {:?}", self)) + } +} + +#[cfg(test)] +mod tests { + use crate::cpu::kernel::ast::*; + + #[test] + fn literal_to_be_bytes() { + assert_eq!( + Literal::Decimal("768".into()).to_trimmed_be_bytes(), + vec![0x03, 0x00] + ); + + assert_eq!( + Literal::Hex("a1b2".into()).to_trimmed_be_bytes(), + vec![0xa1, 0xb2] + ); + + assert_eq!( + Literal::Hex("1b2".into()).to_trimmed_be_bytes(), + vec![0x1, 0xb2] + ); + } +} diff --git a/evm/src/cpu/kernel/evm_asm.pest b/evm/src/cpu/kernel/evm_asm.pest new file mode 100644 index 00000000..af28ef12 --- /dev/null +++ b/evm/src/cpu/kernel/evm_asm.pest @@ -0,0 +1,23 @@ +// Grammar for our EVM assembly code. +// Loosely based on https://gist.github.com/axic/17ddbbce4738ccf4040d30cbb5de484e + +WHITESPACE = _{ " " | "\t" | NEWLINE } +COMMENT = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" | "//" ~ (!NEWLINE ~ ANY)* ~ NEWLINE } + +identifier_first_char = _{ ASCII_ALPHA | "_" } +identifier_char = _{ ASCII_ALPHANUMERIC | "_" } +identifier = @{ identifier_first_char ~ identifier_char* } + +literal_decimal = @{ ASCII_DIGIT+ } +literal_hex = @{ ^"0x" ~ ASCII_HEX_DIGIT+ } +literal = { literal_hex | literal_decimal } + +item = { global_label | local_label | bytes_item | push_instruction | nullary_instruction } +global_label = { ^"GLOBAL " ~ identifier ~ ":" } +local_label = { identifier ~ ":" } +bytes_item = { ^"BYTES " ~ literal ~ ("," ~ literal)* } +push_instruction = { ^"PUSH " ~ (literal | identifier) } +nullary_instruction = { identifier } + +file = { SOI ~ item* ~ silent_eoi } +silent_eoi = _{ !ANY } diff --git a/evm/src/cpu/kernel/mod.rs b/evm/src/cpu/kernel/mod.rs new file mode 100644 index 00000000..d2511063 --- /dev/null +++ b/evm/src/cpu/kernel/mod.rs @@ -0,0 +1,5 @@ +pub mod aggregator; +mod assembler; +mod ast; +mod opcodes; +mod parser; diff --git a/evm/src/cpu/kernel/opcodes.rs b/evm/src/cpu/kernel/opcodes.rs new file mode 100644 index 00000000..b8633178 --- /dev/null +++ b/evm/src/cpu/kernel/opcodes.rs @@ -0,0 +1,123 @@ +/// The opcode of the `PUSH[n]` instruction, given a byte count `n`. +pub(crate) fn get_push_opcode(n: u8) -> u8 { + assert!(n > 0); + assert!(n <= 32); + 0x60 + (n as u8 - 1) +} + +/// The opcode of a standard instruction (not a `PUSH`). +pub(crate) fn get_opcode(mnemonic: &str) -> u8 { + match mnemonic.to_uppercase().as_str() { + "STOP" => 0x00, + "ADD" => 0x01, + "MUL" => 0x02, + "SUB" => 0x03, + "DIV" => 0x04, + "SDIV" => 0x05, + "MOD" => 0x06, + "SMOD" => 0x07, + "ADDMOD" => 0x08, + "MULMOD" => 0x09, + "EXP" => 0x0a, + "SIGNEXTEND" => 0x0b, + "LT" => 0x10, + "GT" => 0x11, + "SLT" => 0x12, + "SGT" => 0x13, + "EQ" => 0x14, + "ISZERO" => 0x15, + "AND" => 0x16, + "OR" => 0x17, + "XOR" => 0x18, + "NOT" => 0x19, + "BYTE" => 0x1a, + "SHL" => 0x1b, + "SHR" => 0x1c, + "SAR" => 0x1d, + "KECCAK256" => 0x20, + "ADDRESS" => 0x30, + "BALANCE" => 0x31, + "ORIGIN" => 0x32, + "CALLER" => 0x33, + "CALLVALUE" => 0x34, + "CALLDATALOAD" => 0x35, + "CALLDATASIZE" => 0x36, + "CALLDATACOPY" => 0x37, + "CODESIZE" => 0x38, + "CODECOPY" => 0x39, + "GASPRICE" => 0x3a, + "EXTCODESIZE" => 0x3b, + "EXTCODECOPY" => 0x3c, + "RETURNDATASIZE" => 0x3d, + "RETURNDATACOPY" => 0x3e, + "EXTCODEHASH" => 0x3f, + "BLOCKHASH" => 0x40, + "COINBASE" => 0x41, + "TIMESTAMP" => 0x42, + "NUMBER" => 0x43, + "DIFFICULTY" => 0x44, + "GASLIMIT" => 0x45, + "CHAINID" => 0x46, + "BASEFEE" => 0x48, + "POP" => 0x50, + "MLOAD" => 0x51, + "MSTORE" => 0x52, + "MSTORE8" => 0x53, + "SLOAD" => 0x54, + "SSTORE" => 0x55, + "JUMP" => 0x56, + "JUMPI" => 0x57, + "GETPC" => 0x58, + "MSIZE" => 0x59, + "GAS" => 0x5a, + "JUMPDEST" => 0x5b, + "DUP1" => 0x80, + "DUP2" => 0x81, + "DUP3" => 0x82, + "DUP4" => 0x83, + "DUP5" => 0x84, + "DUP6" => 0x85, + "DUP7" => 0x86, + "DUP8" => 0x87, + "DUP9" => 0x88, + "DUP10" => 0x89, + "DUP11" => 0x8a, + "DUP12" => 0x8b, + "DUP13" => 0x8c, + "DUP14" => 0x8d, + "DUP15" => 0x8e, + "DUP16" => 0x8f, + "SWAP1" => 0x90, + "SWAP2" => 0x91, + "SWAP3" => 0x92, + "SWAP4" => 0x93, + "SWAP5" => 0x94, + "SWAP6" => 0x95, + "SWAP7" => 0x96, + "SWAP8" => 0x97, + "SWAP9" => 0x98, + "SWAP10" => 0x99, + "SWAP11" => 0x9a, + "SWAP12" => 0x9b, + "SWAP13" => 0x9c, + "SWAP14" => 0x9d, + "SWAP15" => 0x9e, + "SWAP16" => 0x9f, + "LOG0" => 0xa0, + "LOG1" => 0xa1, + "LOG2" => 0xa2, + "LOG3" => 0xa3, + "LOG4" => 0xa4, + "CREATE" => 0xf0, + "CALL" => 0xf1, + "CALLCODE" => 0xf2, + "RETURN" => 0xf3, + "DELEGATECALL" => 0xf4, + "CREATE2" => 0xf5, + "STATICCALL" => 0xfa, + "REVERT" => 0xfd, + "INVALID" => 0xfe, + "SELFDESTRUCT" => 0xff, + _ => panic!("Unrecognized mnemonic {}", mnemonic), + } +} diff --git a/evm/src/cpu/kernel/parser.rs b/evm/src/cpu/kernel/parser.rs new file mode 100644 index 00000000..35db707b --- /dev/null +++ b/evm/src/cpu/kernel/parser.rs @@ -0,0 +1,57 @@ +use pest::iterators::Pair; +use pest::Parser; + +use crate::cpu::kernel::ast::{File, Item, Literal, PushTarget}; + +/// Parses EVM assembly code. +#[derive(pest_derive::Parser)] +#[grammar = "cpu/kernel/evm_asm.pest"] +pub struct AsmParser; + +pub(crate) fn parse(s: &str) -> File { + let file = AsmParser::parse(Rule::file, s) + .expect("Parsing failed") + .next() + .unwrap(); + let body = file.into_inner().map(parse_item).collect(); + File { body } +} + +fn parse_item(item: Pair) -> Item { + let item = item.into_inner().next().unwrap(); + match item.as_rule() { + Rule::global_label => { + Item::GlobalLabelDeclaration(item.into_inner().next().unwrap().as_str().into()) + } + Rule::local_label => { + Item::LocalLabelDeclaration(item.into_inner().next().unwrap().as_str().into()) + } + Rule::bytes_item => Item::Bytes(item.into_inner().map(parse_literal).collect()), + Rule::push_instruction => Item::Push(parse_push_target(item.into_inner().next().unwrap())), + Rule::nullary_instruction => Item::StandardOp(item.as_str().into()), + _ => panic!("Unexpected {:?}", item.as_rule()), + } +} + +fn parse_push_target(target: Pair) -> PushTarget { + match target.as_rule() { + Rule::identifier => PushTarget::Label(target.as_str().into()), + Rule::literal => PushTarget::Literal(parse_literal(target)), + _ => panic!("Unexpected {:?}", target.as_rule()), + } +} + +fn parse_literal(literal: Pair) -> Literal { + let literal = literal.into_inner().next().unwrap(); + match literal.as_rule() { + Rule::literal_decimal => Literal::Decimal(literal.as_str().into()), + Rule::literal_hex => Literal::Hex(parse_hex(literal)), + _ => panic!("Unexpected {:?}", literal.as_rule()), + } +} + +fn parse_hex(hex: Pair) -> String { + let prefix = &hex.as_str()[..2]; + debug_assert!(prefix == "0x" || prefix == "0X"); + hex.as_str()[2..].to_string() +} diff --git a/evm/src/cpu/mod.rs b/evm/src/cpu/mod.rs index 2db21ea9..14a11b1c 100644 --- a/evm/src/cpu/mod.rs +++ b/evm/src/cpu/mod.rs @@ -1,4 +1,5 @@ pub(crate) mod columns; pub mod cpu_stark; pub(crate) mod decode; +pub mod kernel; mod simple_logic;