Transaction (RLP) parsing

Will add tests once we have the interpreter support for other segmnets.
This commit is contained in:
Daniel Lubarov 2022-07-06 19:47:58 -07:00
parent 60227b9828
commit 544c84b420
13 changed files with 553 additions and 6 deletions

View File

@ -9,6 +9,7 @@ use once_cell::sync::Lazy;
use super::assembler::{assemble, Kernel};
use crate::cpu::kernel::parser::parse;
use crate::cpu::kernel::txn_fields::NormalizedTxnField;
use crate::memory::segments::Segment;
pub static KERNEL: Lazy<Kernel> = Lazy::new(combined_kernel);
@ -24,6 +25,9 @@ pub fn evm_constants() -> HashMap<String, U256> {
for segment in Segment::all() {
c.insert(segment.var_name().into(), (segment as u32).into());
}
for txn_field in NormalizedTxnField::all() {
c.insert(txn_field.var_name().into(), (txn_field as u32).into());
}
c
}
@ -43,8 +47,16 @@ pub(crate) fn combined_kernel() -> Kernel {
include_str!("asm/secp256k1/lift_x.asm"),
include_str!("asm/secp256k1/inverse_scalar.asm"),
include_str!("asm/ecrecover.asm"),
include_str!("asm/rlp/encode.asm"),
include_str!("asm/rlp/decode.asm"),
include_str!("asm/rlp/read_to_memory.asm"),
include_str!("asm/storage_read.asm"),
include_str!("asm/storage_write.asm"),
include_str!("asm/transactions/process_normalized.asm"),
include_str!("asm/transactions/router.asm"),
include_str!("asm/transactions/type_0.asm"),
include_str!("asm/transactions/type_1.asm"),
include_str!("asm/transactions/type_2.asm"),
];
let parsed_files = files.iter().map(|f| parse(f)).collect_vec();

View File

@ -78,10 +78,22 @@
// stack: c, input, ...
SWAP1
// stack: input, c, ...
SUB
DIV
// stack: input / c, ...
%endmacro
// Slightly inefficient as we need to swap the inputs.
// Consider avoiding this in performance-critical code.
%macro mod_const(c)
// stack: input, ...
PUSH $c
// stack: c, input, ...
SWAP1
// stack: input, c, ...
MOD
// stack: input % c, ...
%endmacro
%macro shl_const(c)
// stack: input, ...
PUSH $c

View File

@ -0,0 +1,145 @@
// Note: currently, these methods do not check that RLP input is in canonical
// form; for example a single byte could be encoded with the length-of-length
// form. Technically an EVM must perform these checks, but we aren't really
// concerned with it in our setting. An attacker who corrupted consensus could
// prove a non-canonical state, but this would just temporarily stall the bridge
// until a fix was deployed. We are more concerned with preventing any theft of
// assets.
// Parse the length of a bytestring from RLP memory. The next len bytes after
// pos' will contain the string.
//
// Pre stack: pos, retdest
// Post stack: pos', len
global decode_rlp_string_len:
JUMPDEST
// stack: pos, retdest
DUP1
%mload_current(@SEGMENT_RLP_RAW)
// stack: first_byte, pos, retdest
DUP1
%gt_const(0xb6)
// stack: first_byte >= 0xb7, first_byte, pos, retdest
%jumpi(decode_rlp_string_len_large)
// stack: first_byte, pos, retdest
DUP1
%gt_const(0x7f)
// stack: first_byte >= 0x80, first_byte, pos, retdest
%jumpi(decode_rlp_string_len_medium)
decode_rlp_string_len_small:
// String is a single byte in the range [0x00, 0x7f].
%stack (first_byte, pos, retdest) -> (retdest, pos, 1)
JUMP
decode_rlp_string_len_medium:
// String is 0-55 bytes long. First byte contains the len.
// stack: first_byte, pos, retdest
%sub_const(0x80)
// stack: len, pos, retdest
SWAP1
%add_const(1)
// stack: pos', len, retdest
decode_rlp_string_len_large:
// String is >55 bytes long. First byte contains the len of the len.
// stack: first_byte, pos, retdest
%sub_const(0xb7)
// stack: len_of_len, pos, retdest
SWAP1
%add_const(1)
// stack: pos', len_of_len, retdest
%jump(decode_int_given_len)
// Parse a scalar from RLP memory.
// Pre stack: pos, retdest
// Post stack: pos', scalar
//
// Scalars are variable-length, but this method assumes a max length of 32
// bytes, so that the result can be returned as a single word on the stack.
// As per the spec, scalars must not have leading zeros.
global decode_rlp_scalar:
JUMPDEST
// stack: pos, retdest
PUSH decode_int_given_len
// stack: decode_int_given_len, pos, retdest
SWAP1
// stack: pos, decode_int_given_len, retdest
// decode_rlp_string_len will return to decode_int_given_len, at which point
// the stack will contain (pos', len, retdest), which are the proper args
// to decode_int_given_len.
%jump(decode_rlp_string_len)
// Parse the length of an RLP list from memory.
// Pre stack: pos, retdest
// Post stack: pos', len
global decode_rlp_list_len:
JUMPDEST
// stack: pos, retdest
DUP1
%mload_current(@SEGMENT_RLP_RAW)
// stack: first_byte, pos, retdest
SWAP1
%add_const(1) // increment pos
SWAP1
// stack: first_byte, pos', retdest
// If first_byte is >= 0xf7, it's a > 55 byte list, and
// first_byte - 0xf7 is the length of the length.
DUP1
%gt_const(0xf6) // GT is native while GE is not, so compare to 0xf6 instead
// stack: first_byte >= 0xf7, first_byte, pos', retdest
%jumpi(decode_rlp_list_len_big)
decode_rlp_list_len_small:
// The list length is first_byte - 0xc0.
// stack: first_byte, pos', retdest
%sub_const(0xc0)
// stack: len, pos', retdest
%stack (len, pos, retdest) -> (retdest, pos, len)
JUMP
decode_rlp_list_len_big:
JUMPDEST
// The length of the length is first_byte - 0xf7.
// stack: first_byte, pos', retdest
%sub_const(0xf7)
// stack: len_of_len, pos', retdest
SWAP1
// stack: pos', len_of_len, retdest
%jump(decode_int_given_len)
// Parse an integer of the given length. It is assumed that the integer will
// fit in a single (256-bit) word on the stack.
// Pre stack: pos, len, retdest
// Post stack: pos', int
decode_int_given_len:
JUMPDEST
%stack (pos, len, retdest) -> (pos, len, pos, retdest)
ADD
// stack: end_pos, pos, retdest
SWAP1
// stack: pos, end_pos, retdest
PUSH 0 // initial accumulator state
// stack: acc, pos, end_pos, retdest
decode_int_given_len_loop:
JUMPDEST
// stack: acc, pos, end_pos, retdest
DUP3
DUP3
ISZERO
// stack: pos == end_pos, acc, pos, end_pos, retdest
%jumpi(decode_int_given_len_finish)
// stack: acc, pos, end_pos, retdest
%shl_const(8)
// stack: acc << 8, pos, end_pos, retdest
DUP2
// stack: pos, acc << 8, pos, end_pos, retdest
%mload_current(@SEGMENT_RLP_RAW)
// stack: byte, acc << 8, pos, end_pos, retdest
ADD
// stack: acc', pos, end_pos, retdest
// Increment pos.
SWAP1
%add_const(1)
SWAP1
// stack: acc', pos', end_pos, retdest
%jump(decode_int_given_len_loop)
decode_int_given_len_finish:
JUMPDEST
%stack (acc, pos, end_pos, retdest) -> (retdest, pos, acc)
JUMP

View File

@ -0,0 +1,17 @@
// RLP-encode a scalar, i.e. a variable-length integer.
// Pre stack: pos, scalar
// Post stack: (empty)
global encode_rlp_scalar:
PANIC // TODO: implement
// RLP-encode a fixed-length 160-bit string. Assumes string < 2^160.
// Pre stack: pos, string
// Post stack: (empty)
global encode_rlp_160:
PANIC // TODO: implement
// RLP-encode a fixed-length 256-bit string.
// Pre stack: pos, string
// Post stack: (empty)
global encode_rlp_256:
PANIC // TODO: implement

View File

@ -0,0 +1,39 @@
// Read RLP data from the prover's tape, and save it to the SEGMENT_RLP_RAW
// segment of memory.
// Pre stack: retdest
// Post stack: (empty)
global read_rlp_to_memory:
JUMPDEST
// stack: retdest
PROVER_INPUT // Read the RLP blob length from the prover tape.
// stack: len, retdest
PUSH 0 // initial position
// stack: pos, len, retdest
read_rlp_to_memory_loop:
JUMPDEST
// stack: pos, len, retdest
DUP2
DUP2
EQ
// stack: pos == len, pos, len, retdest
%jumpi(read_rlp_to_memory_finish)
// stack: pos, len, retdest
PROVER_INPUT
// stack: byte, pos, len, retdest
DUP1
// stack: pos, byte, pos, len, retdest
%mstore_current(@SEGMENT_RLP_RAW)
// stack: pos, len, retdest
%add_const(1)
// stack: pos', len, retdest
%jump(read_rlp_to_memory_loop)
read_rlp_to_memory_finish:
JUMPDEST
// stack: pos, len, retdest
%pop2
// stack: retdest
JUMP

View File

@ -0,0 +1,5 @@
// After the transaction data has been parsed into a normalized set of fields
// (see TxnField), this routine processes the transaction.
global process_normalized_txn:
// TODO

View File

@ -0,0 +1,38 @@
// This is the entry point of transaction processing. We load the transaction
// RLP data into memory, check the transaction type, then based on the type we
// jump to the appropriate transaction parsing method.
global route_txn:
JUMPDEST
// stack: (empty)
// First load transaction data into memory, where it will be parsed.
PUSH read_txn_from_memory
%jump(read_rlp_to_memory)
// At this point, the raw txn data is in memory.
read_txn_from_memory:
JUMPDEST
// stack: (empty)
// We will peak at the first byte to determine what type of transaction this is.
// Note that type 1 and 2 transactions have a first byte of 1 and 2, respectively.
// Type 0 (legacy) transactions have no such prefix, but their RLP will have a
// first byte >= 0xc0, so there is no overlap.
PUSH 0
%mload_current(@SEGMENT_RLP_RAW)
%eq_const(1)
// stack: first_byte == 1
%jumpi(process_type_1_txn)
// stack: (empty)
PUSH 0
%mload_current(@SEGMENT_RLP_RAW)
%eq_const(2)
// stack: first_byte == 2
%jumpi(process_type_2_txn)
// stack: (empty)
// At this point, since it's not a type 1 or 2 transaction,
// it must be a legacy (aka type 0) transaction.
%jump(process_type_2_txn)

View File

@ -0,0 +1,189 @@
// Type 0 transactions, aka legacy transaction, have the format
// rlp([nonce, gas_price, gas_limit, destination, amount, data, v, r, s])
//
// The field v was originally encoded as
// 27 + y_parity
// but as of EIP 155 it can also be encoded as
// 35 + 2 * chain_id + y_parity
//
// If a chain_id is present in v, the signed data is
// keccak256(rlp([nonce, gas_price, gas_limit, destination, amount, data, chain_id, 0, 0]))
// otherwise, it is
// keccak256(rlp([nonce, gas_price, gas_limit, destination, amount, data]))
global process_type_0_txn:
JUMPDEST
// stack: (empty)
PUSH process_txn_with_len
PUSH 0 // initial pos
// stack: pos, process_txn_with_len
%jump(decode_rlp_list_len)
process_txn_with_len:
// We don't actually need the length.
%stack (pos, len) -> (pos)
PUSH store_nonce
SWAP1
// stack: pos, store_nonce
%jump(decode_rlp_scalar)
store_nonce:
%stack (pos, nonce) -> (@TXN_FIELD_NONCE, nonce, pos)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// stack: pos
PUSH store_gas_price
SWAP1
// stack: pos, store_gas_price
%jump(decode_rlp_scalar)
store_gas_price:
// For legacy transactions, we set both the
// TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS and TXN_FIELD_MAX_FEE_PER_GAS
// fields to gas_price.
%stack (pos, gas_price) -> (@TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS, gas_price,
@TXN_FIELD_MAX_FEE_PER_GAS, gas_price, pos)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// stack: pos
PUSH store_gas_limit
SWAP1
// stack: pos, store_gas_limit
%jump(decode_rlp_scalar)
store_gas_limit:
%stack (pos, gas_limit) -> (@TXN_FIELD_GAS_LIMIT, gas_limit, pos)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// Peak at the RLP to see if the next byte is zero.
// If so, there is no destination field, so skip the store_destination step.
// stack: pos
DUP1
%mload_current(@SEGMENT_RLP_RAW)
ISZERO
// stack: destination_empty, pos
%jumpi(parse_amount)
// If we got here, there is a destination field.
PUSH store_destination
SWAP1
// stack: pos, store_destination
%jump(decode_rlp_scalar)
store_destination:
%stack (pos, destination) -> (@TXN_FIELD_DESTINATION, destination, pos)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// stack: pos
parse_amount:
// stack: pos
PUSH store_amount
SWAP1
// stack: pos, store_amount
%jump(decode_rlp_scalar)
store_amount:
%stack (pos, amount) -> (@TXN_FIELD_AMOUNT, amount, pos)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// stack: pos
PUSH store_data_len
SWAP1
// stack: pos, store_data_len
%jump(decode_rlp_string_len)
store_data_len:
%stack (pos, data_len) -> (@TXN_FIELD_DATA_LEN, data_len, pos, data_len, pos, data_len)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// stack: pos, data_len, pos, data_len
ADD
// stack: new_pos, pos, data_len
// Memcpy the txn data from @SEGMENT_RLP_RAW to @SEGMENT_TXN_DATA.
PUSH parse_v
%stack (parse_v, new_pos, old_pos, data_len) -> (old_pos, data_len, parse_v, new_pos)
PUSH @SEGMENT_RLP_RAW
GET_CONTEXT
PUSH 0
PUSH @SEGMENT_TXN_DATA
GET_CONTEXT
// stack: DST, SRC, data_len, parse_v, new_pos
%jump(memcpy)
parse_v:
// stack: pos
PUSH process_v
SWAP1
// stack: pos, process_v
%jump(decode_rlp_scalar)
process_v:
// stack: pos, v
SWAP1
// stack: v, pos
DUP1
%gt_const(28)
// stack: v > 28, v, pos
%jumpi(process_v_new_style)
// We have an old style v, so y_parity = v - 27.
// No chain ID is present, so we can leave TXN_FIELD_CHAIN_ID_PRESENT and
// TXN_FIELD_CHAIN_ID with their default values of zero.
// stack: v, pos
%sub_const(27)
%stack (y_parity, pos) -> (@TXN_FIELD_Y_PARITY, y_parity, pos)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// stack: pos
%jump(parse_r)
process_v_new_style:
// stack: v, pos
// We have a new style v, so chain_id_present = 1,
// chain_id = (v - 35) / 2, and y_parity = (v - 35) % 2.
%stack (v, pos) -> (@TXN_FIELD_CHAIN_ID_PRESENT, 1, v, pos)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// stack: v, pos
%sub_const(35)
DUP1
// stack: v - 35, v - 35, pos
%div_const(2)
// stack: chain_id, v - 35, pos
PUSH @TXN_FIELD_CHAIN_ID
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// stack: v - 35, pos
%mod_const(2)
// stack: y_parity, pos
PUSH @TXN_FIELD_Y_PARITY
%mstore_current(@SEGMENT_NORMALIZED_TXN)
parse_r:
// stack: pos
PUSH store_r
SWAP1
// stack: pos, store_r
%jump(decode_rlp_scalar)
store_r:
%stack (pos, r) -> (@TXN_FIELD_R, r, pos)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// stack: pos
PUSH store_s
SWAP1
// stack: pos, store_s
%jump(decode_rlp_scalar)
store_s:
%stack (pos, s) -> (@TXN_FIELD_S, s)
%mstore_current(@SEGMENT_NORMALIZED_TXN)
// stack: (empty)
// TODO: Write the signed txn data to memory, where it can be hashed and
// checked against the signature.
%jump(process_normalized_txn)

View File

@ -0,0 +1,12 @@
// Type 1 transactions, introduced by EIP 2930, have the format
// 0x01 || rlp([chain_id, nonce, gas_price, gas_limit, to, value, data,
// access_list, y_parity, r, s])
//
// The signed data is
// keccak256(0x01 || rlp([chain_id, nonce, gas_price, gas_limit, to, value,
// data, access_list]))
global process_type_1_txn:
JUMPDEST
// stack: (empty)
PANIC // TODO: Unfinished

View File

@ -0,0 +1,14 @@
// Type 2 transactions, introduced by EIP 1559, have the format
// 0x02 || rlp([chain_id, nonce, max_priority_fee_per_gas, max_fee_per_gas,
// gas_limit, destination, amount, data, access_list, y_parity,
// r, s])
//
// The signed data is
// keccak256(0x02 || rlp([chain_id, nonce, max_priority_fee_per_gas,
// max_fee_per_gas, gas_limit, destination, amount,
// data, access_list]))
global process_type_2_txn:
JUMPDEST
// stack: (empty)
PANIC // TODO: Unfinished

View File

@ -5,6 +5,7 @@ pub(crate) mod keccak_util;
mod opcodes;
mod parser;
mod stack_manipulation;
mod txn_fields;
#[cfg(test)]
mod interpreter;

View File

@ -0,0 +1,59 @@
/// These are normalized transaction fields, i.e. not specific to any transaction type.
#[allow(dead_code)]
#[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
pub(crate) enum NormalizedTxnField {
/// Whether a chain ID was present in the txn data. Type 0 transaction with v=27 or v=28 have
/// no chain ID. This affects what fields get signed.
ChainIdPresent = 0,
ChainId = 1,
Nonce = 2,
MaxPriorityFeePerGas = 3,
MaxFeePerGas = 4,
GasLimit = 5,
Destination = 6,
Amount = 7,
/// The length of the data field. The data itself is stored in another segment.
DataLen = 8,
YParity = 9,
R = 10,
S = 11,
}
impl NormalizedTxnField {
pub(crate) const COUNT: usize = 12;
pub(crate) fn all() -> [Self; Self::COUNT] {
[
Self::ChainIdPresent,
Self::ChainId,
Self::Nonce,
Self::MaxPriorityFeePerGas,
Self::MaxFeePerGas,
Self::GasLimit,
Self::Destination,
Self::Amount,
Self::DataLen,
Self::YParity,
Self::R,
Self::S,
]
}
/// The variable name that gets passed into kernel assembly code.
pub(crate) fn var_name(&self) -> &'static str {
match self {
NormalizedTxnField::ChainIdPresent => "TXN_FIELD_CHAIN_ID_PRESENT",
NormalizedTxnField::ChainId => "TXN_FIELD_CHAIN_ID",
NormalizedTxnField::Nonce => "TXN_FIELD_NONCE",
NormalizedTxnField::MaxPriorityFeePerGas => "TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS",
NormalizedTxnField::MaxFeePerGas => "TXN_FIELD_MAX_FEE_PER_GAS",
NormalizedTxnField::GasLimit => "TXN_FIELD_GAS_LIMIT",
NormalizedTxnField::Destination => "TXN_FIELD_DESTINATION",
NormalizedTxnField::Amount => "TXN_FIELD_AMOUNT",
NormalizedTxnField::DataLen => "TXN_FIELD_DATA_LEN",
NormalizedTxnField::YParity => "TXN_FIELD_Y_PARITY",
NormalizedTxnField::R => "TXN_FIELD_R",
NormalizedTxnField::S => "TXN_FIELD_S",
}
}
}

View File

@ -1,4 +1,4 @@
#[allow(dead_code)] // TODO: Not all segments are used yet.
#[allow(dead_code)]
#[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
pub(crate) enum Segment {
/// Contains EVM bytecode.
@ -17,14 +17,16 @@ pub(crate) enum Segment {
/// General purpose kernel memory, used by various kernel functions.
/// In general, calling a helper function can result in this memory being clobbered.
KernelGeneral = 6,
/// Contains transaction data (after it's parsed and converted to a standard format).
TxnData = 7,
/// Contains normalized transaction fields; see `TxnField`.
TxnFields = 7,
/// Contains the data field of a transaction.
TxnData = 8,
/// Raw RLP data.
RlpRaw = 8,
RlpRaw = 9,
}
impl Segment {
pub(crate) const COUNT: usize = 9;
pub(crate) const COUNT: usize = 10;
pub(crate) fn all() -> [Self; Self::COUNT] {
[
@ -35,6 +37,7 @@ impl Segment {
Self::Returndata,
Self::Metadata,
Self::KernelGeneral,
Self::TxnFields,
Self::TxnData,
Self::RlpRaw,
]
@ -50,6 +53,7 @@ impl Segment {
Segment::Returndata => "SEGMENT_RETURNDATA",
Segment::Metadata => "SEGMENT_METADATA",
Segment::KernelGeneral => "SEGMENT_KERNEL_GENERAL",
Segment::TxnFields => "SEGMENT_NORMALIZED_TXN",
Segment::TxnData => "SEGMENT_TXN_DATA",
Segment::RlpRaw => "SEGMENT_RLP_RAW",
}