diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs index 74f211d6..a2996eb4 100644 --- a/evm/src/all_stark.rs +++ b/evm/src/all_stark.rs @@ -1,5 +1,6 @@ use std::iter; +use itertools::Itertools; use plonky2::field::extension::Extendable; use plonky2::field::types::Field; use plonky2::hash::hash_types::RichField; diff --git a/evm/src/arithmetic/arithmetic_stark.rs b/evm/src/arithmetic/arithmetic_stark.rs index 52fc2f78..1d3af2b9 100644 --- a/evm/src/arithmetic/arithmetic_stark.rs +++ b/evm/src/arithmetic/arithmetic_stark.rs @@ -11,7 +11,7 @@ use plonky2::plonk::circuit_builder::CircuitBuilder; use plonky2::util::transpose; use static_assertions::const_assert; -use super::columns::NUM_ARITH_COLUMNS; +use super::columns::{op_flags, NUM_ARITH_COLUMNS}; use super::shift; use crate::all_stark::Table; use crate::arithmetic::columns::{NUM_SHARED_COLS, RANGE_COUNTER, RC_FREQUENCIES, SHARED_COLS}; @@ -208,6 +208,12 @@ impl, const D: usize> Stark for ArithmeticSta let lv: &[P; NUM_ARITH_COLUMNS] = vars.get_local_values().try_into().unwrap(); let nv: &[P; NUM_ARITH_COLUMNS] = vars.get_next_values().try_into().unwrap(); + // Flags must be boolean. + for flag_idx in op_flags() { + let flag = lv[flag_idx]; + yield_constr.constraint(flag * (flag - P::ONES)); + } + // Check that `OPCODE_COL` holds 0 if the operation is not a range_check. let opcode_constraint = (P::ONES - lv[columns::IS_RANGE_CHECK]) * lv[columns::OPCODE_COL]; yield_constr.constraint(opcode_constraint); @@ -248,6 +254,13 @@ impl, const D: usize> Stark for ArithmeticSta let nv: &[ExtensionTarget; NUM_ARITH_COLUMNS] = vars.get_next_values().try_into().unwrap(); + // Flags must be boolean. + for flag_idx in op_flags() { + let flag = lv[flag_idx]; + let constraint = builder.mul_sub_extension(flag, flag, flag); + yield_constr.constraint(builder, constraint); + } + // Check that `OPCODE_COL` holds 0 if the operation is not a range_check. let opcode_constraint = builder.arithmetic_extension( F::NEG_ONE, diff --git a/evm/src/arithmetic/columns.rs b/evm/src/arithmetic/columns.rs index bcfb29c1..dc535492 100644 --- a/evm/src/arithmetic/columns.rs +++ b/evm/src/arithmetic/columns.rs @@ -43,6 +43,10 @@ pub(crate) const IS_RANGE_CHECK: usize = IS_SHR + 1; pub(crate) const OPCODE_COL: usize = IS_RANGE_CHECK + 1; pub(crate) const START_SHARED_COLS: usize = OPCODE_COL + 1; +pub(crate) const fn op_flags() -> Range { + IS_ADD..IS_RANGE_CHECK + 1 +} + /// Within the Arithmetic Unit, there are shared columns which can be /// used by any arithmetic circuit, depending on which one is active /// this cycle. diff --git a/evm/src/cpu/contextops.rs b/evm/src/cpu/contextops.rs index 0afc070f..77debc75 100644 --- a/evm/src/cpu/contextops.rs +++ b/evm/src/cpu/contextops.rs @@ -105,7 +105,7 @@ fn eval_packed_get( } /// Circuit version of `eval_packed_get`. -/// Evalutes constraints for GET_CONTEXT. +/// Evaluates constraints for GET_CONTEXT. fn eval_ext_circuit_get, const D: usize>( builder: &mut CircuitBuilder, lv: &CpuColumnsView>, diff --git a/evm/src/cpu/kernel/asm/bignum/cmp.asm b/evm/src/cpu/kernel/asm/bignum/cmp.asm index d5abd238..cf6b547f 100644 --- a/evm/src/cpu/kernel/asm/bignum/cmp.asm +++ b/evm/src/cpu/kernel/asm/bignum/cmp.asm @@ -12,17 +12,19 @@ global cmp_bignum: // stack: len, a_start_loc, b_start_loc, retdest SWAP1 // stack: a_start_loc, len, b_start_loc, retdest - DUP2 - // stack: len, a_start_loc, len, b_start_loc, retdest + PUSH 1 + DUP3 + SUB + // stack: len-1, a_start_loc, len, b_start_loc, retdest ADD - %decrement // stack: a_end_loc, len, b_start_loc, retdest SWAP2 // stack: b_start_loc, len, a_end_loc, retdest - DUP2 - // stack: len, b_start_loc, len, a_end_loc, retdest + PUSH 1 + DUP3 + SUB + // stack: len-1, b_start_loc, len, a_end_loc, retdest ADD - %decrement // stack: b_end_loc, len, a_end_loc, retdest %stack (b, l, a) -> (l, a, b) // stack: len, a_end_loc, b_end_loc, retdest diff --git a/evm/src/cpu/kernel/asm/bignum/shr.asm b/evm/src/cpu/kernel/asm/bignum/shr.asm index adf95770..16d8403c 100644 --- a/evm/src/cpu/kernel/asm/bignum/shr.asm +++ b/evm/src/cpu/kernel/asm/bignum/shr.asm @@ -42,9 +42,9 @@ shr_loop: // stack: i, carry << 127 | a[i] >> 1, i, new_carry, start_loc, retdest %mstore_current_general // stack: i, new_carry, start_loc, retdest - DUP1 - // stack: i, i, new_carry, start_loc, retdest - %decrement + PUSH 1 + DUP2 + SUB // stack: i-1, i, new_carry, start_loc, retdest SWAP1 // stack: i, i-1, new_carry, start_loc, retdest diff --git a/evm/src/cpu/kernel/asm/bloom_filter.asm b/evm/src/cpu/kernel/asm/bloom_filter.asm index 0362d710..8fa84a80 100644 --- a/evm/src/cpu/kernel/asm/bloom_filter.asm +++ b/evm/src/cpu/kernel/asm/bloom_filter.asm @@ -142,10 +142,11 @@ logs_bloom_end: // Also updates the block bloom filter. %macro bloom_write_bit // stack: byte_index, byte_bit_index - DUP2 - // stack: byte_bit_index, byte_index, byte_bit_index + PUSH 1 + DUP3 + // stack: byte_bit_index, 1, byte_index, byte_bit_index PUSH 7 SUB - PUSH 1 SWAP1 SHL + SHL // Updates the current txn bloom filter. SWAP2 POP DUP1 %mload_kernel(@SEGMENT_TXN_BLOOM) diff --git a/evm/src/cpu/kernel/asm/core/gas.asm b/evm/src/cpu/kernel/asm/core/gas.asm index d5e4e9bb..2e16c373 100644 --- a/evm/src/cpu/kernel/asm/core/gas.asm +++ b/evm/src/cpu/kernel/asm/core/gas.asm @@ -122,7 +122,7 @@ global sys_gasprice: // L(n) = n - floor(n / 64) %macro all_but_one_64th // stack: n - DUP1 %div_const(64) + DUP1 %shr_const(6) // stack: floor(n / 64), n SWAP1 SUB // stack: n - floor(n / 64) diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm index 6dbddddc..86b179ba 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm @@ -1,7 +1,7 @@ // Load a single value from bn254 pairings memory. %macro mload_bn254_pairing // stack: offset - %mload_current(@SEGMENT_KERNEL_BN_PAIRING) + %mload_current(@SEGMENT_BN_PAIRING) // stack: value %endmacro @@ -9,14 +9,14 @@ // stack: PUSH $offset // stack: offset - %mload_current(@SEGMENT_KERNEL_BN_PAIRING) + %mload_current(@SEGMENT_BN_PAIRING) // stack: value %endmacro // Store a single value to bn254 pairings memory. %macro mstore_bn254_pairing // stack: offset, value - %mstore_current(@SEGMENT_KERNEL_BN_PAIRING) + %mstore_current(@SEGMENT_BN_PAIRING) // stack: %endmacro @@ -24,7 +24,7 @@ // stack: value PUSH $offset // stack: offset, value - %mstore_current(@SEGMENT_KERNEL_BN_PAIRING) + %mstore_current(@SEGMENT_BN_PAIRING) // stack: %endmacro diff --git a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm index c84536d8..c1103100 100644 --- a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm +++ b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm @@ -87,9 +87,9 @@ ecdsa_after_precompute_loop: %mul_const(2) ADD %mul_const(2) ADD %mul_const(2) ADD %stack (index, i, accx, accy, a0, a1, b0, b1, retdest) -> (index, index, i, accx, accy, a0, a1, b0, b1, retdest) %mul_const(2) %add_const(1) - %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + %mload_current(@SEGMENT_ECDSA_TABLE) SWAP1 %mul_const(2) - %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + %mload_current(@SEGMENT_ECDSA_TABLE) %stack (Px, Py, i, accx, accy, a0, a1, b0, b1, retdest) -> (Px, Py, accx, accy, ecdsa_after_precompute_loop_contd, i, a0, a1, b0, b1, retdest) %jump(secp_add_valid_points) ecdsa_after_precompute_loop_contd: @@ -97,8 +97,9 @@ ecdsa_after_precompute_loop_contd: ISZERO %jumpi(ecdsa_after_precompute_loop_end) %jump(secp_double) ecdsa_after_precompute_loop_contd2: - %stack (accx, accy, i, a0, a1, b0, b1, retdest) -> (i, accx, accy, a0, a1, b0, b1, retdest) - %decrement %jump(ecdsa_after_precompute_loop) + %stack (accx, accy, i, a0, a1, b0, b1, retdest) -> (i, 1, accx, accy, a0, a1, b0, b1, retdest) + SUB // i - 1 + %jump(ecdsa_after_precompute_loop) ecdsa_after_precompute_loop_end: // Check that the public key is not the point at infinity. See https://github.com/ethereum/eth-keys/pull/76 for discussion. DUP2 DUP2 ISZERO SWAP1 ISZERO MUL %jumpi(pk_is_infinity) diff --git a/evm/src/cpu/kernel/asm/curve/secp256k1/precomputation.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/precomputation.asm index 3cea0315..b6bed1b0 100644 --- a/evm/src/cpu/kernel/asm/curve/secp256k1/precomputation.asm +++ b/evm/src/cpu/kernel/asm/curve/secp256k1/precomputation.asm @@ -1,27 +1,27 @@ // Initial stack: Gneg, Qneg, Qx, Qy, retdest -// Compute a*G ± b*phi(G) + c*Q ± d*phi(Q) for a,b,c,d in {0,1}^4 and store its x-coordinate at location `2*(8a+4b+2c+d)` and its y-coordinate at location `2*(8a+4b+2c+d)+1` in the SEGMENT_KERNEL_ECDSA_TABLE segment. +// Compute a*G ± b*phi(G) + c*Q ± d*phi(Q) for a,b,c,d in {0,1}^4 and store its x-coordinate at location `2*(8a+4b+2c+d)` and its y-coordinate at location `2*(8a+4b+2c+d)+1` in the SEGMENT_ECDSA_TABLE segment. global secp_precompute_table: // First store G, ± phi(G), G ± phi(G) // Use Gneg for the ±, e.g., ±phi(G) is computed as `Gneg * (-phi(G)) + (1-Gneg)*phi(G)` (note only the y-coordinate needs to be filtered). // stack: Gneg, Qneg, Qx, Qy, retdest PUSH 32670510020758816978083085130507043184471273380659243275938904335757337482424 PUSH 17 PUSH 55066263022277343669578718895168534326250603453777594175500187360389116729240 PUSH 16 - %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + %mstore_current(@SEGMENT_ECDSA_TABLE) %mstore_current(@SEGMENT_ECDSA_TABLE) DUP1 DUP1 %mul_const(32670510020758816978083085130507043184471273380659243275938904335757337482424) SWAP1 PUSH 1 SUB %mul_const(83121579216557378445487899878180864668798711284981320763518679672151497189239) ADD PUSH 9 PUSH 85340279321737800624759429340272274763154997815782306132637707972559913914315 PUSH 8 - %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + %mstore_current(@SEGMENT_ECDSA_TABLE) %mstore_current(@SEGMENT_ECDSA_TABLE) DUP1 DUP1 %mul_const(83121579216557378445487899878180864668798711284981320763518679672151497189239) SWAP1 PUSH 1 SUB %mul_const(100652675408719987021357910538015346127426077519185866739835120963490438734674) ADD PUSH 25 - %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + %mstore_current(@SEGMENT_ECDSA_TABLE) DUP1 %mul_const(91177636130617246552803821781935006617134368061721227770777272682868638699771) SWAP1 PUSH 1 SUB %mul_const(66837770201594535779099350687042404727408598709762866365333192677982385899440) ADD PUSH 24 - %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + %mstore_current(@SEGMENT_ECDSA_TABLE) // Then store Q, ±phi(Q), Q ± phi(Q) %stack (Qneg, Qx, Qy, retdest) -> (4, Qx, 5, Qy, Qx, @SECP_BASE, Qneg, Qx, Qy, retdest) - %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + %mstore_current(@SEGMENT_ECDSA_TABLE) %mstore_current(@SEGMENT_ECDSA_TABLE) // stack: Qx, @SECP_BASE, Qx, Qy, retdest PUSH @SECP_GLV_BETA MULMOD %stack (betaQx, Qneg, Qx, Qy, retdest) -> (Qneg, Qy, Qneg, betaQx, Qx, Qy, retdest) @@ -29,42 +29,42 @@ global secp_precompute_table: // stack: 1-Qneg, Qneg*Qy, betaQx, Qx, Qy, retdest DUP5 PUSH @SECP_BASE SUB MUL ADD %stack (selectQy, betaQx, Qx, Qy, retdest) -> (2, betaQx, 3, selectQy, betaQx, selectQy, Qx, Qy, precompute_table_contd, retdest) - %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + %mstore_current(@SEGMENT_ECDSA_TABLE) %mstore_current(@SEGMENT_ECDSA_TABLE) %jump(secp_add_valid_points_no_edge_case) precompute_table_contd: %stack (x, y, retdest) -> (6, x, 7, y, retdest) - %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + %mstore_current(@SEGMENT_ECDSA_TABLE) %mstore_current(@SEGMENT_ECDSA_TABLE) PUSH 2 // Use a loop to store a*G ± b*phi(G) + c*Q ± d*phi(Q) for a,b,c,d in {0,1}^4. precompute_table_loop: // stack: i, retdest - DUP1 %increment %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + DUP1 %increment %mload_current(@SEGMENT_ECDSA_TABLE) %stack (y, i, retdest) -> (i, y, i, retdest) - %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + %mload_current(@SEGMENT_ECDSA_TABLE) PUSH precompute_table_loop_contd DUP3 DUP3 - PUSH 9 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) - PUSH 8 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + PUSH 9 %mload_current(@SEGMENT_ECDSA_TABLE) + PUSH 8 %mload_current(@SEGMENT_ECDSA_TABLE) // stack: Gx, Gy, x, y, precompute_table_loop_contd, x, y, i, retdest %jump(secp_add_valid_points) precompute_table_loop_contd: %stack (Rx, Ry, x, y, i, retdest) -> (i, 8, Rx, i, 9, Ry, x, y, i, retdest) - ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + ADD %mstore_current(@SEGMENT_ECDSA_TABLE) ADD %mstore_current(@SEGMENT_ECDSA_TABLE) DUP2 DUP2 - PUSH 17 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) - PUSH 16 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + PUSH 17 %mload_current(@SEGMENT_ECDSA_TABLE) + PUSH 16 %mload_current(@SEGMENT_ECDSA_TABLE) %stack (Gx, Gy, x, y, x, y, i, retdest) -> (Gx, Gy, x, y, precompute_table_loop_contd2, x, y, i, retdest) %jump(secp_add_valid_points) precompute_table_loop_contd2: %stack (Rx, Ry, x, y, i, retdest) -> (i, 16, Rx, i, 17, Ry, x, y, i, retdest) - ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) - PUSH 25 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) - PUSH 24 %mload_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + ADD %mstore_current(@SEGMENT_ECDSA_TABLE) ADD %mstore_current(@SEGMENT_ECDSA_TABLE) + PUSH 25 %mload_current(@SEGMENT_ECDSA_TABLE) + PUSH 24 %mload_current(@SEGMENT_ECDSA_TABLE) %stack (Gx, Gy, x, y, i, retdest) -> (Gx, Gy, x, y, precompute_table_loop_contd3, i, retdest) %jump(secp_add_valid_points) precompute_table_loop_contd3: %stack (Rx, Ry, i, retdest) -> (i, 24, Rx, i, 25, Ry, i, retdest) - ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) ADD %mstore_kernel(@SEGMENT_KERNEL_ECDSA_TABLE) + ADD %mstore_current(@SEGMENT_ECDSA_TABLE) ADD %mstore_current(@SEGMENT_ECDSA_TABLE) %add_const(2) DUP1 %eq_const(8) %jumpi(precompute_table_end) %jump(precompute_table_loop) diff --git a/evm/src/cpu/kernel/asm/exp.asm b/evm/src/cpu/kernel/asm/exp.asm index 5dd67366..4b798e84 100644 --- a/evm/src/cpu/kernel/asm/exp.asm +++ b/evm/src/cpu/kernel/asm/exp.asm @@ -86,7 +86,7 @@ sys_exp_gas_loop_enter: // stack: e >> shift, shift, x, e, return_info %jumpi(sys_exp_gas_loop) // stack: shift_bits, x, e, return_info - %div_const(8) + %shr_const(3) // stack: byte_size_of_e := shift_bits / 8, x, e, return_info %mul_const(@GAS_EXPBYTE) %add_const(@GAS_EXP) diff --git a/evm/src/cpu/kernel/asm/hash/blake2/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2/compression.asm index 454e5128..ba9ffc13 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2/compression.asm @@ -21,10 +21,11 @@ compression_loop: // stack: addr, cur_block, retdest POP // stack: cur_block, retdest + PUSH 1 PUSH 0 %mload_current_general - // stack: num_blocks, cur_block, retdest - %decrement + // stack: num_blocks, 1, cur_block, retdest + SUB // stack: num_blocks - 1, cur_block, retdest DUP2 // stack: cur_block, num_blocks - 1, cur_block, retdest diff --git a/evm/src/cpu/kernel/asm/hash/sha2/main.asm b/evm/src/cpu/kernel/asm/hash/sha2/main.asm index b311262d..81ec3912 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/main.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/main.asm @@ -31,7 +31,7 @@ global sha2_pad: DUP1 // stack: num_bytes, num_bytes, retdest %add_const(8) - %div_const(64) + %shr_const(6) %increment // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest diff --git a/evm/src/cpu/kernel/asm/hash/sha2/ops.asm b/evm/src/cpu/kernel/asm/hash/sha2/ops.asm index 6a4c5e3b..d50e5c9a 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/ops.asm @@ -34,7 +34,7 @@ // stack: rotr(x, 18), x, rotr(x, 7) SWAP1 // stack: x, rotr(x, 18), rotr(x, 7) - %div_const(8) // equivalent to %shr_const(3) + %shr_const(3) // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) XOR XOR diff --git a/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm b/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm index 438875fb..c412a666 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm @@ -17,11 +17,12 @@ %decrement SWAP1 // stack: length >> (8 * i), last_addr - i - 2 - %div_const(256) // equivalent to %shr_const(8) + %shr_const(8) // stack: length >> (8 * (i + 1)), last_addr - i - 2 - DUP1 - // stack: length >> (8 * (i + 1)), length >> (8 * (i + 1)), last_addr - i - 2 - %mod_const(256) + PUSH 256 + DUP2 + // stack: length >> (8 * (i + 1)), 256, length >> (8 * (i + 1)), last_addr - i - 2 + MOD // stack: (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2 DUP3 // stack: last_addr - i - 2, (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2 diff --git a/evm/src/cpu/kernel/asm/journal/journal.asm b/evm/src/cpu/kernel/asm/journal/journal.asm index a0e5502d..2715bc6b 100644 --- a/evm/src/cpu/kernel/asm/journal/journal.asm +++ b/evm/src/cpu/kernel/asm/journal/journal.asm @@ -199,8 +199,9 @@ %endmacro %macro pop_checkpoint + PUSH 1 %mload_context_metadata(@CTX_METADATA_CHECKPOINTS_LEN) // stack: i - %decrement + SUB %mstore_context_metadata(@CTX_METADATA_CHECKPOINTS_LEN) %endmacro diff --git a/evm/src/cpu/kernel/asm/journal/log.asm b/evm/src/cpu/kernel/asm/journal/log.asm index 0b815fae..e1794397 100644 --- a/evm/src/cpu/kernel/asm/journal/log.asm +++ b/evm/src/cpu/kernel/asm/journal/log.asm @@ -8,8 +8,9 @@ global revert_log: // stack: entry_type, ptr, retdest POP // First, reduce the number of logs. + PUSH 1 %mload_global_metadata(@GLOBAL_METADATA_LOGS_LEN) - %decrement + SUB %mstore_global_metadata(@GLOBAL_METADATA_LOGS_LEN) // stack: ptr, retdest // Second, restore payload length. diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index 3a3a17a5..eef7ee1a 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -409,8 +409,7 @@ %mstore_u32 %endmacro -// Store a single byte to @SEGMENT_RLP_RAW. -%macro mstore_rlp +%macro swap_mstore // stack: addr, value SWAP1 MSTORE_GENERAL diff --git a/evm/src/cpu/kernel/asm/memory/metadata.asm b/evm/src/cpu/kernel/asm/memory/metadata.asm index dfbfb646..e69e292b 100644 --- a/evm/src/cpu/kernel/asm/memory/metadata.asm +++ b/evm/src/cpu/kernel/asm/memory/metadata.asm @@ -359,7 +359,7 @@ zero_hash: // stack: num_bytes %add_const(31) // stack: 31 + num_bytes - %div_const(32) + %shr_const(5) // stack: (num_bytes + 31) / 32 %endmacro @@ -372,7 +372,7 @@ zero_hash: SWAP1 // stack: num_words, num_words * GAS_MEMORY %square - %div_const(512) + %shr_const(9) // stack: num_words^2 / 512, num_words * GAS_MEMORY ADD // stack: cost = num_words^2 / 512 + num_words * GAS_MEMORY @@ -422,8 +422,9 @@ zero_hash: %endmacro %macro decrement_call_depth + PUSH 1 %mload_global_metadata(@GLOBAL_METADATA_CALL_STACK_DEPTH) - %decrement + SUB %mstore_global_metadata(@GLOBAL_METADATA_CALL_STACK_DEPTH) %endmacro diff --git a/evm/src/cpu/kernel/asm/mpt/hash/hash.asm b/evm/src/cpu/kernel/asm/mpt/hash/hash.asm index 29348f97..15fdd6a9 100644 --- a/evm/src/cpu/kernel/asm/mpt/hash/hash.asm +++ b/evm/src/cpu/kernel/asm/mpt/hash/hash.asm @@ -151,8 +151,8 @@ global encode_node_branch: // No value; append the empty string (0x80). // stack: value_ptr, rlp_pos', rlp_start, encode_value, cur_len, retdest - %stack (value_ptr, rlp_pos, rlp_start, encode_value) -> (rlp_pos, 0x80, rlp_pos, rlp_start) - %mstore_rlp + %stack (value_ptr, rlp_pos, rlp_start, encode_value) -> (0x80, rlp_pos, rlp_pos, rlp_start) + MSTORE_GENERAL // stack: rlp_pos', rlp_start, cur_len, retdest %increment // stack: rlp_pos'', rlp_start, cur_len, retdest @@ -192,9 +192,9 @@ encode_node_branch_prepend_prefix: SWAP1 DUP1 %sub_const(32) %jumpi(%%unpack) // Otherwise, result is a hash, and we need to add the prefix 0x80 + 32 = 160. // stack: result_len, result, cur_len, rlp_pos, rlp_start, node_payload_ptr, encode_value, retdest + DUP4 // rlp_pos PUSH 160 - DUP5 // rlp_pos - %mstore_rlp + MSTORE_GENERAL SWAP3 %increment SWAP3 // rlp_pos += 1 %%unpack: %stack (result_len, result, cur_len, rlp_pos, rlp_start, node_payload_ptr, encode_value, retdest) @@ -233,9 +233,9 @@ encode_node_extension_after_hex_prefix: // If result_len != 32, result is raw RLP, with an appropriate RLP prefix already. DUP4 %sub_const(32) %jumpi(encode_node_extension_unpack) // Otherwise, result is a hash, and we need to add the prefix 0x80 + 32 = 160. + DUP1 // rlp_pos PUSH 160 - DUP2 // rlp_pos - %mstore_rlp + MSTORE_GENERAL %increment // rlp_pos += 1 encode_node_extension_unpack: %stack (rlp_pos, rlp_start, result, result_len, node_payload_ptr, cur_len) diff --git a/evm/src/cpu/kernel/asm/mpt/hash/hash_trie_specific.asm b/evm/src/cpu/kernel/asm/mpt/hash/hash_trie_specific.asm index 6abca0b6..cd07c01f 100644 --- a/evm/src/cpu/kernel/asm/mpt/hash/hash_trie_specific.asm +++ b/evm/src/cpu/kernel/asm/mpt/hash/hash_trie_specific.asm @@ -326,8 +326,8 @@ encode_nonzero_receipt_type: // stack: rlp_receipt_len, txn_type, rlp_addr, value_ptr, retdest DUP3 %encode_rlp_multi_byte_string_prefix // stack: rlp_addr, txn_type, old_rlp_addr, value_ptr, retdest - DUP2 DUP2 - %mstore_rlp + DUP1 DUP3 + MSTORE_GENERAL %increment // stack: rlp_addr, txn_type, old_rlp_addr, value_ptr, retdest %stack (rlp_addr, txn_type, old_rlp_addr, value_ptr, retdest) -> (rlp_addr, value_ptr, retdest) diff --git a/evm/src/cpu/kernel/asm/mpt/hex_prefix.asm b/evm/src/cpu/kernel/asm/mpt/hex_prefix.asm index 7dd02c34..cee87deb 100644 --- a/evm/src/cpu/kernel/asm/mpt/hex_prefix.asm +++ b/evm/src/cpu/kernel/asm/mpt/hex_prefix.asm @@ -27,7 +27,7 @@ first_byte: // stack: rlp_addr, num_nibbles, packed_nibbles, terminated, retdest // get the first nibble, if num_nibbles is odd, or zero otherwise SWAP2 - // stack: packed_nibbles, num_nibbbles, rlp_addr, terminated, retdest + // stack: packed_nibbles, num_nibbles, rlp_addr, terminated, retdest DUP2 DUP1 %mod_const(2) // stack: parity, num_nibbles, packed_nibbles, num_nibbles, rlp_addr, terminated, retdest @@ -50,7 +50,7 @@ first_byte: ADD // stack: first_byte, rlp_addr, retdest DUP2 - %mstore_rlp + %swap_mstore %increment // stack: rlp_addr', retdest SWAP1 @@ -88,7 +88,7 @@ rlp_header_medium: // stack: hp_len, rlp_addr, num_nibbles, packed_nibbles, terminated, retdest %add_const(0x80) // value = 0x80 + hp_len DUP2 - %mstore_rlp + %swap_mstore // stack: rlp_addr, num_nibbles, packed_nibbles, terminated, retdest // rlp_addr += 1 %increment @@ -108,14 +108,14 @@ rlp_header_large: // In practice hex-prefix length will never exceed 256, so the length of the // length will always be 1 byte in this case. + DUP2 // rlp_addr PUSH 0xb8 // value = 0xb7 + len_of_len = 0xb8 - DUP3 - %mstore_rlp + MSTORE_GENERAL // stack: rlp_addr, value, hp_len, i, rlp_addr, num_nibbles, packed_nibbles, terminated, retdest // stack: hp_len, rlp_addr, num_nibbles, packed_nibbles, terminated, retdest DUP2 %increment - %mstore_rlp + %swap_mstore // stack: rlp_addr, num_nibbles, packed_nibbles, terminated, retdest // rlp_addr += 2 diff --git a/evm/src/cpu/kernel/asm/mpt/util.asm b/evm/src/cpu/kernel/asm/mpt/util.asm index 27418b17..db13d889 100644 --- a/evm/src/cpu/kernel/asm/mpt/util.asm +++ b/evm/src/cpu/kernel/asm/mpt/util.asm @@ -11,9 +11,9 @@ %endmacro %macro initialize_rlp_segment - PUSH 0x80 PUSH @ENCODED_EMPTY_NODE_POS - %mstore_rlp + PUSH 0x80 + MSTORE_GENERAL %endmacro %macro alloc_rlp_block diff --git a/evm/src/cpu/kernel/asm/rlp/encode.asm b/evm/src/cpu/kernel/asm/rlp/encode.asm index 2319e780..dcd24515 100644 --- a/evm/src/cpu/kernel/asm/rlp/encode.asm +++ b/evm/src/cpu/kernel/asm/rlp/encode.asm @@ -29,12 +29,11 @@ global encode_rlp_256: // RLP-encode a fixed-length string with the given byte length. Assumes string < 2^(8 * len). global encode_rlp_fixed: // stack: len, rlp_addr, string, retdest - DUP1 + DUP2 + DUP2 %add_const(0x80) - // stack: first_byte, len, rlp_addr, string, retdest - DUP3 - // stack: rlp_addr, first_byte, len, rlp_addr, string, retdest - %mstore_rlp + // stack: first_byte, rlp_addr, len, rlp_addr, string, retdest + MSTORE_GENERAL // stack: len, rlp_addr, string, retdest SWAP1 %increment // increment rlp_addr @@ -51,19 +50,17 @@ encode_rlp_fixed_finish: // I.e. writes encode(encode(string). Assumes string < 2^(8 * len). global doubly_encode_rlp_fixed: // stack: len, rlp_addr, string, retdest - DUP1 + DUP2 + DUP2 %add_const(0x81) - // stack: first_byte, len, rlp_addr, string, retdest - DUP3 - // stack: rlp_addr, first_byte, len, rlp_addr, string, retdest - %mstore_rlp + // stack: first_byte, rlp_addr, len, rlp_addr, string, retdest + MSTORE_GENERAL // stack: len, rlp_addr, string, retdest - DUP1 + DUP2 %increment + DUP2 %add_const(0x80) - // stack: second_byte, len, original_rlp_addr, string, retdest - DUP3 %increment - // stack: rlp_addr', second_byte, len, rlp_addr, string, retdest - %mstore_rlp + // stack: second_byte, rlp_addr', len, original_rlp_addr, string, retdest + MSTORE_GENERAL // stack: len, rlp_addr, string, retdest SWAP1 %add_const(2) // advance past the two prefix bytes @@ -87,11 +84,12 @@ global encode_rlp_multi_byte_string_prefix: %jumpi(encode_rlp_multi_byte_string_prefix_large) // Medium case; prefix is 0x80 + str_len. // stack: rlp_addr, str_len, retdest - SWAP1 %add_const(0x80) - // stack: prefix, rlp_addr, retdest + PUSH 0x80 DUP2 - // stack: rlp_addr, prefix, rlp_addr, retdest - %mstore_rlp + // stack: rlp_addr, 0x80, rlp_addr, str_len, retdest + SWAP3 ADD + // stack: prefix, rlp_addr, rlp_addr, retdest + MSTORE_GENERAL // stack: rlp_addr, retdest %increment // stack: rlp_addr', retdest @@ -104,12 +102,11 @@ encode_rlp_multi_byte_string_prefix_large: %num_bytes // stack: len_of_len, rlp_addr, str_len, retdest SWAP1 - DUP2 // len_of_len + DUP1 // rlp_addr + DUP3 // len_of_len %add_const(0xb7) - // stack: first_byte, rlp_addr, len_of_len, str_len, retdest - DUP2 - // stack: rlp_addr, first_byte, rlp_addr, len_of_len, str_len, retdest - %mstore_rlp + // stack: first_byte, rlp_addr, rlp_addr, len_of_len, str_len, retdest + MSTORE_GENERAL // stack: rlp_addr, len_of_len, str_len, retdest %increment // stack: rlp_addr', len_of_len, str_len, retdest @@ -132,12 +129,11 @@ global encode_rlp_list_prefix: %jumpi(encode_rlp_list_prefix_large) // Small case: prefix is just 0xc0 + length. // stack: rlp_addr, payload_len, retdest - SWAP1 + DUP1 + SWAP2 %add_const(0xc0) - // stack: prefix, rlp_addr, retdest - DUP2 - // stack: rlp_addr, prefix, rlp_addr, retdest - %mstore_rlp + // stack: prefix, rlp_addr, rlp_addr, retdest + MSTORE_GENERAL // stack: rlp_addr, retdest %increment SWAP1 @@ -147,10 +143,10 @@ encode_rlp_list_prefix_large: // stack: rlp_addr, payload_len, retdest DUP2 %num_bytes // stack: len_of_len, rlp_addr, payload_len, retdest - DUP1 %add_const(0xf7) - // stack: first_byte, len_of_len, rlp_addr, payload_len, retdest - DUP3 // rlp_addr - %mstore_rlp + DUP2 + DUP2 %add_const(0xf7) + // stack: first_byte, rlp_addr, len_of_len, rlp_addr, payload_len, retdest + MSTORE_GENERAL // stack: len_of_len, rlp_addr, payload_len, retdest SWAP1 %increment // stack: rlp_addr', len_of_len, payload_len, retdest @@ -184,10 +180,10 @@ global prepend_rlp_list_prefix: // If we got here, we have a small list, so we prepend 0xc0 + len at rlp_address 8. // stack: payload_len, end_rlp_addr, start_rlp_addr, retdest - DUP1 %add_const(0xc0) - // stack: prefix_byte, payload_len, end_rlp_addr, start_rlp_addr, retdest - DUP4 %decrement // offset of prefix - %mstore_rlp + PUSH 1 DUP4 SUB // offset of prefix + DUP2 %add_const(0xc0) + // stack: prefix_byte, start_rlp_addr-1, payload_len, end_rlp_addr, start_rlp_addr, retdest + MSTORE_GENERAL // stack: payload_len, end_rlp_addr, start_rlp_addr, retdest %increment // stack: rlp_len, end_rlp_addr, start_rlp_addr, retdest @@ -204,10 +200,10 @@ prepend_rlp_list_prefix_big: DUP1 %num_bytes // stack: len_of_len, payload_len, end_rlp_addr, start_rlp_addr, retdest DUP1 - DUP5 %decrement // start_rlp_addr - 1 + PUSH 1 DUP6 SUB // start_rlp_addr - 1 SUB // stack: prefix_start_rlp_addr, len_of_len, payload_len, end_rlp_addr, start_rlp_addr, retdest - DUP2 %add_const(0xf7) DUP2 %mstore_rlp // rlp[prefix_start_rlp_addr] = 0xf7 + len_of_len + DUP2 %add_const(0xf7) DUP2 %swap_mstore // rlp[prefix_start_rlp_addr] = 0xf7 + len_of_len // stack: prefix_start_rlp_addr, len_of_len, payload_len, end_rlp_addr, start_rlp_addr, retdest DUP1 %increment // start_len_rlp_addr = prefix_start_rlp_addr + 1 %stack (start_len_rlp_addr, prefix_start_rlp_addr, len_of_len, payload_len, end_rlp_addr, start_rlp_addr, retdest) diff --git a/evm/src/cpu/kernel/asm/rlp/encode_rlp_scalar.asm b/evm/src/cpu/kernel/asm/rlp/encode_rlp_scalar.asm index 8196a452..d311a57e 100644 --- a/evm/src/cpu/kernel/asm/rlp/encode_rlp_scalar.asm +++ b/evm/src/cpu/kernel/asm/rlp/encode_rlp_scalar.asm @@ -85,6 +85,14 @@ encode_rlp_scalar_small: SWAP1 JUMP +// Convenience macro to call encode_rlp_scalar and return where we left off. +// It takes swapped inputs, i.e. `scalar, rlp_addr` instead of `rlp_addr, scalar`. +%macro encode_rlp_scalar_swapped_inputs + %stack (scalar, rlp_addr) -> (rlp_addr, scalar, %%after) + %jump(encode_rlp_scalar) +%%after: +%endmacro + // Convenience macro to call encode_rlp_scalar and return where we left off. %macro encode_rlp_scalar %stack (rlp_addr, scalar) -> (rlp_addr, scalar, %%after) diff --git a/evm/src/cpu/kernel/asm/rlp/num_bytes.asm b/evm/src/cpu/kernel/asm/rlp/num_bytes.asm index b2a1b9d6..de0a7ca9 100644 --- a/evm/src/cpu/kernel/asm/rlp/num_bytes.asm +++ b/evm/src/cpu/kernel/asm/rlp/num_bytes.asm @@ -5,8 +5,8 @@ global num_bytes: DUP1 ISZERO %jumpi(return_1) // Non-deterministically guess the number of bits PROVER_INPUT(num_bits) - %stack (num_bits, x) -> (num_bits, x, num_bits) - %decrement + %stack(num_bits, x) -> (num_bits, 1, x, num_bits) + SUB SHR // stack: 1, num_bits %assert_eq_const(1) @@ -17,7 +17,10 @@ global num_bytes: SWAP1 JUMP -return_1: POP PUSH 1 SWAP1 JUMP +return_1: + // stack: x, retdest + %stack(x, retdest) -> (retdest, 1) + JUMP // Convenience macro to call num_bytes and return where we left off. %macro num_bytes diff --git a/evm/src/cpu/kernel/asm/transactions/router.asm b/evm/src/cpu/kernel/asm/transactions/router.asm index 2ecccfe0..edabfbc4 100644 --- a/evm/src/cpu/kernel/asm/transactions/router.asm +++ b/evm/src/cpu/kernel/asm/transactions/router.asm @@ -5,10 +5,8 @@ global route_txn: // stack: txn_counter, num_nibbles, retdest // First load transaction data into memory, where it will be parsed. - PUSH read_txn_from_memory - SWAP2 SWAP1 - PUSH update_txn_trie - // stack: update_txn_trie, tx_counter, num_nibbles, read_txn_from_memory, retdest + %stack(txn_counter, num_nibbles) -> (update_txn_trie, txn_counter, num_nibbles, read_txn_from_memory) + // stack: update_txn_trie, txn_counter, num_nibbles, read_txn_from_memory, retdest %jump(read_rlp_to_memory) // At this point, the raw txn data is in memory. diff --git a/evm/src/cpu/kernel/asm/transactions/type_0.asm b/evm/src/cpu/kernel/asm/transactions/type_0.asm index 0cd22c10..a3f3bb0d 100644 --- a/evm/src/cpu/kernel/asm/transactions/type_0.asm +++ b/evm/src/cpu/kernel/asm/transactions/type_0.asm @@ -61,7 +61,7 @@ process_v_new_style: %sub_const(35) DUP1 // stack: v - 35, v - 35, rlp_addr, retdest - %div_const(2) + %div2 // stack: chain_id, v - 35, rlp_addr, retdest %mstore_txn_field(@TXN_FIELD_CHAIN_ID) @@ -94,11 +94,11 @@ type_0_compute_signed_data: // stack: rlp_addr, rlp_addr_start, retdest %mload_txn_field(@TXN_FIELD_MAX_FEE_PER_GAS) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest %mload_txn_field(@TXN_FIELD_GAS_LIMIT) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest %mload_txn_field(@TXN_FIELD_TO) @@ -108,12 +108,12 @@ type_0_compute_signed_data: %jump(after_to) zero_to: // stack: to, rlp_addr, rlp_addr_start, retdest - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest after_to: %mload_txn_field(@TXN_FIELD_VALUE) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest // Encode txn data. @@ -133,15 +133,15 @@ after_serializing_txn_data: // stack: rlp_addr, rlp_addr_start, retdest %mload_txn_field(@TXN_FIELD_CHAIN_ID) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest PUSH 0 - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest PUSH 0 - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest finish_rlp_list: diff --git a/evm/src/cpu/kernel/asm/transactions/type_1.asm b/evm/src/cpu/kernel/asm/transactions/type_1.asm index f9142bd4..e64a4aee 100644 --- a/evm/src/cpu/kernel/asm/transactions/type_1.asm +++ b/evm/src/cpu/kernel/asm/transactions/type_1.asm @@ -48,15 +48,15 @@ type_1_compute_signed_data: // stack: rlp_addr, rlp_addr_start, retdest %mload_txn_field(@TXN_FIELD_NONCE) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest %mload_txn_field(@TXN_FIELD_MAX_FEE_PER_GAS) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest %mload_txn_field(@TXN_FIELD_GAS_LIMIT) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest %mload_txn_field(@TXN_FIELD_TO) @@ -66,12 +66,12 @@ type_1_compute_signed_data: %jump(after_to) zero_to: // stack: to, rlp_addr, rlp_addr_start, retdest - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest after_to: %mload_txn_field(@TXN_FIELD_VALUE) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_addr_start, retdest // Encode txn data. diff --git a/evm/src/cpu/kernel/asm/transactions/type_2.asm b/evm/src/cpu/kernel/asm/transactions/type_2.asm index cd4f85e6..5074c579 100644 --- a/evm/src/cpu/kernel/asm/transactions/type_2.asm +++ b/evm/src/cpu/kernel/asm/transactions/type_2.asm @@ -51,19 +51,19 @@ type_2_compute_signed_data: // stack: rlp_addr, rlp_start, retdest %mload_txn_field(@TXN_FIELD_NONCE) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_start, retdest %mload_txn_field(@TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_start, retdest %mload_txn_field(@TXN_FIELD_MAX_FEE_PER_GAS) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_start, retdest %mload_txn_field(@TXN_FIELD_GAS_LIMIT) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_start, retdest %mload_txn_field(@TXN_FIELD_TO) @@ -73,12 +73,12 @@ type_2_compute_signed_data: %jump(after_to) zero_to: // stack: to, rlp_addr, rlp_start, retdest - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_start, retdest after_to: %mload_txn_field(@TXN_FIELD_VALUE) - SWAP1 %encode_rlp_scalar + %encode_rlp_scalar_swapped_inputs // stack: rlp_addr, rlp_start, retdest // Encode txn data. diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 855eb4bb..76def0c7 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -284,9 +284,9 @@ %macro ceil_div // stack: x, y - DUP2 - // stack: y, x, y - %decrement + PUSH 1 + DUP3 + SUB // y - 1 // stack: y - 1, x, y ADD DIV @@ -346,7 +346,10 @@ %endmacro %macro div2 - %div_const(2) + // stack: x + PUSH 1 + SHR + // stack: x >> 1 %endmacro %macro iseven diff --git a/evm/src/cpu/kernel/asm/util/math.asm b/evm/src/cpu/kernel/asm/util/math.asm index 98f7b008..4bdf6902 100644 --- a/evm/src/cpu/kernel/asm/util/math.asm +++ b/evm/src/cpu/kernel/asm/util/math.asm @@ -5,7 +5,7 @@ log2_floor_helper: ISZERO %jumpi(end) // stack: val, counter, retdest - %div_const(2) + %div2 // stack: val/2, counter, retdest SWAP1 %increment @@ -22,7 +22,7 @@ end: global log2_floor: // stack: val, retdest - %div_const(2) + %div2 // stack: val/2, retdest PUSH 0 // stack: 0, val/2, retdest diff --git a/evm/src/cpu/kernel/stack/permutations.rs b/evm/src/cpu/kernel/stack/permutations.rs index 50be605d..69d5f2e6 100644 --- a/evm/src/cpu/kernel/stack/permutations.rs +++ b/evm/src/cpu/kernel/stack/permutations.rs @@ -59,7 +59,7 @@ fn apply_perm(permutation: Vec>, mut lst: Vec(lst_a: &[T], lst_b: &[T]) -> Vec> { - // We should check to ensure that A and B are indeed rearrangments of each other. + // We should check to ensure that A and B are indeed rearrangements of each other. assert!(is_permutation(lst_a, lst_b)); let n = lst_a.len(); diff --git a/evm/src/cpu/memio.rs b/evm/src/cpu/memio.rs index 2073e182..7cb4c388 100644 --- a/evm/src/cpu/memio.rs +++ b/evm/src/cpu/memio.rs @@ -204,7 +204,7 @@ fn eval_packed_store( } /// Circuit version of `eval_packed_store`. -/// /// Evaluates constraints for MSTORE_GENERAL. +/// Evaluates constraints for MSTORE_GENERAL. fn eval_ext_circuit_store, const D: usize>( builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder, lv: &CpuColumnsView>, diff --git a/evm/src/cpu/syscalls_exceptions.rs b/evm/src/cpu/syscalls_exceptions.rs index 501b114f..337a0a86 100644 --- a/evm/src/cpu/syscalls_exceptions.rs +++ b/evm/src/cpu/syscalls_exceptions.rs @@ -29,6 +29,12 @@ pub(crate) fn eval_packed( let filter_exception = lv.op.exception; let total_filter = filter_syscall + filter_exception; + // First, constrain filters to be boolean. + // Ensuring they are mutually exclusive is done in other modules + // through the `is_cpu_cycle` variable. + yield_constr.constraint(filter_syscall * (filter_syscall - P::ONES)); + yield_constr.constraint(filter_exception * (filter_exception - P::ONES)); + // If exception, ensure we are not in kernel mode yield_constr.constraint(filter_exception * lv.is_kernel_mode); @@ -132,6 +138,14 @@ pub(crate) fn eval_ext_circuit, const D: usize>( let filter_exception = lv.op.exception; let total_filter = builder.add_extension(filter_syscall, filter_exception); + // First, constrain filters to be boolean. + // Ensuring they are mutually exclusive is done in other modules + // through the `is_cpu_cycle` variable. + let constr = builder.mul_sub_extension(filter_syscall, filter_syscall, filter_syscall); + yield_constr.constraint(builder, constr); + let constr = builder.mul_sub_extension(filter_exception, filter_exception, filter_exception); + yield_constr.constraint(builder, constr); + // Ensure that, if exception, we are not in kernel mode let constr = builder.mul_extension(filter_exception, lv.is_kernel_mode); yield_constr.constraint(builder, constr); diff --git a/evm/src/cross_table_lookup.rs b/evm/src/cross_table_lookup.rs index 21f94126..428279ba 100644 --- a/evm/src/cross_table_lookup.rs +++ b/evm/src/cross_table_lookup.rs @@ -28,11 +28,14 @@ //! the current and next row values -- when computing the linear combinations. use std::borrow::Borrow; +use std::cmp::min; use std::fmt::Debug; use std::iter::repeat; use anyhow::{ensure, Result}; +use hashbrown::HashMap; use itertools::Itertools; +use plonky2::field::batch_util::{batch_add_inplace, batch_multiply_inplace}; use plonky2::field::extension::{Extendable, FieldExtension}; use plonky2::field::packed::PackedField; use plonky2::field::polynomial::PolynomialValues; @@ -46,6 +49,7 @@ use plonky2::plonk::config::{AlgebraicHasher, GenericConfig, Hasher}; use plonky2::plonk::plonk_common::{ reduce_with_powers, reduce_with_powers_circuit, reduce_with_powers_ext_circuit, }; +use plonky2::util::ceil_div_usize; use plonky2::util::serialization::{Buffer, IoResult, Read, Write}; use crate::all_stark::{Table, NUM_TABLES}; @@ -466,39 +470,66 @@ impl CrossTableLookup { } } - /// Given a `Table` t and the number of challenges, returns the number of Cross-table lookup polynomials associated to t, - /// i.e. the number of looking and looked tables among all CTLs whose columns are taken from t. - pub(crate) fn num_ctl_zs(ctls: &[Self], table: Table, num_challenges: usize) -> usize { + /// Given a table, returns: + /// - the total number of helper columns for this table, over all Cross-table lookups, + /// - the total number of z polynomials for this table, over all Cross-table lookups, + /// - the number of helper columns for this table, for each Cross-table lookup. + pub(crate) fn num_ctl_helpers_zs_all( + ctls: &[Self], + table: Table, + num_challenges: usize, + constraint_degree: usize, + ) -> (usize, usize, Vec) { + let mut num_helpers = 0; let mut num_ctls = 0; - for ctl in ctls { + let mut num_helpers_by_ctl = vec![0; ctls.len()]; + for (i, ctl) in ctls.iter().enumerate() { let all_tables = std::iter::once(&ctl.looked_table).chain(&ctl.looking_tables); - num_ctls += all_tables.filter(|twc| twc.table == table).count(); + let num_appearances = all_tables.filter(|twc| twc.table == table).count(); + let is_helpers = num_appearances > 2; + if is_helpers { + num_helpers_by_ctl[i] = ceil_div_usize(num_appearances, constraint_degree - 1); + num_helpers += num_helpers_by_ctl[i]; + } + + if num_appearances > 0 { + num_ctls += 1; + } } - num_ctls * num_challenges + ( + num_helpers * num_challenges, + num_ctls * num_challenges, + num_helpers_by_ctl, + ) } } /// Cross-table lookup data for one table. #[derive(Clone, Default)] -pub(crate) struct CtlData { +pub(crate) struct CtlData<'a, F: Field> { /// Data associated with all Z(x) polynomials for one table. - pub(crate) zs_columns: Vec>, + pub(crate) zs_columns: Vec>, } /// Cross-table lookup data associated with one Z(x) polynomial. +/// One Z(x) polynomial can be associated to multiple tables, +/// built from the same STARK. #[derive(Clone)] -pub(crate) struct CtlZData { +pub(crate) struct CtlZData<'a, F: Field> { + /// Helper columns to verify the Z polynomial values. + pub(crate) helper_columns: Vec>, /// Z polynomial values. pub(crate) z: PolynomialValues, /// Cross-table lookup challenge. pub(crate) challenge: GrandProductChallenge, - /// Column linear combination for the current table. - pub(crate) columns: Vec>, - /// Filter column for the current table. It evaluates to either 1 or 0. - pub(crate) filter: Option>, + /// Vector of column linear combinations for the current tables. + pub(crate) columns: Vec<&'a [Column]>, + /// Vector of filter columns for the current table. + /// Each filter evaluates to either 1 or 0. + pub(crate) filter: Vec>>, } -impl CtlData { +impl<'a, F: Field> CtlData<'a, F> { /// Returns the number of cross-table lookup polynomials. pub(crate) fn len(&self) -> usize { self.zs_columns.len() @@ -509,12 +540,38 @@ impl CtlData { self.zs_columns.is_empty() } - /// Returns all the cross-table lookup polynomials. - pub(crate) fn z_polys(&self) -> Vec> { - self.zs_columns + /// Returns all the cross-table lookup helper polynomials. + pub(crate) fn ctl_helper_polys(&self) -> Vec> { + let num_polys = self + .zs_columns .iter() - .map(|zs_columns| zs_columns.z.clone()) - .collect() + .fold(0, |acc, z| acc + z.helper_columns.len()); + let mut res = Vec::with_capacity(num_polys); + for z in &self.zs_columns { + res.extend(z.helper_columns.clone()); + } + + res + } + + /// Returns all the Z cross-table-lookup polynomials. + pub(crate) fn ctl_z_polys(&self) -> Vec> { + let mut res = Vec::with_capacity(self.zs_columns.len()); + for z in &self.zs_columns { + res.push(z.z.clone()); + } + + res + } + /// Returns the number of helper columns for each STARK in each + /// `CtlZData`. + pub(crate) fn num_ctl_helper_polys(&self) -> Vec { + let mut res = Vec::with_capacity(self.zs_columns.len()); + for z in &self.zs_columns { + res.push(z.helper_columns.len()); + } + + res } } @@ -640,16 +697,46 @@ pub(crate) fn get_grand_product_challenge_set_target< GrandProductChallengeSet { challenges } } +/// Returns the number of helper columns for each `Table`. +pub(crate) fn num_ctl_helper_columns_by_table( + ctls: &[CrossTableLookup], + constraint_degree: usize, +) -> Vec<[usize; NUM_TABLES]> { + let mut res = vec![[0; NUM_TABLES]; ctls.len()]; + for (i, ctl) in ctls.iter().enumerate() { + let CrossTableLookup { + looking_tables, + looked_table, + } = ctl; + let mut num_by_table = [0; NUM_TABLES]; + + let grouped_lookups = looking_tables.iter().group_by(|&a| a.table); + + for (table, group) in grouped_lookups.into_iter() { + let sum = group.count(); + if sum > 2 { + // We only need helper columns if there are more than 2 columns. + num_by_table[table as usize] = ceil_div_usize(sum, constraint_degree - 1); + } + } + + res[i] = num_by_table; + } + res +} + /// Generates all the cross-table lookup data, for all tables. /// - `trace_poly_values` corresponds to the trace values for all tables. /// - `cross_table_lookups` corresponds to all the cross-table lookups, i.e. the looked and looking tables, as described in `CrossTableLookup`. /// - `ctl_challenges` corresponds to the challenges used for CTLs. +/// - `constraint_degree` is the maximal constraint degree for the table. /// For each `CrossTableLookup`, and each looking/looked table, the partial products for the CTL are computed, and added to the said table's `CtlZData`. -pub(crate) fn cross_table_lookup_data( +pub(crate) fn cross_table_lookup_data<'a, F: RichField, const D: usize>( trace_poly_values: &[Vec>; NUM_TABLES], - cross_table_lookups: &[CrossTableLookup], + cross_table_lookups: &'a [CrossTableLookup], ctl_challenges: &GrandProductChallengeSet, -) -> [CtlData; NUM_TABLES] { + constraint_degree: usize, +) -> [CtlData<'a, F>; NUM_TABLES] { let mut ctl_data_per_table = [0; NUM_TABLES].map(|_| CtlData::default()); for CrossTableLookup { looking_tables, @@ -658,103 +745,232 @@ pub(crate) fn cross_table_lookup_data( { log::debug!("Processing CTL for {:?}", looked_table.table); for &challenge in &ctl_challenges.challenges { - let zs_looking = looking_tables.iter().map(|table| { - partial_sums( - &trace_poly_values[table.table as usize], - &table.columns, - &table.filter, - challenge, - ) - }); - let z_looked = partial_sums( - &trace_poly_values[looked_table.table as usize], - &looked_table.columns, - &looked_table.filter, + let helper_zs_looking = ctl_helper_zs_cols( + trace_poly_values, + looking_tables.clone(), challenge, + constraint_degree, ); - for (table, z) in looking_tables.iter().zip(zs_looking) { - ctl_data_per_table[table.table as usize] - .zs_columns - .push(CtlZData { - z, - challenge, - columns: table.columns.clone(), - filter: table.filter.clone(), - }); + + let mut z_looked = partial_sums( + &trace_poly_values[looked_table.table as usize], + &[(&looked_table.columns, &looked_table.filter)], + challenge, + constraint_degree, + ); + + for (table, helpers_zs) in helper_zs_looking { + let num_helpers = helpers_zs.len() - 1; + let count = looking_tables + .iter() + .filter(|looking_table| looking_table.table as usize == table) + .count(); + let cols_filts = looking_tables.iter().filter_map(|looking_table| { + if looking_table.table as usize == table { + Some((&looking_table.columns, &looking_table.filter)) + } else { + None + } + }); + let mut columns = Vec::with_capacity(count); + let mut filter = Vec::with_capacity(count); + for (col, filt) in cols_filts { + columns.push(&col[..]); + filter.push(filt.clone()); + } + ctl_data_per_table[table].zs_columns.push(CtlZData { + helper_columns: helpers_zs[..num_helpers].to_vec(), + z: helpers_zs[num_helpers].clone(), + challenge, + columns, + filter, + }); } + // There is no helper column for the looking table. + let looked_poly = z_looked[0].clone(); ctl_data_per_table[looked_table.table as usize] .zs_columns .push(CtlZData { - z: z_looked, + helper_columns: vec![], + z: looked_poly, challenge, - columns: looked_table.columns.clone(), - filter: looked_table.filter.clone(), + columns: vec![&looked_table.columns[..]], + filter: vec![looked_table.filter.clone()], }); } } ctl_data_per_table } +type ColumnFilter<'a, F> = (&'a [Column], &'a Option>); + +/// Given a STARK's trace, and the data associated to one lookup (either CTL or range check), +/// returns the associated helper polynomials. +pub(crate) fn get_helper_cols( + trace: &[PolynomialValues], + degree: usize, + columns_filters: &[ColumnFilter], + challenge: GrandProductChallenge, + constraint_degree: usize, +) -> Vec> { + let num_helper_columns = ceil_div_usize(columns_filters.len(), constraint_degree - 1); + + let mut helper_columns = Vec::with_capacity(num_helper_columns); + + let mut filter_index = 0; + for mut cols_filts in &columns_filters.iter().chunks(constraint_degree - 1) { + let (first_col, first_filter) = cols_filts.next().unwrap(); + + let mut filter_col = Vec::with_capacity(degree); + let first_combined = (0..degree) + .map(|d| { + let f = if let Some(filter) = first_filter { + let f = filter.eval_table(trace, d); + filter_col.push(f); + f + } else { + filter_col.push(F::ONE); + F::ONE + }; + if f.is_one() { + let evals = first_col + .iter() + .map(|c| c.eval_table(trace, d)) + .collect::>(); + challenge.combine(evals.iter()) + } else { + assert_eq!(f, F::ZERO, "Non-binary filter?"); + // Dummy value. Cannot be zero since it will be batch-inverted. + F::ONE + } + }) + .collect::>(); + + let mut acc = F::batch_multiplicative_inverse(&first_combined); + for d in 0..degree { + if filter_col[d].is_zero() { + acc[d] = F::ZERO; + } + } + + for (col, filt) in cols_filts { + let mut filter_col = Vec::with_capacity(degree); + let mut combined = (0..degree) + .map(|d| { + let f = if let Some(filter) = filt { + let f = filter.eval_table(trace, d); + filter_col.push(f); + f + } else { + filter_col.push(F::ONE); + F::ONE + }; + if f.is_one() { + let evals = col + .iter() + .map(|c| c.eval_table(trace, d)) + .collect::>(); + challenge.combine(evals.iter()) + } else { + assert_eq!(f, F::ZERO, "Non-binary filter?"); + // Dummy value. Cannot be zero since it will be batch-inverted. + F::ONE + } + }) + .collect::>(); + + combined = F::batch_multiplicative_inverse(&combined); + + for d in 0..degree { + if filter_col[d].is_zero() { + combined[d] = F::ZERO; + } + } + + batch_add_inplace(&mut acc, &combined); + } + + helper_columns.push(acc.into()); + } + assert_eq!(helper_columns.len(), num_helper_columns); + + helper_columns +} + +/// Computes helper columns and Z polynomials for all looking tables +/// of one cross-table lookup (i.e. for one looked table). +fn ctl_helper_zs_cols( + all_stark_traces: &[Vec>; NUM_TABLES], + looking_tables: Vec>, + challenge: GrandProductChallenge, + constraint_degree: usize, +) -> Vec<(usize, Vec>)> { + let grouped_lookups = looking_tables.iter().group_by(|a| a.table); + + grouped_lookups + .into_iter() + .map(|(table, group)| { + let degree = all_stark_traces[table as usize][0].len(); + let columns_filters = group + .map(|table| (&table.columns[..], &table.filter)) + .collect::], &Option>)>>(); + ( + table as usize, + partial_sums( + &all_stark_traces[table as usize], + &columns_filters, + challenge, + constraint_degree, + ), + ) + }) + .collect::>)>>() +} + /// Computes the cross-table lookup partial sums for one table and given column linear combinations. /// `trace` represents the trace values for the given table. -/// `columns` are all the column linear combinations to evaluate. -/// `filter_column` is a column linear combination used to determine whether a row should be selected. +/// `columns` is a vector of column linear combinations to evaluate. Each element in the vector represents columns that need to be combined. +/// `filter_cols` are column linear combinations used to determine whether a row should be selected. /// `challenge` is a cross-table lookup challenge. /// The initial sum `s` is 0. -/// For each row, if the `filter_column` evaluates to 1, then the rows is selected. All the column linear combinations are evaluated at said row. All those evaluations are combined using the challenge to get a value `v`. -/// The sum is updated: `s += 1/v`, and is pushed to the vector of partial sums. +/// For each row, if the `filter_column` evaluates to 1, then the row is selected. All the column linear combinations are evaluated at said row. +/// The evaluations of each elements of `columns` are then combined together to form a value `v`. +/// The values `v`` are grouped together, in groups of size `constraint_degree - 1` (2 in our case). For each group, we construct a helper +/// column: h = \sum_i 1/(v_i). +/// +/// The sum is updated: `s += \sum h_i`, and is pushed to the vector of partial sums `z``. +/// Returns the helper columns and `z`. fn partial_sums( trace: &[PolynomialValues], - columns: &[Column], - filter: &Option>, + columns_filters: &[ColumnFilter], challenge: GrandProductChallenge, -) -> PolynomialValues { - let mut partial_sum = F::ZERO; + constraint_degree: usize, +) -> Vec> { let degree = trace[0].len(); - let mut filters = Vec::with_capacity(degree); - let mut res = Vec::with_capacity(degree); + let mut z = Vec::with_capacity(degree); - for i in (0..degree).rev() { - if let Some(filter) = filter { - let filter_val = filter.eval_table(trace, i); - if filter_val.is_one() { - filters.push(true); - } else { - assert_eq!(filter_val, F::ZERO, "Non-binary filter?"); - filters.push(false); - } - } else { - filters.push(false); - }; + let mut helper_columns = + get_helper_cols(trace, degree, columns_filters, challenge, constraint_degree); - let combined = if filters[filters.len() - 1] { - let evals = columns - .iter() - .map(|c| c.eval_table(trace, i)) - .collect::>(); - challenge.combine(evals.iter()) - } else { - // Dummy value. Cannot be zero since it will be batch-inverted. - F::ONE - }; - res.push(combined); + let x = helper_columns + .iter() + .map(|col| col.values[degree - 1]) + .sum::(); + z.push(x); + + for i in (0..degree - 1).rev() { + let x = helper_columns.iter().map(|col| col.values[i]).sum::(); + + z.push(z[z.len() - 1] + x); } - res = F::batch_multiplicative_inverse(&res); - - if !filters[0] { - res[0] = F::ZERO; + z.reverse(); + if columns_filters.len() > 2 { + helper_columns.push(z.into()); + } else { + helper_columns = vec![z.into()]; } - for i in (1..degree) { - let mut cur_value = res[i - 1]; - if filters[i] { - cur_value += res[i]; - } - res[i] = cur_value; - } - - res.reverse(); - res.into() + helper_columns } /// Data necessary to check the cross-table lookups of a given table. @@ -765,6 +981,9 @@ where FE: FieldExtension, P: PackedField, { + /// Helper columns to check that the Z polyomial + /// was constructed correctly. + pub(crate) helper_columns: Vec

, /// Evaluation of the trace polynomials at point `zeta`. pub(crate) local_z: P, /// Evaluation of the trace polynomials at point `g * zeta` @@ -772,9 +991,9 @@ where /// Cross-table lookup challenges. pub(crate) challenges: GrandProductChallenge, /// Column linear combinations of the `CrossTableLookup`s. - pub(crate) columns: &'a [Column], + pub(crate) columns: Vec<&'a [Column]>, /// Filter that evaluates to either 1 or 0. - pub(crate) filter: &'a Option>, + pub(crate) filter: Vec>>, } impl<'a, F: RichField + Extendable, const D: usize> @@ -786,45 +1005,107 @@ impl<'a, F: RichField + Extendable, const D: usize> cross_table_lookups: &'a [CrossTableLookup], ctl_challenges: &'a GrandProductChallengeSet, num_lookup_columns: &[usize; NUM_TABLES], + num_helper_ctl_columns: &Vec<[usize; NUM_TABLES]>, ) -> [Vec; NUM_TABLES] { + let mut total_num_helper_cols_by_table = [0; NUM_TABLES]; + for p_ctls in num_helper_ctl_columns { + for j in 0..NUM_TABLES { + total_num_helper_cols_by_table[j] += p_ctls[j] * ctl_challenges.challenges.len(); + } + } + // Get all cross-table lookup polynomial openings for each STARK proof. let mut ctl_zs = proofs .iter() .zip(num_lookup_columns) .map(|(p, &num_lookup)| { let openings = &p.proof.openings; - let ctl_zs = openings.auxiliary_polys.iter().skip(num_lookup); - let ctl_zs_next = openings.auxiliary_polys_next.iter().skip(num_lookup); - ctl_zs.zip(ctl_zs_next) + + let ctl_zs = &openings.auxiliary_polys[num_lookup..]; + let ctl_zs_next = &openings.auxiliary_polys_next[num_lookup..]; + ctl_zs.iter().zip(ctl_zs_next).collect::>() }) .collect::>(); // Put each cross-table lookup polynomial into the correct table data: if a CTL polynomial is extracted from looking/looked table t, then we add it to the `CtlCheckVars` of table t. + let mut start_indices = [0; NUM_TABLES]; + let mut z_indices = [0; NUM_TABLES]; let mut ctl_vars_per_table = [0; NUM_TABLES].map(|_| vec![]); - for CrossTableLookup { - looking_tables, - looked_table, - } in cross_table_lookups + for ( + CrossTableLookup { + looking_tables, + looked_table, + }, + num_ctls, + ) in cross_table_lookups.iter().zip(num_helper_ctl_columns) { for &challenges in &ctl_challenges.challenges { + // Group looking tables by `Table`, since we bundle the looking tables taken from the same `Table` together thanks to helper columns. + // We want to only iterate on each `Table` once. + let mut filtered_looking_tables = + Vec::with_capacity(min(looking_tables.len(), NUM_TABLES)); for table in looking_tables { - let (looking_z, looking_z_next) = ctl_zs[table.table as usize].next().unwrap(); - ctl_vars_per_table[table.table as usize].push(Self { + if !filtered_looking_tables.contains(&(table.table as usize)) { + filtered_looking_tables.push(table.table as usize); + } + } + + for (i, &table) in filtered_looking_tables.iter().enumerate() { + // We have first all the helper polynomials, then all the z polynomials. + let (looking_z, looking_z_next) = + ctl_zs[table][total_num_helper_cols_by_table[table] + z_indices[table]]; + + let count = looking_tables + .iter() + .filter(|looking_table| looking_table.table as usize == table) + .count(); + let cols_filts = looking_tables.iter().filter_map(|looking_table| { + if looking_table.table as usize == table { + Some((&looking_table.columns, &looking_table.filter)) + } else { + None + } + }); + let mut columns = Vec::with_capacity(count); + let mut filter = Vec::with_capacity(count); + for (col, filt) in cols_filts { + columns.push(&col[..]); + filter.push(filt.clone()); + } + let helper_columns = ctl_zs[table] + [start_indices[table]..start_indices[table] + num_ctls[table]] + .iter() + .map(|&(h, _)| *h) + .collect::>(); + + start_indices[table] += num_ctls[table]; + + z_indices[table] += 1; + ctl_vars_per_table[table].push(Self { + helper_columns, local_z: *looking_z, next_z: *looking_z_next, challenges, - columns: &table.columns, - filter: &table.filter, + columns, + filter, }); } - let (looked_z, looked_z_next) = ctl_zs[looked_table.table as usize].next().unwrap(); + let (looked_z, looked_z_next) = ctl_zs[looked_table.table as usize] + [total_num_helper_cols_by_table[looked_table.table as usize] + + z_indices[looked_table.table as usize]]; + + z_indices[looked_table.table as usize] += 1; + + let columns = vec![&looked_table.columns[..]]; + let filter = vec![looked_table.filter.clone()]; ctl_vars_per_table[looked_table.table as usize].push(Self { + helper_columns: vec![], local_z: *looked_z, next_z: *looked_z_next, challenges, - columns: &looked_table.columns, - filter: &looked_table.filter, + columns, + filter, }); } } @@ -832,6 +1113,61 @@ impl<'a, F: RichField + Extendable, const D: usize> } } +/// Given data associated to a lookup (either a CTL or a range-check), check the associated helper polynomials. +pub(crate) fn eval_helper_columns( + filter: &[Option>], + columns: &[Vec

], + local_values: &[P], + next_values: &[P], + helper_columns: &[P], + constraint_degree: usize, + challenges: &GrandProductChallenge, + consumer: &mut ConstraintConsumer

, +) where + F: RichField + Extendable, + FE: FieldExtension, + P: PackedField, +{ + if !helper_columns.is_empty() { + for (j, chunk) in columns.chunks(constraint_degree - 1).enumerate() { + let fs = + &filter[(constraint_degree - 1) * j..(constraint_degree - 1) * j + chunk.len()]; + let h = helper_columns[j]; + + match chunk.len() { + 2 => { + let combin0 = challenges.combine(&chunk[0]); + let combin1 = challenges.combine(chunk[1].iter()); + + let f0 = if let Some(filter0) = &fs[0] { + filter0.eval_filter(local_values, next_values) + } else { + P::ONES + }; + let f1 = if let Some(filter1) = &fs[1] { + filter1.eval_filter(local_values, next_values) + } else { + P::ONES + }; + + consumer.constraint(combin1 * combin0 * h - f0 * combin1 - f1 * combin0); + } + 1 => { + let combin = challenges.combine(&chunk[0]); + let f0 = if let Some(filter1) = &fs[0] { + filter1.eval_filter(local_values, next_values) + } else { + P::ONES + }; + consumer.constraint(combin * h - f0); + } + + _ => todo!("Allow other constraint degrees"), + } + } + } +} + /// Checks the cross-table lookup Z polynomials for each table: /// - Checks that the CTL `Z` partial sums are correctly updated. /// - Checks that the final value of the CTL sum is the combination of all STARKs' CTL polynomials. @@ -843,6 +1179,7 @@ pub(crate) fn eval_cross_table_lookup_checks, ctl_vars: &[CtlCheckVars], consumer: &mut ConstraintConsumer

, + constraint_degree: usize, ) where F: RichField + Extendable, FE: FieldExtension, @@ -854,6 +1191,7 @@ pub(crate) fn eval_cross_table_lookup_checks>() + }) .collect::>(); - let combined = challenges.combine(evals.iter()); - let local_filter = if let Some(combin) = filter { - combin.eval_filter(local_values, next_values) - } else { - P::ONES - }; - // Check value of `Z(g^(n-1))` - consumer.constraint_last_row(*local_z * combined - local_filter); - // Check `Z(w) = Z(gw) * (filter / combination)` - consumer.constraint_transition((*local_z - *next_z) * combined - local_filter); + // Check helper columns. + eval_helper_columns( + filter, + &evals, + local_values, + next_values, + helper_columns, + constraint_degree, + challenges, + consumer, + ); + + if !helper_columns.is_empty() { + let h_sum = helper_columns.iter().fold(P::ZEROS, |acc, x| acc + *x); + // Check value of `Z(g^(n-1))` + consumer.constraint_last_row(*local_z - h_sum); + // Check `Z(w) = Z(gw) + \sum h_i` + consumer.constraint_transition(*local_z - *next_z - h_sum); + } else if columns.len() > 1 { + let combin0 = challenges.combine(&evals[0]); + let combin1 = challenges.combine(&evals[1]); + + let f0 = if let Some(filter0) = &filter[0] { + filter0.eval_filter(local_values, next_values) + } else { + P::ONES + }; + let f1 = if let Some(filter1) = &filter[1] { + filter1.eval_filter(local_values, next_values) + } else { + P::ONES + }; + + consumer + .constraint_last_row(combin0 * combin1 * *local_z - f0 * combin1 - f1 * combin0); + consumer.constraint_transition( + combin0 * combin1 * (*local_z - *next_z) - f0 * combin1 - f1 * combin0, + ); + } else { + let combin0 = challenges.combine(&evals[0]); + let f0 = if let Some(filter0) = &filter[0] { + filter0.eval_filter(local_values, next_values) + } else { + P::ONES + }; + consumer.constraint_last_row(combin0 * *local_z - f0); + consumer.constraint_transition(combin0 * (*local_z - *next_z) - f0); + } } } /// Circuit version of `CtlCheckVars`. Data necessary to check the cross-table lookups of a given table. #[derive(Clone)] -pub(crate) struct CtlCheckVarsTarget<'a, F: Field, const D: usize> { +pub(crate) struct CtlCheckVarsTarget { + ///Evaluation of the helper columns to check that the Z polyomial + /// was constructed correctly. + pub(crate) helper_columns: Vec>, /// Evaluation of the trace polynomials at point `zeta`. pub(crate) local_z: ExtensionTarget, /// Evaluation of the trace polynomials at point `g * zeta`. @@ -890,12 +1273,12 @@ pub(crate) struct CtlCheckVarsTarget<'a, F: Field, const D: usize> { /// Cross-table lookup challenges. pub(crate) challenges: GrandProductChallenge, /// Column linear combinations of the `CrossTableLookup`s. - pub(crate) columns: &'a [Column], + pub(crate) columns: Vec>>, /// Filter that evaluates to either 1 or 0. - pub(crate) filter: &'a Option>, + pub(crate) filter: Vec>>, } -impl<'a, F: Field, const D: usize> CtlCheckVarsTarget<'a, F, D> { +impl<'a, F: Field, const D: usize> CtlCheckVarsTarget { /// Circuit version of `from_proofs`. Extracts the `CtlCheckVarsTarget` for each STARK. pub(crate) fn from_proof( table: Table, @@ -903,6 +1286,8 @@ impl<'a, F: Field, const D: usize> CtlCheckVarsTarget<'a, F, D> { cross_table_lookups: &'a [CrossTableLookup], ctl_challenges: &'a GrandProductChallengeSet, num_lookup_columns: usize, + total_num_helper_columns: usize, + num_helper_ctl_columns: &[usize], ) -> Vec { // Get all cross-table lookup polynomial openings for each STARK proof. let mut ctl_zs = { @@ -912,47 +1297,145 @@ impl<'a, F: Field, const D: usize> CtlCheckVarsTarget<'a, F, D> { .auxiliary_polys_next .iter() .skip(num_lookup_columns); - ctl_zs.zip(ctl_zs_next) + ctl_zs.zip(ctl_zs_next).collect::>() }; // Put each cross-table lookup polynomial into the correct table data: if a CTL polynomial is extracted from looking/looked table t, then we add it to the `CtlCheckVars` of table t. + let mut z_index = 0; + let mut start_index = 0; let mut ctl_vars = vec![]; - for CrossTableLookup { - looking_tables, - looked_table, - } in cross_table_lookups + for ( + i, + CrossTableLookup { + looking_tables, + looked_table, + }, + ) in cross_table_lookups.iter().enumerate() { for &challenges in &ctl_challenges.challenges { - for looking_table in looking_tables { + // Group looking tables by `Table`, since we bundle the looking tables taken from the same `Table` together thanks to helper columns. + + let count = looking_tables + .iter() + .filter(|looking_table| looking_table.table == table) + .count(); + let cols_filts = looking_tables.iter().filter_map(|looking_table| { if looking_table.table == table { - let (looking_z, looking_z_next) = ctl_zs.next().unwrap(); - ctl_vars.push(Self { - local_z: *looking_z, - next_z: *looking_z_next, - challenges, - columns: &looking_table.columns, - filter: &looking_table.filter, - }); + Some((&looking_table.columns, &looking_table.filter)) + } else { + None } + }); + if count > 0 { + let mut columns = Vec::with_capacity(count); + let mut filter = Vec::with_capacity(count); + for (col, filt) in cols_filts { + columns.push(col.clone()); + filter.push(filt.clone()); + } + let (looking_z, looking_z_next) = ctl_zs[total_num_helper_columns + z_index]; + let helper_columns = ctl_zs + [start_index..start_index + num_helper_ctl_columns[i]] + .iter() + .map(|(&h, _)| h) + .collect::>(); + + start_index += num_helper_ctl_columns[i]; + z_index += 1; + // let columns = group.0.clone(); + // let filter = group.1.clone(); + ctl_vars.push(Self { + helper_columns, + local_z: *looking_z, + next_z: *looking_z_next, + challenges, + columns, + filter, + }); } if looked_table.table == table { - let (looked_z, looked_z_next) = ctl_zs.next().unwrap(); + let (looked_z, looked_z_next) = ctl_zs[total_num_helper_columns + z_index]; + z_index += 1; + + let columns = vec![looked_table.columns.clone()]; + let filter = vec![looked_table.filter.clone()]; ctl_vars.push(Self { + helper_columns: vec![], local_z: *looked_z, next_z: *looked_z_next, challenges, - columns: &looked_table.columns, - filter: &looked_table.filter, + columns, + filter, }); } } } - assert!(ctl_zs.next().is_none()); + ctl_vars } } +/// Circuit version of `eval_helper_columns`. +/// Given data associated to a lookup (either a CTL or a range-check), check the associated helper polynomials. +pub(crate) fn eval_helper_columns_circuit, const D: usize>( + builder: &mut CircuitBuilder, + filter: &[Option>], + columns: &[Vec>], + local_values: &[ExtensionTarget], + next_values: &[ExtensionTarget], + helper_columns: &[ExtensionTarget], + constraint_degree: usize, + challenges: &GrandProductChallenge, + consumer: &mut RecursiveConstraintConsumer, +) { + if !helper_columns.is_empty() { + for (j, chunk) in columns.chunks(constraint_degree - 1).enumerate() { + let fs = + &filter[(constraint_degree - 1) * j..(constraint_degree - 1) * j + chunk.len()]; + let h = helper_columns[j]; + + let one = builder.one_extension(); + match chunk.len() { + 2 => { + let combin0 = challenges.combine_circuit(builder, &chunk[0]); + let combin1 = challenges.combine_circuit(builder, &chunk[1]); + + let f0 = if let Some(filter0) = &fs[0] { + filter0.eval_filter_circuit(builder, local_values, next_values) + } else { + one + }; + let f1 = if let Some(filter1) = &fs[1] { + filter1.eval_filter_circuit(builder, local_values, next_values) + } else { + one + }; + + let constr = builder.mul_sub_extension(combin0, h, f0); + let constr = builder.mul_extension(constr, combin1); + let f1_constr = builder.mul_extension(f1, combin0); + let constr = builder.sub_extension(constr, f1_constr); + + consumer.constraint(builder, constr); + } + 1 => { + let combin = challenges.combine_circuit(builder, &chunk[0]); + let f0 = if let Some(filter1) = &fs[0] { + filter1.eval_filter_circuit(builder, local_values, next_values) + } else { + one + }; + let constr = builder.mul_sub_extension(combin, h, f0); + consumer.constraint(builder, constr); + } + + _ => todo!("Allow other constraint degrees"), + } + } + } +} + /// Circuit version of `eval_cross_table_lookup_checks`. Checks the cross-table lookup Z polynomials for each table: /// - Checks that the CTL `Z` partial sums are correctly updated. /// - Checks that the final value of the CTL sum is the combination of all STARKs' CTL polynomials. @@ -969,12 +1452,16 @@ pub(crate) fn eval_cross_table_lookup_checks_circuit< vars: &S::EvaluationFrameTarget, ctl_vars: &[CtlCheckVarsTarget], consumer: &mut RecursiveConstraintConsumer, + constraint_degree: usize, ) { let local_values = vars.get_local_values(); let next_values = vars.get_next_values(); + let one = builder.one_extension(); + for lookup_vars in ctl_vars { let CtlCheckVarsTarget { + helper_columns, local_z, next_z, challenges, @@ -982,29 +1469,77 @@ pub(crate) fn eval_cross_table_lookup_checks_circuit< filter, } = lookup_vars; - let one = builder.one_extension(); - let local_filter = if let Some(combin) = filter { - combin.eval_filter_circuit(builder, local_values, next_values) - } else { - one - }; - // Compute all linear combinations on the current table, and combine them using the challenge. let evals = columns .iter() - .map(|c| c.eval_with_next_circuit(builder, local_values, next_values)) + .map(|col| { + col.iter() + .map(|c| c.eval_with_next_circuit(builder, local_values, next_values)) + .collect::>() + }) .collect::>(); - let combined = challenges.combine_circuit(builder, &evals); + // Check helper columns. + eval_helper_columns_circuit( + builder, + filter, + &evals, + local_values, + next_values, + helper_columns, + constraint_degree, + challenges, + consumer, + ); - // Check value of `Z(g^(n-1))` - let last_row = builder.mul_sub_extension(*local_z, combined, local_filter); - consumer.constraint_last_row(builder, last_row); - // Check `Z(w) = Z(gw) * (filter / combination)` let z_diff = builder.sub_extension(*local_z, *next_z); - let lhs = builder.mul_extension(combined, z_diff); - let transition = builder.sub_extension(lhs, local_filter); - consumer.constraint_transition(builder, transition); + if !helper_columns.is_empty() { + // Check value of `Z(g^(n-1))` + let h_sum = builder.add_many_extension(helper_columns); + + let last_row = builder.sub_extension(*local_z, h_sum); + consumer.constraint_last_row(builder, last_row); + // Check `Z(w) = Z(gw) * (filter / combination)` + + let transition = builder.sub_extension(z_diff, h_sum); + consumer.constraint_transition(builder, transition); + } else if columns.len() > 1 { + let combin0 = challenges.combine_circuit(builder, &evals[0]); + let combin1 = challenges.combine_circuit(builder, &evals[1]); + + let f0 = if let Some(filter0) = &filter[0] { + filter0.eval_filter_circuit(builder, local_values, next_values) + } else { + one + }; + let f1 = if let Some(filter1) = &filter[1] { + filter1.eval_filter_circuit(builder, local_values, next_values) + } else { + one + }; + + let combined = builder.mul_sub_extension(combin1, *local_z, f1); + let combined = builder.mul_extension(combined, combin0); + let constr = builder.arithmetic_extension(F::NEG_ONE, F::ONE, f0, combin1, combined); + consumer.constraint_last_row(builder, constr); + + let combined = builder.mul_sub_extension(combin1, z_diff, f1); + let combined = builder.mul_extension(combined, combin0); + let constr = builder.arithmetic_extension(F::NEG_ONE, F::ONE, f0, combin1, combined); + consumer.constraint_last_row(builder, constr); + } else { + let combin0 = challenges.combine_circuit(builder, &evals[0]); + let f0 = if let Some(filter0) = &filter[0] { + filter0.eval_filter_circuit(builder, local_values, next_values) + } else { + one + }; + + let constr = builder.mul_sub_extension(combin0, *local_z, f0); + consumer.constraint_last_row(builder, constr); + let constr = builder.mul_sub_extension(combin0, z_diff, f0); + consumer.constraint_transition(builder, constr); + } } } @@ -1026,11 +1561,19 @@ pub(crate) fn verify_cross_table_lookups, const D: { // Get elements looking into `looked_table` that are not associated to any STARK. let extra_sum_vec = &ctl_extra_looking_sums[looked_table.table as usize]; + // We want to iterate on each looking table only once. + let mut filtered_looking_tables = vec![]; + for table in looking_tables { + if !filtered_looking_tables.contains(&(table.table as usize)) { + filtered_looking_tables.push(table.table as usize); + } + } for c in 0..config.num_challenges { // Compute the combination of all looking table CTL polynomial openings. - let looking_zs_sum = looking_tables + + let looking_zs_sum = filtered_looking_tables .iter() - .map(|table| *ctl_zs_openings[table.table as usize].next().unwrap()) + .map(|&table| *ctl_zs_openings[table].next().unwrap()) .sum::() + extra_sum_vec[c]; @@ -1065,12 +1608,19 @@ pub(crate) fn verify_cross_table_lookups_circuit, c { // Get elements looking into `looked_table` that are not associated to any STARK. let extra_sum_vec = &ctl_extra_looking_sums[looked_table.table as usize]; + // We want to iterate on each looking table only once. + let mut filtered_looking_tables = vec![]; + for table in looking_tables { + if !filtered_looking_tables.contains(&(table.table as usize)) { + filtered_looking_tables.push(table.table as usize); + } + } for c in 0..inner_config.num_challenges { // Compute the combination of all looking table CTL polynomial openings. let mut looking_zs_sum = builder.add_many( - looking_tables + filtered_looking_tables .iter() - .map(|table| *ctl_zs_openings[table.table as usize].next().unwrap()), + .map(|&table| *ctl_zs_openings[table].next().unwrap()), ); looking_zs_sum = builder.add(looking_zs_sum, extra_sum_vec[c]); diff --git a/evm/src/keccak/keccak_stark.rs b/evm/src/keccak/keccak_stark.rs index b3e31093..63098e79 100644 --- a/evm/src/keccak/keccak_stark.rs +++ b/evm/src/keccak/keccak_stark.rs @@ -740,13 +740,14 @@ mod tests { // Fake CTL data. let ctl_z_data = CtlZData { + helper_columns: vec![PolynomialValues::zero(degree)], z: PolynomialValues::zero(degree), challenge: GrandProductChallenge { beta: F::ZERO, gamma: F::ZERO, }, columns: vec![], - filter: Some(Filter::new_simple(Column::constant(F::ZERO))), + filter: vec![Some(Filter::new_simple(Column::constant(F::ZERO)))], }; let ctl_data = CtlData { zs_columns: vec![ctl_z_data.clone(); config.num_challenges], diff --git a/evm/src/lookup.rs b/evm/src/lookup.rs index 8bf0c8f6..4b735c64 100644 --- a/evm/src/lookup.rs +++ b/evm/src/lookup.rs @@ -12,7 +12,10 @@ use plonky2::plonk::circuit_builder::CircuitBuilder; use plonky2_util::ceil_div_usize; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; -use crate::cross_table_lookup::{Column, Filter}; +use crate::cross_table_lookup::{ + eval_helper_columns, eval_helper_columns_circuit, get_helper_cols, Column, Filter, + GrandProductChallenge, +}; use crate::evaluation_frame::StarkEvaluationFrame; use crate::stark::Stark; @@ -64,48 +67,38 @@ pub(crate) fn lookup_helper_columns( let num_helper_columns = lookup.num_helper_columns(constraint_degree); let mut helper_columns: Vec> = Vec::with_capacity(num_helper_columns); + let looking_cols = lookup + .columns + .iter() + .map(|col| vec![col.clone()]) + .collect::>>>(); + + let grand_challenge = GrandProductChallenge { + beta: F::ONE, + gamma: challenge, + }; + + let columns_filters = looking_cols + .iter() + .zip(lookup.filter_columns.iter()) + .map(|(col, filter)| (&col[..], filter)) + .collect::>(); // For each batch of `constraint_degree-1` columns `fi`, compute `sum 1/(f_i+challenge)` and // add it to the helper columns. - // TODO: This does one batch inversion per column. It would also be possible to do one batch inversion - // for every group of columns, but that would require building a big vector of all the columns concatenated. - // Not sure which approach is better. // Note: these are the h_k(x) polynomials in the paper, with a few differences: // * Here, the first ratio m_0(x)/phi_0(x) is not included with the columns batched up to create the // h_k polynomials; instead there's a separate helper column for it (see below). // * Here, we use 1 instead of -1 as the numerator (and subtract later). // * Here, for now, the batch size (l) is always constraint_degree - 1 = 2. - for (i, mut col_inds) in (&lookup.columns.iter().chunks(constraint_degree - 1)) - .into_iter() - .enumerate() - { - let first = col_inds.next().unwrap(); - - let mut column = first.eval_all_rows(trace_poly_values); - let length = column.len(); - - for x in column.iter_mut() { - *x = challenge + *x; - } - let mut acc = F::batch_multiplicative_inverse(&column); - if let Some(filter) = &lookup.filter_columns[0] { - batch_multiply_inplace(&mut acc, &filter.eval_all_rows(trace_poly_values)); - } - - for (j, ind) in col_inds.enumerate() { - let mut column = ind.eval_all_rows(trace_poly_values); - for x in column.iter_mut() { - *x = challenge + *x; - } - column = F::batch_multiplicative_inverse(&column); - let filter_idx = (constraint_degree - 1) * i + j + 1; - if let Some(filter) = &lookup.filter_columns[filter_idx] { - batch_multiply_inplace(&mut column, &filter.eval_all_rows(trace_poly_values)); - } - batch_add_inplace(&mut acc, &column); - } - - helper_columns.push(acc.into()); - } + // * Here, there are filters for the columns, to only select some rows + // in a given column. + let mut helper_columns = get_helper_cols( + trace_poly_values, + trace_poly_values[0].len(), + &columns_filters, + grand_challenge, + constraint_degree, + ); // Add `1/(table+challenge)` to the helper columns. // This is 1/phi_0(x) = 1/(x + t(x)) from the paper. @@ -168,38 +161,30 @@ pub(crate) fn eval_packed_lookups_generic>>(); + // For each chunk, check that `h_i (x+f_2i) (x+f_{2i+1}) = (x+f_2i) * filter_{2i+1} + (x+f_{2i+1}) * filter_2i` if the chunk has length 2 // or if it has length 1, check that `h_i * (x+f_2i) = filter_2i`, where x is the challenge - for (j, chunk) in lookup.columns.chunks(degree - 1).enumerate() { - let mut x = lookup_vars.local_values[start + j]; - let mut y = P::ZEROS; - let col_values = chunk - .iter() - .map(|col| col.eval_with_next(local_values, next_values)); - let filters = lookup.filter_columns - [(degree - 1) * j..(degree - 1) * j + chunk.len()] - .iter() - .map(|maybe_filter| { - if let Some(filter) = maybe_filter { - filter.eval_filter(local_values, next_values) - } else { - P::ONES - } - }) - .rev() - .collect::>(); - let last_filter_value = filters[0]; - for (val, f) in col_values.zip_eq(filters) { - x *= val + challenge; - y += (val + challenge) * f; - } - match chunk.len() { - 2 => yield_constr.constraint(x - y), - 1 => yield_constr.constraint(x - last_filter_value), - _ => todo!("Allow other constraint degrees."), - } - } + eval_helper_columns( + &lookup.filter_columns, + &lookup_columns, + local_values, + next_values, + &lookup_vars.local_values[start..start + num_helper_columns - 1], + degree, + &grand_challenge, + yield_constr, + ); + + let challenge = FE::from_basefield(challenge); // Check the `Z` polynomial. let z = lookup_vars.local_values[start + num_helper_columns - 1]; @@ -245,45 +230,30 @@ pub(crate) fn eval_ext_lookups_circuit< let mut start = 0; for lookup in lookups { let num_helper_columns = lookup.num_helper_columns(degree); + let col_values = lookup + .columns + .iter() + .map(|col| vec![col.eval_with_next_circuit(builder, local_values, next_values)]) + .collect::>(); + for &challenge in &lookup_vars.challenges { + let grand_challenge = GrandProductChallenge { + beta: builder.one(), + gamma: challenge, + }; + + eval_helper_columns_circuit( + builder, + &lookup.filter_columns, + &col_values, + local_values, + next_values, + &lookup_vars.local_values[start..start + num_helper_columns - 1], + degree, + &grand_challenge, + yield_constr, + ); let challenge = builder.convert_to_ext(challenge); - for (j, chunk) in lookup.columns.chunks(degree - 1).enumerate() { - let mut x = lookup_vars.local_values[start + j]; - let mut y = builder.zero_extension(); - let col_values = chunk - .iter() - .map(|k| k.eval_with_next_circuit(builder, local_values, next_values)) - .collect::>(); - let filters = lookup.filter_columns - [(degree - 1) * j..(degree - 1) * j + chunk.len()] - .iter() - .map(|maybe_filter| { - if let Some(filter) = maybe_filter { - filter.eval_filter_circuit(builder, local_values, next_values) - } else { - one - } - }) - .rev() - .collect::>(); - let last_filter_value = filters[0]; - for (&val, f) in col_values.iter().zip_eq(filters) { - let tmp = builder.add_extension(val, challenge); - x = builder.mul_extension(x, tmp); - y = builder.mul_add_extension(f, tmp, y); - } - match chunk.len() { - 2 => { - let tmp = builder.sub_extension(x, y); - yield_constr.constraint(builder, tmp) - } - 1 => { - let tmp = builder.sub_extension(x, last_filter_value); - yield_constr.constraint(builder, tmp) - } - _ => todo!("Allow other constraint degrees."), - } - } let z = lookup_vars.local_values[start + num_helper_columns - 1]; let next_z = lookup_vars.next_values[start + num_helper_columns - 1]; diff --git a/evm/src/memory/memory_stark.rs b/evm/src/memory/memory_stark.rs index e596f421..1283965b 100644 --- a/evm/src/memory/memory_stark.rs +++ b/evm/src/memory/memory_stark.rs @@ -305,6 +305,10 @@ impl, const D: usize> Stark for MemoryStark, const D: usize> Stark for MemoryStark "SEGMENT_TRIE_DATA", Segment::ShiftTable => "SEGMENT_SHIFT_TABLE", Segment::JumpdestBits => "SEGMENT_JUMPDEST_BITS", - Segment::EcdsaTable => "SEGMENT_KERNEL_ECDSA_TABLE", + Segment::EcdsaTable => "SEGMENT_ECDSA_TABLE", Segment::BnWnafA => "SEGMENT_BN_WNAF_A", Segment::BnWnafB => "SEGMENT_BN_WNAF_B", Segment::BnTableQ => "SEGMENT_BN_TABLE_Q", - Segment::BnPairing => "SEGMENT_KERNEL_BN_PAIRING", + Segment::BnPairing => "SEGMENT_BN_PAIRING", Segment::AccessedAddresses => "SEGMENT_ACCESSED_ADDRESSES", Segment::AccessedStorageKeys => "SEGMENT_ACCESSED_STORAGE_KEYS", Segment::SelfDestructList => "SEGMENT_SELFDESTRUCT_LIST", diff --git a/evm/src/proof.rs b/evm/src/proof.rs index 3768f98f..4cc18677 100644 --- a/evm/src/proof.rs +++ b/evm/src/proof.rs @@ -974,7 +974,10 @@ impl, const D: usize> StarkOpeningSet { auxiliary_polys_commitment: &PolynomialBatch, quotient_commitment: &PolynomialBatch, num_lookup_columns: usize, + num_ctl_polys: &[usize], ) -> Self { + let total_num_helper_cols: usize = num_ctl_polys.iter().sum(); + // Batch evaluates polynomials on the LDE, at a point `z`. let eval_commitment = |z: F::Extension, c: &PolynomialBatch| { c.polynomials @@ -989,6 +992,9 @@ impl, const D: usize> StarkOpeningSet { .map(|p| p.eval(z)) .collect::>() }; + + let auxiliary_first = eval_commitment_base(F::ONE, auxiliary_polys_commitment); + let ctl_zs_first = auxiliary_first[num_lookup_columns + total_num_helper_cols..].to_vec(); // `g * zeta`. let zeta_next = zeta.scalar_mul(g); Self { @@ -996,9 +1002,7 @@ impl, const D: usize> StarkOpeningSet { next_values: eval_commitment(zeta_next, trace_commitment), auxiliary_polys: eval_commitment(zeta, auxiliary_polys_commitment), auxiliary_polys_next: eval_commitment(zeta_next, auxiliary_polys_commitment), - ctl_zs_first: eval_commitment_base(F::ONE, auxiliary_polys_commitment) - [num_lookup_columns..] - .to_vec(), + ctl_zs_first, quotient_polys: eval_commitment(zeta, quotient_commitment), } } @@ -1051,7 +1055,7 @@ pub(crate) struct StarkOpeningSetTarget { pub auxiliary_polys: Vec>, /// `ExtensionTarget`s for the opening of lookups and cross-table lookups `Z` polynomials at `g * zeta`. pub auxiliary_polys_next: Vec>, - /// /// `ExtensionTarget`s for the opening of lookups and cross-table lookups `Z` polynomials at 1. + /// `ExtensionTarget`s for the opening of lookups and cross-table lookups `Z` polynomials at 1. pub ctl_zs_first: Vec, /// `ExtensionTarget`s for the opening of quotient polynomials at `zeta`. pub quotient_polys: Vec>, diff --git a/evm/src/prover.rs b/evm/src/prover.rs index c90490b8..e6426023 100644 --- a/evm/src/prover.rs +++ b/evm/src/prover.rs @@ -135,6 +135,7 @@ where &trace_poly_values, &all_stark.cross_table_lookups, &ctl_challenges, + all_stark.arithmetic_stark.constraint_degree() ) ); @@ -375,9 +376,14 @@ where // We add CTLs to the permutation arguments so that we can batch commit to // all auxiliary polynomials. let auxiliary_polys = match lookup_helper_columns { - None => ctl_data.z_polys(), + None => { + let mut ctl_polys = ctl_data.ctl_helper_polys(); + ctl_polys.extend(ctl_data.ctl_z_polys()); + ctl_polys + } Some(mut lookup_columns) => { - lookup_columns.extend(ctl_data.z_polys()); + lookup_columns.extend(ctl_data.ctl_helper_polys()); + lookup_columns.extend(ctl_data.ctl_z_polys()); lookup_columns } }; @@ -402,6 +408,8 @@ where let alphas = challenger.get_n_challenges(config.num_challenges); + let num_ctl_polys = ctl_data.num_ctl_helper_polys(); + #[cfg(test)] { check_constraints( @@ -414,6 +422,7 @@ where alphas.clone(), degree_bits, num_lookup_columns, + &num_ctl_polys, ); } @@ -432,6 +441,7 @@ where alphas, degree_bits, num_lookup_columns, + &num_ctl_polys, config, ) ); @@ -486,6 +496,7 @@ where &auxiliary_polys_commitment, "ient_commitment, stark.num_lookup_helper_columns(config), + &num_ctl_polys, ); // Get the FRI openings and observe them. challenger.observe_openings(&openings.to_fri_openings()); @@ -502,7 +513,7 @@ where timing, "compute openings proof", PolynomialBatch::prove_openings( - &stark.fri_instance(zeta, g, ctl_data.len(), config), + &stark.fri_instance(zeta, g, num_ctl_polys.iter().sum(), num_ctl_polys, config), &initial_merkle_trees, challenger, &fri_params, @@ -535,6 +546,7 @@ fn compute_quotient_polys<'a, F, P, C, S, const D: usize>( alphas: Vec, degree_bits: usize, num_lookup_columns: usize, + num_ctl_columns: &[usize], config: &StarkConfig, ) -> Vec> where @@ -545,6 +557,7 @@ where { let degree = 1 << degree_bits; let rate_bits = config.fri_config.rate_bits; + let total_num_helper_cols: usize = num_ctl_columns.iter().sum(); let quotient_degree_bits = log2_ceil(stark.quotient_degree_factor()); assert!( @@ -616,18 +629,35 @@ where // - for each CTL: // - the filter `Column` // - the `Column`s that form the looking/looked table. + + let mut start_index = 0; let ctl_vars = ctl_data .zs_columns .iter() .enumerate() - .map(|(i, zs_columns)| CtlCheckVars:: { - local_z: auxiliary_polys_commitment.get_lde_values_packed(i_start, step) - [num_lookup_columns + i], - next_z: auxiliary_polys_commitment.get_lde_values_packed(i_next_start, step) - [num_lookup_columns + i], - challenges: zs_columns.challenge, - columns: &zs_columns.columns, - filter: &zs_columns.filter, + .map(|(i, zs_columns)| { + let num_ctl_helper_cols = num_ctl_columns[i]; + let helper_columns = auxiliary_polys_commitment + .get_lde_values_packed(i_start, step)[num_lookup_columns + + start_index + ..num_lookup_columns + start_index + num_ctl_helper_cols] + .to_vec(); + + let ctl_vars = CtlCheckVars:: { + helper_columns, + local_z: auxiliary_polys_commitment.get_lde_values_packed(i_start, step) + [num_lookup_columns + total_num_helper_cols + i], + next_z: auxiliary_polys_commitment + .get_lde_values_packed(i_next_start, step) + [num_lookup_columns + total_num_helper_cols + i], + challenges: zs_columns.challenge, + columns: zs_columns.columns.clone(), + filter: zs_columns.filter.clone(), + }; + + start_index += num_ctl_helper_cols; + + ctl_vars }) .collect::>(); @@ -691,6 +721,7 @@ fn check_constraints<'a, F, C, S, const D: usize>( alphas: Vec, degree_bits: usize, num_lookup_columns: usize, + num_ctl_helper_cols: &[usize], ) where F: RichField + Extendable, C: GenericConfig, @@ -699,6 +730,8 @@ fn check_constraints<'a, F, C, S, const D: usize>( let degree = 1 << degree_bits; let rate_bits = 0; // Set this to higher value to check constraint degree. + let total_num_helper_cols: usize = num_ctl_helper_cols.iter().sum(); + let size = degree << rate_bits; let step = 1 << rate_bits; @@ -754,18 +787,34 @@ fn check_constraints<'a, F, C, S, const D: usize>( }); // Get the local and next row evaluations for the current STARK's CTL Z polynomials. + let mut start_index = 0; let ctl_vars = ctl_data .zs_columns .iter() .enumerate() - .map(|(iii, zs_columns)| CtlCheckVars:: { - local_z: auxiliary_subgroup_evals[i][num_lookup_columns + iii], - next_z: auxiliary_subgroup_evals[i_next][num_lookup_columns + iii], - challenges: zs_columns.challenge, - columns: &zs_columns.columns, - filter: &zs_columns.filter, + .map(|(iii, zs_columns)| { + let num_helper_cols = num_ctl_helper_cols[iii]; + let helper_columns = auxiliary_subgroup_evals[i][num_lookup_columns + + start_index + ..num_lookup_columns + start_index + num_helper_cols] + .to_vec(); + let ctl_vars = CtlCheckVars:: { + helper_columns, + local_z: auxiliary_subgroup_evals[i] + [num_lookup_columns + total_num_helper_cols + iii], + next_z: auxiliary_subgroup_evals[i_next] + [num_lookup_columns + total_num_helper_cols + iii], + challenges: zs_columns.challenge, + columns: zs_columns.columns.clone(), + filter: zs_columns.filter.clone(), + }; + + start_index += num_helper_cols; + + ctl_vars }) .collect::>(); + // Evaluate the polynomial combining all constraints, including those associated // to the permutation and CTL arguments. eval_vanishing_poly::( diff --git a/evm/src/recursive_verifier.rs b/evm/src/recursive_verifier.rs index 5c10e3b3..8d10b498 100644 --- a/evm/src/recursive_verifier.rs +++ b/evm/src/recursive_verifier.rs @@ -229,10 +229,24 @@ where let zero_target = builder.zero(); let num_lookup_columns = stark.num_lookup_helper_columns(inner_config); - let num_ctl_zs = - CrossTableLookup::num_ctl_zs(cross_table_lookups, table, inner_config.num_challenges); - let proof_target = - add_virtual_stark_proof(&mut builder, stark, inner_config, degree_bits, num_ctl_zs); + let (total_num_helpers, num_ctl_zs, num_helpers_by_ctl) = + CrossTableLookup::num_ctl_helpers_zs_all( + cross_table_lookups, + table, + inner_config.num_challenges, + stark.constraint_degree(), + ); + let num_ctl_helper_zs = num_ctl_zs + total_num_helpers; + + let proof_target = add_virtual_stark_proof( + &mut builder, + stark, + inner_config, + degree_bits, + num_ctl_helper_zs, + num_ctl_zs, + ); + builder.register_public_inputs( &proof_target .trace_cap @@ -257,6 +271,8 @@ where cross_table_lookups, &ctl_challenges_target, num_lookup_columns, + total_num_helpers, + &num_helpers_by_ctl, ); let init_challenger_state_target = @@ -330,6 +346,12 @@ fn verify_stark_proof_with_challenges_circuit< let zero = builder.zero(); let one = builder.one_extension(); + let num_ctl_polys = ctl_vars + .iter() + .map(|ctl| ctl.helper_columns.len()) + .sum::(); + let num_ctl_z_polys = ctl_vars.len(); + let StarkOpeningSetTarget { local_values, next_values, @@ -407,6 +429,7 @@ fn verify_stark_proof_with_challenges_circuit< builder, challenges.stark_zeta, F::primitive_root_of_unity(degree_bits), + num_ctl_polys, ctl_zs_first.len(), inner_config, ); @@ -759,6 +782,7 @@ pub(crate) fn add_virtual_stark_proof< stark: &S, config: &StarkConfig, degree_bits: usize, + num_ctl_helper_zs: usize, num_ctl_zs: usize, ) -> StarkProofTarget { let fri_params = config.fri_params(degree_bits); @@ -766,7 +790,7 @@ pub(crate) fn add_virtual_stark_proof< let num_leaves_per_oracle = vec![ S::COLUMNS, - stark.num_lookup_helper_columns(config) + num_ctl_zs, + stark.num_lookup_helper_columns(config) + num_ctl_helper_zs, stark.quotient_degree_factor() * config.num_challenges, ]; @@ -776,7 +800,13 @@ pub(crate) fn add_virtual_stark_proof< trace_cap: builder.add_virtual_cap(cap_height), auxiliary_polys_cap, quotient_polys_cap: builder.add_virtual_cap(cap_height), - openings: add_virtual_stark_opening_set::(builder, stark, num_ctl_zs, config), + openings: add_virtual_stark_opening_set::( + builder, + stark, + num_ctl_helper_zs, + num_ctl_zs, + config, + ), opening_proof: builder.add_virtual_fri_proof(&num_leaves_per_oracle, &fri_params), } } @@ -784,6 +814,7 @@ pub(crate) fn add_virtual_stark_proof< fn add_virtual_stark_opening_set, S: Stark, const D: usize>( builder: &mut CircuitBuilder, stark: &S, + num_ctl_helper_zs: usize, num_ctl_zs: usize, config: &StarkConfig, ) -> StarkOpeningSetTarget { @@ -791,10 +822,12 @@ fn add_virtual_stark_opening_set, S: Stark, c StarkOpeningSetTarget { local_values: builder.add_virtual_extension_targets(S::COLUMNS), next_values: builder.add_virtual_extension_targets(S::COLUMNS), - auxiliary_polys: builder - .add_virtual_extension_targets(stark.num_lookup_helper_columns(config) + num_ctl_zs), - auxiliary_polys_next: builder - .add_virtual_extension_targets(stark.num_lookup_helper_columns(config) + num_ctl_zs), + auxiliary_polys: builder.add_virtual_extension_targets( + stark.num_lookup_helper_columns(config) + num_ctl_helper_zs, + ), + auxiliary_polys_next: builder.add_virtual_extension_targets( + stark.num_lookup_helper_columns(config) + num_ctl_helper_zs, + ), ctl_zs_first: builder.add_virtual_targets(num_ctl_zs), quotient_polys: builder .add_virtual_extension_targets(stark.quotient_degree_factor() * num_challenges), diff --git a/evm/src/stark.rs b/evm/src/stark.rs index 6099abdd..6ee26f58 100644 --- a/evm/src/stark.rs +++ b/evm/src/stark.rs @@ -92,7 +92,8 @@ pub trait Stark, const D: usize>: Sync { &self, zeta: F::Extension, g: F, - num_ctl_zs: usize, + num_ctl_helpers: usize, + num_ctl_zs: Vec, config: &StarkConfig, ) -> FriInstanceInfo { let trace_oracle = FriOracleInfo { @@ -102,7 +103,7 @@ pub trait Stark, const D: usize>: Sync { let trace_info = FriPolynomialInfo::from_range(TRACE_ORACLE_INDEX, 0..Self::COLUMNS); let num_lookup_columns = self.num_lookup_helper_columns(config); - let num_auxiliary_polys = num_lookup_columns + num_ctl_zs; + let num_auxiliary_polys = num_lookup_columns + num_ctl_helpers + num_ctl_zs.len(); let auxiliary_oracle = FriOracleInfo { num_polys: num_auxiliary_polys, blinding: false, @@ -110,9 +111,10 @@ pub trait Stark, const D: usize>: Sync { let auxiliary_polys_info = FriPolynomialInfo::from_range(AUXILIARY_ORACLE_INDEX, 0..num_auxiliary_polys); + let mut start_index = num_lookup_columns; let ctl_zs_info = FriPolynomialInfo::from_range( AUXILIARY_ORACLE_INDEX, - num_lookup_columns..num_lookup_columns + num_ctl_zs, + num_lookup_columns + num_ctl_helpers..num_auxiliary_polys, ); let num_quotient_polys = self.num_quotient_polys(config); @@ -152,6 +154,7 @@ pub trait Stark, const D: usize>: Sync { builder: &mut CircuitBuilder, zeta: ExtensionTarget, g: F, + num_ctl_helper_polys: usize, num_ctl_zs: usize, inner_config: &StarkConfig, ) -> FriInstanceInfoTarget { @@ -162,7 +165,7 @@ pub trait Stark, const D: usize>: Sync { let trace_info = FriPolynomialInfo::from_range(TRACE_ORACLE_INDEX, 0..Self::COLUMNS); let num_lookup_columns = self.num_lookup_helper_columns(inner_config); - let num_auxiliary_polys = num_lookup_columns + num_ctl_zs; + let num_auxiliary_polys = num_lookup_columns + num_ctl_helper_polys + num_ctl_zs; let auxiliary_oracle = FriOracleInfo { num_polys: num_auxiliary_polys, blinding: false, @@ -172,7 +175,8 @@ pub trait Stark, const D: usize>: Sync { let ctl_zs_info = FriPolynomialInfo::from_range( AUXILIARY_ORACLE_INDEX, - num_lookup_columns..num_lookup_columns + num_ctl_zs, + num_lookup_columns + num_ctl_helper_polys + ..num_lookup_columns + num_ctl_helper_polys + num_ctl_zs, ); let num_quotient_polys = self.num_quotient_polys(inner_config); diff --git a/evm/src/vanishing_poly.rs b/evm/src/vanishing_poly.rs index 15444c7e..c1f2d0f9 100644 --- a/evm/src/vanishing_poly.rs +++ b/evm/src/vanishing_poly.rs @@ -42,7 +42,12 @@ pub(crate) fn eval_vanishing_poly( ); } // Evaluate the STARK constraints related to the cross-table lookups. - eval_cross_table_lookup_checks::(vars, ctl_vars, consumer); + eval_cross_table_lookup_checks::( + vars, + ctl_vars, + consumer, + stark.constraint_degree(), + ); } /// Circuit version of `eval_vanishing_poly`. @@ -66,5 +71,11 @@ pub(crate) fn eval_vanishing_poly_circuit( eval_ext_lookups_circuit::(builder, stark, vars, lookup_vars, consumer); } // Evaluate all of the STARK's constraints related to the cross-table lookups. - eval_cross_table_lookup_checks_circuit::(builder, vars, ctl_vars, consumer); + eval_cross_table_lookup_checks_circuit::( + builder, + vars, + ctl_vars, + consumer, + stark.constraint_degree(), + ); } diff --git a/evm/src/verifier.rs b/evm/src/verifier.rs index b69cd927..9eb12100 100644 --- a/evm/src/verifier.rs +++ b/evm/src/verifier.rs @@ -16,7 +16,8 @@ use crate::constraint_consumer::ConstraintConsumer; use crate::cpu::kernel::aggregator::KERNEL; use crate::cpu::kernel::constants::global_metadata::GlobalMetadata; use crate::cross_table_lookup::{ - verify_cross_table_lookups, CtlCheckVars, GrandProductChallenge, GrandProductChallengeSet, + num_ctl_helper_columns_by_table, verify_cross_table_lookups, CtlCheckVars, + GrandProductChallenge, GrandProductChallengeSet, }; use crate::evaluation_frame::StarkEvaluationFrame; use crate::lookup::LookupCheckVars; @@ -56,11 +57,17 @@ where cross_table_lookups, } = all_stark; + let num_ctl_helper_cols = num_ctl_helper_columns_by_table( + cross_table_lookups, + all_stark.arithmetic_stark.constraint_degree(), + ); + let ctl_vars_per_table = CtlCheckVars::from_proofs( &all_proof.stark_proofs, cross_table_lookups, &ctl_challenges, &num_lookup_columns, + &num_ctl_helper_cols, ); verify_stark_proof_with_challenges( @@ -300,7 +307,12 @@ pub(crate) fn verify_stark_proof_with_challenges< config: &StarkConfig, ) -> Result<()> { log::debug!("Checking proof: {}", type_name::()); - validate_proof_shape(stark, proof, config, ctl_vars.len())?; + let num_ctl_polys = ctl_vars + .iter() + .map(|ctl| ctl.helper_columns.len()) + .sum::(); + let num_ctl_z_polys = ctl_vars.len(); + validate_proof_shape(stark, proof, config, num_ctl_polys, num_ctl_z_polys)?; let StarkOpeningSet { local_values, next_values, @@ -374,11 +386,16 @@ pub(crate) fn verify_stark_proof_with_challenges< proof.quotient_polys_cap.clone(), ]; + let num_ctl_zs = ctl_vars + .iter() + .map(|ctl| ctl.helper_columns.len()) + .collect::>(); verify_fri_proof::( &stark.fri_instance( challenges.stark_zeta, F::primitive_root_of_unity(degree_bits), - ctl_zs_first.len(), + num_ctl_polys, + num_ctl_zs, config, ), &proof.openings.to_fri_openings(), @@ -395,6 +412,7 @@ fn validate_proof_shape( stark: &S, proof: &StarkProof, config: &StarkConfig, + num_ctl_helpers: usize, num_ctl_zs: usize, ) -> anyhow::Result<()> where @@ -424,7 +442,8 @@ where let degree_bits = proof.recover_degree_bits(config); let fri_params = config.fri_params(degree_bits); let cap_height = fri_params.config.cap_height; - let num_auxiliary = num_ctl_zs + stark.num_lookup_helper_columns(config); + + let num_auxiliary = num_ctl_helpers + stark.num_lookup_helper_columns(config) + num_ctl_zs; ensure!(trace_cap.height() == cap_height); ensure!(auxiliary_polys_cap.height() == cap_height); diff --git a/field/src/extension/mod.rs b/field/src/extension/mod.rs index bbbaca25..3586055e 100644 --- a/field/src/extension/mod.rs +++ b/field/src/extension/mod.rs @@ -15,7 +15,7 @@ pub trait OEF: FieldExtension { // Element W of BaseField, such that `X^d - W` is irreducible over BaseField. const W: Self::BaseField; - // Element of BaseField such that DTH_ROOT^D == 1. Implementors + // Element of BaseField such that DTH_ROOT^D == 1. Implementers // should set this to W^((p - 1)/D), where W is as above and p is // the order of the BaseField. const DTH_ROOT: Self::BaseField; diff --git a/plonky2/src/gates/selectors.rs b/plonky2/src/gates/selectors.rs index 1018ba75..be9a9da8 100644 --- a/plonky2/src/gates/selectors.rs +++ b/plonky2/src/gates/selectors.rs @@ -40,7 +40,7 @@ pub enum LookupSelectors { } /// Returns selector polynomials for each LUT. We have two constraint domains (remember that gates are stored upside down): -/// - [last_lut_row, first_lut_row] (Sum and RE transition contraints), +/// - [last_lut_row, first_lut_row] (Sum and RE transition constraints), /// - [last_lu_row, last_lut_row - 1] (LDC column transition constraints). /// We also add two more: /// - {first_lut_row + 1} where we check the initial values of sum and RE (which are 0), diff --git a/plonky2/src/hash/poseidon_goldilocks.rs b/plonky2/src/hash/poseidon_goldilocks.rs index 3e089e64..1fd5af13 100644 --- a/plonky2/src/hash/poseidon_goldilocks.rs +++ b/plonky2/src/hash/poseidon_goldilocks.rs @@ -323,7 +323,7 @@ mod poseidon12_mds { let (u8, u9, u10) = fft4_real([s2, s5, s8, s11]); // This where the multiplication in frequency domain is done. More precisely, and with - // the appropriate permuations in between, the sequence of + // the appropriate permutations in between, the sequence of // 3-point FFTs --> multiplication by twiddle factors --> Hadamard multiplication --> // 3 point iFFTs --> multiplication by (inverse) twiddle factors // is "squashed" into one step composed of the functions "block1", "block2" and "block3".