Remove plonky2_evm post-migration to zk_evm monorepo (#1530)

2026-02-17 04:13:35 +00:00 · 2024-02-17 11:04:07 -05:00 · 2024-02-17 11:04:07 -05:00 · 3e579b6d43
commit 3e579b6d43
parent a7b985ce39
335 changed files with 12 additions and 69991 deletions
--- a/.github/workflows/continuous-integration-workflow.yml
+++ b/.github/workflows/continuous-integration-workflow.yml
@ -51,14 +51,6 @@ jobs:
          CARGO_INCREMENTAL: 1
          RUST_BACKTRACE: 1

-      - name: Check in evm subdirectory
-        run: cargo check --manifest-path evm/Cargo.toml
-        env:
-          RUSTFLAGS: -Copt-level=3 -Cdebug-assertions -Coverflow-checks=y -Cdebuginfo=0
-          RUST_LOG: 1
-          CARGO_INCREMENTAL: 1
-          RUST_BACKTRACE: 1
-
      - name: Run cargo test
        run: cargo test --workspace
        env:
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,5 +1,5 @@
 [workspace]
-members = ["evm", "field", "maybe_rayon", "plonky2", "starky", "util"]
+members = ["field", "maybe_rayon", "plonky2", "starky", "util"]
 resolver = "2"

 [profile.release]
--- a/README.md
+++ b/README.md
@ -179,8 +179,14 @@ Plonky2's default hash function is Poseidon, configured with 8 full rounds, 22 p

 ## Links

- [System Zero](https://github.com/0xPolygonZero/system-zero), a zkVM built on top of Starky (no longer maintained)
- [Waksman](https://github.com/0xPolygonZero/plonky2-waksman), Plonky2 gadgets for permutation checking using Waksman networks (no longer maintained)
- [Insertion](https://github.com/0xPolygonZero/plonky2-insertion), Plonky2 gadgets for insertion into a list (no longer maintained)
- [u32](https://github.com/0xPolygonZero/plonky2-u32), Plonky2 gadgets for u32 arithmetic (no longer actively maintained)
- [ECDSA](https://github.com/0xPolygonZero/plonky2-ecdsa), Plonky2 gadgets for the ECDSA algorithm (no longer actively maintained)
+#### Actively maintained
+
+- [Polygon Zero's zkEVM](https://github.com/0xPolygonZero/zk_evm), an efficient Type 1 zkEVM built on top of Starky and plonky2
+
+#### No longer maintained
+
+- [System Zero](https://github.com/0xPolygonZero/system-zero), a zkVM built on top of Starky
+- [Waksman](https://github.com/0xPolygonZero/plonky2-waksman), Plonky2 gadgets for permutation checking using Waksman networks
+- [Insertion](https://github.com/0xPolygonZero/plonky2-insertion), Plonky2 gadgets for insertion into a list
+- [u32](https://github.com/0xPolygonZero/plonky2-u32), Plonky2 gadgets for u32 arithmetic
+- [ECDSA](https://github.com/0xPolygonZero/plonky2-ecdsa), Plonky2 gadgets for the ECDSA algorithm
--- a/evm/.cargo/katex-header.html
+++ b/evm/.cargo/katex-header.html
@ -1 +0,0 @@
-../../.cargo/katex-header.html
--- a/evm/Cargo.toml
+++ b/evm/Cargo.toml
@ -1,71 +0,0 @@
-[package]
-name = "plonky2_evm"
-description = "Implementation of STARKs for the Ethereum Virtual Machine"
-version = "0.1.1"
-license = "MIT or Apache-2.0"
-authors = ["Daniel Lubarov <daniel@lubarov.com>", "William Borgeaud <williamborgeaud@gmail.com>"]
-readme = "README.md"
-repository = "https://github.com/0xPolygonZero/plonky2"
-keywords = ["EVM", "STARK", "Ethereum"]
-categories = ["cryptography"]
-edition = "2021"
-
-[dependencies]
-anyhow = "1.0.40"
-bytes = "1.4.0"
-env_logger = "0.10.0"
-eth_trie_utils = { git = "https://github.com/0xPolygonZero/eth_trie_utils.git", rev = "7fc3c3f54b3cec9c6fc5ffc5230910bd1cb77f76" }
-ethereum-types = "0.14.0"
-hex = { version = "0.4.3", optional = true }
-hex-literal = "0.4.1"
-itertools = "0.11.0"
-keccak-hash = "0.10.0"
-log = "0.4.14"
-plonky2_maybe_rayon = { path = "../maybe_rayon" }
-num = "0.4.0"
-num-bigint = "0.4.3"
-once_cell = "1.13.0"
-pest = "2.1.3"
-pest_derive = "2.1.0"
-plonky2 = { path = "../plonky2", features = ["timing"] }
-plonky2_util = { path = "../util" }
-starky = { path = "../starky" }
-rand = "0.8.5"
-rand_chacha = "0.3.1"
-rlp = "0.5.1"
-rlp-derive = "0.1.0"
-serde = { version = "1.0.144", features = ["derive"] }
-static_assertions = "1.1.0"
-hashbrown = { version = "0.14.0" }
-tiny-keccak = "2.0.2"
-serde_json = "1.0"
-
-[target.'cfg(not(target_env = "msvc"))'.dependencies]
-jemallocator = "0.5.0"
-
-[dev-dependencies]
-criterion = "0.5.1"
-hex = "0.4.3"
-ripemd = "0.1.3"
-sha2 = "0.10.6"
-
-[features]
-default = ["parallel"]
-asmtools = ["hex"]
-parallel = [
-    "plonky2/parallel",
-    "plonky2_maybe_rayon/parallel",
-    "starky/parallel"
-]
-
-[[bin]]
-name = "assemble"
-required-features = ["asmtools"]
-
-[[bench]]
-name = "stack_manipulation"
-harness = false
-
-# Display math equations properly in documentation
-[package.metadata.docs.rs]
-rustdoc-args = ["--html-in-header", ".cargo/katex-header.html"]
--- a/evm/LICENSE-APACHE
+++ b/evm/LICENSE-APACHE
@ -1,176 +0,0 @@
-                              Apache License
-                        Version 2.0, January 2004
-                     http://www.apache.org/licenses/
-
-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-1. Definitions.
-
-   "License" shall mean the terms and conditions for use, reproduction,
-   and distribution as defined by Sections 1 through 9 of this document.
-
-   "Licensor" shall mean the copyright owner or entity authorized by
-   the copyright owner that is granting the License.
-
-   "Legal Entity" shall mean the union of the acting entity and all
-   other entities that control, are controlled by, or are under common
-   control with that entity. For the purposes of this definition,
-   "control" means (i) the power, direct or indirect, to cause the
-   direction or management of such entity, whether by contract or
-   otherwise, or (ii) ownership of fifty percent (50%) or more of the
-   outstanding shares, or (iii) beneficial ownership of such entity.
-
-   "You" (or "Your") shall mean an individual or Legal Entity
-   exercising permissions granted by this License.
-
-   "Source" form shall mean the preferred form for making modifications,
-   including but not limited to software source code, documentation
-   source, and configuration files.
-
-   "Object" form shall mean any form resulting from mechanical
-   transformation or translation of a Source form, including but
-   not limited to compiled object code, generated documentation,
-   and conversions to other media types.
-
-   "Work" shall mean the work of authorship, whether in Source or
-   Object form, made available under the License, as indicated by a
-   copyright notice that is included in or attached to the work
-   (an example is provided in the Appendix below).
-
-   "Derivative Works" shall mean any work, whether in Source or Object
-   form, that is based on (or derived from) the Work and for which the
-   editorial revisions, annotations, elaborations, or other modifications
-   represent, as a whole, an original work of authorship. For the purposes
-   of this License, Derivative Works shall not include works that remain
-   separable from, or merely link (or bind by name) to the interfaces of,
-   the Work and Derivative Works thereof.
-
-   "Contribution" shall mean any work of authorship, including
-   the original version of the Work and any modifications or additions
-   to that Work or Derivative Works thereof, that is intentionally
-   submitted to Licensor for inclusion in the Work by the copyright owner
-   or by an individual or Legal Entity authorized to submit on behalf of
-   the copyright owner. For the purposes of this definition, "submitted"
-   means any form of electronic, verbal, or written communication sent
-   to the Licensor or its representatives, including but not limited to
-   communication on electronic mailing lists, source code control systems,
-   and issue tracking systems that are managed by, or on behalf of, the
-   Licensor for the purpose of discussing and improving the Work, but
-   excluding communication that is conspicuously marked or otherwise
-   designated in writing by the copyright owner as "Not a Contribution."
-
-   "Contributor" shall mean Licensor and any individual or Legal Entity
-   on behalf of whom a Contribution has been received by Licensor and
-   subsequently incorporated within the Work.
-
-2. Grant of Copyright License. Subject to the terms and conditions of
-   this License, each Contributor hereby grants to You a perpetual,
-   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-   copyright license to reproduce, prepare Derivative Works of,
-   publicly display, publicly perform, sublicense, and distribute the
-   Work and such Derivative Works in Source or Object form.
-
-3. Grant of Patent License. Subject to the terms and conditions of
-   this License, each Contributor hereby grants to You a perpetual,
-   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-   (except as stated in this section) patent license to make, have made,
-   use, offer to sell, sell, import, and otherwise transfer the Work,
-   where such license applies only to those patent claims licensable
-   by such Contributor that are necessarily infringed by their
-   Contribution(s) alone or by combination of their Contribution(s)
-   with the Work to which such Contribution(s) was submitted. If You
-   institute patent litigation against any entity (including a
-   cross-claim or counterclaim in a lawsuit) alleging that the Work
-   or a Contribution incorporated within the Work constitutes direct
-   or contributory patent infringement, then any patent licenses
-   granted to You under this License for that Work shall terminate
-   as of the date such litigation is filed.
-
-4. Redistribution. You may reproduce and distribute copies of the
-   Work or Derivative Works thereof in any medium, with or without
-   modifications, and in Source or Object form, provided that You
-   meet the following conditions:
-
-   (a) You must give any other recipients of the Work or
-       Derivative Works a copy of this License; and
-
-   (b) You must cause any modified files to carry prominent notices
-       stating that You changed the files; and
-
-   (c) You must retain, in the Source form of any Derivative Works
-       that You distribute, all copyright, patent, trademark, and
-       attribution notices from the Source form of the Work,
-       excluding those notices that do not pertain to any part of
-       the Derivative Works; and
-
-   (d) If the Work includes a "NOTICE" text file as part of its
-       distribution, then any Derivative Works that You distribute must
-       include a readable copy of the attribution notices contained
-       within such NOTICE file, excluding those notices that do not
-       pertain to any part of the Derivative Works, in at least one
-       of the following places: within a NOTICE text file distributed
-       as part of the Derivative Works; within the Source form or
-       documentation, if provided along with the Derivative Works; or,
-       within a display generated by the Derivative Works, if and
-       wherever such third-party notices normally appear. The contents
-       of the NOTICE file are for informational purposes only and
-       do not modify the License. You may add Your own attribution
-       notices within Derivative Works that You distribute, alongside
-       or as an addendum to the NOTICE text from the Work, provided
-       that such additional attribution notices cannot be construed
-       as modifying the License.
-
-   You may add Your own copyright statement to Your modifications and
-   may provide additional or different license terms and conditions
-   for use, reproduction, or distribution of Your modifications, or
-   for any such Derivative Works as a whole, provided Your use,
-   reproduction, and distribution of the Work otherwise complies with
-   the conditions stated in this License.
-
-5. Submission of Contributions. Unless You explicitly state otherwise,
-   any Contribution intentionally submitted for inclusion in the Work
-   by You to the Licensor shall be under the terms and conditions of
-   this License, without any additional terms or conditions.
-   Notwithstanding the above, nothing herein shall supersede or modify
-   the terms of any separate license agreement you may have executed
-   with Licensor regarding such Contributions.
-
-6. Trademarks. This License does not grant permission to use the trade
-   names, trademarks, service marks, or product names of the Licensor,
-   except as required for reasonable and customary use in describing the
-   origin of the Work and reproducing the content of the NOTICE file.
-
-7. Disclaimer of Warranty. Unless required by applicable law or
-   agreed to in writing, Licensor provides the Work (and each
-   Contributor provides its Contributions) on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-   implied, including, without limitation, any warranties or conditions
-   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-   PARTICULAR PURPOSE. You are solely responsible for determining the
-   appropriateness of using or redistributing the Work and assume any
-   risks associated with Your exercise of permissions under this License.
-
-8. Limitation of Liability. In no event and under no legal theory,
-   whether in tort (including negligence), contract, or otherwise,
-   unless required by applicable law (such as deliberate and grossly
-   negligent acts) or agreed to in writing, shall any Contributor be
-   liable to You for damages, including any direct, indirect, special,
-   incidental, or consequential damages of any character arising as a
-   result of this License or out of the use or inability to use the
-   Work (including but not limited to damages for loss of goodwill,
-   work stoppage, computer failure or malfunction, or any and all
-   other commercial damages or losses), even if such Contributor
-   has been advised of the possibility of such damages.
-
-9. Accepting Warranty or Additional Liability. While redistributing
-   the Work or Derivative Works thereof, You may choose to offer,
-   and charge a fee for, acceptance of support, warranty, indemnity,
-   or other liability obligations and/or rights consistent with this
-   License. However, in accepting such obligations, You may act only
-   on Your own behalf and on Your sole responsibility, not on behalf
-   of any other Contributor, and only if You agree to indemnify,
-   defend, and hold each Contributor harmless for any liability
-   incurred by, or claims asserted against, such Contributor by reason
-   of your accepting any such warranty or additional liability.
-
-END OF TERMS AND CONDITIONS
--- a/evm/LICENSE-MIT
+++ b/evm/LICENSE-MIT
@ -1,19 +0,0 @@
-The MIT License (MIT)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
--- a/evm/README.md
+++ b/evm/README.md
@ -1,36 +0,0 @@
-# Provable Stateless ZK-EVM
-
-Included here is an implementation of a stateless, recursive ZK-EVM client implemented using Plonky2. It currently supports the full Merkle-Patricia Trie and has all Shanghai opcodes implemented.
-
-## Performance 
-
-This implementation is able to provide transaction level proofs which are then recursively aggregated into a block proof. This means that proofs for a block can be efficiently distributed across a cluster of computers. As these proofs use Plonky2 they are CPU and Memory bound. The ability to scale horizontally across transactions increases the total performance of the system dramatically. End-to-end workflows are currently in progress to support this proving mode against live evm networks.
-
-Furthermore the implementation itself is highly optimized to provide fast proving times on generally available cloud instances and does not require GPUs or special hardware.
-
-## Ethereum Compatibility
-
-The aim of this module is to initially provide full ethereum compatibility. Today, all [EVM tests](https://github.com/0xPolygonZero/evm-tests) for the Shanghai hardfork are implemented. Work is progressing on supporting the upcoming [Cancun](https://github.com/0xPolygonZero/plonky2/labels/cancun) EVM changes. Furthermore, this prover uses the full ethereum state tree and hashing modes.
-
-## Audits
-
-Audits for the ZK-EVM will begin on November 27th, 2023. See the [Audit RC1 Milestone](https://github.com/0xPolygonZero/plonky2/milestone/2?closed=1). This README will be updated with the proper branches and hashes when the audit has commenced.
-
-## Documentation / Specification
-
-The current specification is located in the [/spec](/spec) directory, with the most currently up-to-date PDF [available here](https://github.com/0xPolygonZero/plonky2/blob/main/evm/spec/zkevm.pdf). Further documentation will be made over the coming months.
-
-## License
-Copyright (c) 2023 PT Services DMCC
-
-Licensed under either of:
-* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
-* MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
-
-at your option. 
-
-The SPDX license identifier for this project is `MIT OR Apache-2.0`.
-
-### Contribution
-
-Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
--- a/evm/benches/stack_manipulation.rs
+++ b/evm/benches/stack_manipulation.rs
@ -1,75 +0,0 @@
-use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
-use plonky2_evm::cpu::kernel::assemble_to_bytes;
-
-fn criterion_benchmark(c: &mut Criterion) {
-    rotl_group(c);
-    rotr_group(c);
-    insert_group(c);
-    delete_group(c);
-    replace_group(c);
-    shuffle_group(c);
-    misc_group(c);
-}
-
-fn rotl_group(c: &mut Criterion) {
-    let mut group = c.benchmark_group("rotl");
-    group.sample_size(10);
-    group.bench_function(BenchmarkId::from_parameter(8), |b| {
-        b.iter(|| assemble("%stack (a, b, c, d, e, f, g, h) -> (b, c, d, e, f, g, h, a)"))
-    });
-}
-
-fn rotr_group(c: &mut Criterion) {
-    let mut group = c.benchmark_group("rotr");
-    group.sample_size(10);
-    group.bench_function(BenchmarkId::from_parameter(8), |b| {
-        b.iter(|| assemble("%stack (a, b, c, d, e, f, g, h) -> (h, a, b, c, d, e, f, g)"))
-    });
-}
-
-fn insert_group(c: &mut Criterion) {
-    let mut group = c.benchmark_group("insert");
-    group.sample_size(10);
-    group.bench_function(BenchmarkId::from_parameter(8), |b| {
-        b.iter(|| assemble("%stack (a, b, c, d, e, f, g, h) -> (a, b, c, d, 123, e, f, g, h)"))
-    });
-}
-
-fn delete_group(c: &mut Criterion) {
-    let mut group = c.benchmark_group("delete");
-    group.sample_size(10);
-    group.bench_function(BenchmarkId::from_parameter(8), |b| {
-        b.iter(|| assemble("%stack (a, b, c, d, e, f, g, h) -> (a, b, c, e, f, g, h)"))
-    });
-}
-
-fn replace_group(c: &mut Criterion) {
-    let mut group = c.benchmark_group("replace");
-    group.sample_size(10);
-    group.bench_function(BenchmarkId::from_parameter(8), |b| {
-        b.iter(|| assemble("%stack (a, b, c, d, e, f, g, h) -> (a, b, c, 5, e, f, g, h)"))
-    });
-}
-
-fn shuffle_group(c: &mut Criterion) {
-    let mut group = c.benchmark_group("shuffle");
-    group.sample_size(10);
-    group.bench_function(BenchmarkId::from_parameter(8), |b| {
-        b.iter(|| assemble("%stack (a, b, c, d, e, f, g, h) -> (g, d, h, a, f, e, b, c)"))
-    });
-}
-
-fn misc_group(c: &mut Criterion) {
-    let mut group = c.benchmark_group("misc");
-    group.sample_size(10);
-    group.bench_function(BenchmarkId::from_parameter(8), |b| {
-        b.iter(|| assemble("%stack (a, b, c, a, e, f, g, h) -> (g, 1, h, g, f, 3, b, b)"))
-    });
-}
-
-criterion_group!(benches, criterion_benchmark);
-criterion_main!(benches);
-
-fn assemble(code: &str) {
-    assemble_to_bytes(&[code.into()]);
-}
--- a/evm/spec/.gitignore
+++ b/evm/spec/.gitignore
@ -1,7 +0,0 @@
-## Files generated by pdflatex, bibtex, etc.
-*.aux
-*.log
-*.out
-*.toc
-*.bbl
-*.blg
--- a/evm/spec/Makefile
+++ b/evm/spec/Makefile
@ -1,20 +0,0 @@
-DOCNAME=zkevm
-
-all: pdf
-
-.PHONY: clean
-
-quick:
-	pdflatex $(DOCNAME).tex
-
-pdf:
-	pdflatex $(DOCNAME).tex
-	bibtex $(DOCNAME).aux
-	pdflatex $(DOCNAME).tex
-	pdflatex $(DOCNAME).tex
-
-view: pdf
-	open $(DOCNAME).pdf
-
-clean:
-	rm -f *.blg *.bbl *.aux *.log
--- a/evm/spec/bibliography.bib
+++ b/evm/spec/bibliography.bib
@ -1,30 +0,0 @@
-@misc{stark,
-    author       = {Eli Ben-Sasson and
-                    Iddo Bentov and
-                    Yinon Horesh and
-                    Michael Riabzev},
-    title        = {Scalable, transparent, and post-quantum secure computational integrity},
-    howpublished = {Cryptology ePrint Archive, Report 2018/046},
-    year         = {2018},
-    note         = {\url{https://ia.cr/2018/046}},
-}
-
-@misc{plonk,
-    author       = {Ariel Gabizon and
-                    Zachary J. Williamson and
-                    Oana Ciobotaru},
-    title        = {PLONK: Permutations over Lagrange-bases for Oecumenical Noninteractive arguments of Knowledge},
-    howpublished = {Cryptology ePrint Archive, Report 2019/953},
-    year         = {2019},
-    note         = {\url{https://ia.cr/2019/953}},
-}
-
-@article{yellowpaper,
-	title={Ethereum: A secure decentralised generalised transaction ledger},
-	author={Wood, Gavin and others},
-	journal={Ethereum project yellow paper},
-	volume={151},
-	number={2014},
-	pages={1--32},
-	year={2014}
-}
--- a/evm/spec/cpulogic.tex
+++ b/evm/spec/cpulogic.tex
@ -1,285 +0,0 @@
-\section{CPU logic}
-\label{cpulogic}
-
-The CPU is in charge of coordinating the different STARKs, proving the correct execution of the instructions it reads and guaranteeing
-that the final state of the EVM corresponds to the starting state after executing the input transaction. All design choices were made 
-to make sure these properties can be adequately translated into constraints of degree at most 3 while minimizing the size of the different
-table traces (number of columns and number of rows).
-
-In this section, we will detail some of these choices.
-
-\subsection{Kernel}
-The kernel is in charge of the proving logic. This section aims at providing a high level overview of this logic. For details about any specific part of the logic, one can consult the various ``asm'' files in the \href{https://github.com/0xPolygonZero/plonky2/tree/main/evm/src/cpu/kernel}{``kernel'' folder}.
-
-We prove one transaction at a time. These proofs can later be aggregated recursively to prove a block. Proof aggregation is however not in the scope of this section. Here, we assume that we have an initial state of the EVM, and we wish to prove that a single transaction was correctly executed, leading to a correct update of the state.
-
-Since we process one transaction at a time, a few intermediary values need to be provided by the prover. Indeed, to prove that the registers in the EVM state are correctly updated, we need to have access to their initial values. When aggregating proofs, we can also constrain those values to match from one transaction to the next. Let us consider the example of the transaction number. Let $n$ be the number of transactions executed so far in the current block. If the current proof is not a dummy one (we are indeed executing a transaction), then the transaction number should be updated: $n := n+1$. Otherwise, the number remains unchanged. We can easily constrain this update. When aggregating the previous transaction proof ($lhs$) with the current one ($rhs$), we also need to check that the output transaction number of $lhs$ is the same as the input transaction number of $rhs$. 
-
-Those prover provided values are stored in memory prior to entering the kernel, and are used in the kernel to assert correct updates. The list of prover provided values necessary to the kernel is the following:
-\begin{enumerate}
-    \item the previous transaction number: $t_n$,
-    \item the gas used before executing the current transaction: $g\_u_0$,
-    \item the gas used after executing the current transaction: $g\_u_1$,
-    \item the state, transaction and receipts MPTs before executing the current transaction: $\texttt{tries}_0$,
-    \item the hash of all MPTs before executing the current transaction: $\texttt{digests}_0$,
-    \item the hash of all MPTs after executing the current transaction: $\texttt{digests}_1$,
-    \item the RLP encoding of the transaction.
-\end{enumerate}
-
-\paragraph*{Initialization:} The first step consists in initializing:
-\begin{itemize}
-    \item The shift table: it maps the number of bit shifts $s$ with its shifted value $1 << s$. Note that $0 \leq s \leq 255$.
-    \item The initial MPTs: the initial state, transaction and receipt tries $\texttt{tries}_0$ are loaded from memory and hashed. The hashes are then compared to $\texttt{digests}\_0$.
-    \item We load the transaction number $t\_n$ and the current gas used $g\_u_0$ from memory.
-\end{itemize}
-
-If no transaction is provided, we can halt after this initialization. Otherwise, we start processing the transaction. The transaction is provided as its RLP encoding. We can deduce the various transaction fields (such as its type or the transfer value) from its encoding. Based on this, the kernel updates the state trie by executing the transaction. Processing the transaction also includes updating the transactions MPT with the transaction at hand.
-
-The processing of the transaction returns a boolean ``success'' that indicates whether the transaction was executed successfully, along with the leftover gas. 
-
-The following step is then to update the receipts MPT. Here, we update the transaction's bloom filter. We store ``success'', the leftover gas, the transaction bloom filter and the logs in memory. We also store some additional information that facilitates the RLP encoding of the receipts later.
-
-If there are any withdrawals, they are performed at this stage.
-
-Finally, once the three MPTs have been updated, we need to carry out final checks:
-\begin{itemize}
-    \item the gas used after the execution is equal to $g\_u_1$,
-    \item the new transaction number is $n+1$ if there was a transaction,
-    \item the three MPTs are hashed and checked against $\texttt{digests}_1$.
-\end{itemize}
-Once those final checks are performed, the program halts.
-
-\subsection{Simple opcodes VS Syscalls}
-For simplicity and efficiency, EVM opcodes are categorized into two groups: ``simple opcodes'' and ``syscalls''. Simple opcodes are generated directly in Rust, in \href{https://github.com/0xPolygonZero/plonky2/blob/main/evm/src/witness/operation.rs}{operation.rs}. Every call to a simple opcode adds exactly one row to the \href{https://github.com/0xPolygonZero/plonky2/blob/main/evm/spec/tables/cpu.tex}{cpu table}. Syscalls are more complex structures written with simple opcodes, in the kernel.
-
-Whenever we encounter a syscall, we switch to kernel mode and execute its associated code. At the end of each syscall, we run EXIT\_KERNEL, which resets the kernel mode to its state right before the syscall. It also sets the PC to point to the opcode right after the syscall.
-
-Exceptions are handled differently for simple opcodes and syscalls. When necessary, simple opcodes throw an exception (see \ref{exceptions}). This activates the ``exception flag'' in the CPU and runs the exception operations. On the other hand, syscalls handle exceptions in the kernel directly.
-
-\subsection{Privileged instructions}
-
-To ease and speed-up proving time, the zkEVM supports custom, privileged instructions that can only be executed by the kernel.
-Any appearance of those privileged instructions in a contract bytecode for instance would result in an unprovable state.
-
-In what follows, we denote by $p_{BN}$ the characteristic of the BN254 curve base field, curve for which Ethereum supports the 
-ecAdd, ecMul and ecPairing precompiles.
-
-\begin{enumerate}[align=left]
-  \item[0x0C.] \texttt{ADDFP254}. Pops 2 elements from the stack interpreted as BN254 base field elements, and pushes their addition modulo $p_{BN}$ onto the stack.
-
-  \item[0x0D.] \texttt{MULFP254}. Pops 2 elements from the stack interpreted as BN254 base field elements, and pushes their product modulo $p_{BN}$ onto the stack.
-
-  \item[0x0E.] \texttt{SUBFP254}. Pops 2 elements from the stack interpreted as BN254 base field elements, and pushes their difference modulo $p_{BN}$ onto the stack.
-  This instruction behaves similarly to the SUB (0x03) opcode, in that we subtract the second element of the stack from the initial (top) one.
-
-  \item[0x0F.] \texttt{SUBMOD}. Pops 3 elements from the stack, and pushes the modular difference of the first two elements of the stack by the third one.
-  It is similar to the SUB instruction, with an extra pop for the custom modulus.
-
-  \item[0x21.] \texttt{KECCAK\_GENERAL}. Pops 2 elements (a Memory address, followed by a length $\ell$) and pushes the hash of the memory portion starting at the
-  constructed address and of length $\ell$. It is similar to KECCAK256 (0x20) instruction, but can be applied to any memory section (i.e. even privileged ones).
-
-  \item[0x49.] \texttt{PROVER\_INPUT}. Pushes a single prover input onto the stack.
-
-  \item[0xC0-0xDF.] \texttt{MSTORE\_32BYTES}. Pops 2 elements from the stack (a Memory address, and then a value), and pushes
-  a new address' onto the stack. The value is being decomposed into bytes and written to memory, starting from the fetched address. The new address being pushed is computed as the
-  initial address + the length of the byte sequence being written to memory. Note that similarly to PUSH (0x60-0x7F) instructions, there are 32 MSTORE\_32BYTES instructions, each
-  corresponding to a target byte length (length 0 is ignored, for the same reasons as MLOAD\_32BYTES, see below). Writing to memory an integer fitting in $n$ bytes with a length $\ell < n$ will
-  result in the integer being truncated. On the other hand, specifying a length $\ell$ greater than the byte size of the value being written will result in padding with zeroes. This
-  process is heavily used when resetting memory sections (by calling MSTORE\_32BYTES\_32 with the value 0).
-
-  \item[0xF6.] \texttt{GET\_CONTEXT}. Pushes the current context onto the stack. The kernel always has context 0.
-
-  \item[0xF7.] \texttt{SET\_CONTEXT}. Pops the top element of the stack and updates the current context to this value. It is usually used when calling another contract or precompile,
-  to distinguish the caller from the callee.
-
-  \item[0xF8.] \texttt{MLOAD\_32BYTES}. Pops 2 elements from the stack (a Memory address, and then a length $\ell$), and pushes
-  a value onto the stack. The pushed value corresponds to the U256 integer read from the big-endian sequence of length $\ell$ from the memory address being fetched. Note that an
-  empty length is not valid, nor is a length greater than 32 (as a U256 consists in at most 32 bytes). Missing these conditions will result in an unverifiable proof.
-
-  \item[0xF9.] \texttt{EXIT\_KERNEL}. Pops 1 element from the stack. This instruction is used at the end of a syscall, before proceeding to the rest of the execution logic.
-  The popped element, \textit{kexit\_info}, contains several pieces of information like the current program counter, the current amount of gas used, and whether we are in kernel (i.e. privileged) mode or not.
-
-  \item[0xFB.] \texttt{MLOAD\_GENERAL}. Pops 1 elements (a Memory address), and pushes the value stored at this memory
-  address onto the stack. It can read any memory location, general (similarly to MLOAD (0x51) instruction) or privileged.
-
-  \item[0xFC.] \texttt{MSTORE\_GENERAL}. Pops 2 elements (a value and a Memory address), and writes the popped value from
-  the stack at the fetched address. It can write to any memory location, general (similarly to MSTORE (0x52) / MSTORE8 (0x53) instructions) or privileged.
-\end{enumerate}
-
-
-\subsection{Memory addresses}
-\label{memoryaddresses}
-
-Kernel operations deal with memory addresses as single U256 elements.
-However, when processing the operations to generate the proof witness, the CPU will decompose these into three components:
-
-\begin{itemize}
-  \item[context.] The context of the memory address. The Kernel context is special, and has value 0.
-
-  \item[segment.] The segment of the memory address, corresponding to a specific section given a context (eg. MPT data, global metadata, etc.).
-
-  \item[virtual.] The offset of the memory address, within a segment given a context.
-\end{itemize}
-
-To easily retrieve these components, we scale them so that they can represent a memory address as:
-
-$$ \mathrm{addr} = 2^{64} \cdot \mathrm{context} + 2^{32} \cdot \mathrm{segment} + \mathrm{offset}$$
-
-This allows to easily retrieve each component individually once a Memory address has been decomposed into 32-bit limbs.
-
-
-\subsection{Stack handling}
-\label{stackhandling}
-
-\subsubsection{Top of the stack}
-
-The majority of memory operations involve the stack. The stack is a segment in memory, and stack operations (popping or pushing) use the memory channels.
-Every CPU instruction performs between 0 and 3 pops, and may push at most once. However, for efficiency purposes, we hold the top of the stack in
-the first memory channel \texttt{current\_row.mem\_channels[0]}, only writing it in memory if necessary.
-
-\paragraph*{Motivation:}
-
-See \href{https://github.com/0xPolygonZero/plonky2/issues/1149}{this issue}.
-
-\paragraph*{Top reading and writing:}
-
-When a CPU instruction modifies the stack, it must update the top of the stack accordingly. There are three cases.
-
-\begin{itemize}
-  \item \textbf{The instruction pops and pushes:} The new top of the stack is stored in \texttt{next\_row.mem\_channels[0]}; it may be computed by the instruction,
-or it could be read from memory. In either case, the instruction is responsible for setting \texttt{next\_row.mem\_channels[0]}'s flags and address columns correctly.
-After use, the previous top of the stack is discarded and doesn't need to be written in memory.
-  \item \textbf{The instruction pushes, but doesn't pop:} The new top of the stack is stored in \texttt{next\_row.mem\_channels[0]}; it may be computed by the instruction,
-or it could be read from memory. In either case, the instruction is responsible for setting \texttt{next\_row.mem\_channels[0]}'s flags and address columns correctly.
-If the stack wasn't empty (\texttt{current\_row.stack\_len > 0}), the instruction performs a memory read in \texttt{current\_row.partial\_ channel}. \texttt{current\_row.partial\_channel}
-shares its values with \texttt{current\_ row.mem\_channels[0]} (which holds the current top of the stack). If the stack was empty, \texttt{current\_row.partial\_channel}
-is disabled.
-  \item \textbf{The instruction pops, but doesn't push:} After use, the current top of the stack is discarded and doesn't need to be written in memory.
-If the stack isn't empty now (\texttt{current\_row.stack\_len > num\_pops}), the new top of the stack is set in \texttt{next\_row.mem\_channels[0]}
-with a memory read from the stack segment. If the stack is now empty, \texttt{next\_row.mem\_channels[0]} is disabled.
-\end{itemize}
-
-In the last two cases, there is an edge case if \texttt{current\_row.stack\_len} is equal to a \texttt{special\_len}. For a strictly pushing instruction,
-this happens if the stack is empty, and \texttt{special\_len = 0}. For a strictly popping instruction, this happens if the next stack is empty, i.e. if
-all remaining elements are popped, and \texttt{special\_len = num\_pops}. Note that we do not need to check for values below \texttt{num\_pops}, since this
-would be a stack underflow exception which is handled separately.
-The edge case is detected with the compound flag
-$$\texttt{1 - not\_special\_len * stack\_inv\_aux,}$$
-where $$\texttt{not\_special\_len = current\_row - special\_len}$$
-
-
-and \texttt{stack\_inv\_aux} is constrained to be the modular inverse of \texttt{not\_special\_ len} if it's non-zero, or 0 otherwise. The flag is 1
-if \texttt{stack\_len} is equal to \texttt{special\_len}, and 0 otherwise.
-
-This logic can be found in code in the \texttt{eval\_packed\_one} function of \href{https://github.com/0xPolygonZero/plonky2/blob/main/evm/src/cpu/stack.rs}{stack.rs}.
-The function multiplies all of the stack constraints with the degree 1 filter associated with the current instruction.
-
-\paragraph*{Operation flag merging:}
-
-To reduce the total number of columns, many operation flags are merged together (e.g. \texttt{DUP} and \texttt{SWAP}) and are distinguished with the binary decomposition of their opcodes.
-The filter for a merged operation is now of degree 2: for example, \texttt{is\_swap = dup\_swap * opcode\_bits[4]} since the 4th bit is set to 1 for a \texttt{SWAP} and 0 for a \texttt{DUP}.
-If the two instructions have different stack behaviors, this can be a problem: \texttt{eval\_packed\_one}'s constraints are already of degree 3 and it can't support degree 2 filters.
-
-When this happens, stack constraints are defined manually in the operation's dedicated file (e.g. \texttt{dup\_swap.rs}). Implementation details vary case-by-case and can be found in the files.
-
-\subsubsection{Stack length checking}
-
-The CPU must make sure that the stack length never goes below zero and, in user mode, never grows beyond the maximum stack size. When this happens, an honest prover should trigger the
-corresponding exception. If a malicious prover doesn't trigger the exception, constraints must fail the proof.
-
-\paragraph*{Stack underflow:}
-There is no explicit constraint checking for stack underflow. An underflow happens when the CPU tries to pop the empty stack, which would perform a memory read at virtual address \texttt{-1}.
-Such a read cannot succeed: in Memory, the range-check argument requires the gap between two consecutive addresses to be lower than the length of the Memory trace. Since the prime of the Plonky2 field is 64-bit long,
-this would require a Memory trace longer than $2^{32}$.
-
-\paragraph*{Stack overflow:}
-An instruction can only push at most once, meaning that an overflow occurs whenever the stack length is exactly one more than the maximum stack size ($1024+1$) in user mode.
-To constrain this, the column \texttt{stack\_len\_bounds\_aux} contains:
-
-\begin{itemize}
-  \item[--] the modular inverse of \texttt{stack\_len - 1025} if we're in user mode and \texttt{stack\_len $\neq$ 1025},
-  \item[--] 0 if \texttt{stack\_len = 1025} or if we're in kernel mode.
-\end{itemize}
-Then overflow can be checked with the flag
-$$\texttt{(1 - is\_kernel\_mode) - stack\_len * stack\_len\_bounds\_aux}.$$
-The flag is 1 if \texttt{stack\_len = 1025} and we're in user mode, and 0 otherwise.
-
-Because \texttt{stack\_len\_bounds\_aux} is a shared general column, we only check this constraint after an instruction that can actually trigger an overflow,
-i.e. a pushing, non-popping instruction.
-
-\subsection{Gas handling}
-
-\subsubsection{Out of gas errors}
-
-The CPU table has a ``gas'' register that keeps track of the gas used by the transaction so far.
-
-The crucial invariant in our out-of-gas checking method is that at any point in the program's execution, we have not used more gas than we have available; that is ``gas'' is at most the gas allocation for the transaction (which is stored separately by the kernel). We assume that the gas allocation will never be $2^{32}$ or more, so if ``gas'' does not fit in one limb, then we've run out of gas.
-
-When a native instruction (one that is not a syscall) is executed, a constraint ensures that the ``gas'' register is increased by the correct amount. This is not automatic for syscalls; the syscall handler itself must calculate and charge the appropriate amount.
-
-If everything goes smoothly and we have not run out of gas, ``gas'' should be no more than the gas allowance at the point that we STOP, REVERT, stack overflow, or whatever. Indeed, because we assume that the gas overflow handler is invoked \textit{as soon as} we've run out of gas, all these termination methods verify that $\texttt{gas} \leq \texttt{allowance}$, and jump to \texttt{exc\_out\_of\_gas} if this is not the case. This is also true for the out-of-gas handler, which checks that: 
-\begin{enumerate}
-  \item we have not yet run out of gas
-  \item we are about to run out of gas
-\end{enumerate} 
-and ``PANIC'' if either of those statements does not hold.
-
-When we do run out of gas, however, this event must be handled. Syscalls are responsible for checking that their execution would not cause the transaction to run out of gas. If the syscall detects that it would need to charge more gas than available, it aborts the transaction (or the current code) by jumping to \texttt{fault\_exception}. In fact, \texttt{fault\_exception} is in charge of handling all exceptional halts in the kernel.
-
-Native instructions do this differently. If the prover notices that execution of the instruction would cause an out-of-gas error, it must jump to the appropriate handler instead of executing the instruction. (The handler contains special code that PANICs if the prover invoked it incorrectly.)
-
-\subsubsection{Overflow}
-
-We must be careful to ensure that ``gas'' does not overflow to prevent denial of service attacks.
-
-Note that a syscall cannot be the instruction that causes an overflow. This is because every syscall is required to verify that its execution does not cause us to exceed the gas limit. Upon entry into a syscall, a constraint verifies that $\texttt{gas} < 2^{32}$. Some syscalls may have to be careful to ensure that the gas check is performed correctly (for example, that overflow modulo $2^{256}$ does not occur). So we can assume that upon entry and exit out of a syscall, $\texttt{gas} < 2^{32}$.
-
-Similarly, native instructions alone cannot cause wraparound. The most expensive instruction, JUMPI, costs 10 gas. Even if we were to execute $2^{32}$ consecutive JUMPI instructions, the maximum length of a trace, we are nowhere close to consuming $2^{64} - 2^{32} + 1$ (= Goldilocks prime) gas.
-
-The final scenario we must tackle is an expensive syscall followed by many expensive native instructions. Upon exit from a syscall, $\texttt{gas} < 2^{32}$. Again, even if that syscall is followed by $2^{32}$ native instructions of cost 10, we do not see wraparound modulo Goldilocks.
-
-
-\subsection{Exceptions}
-\label{exceptions}
-
-Sometimes, when executing user code (i.e. contract or transaction code), the EVM halts exceptionally (i.e. outside of a STOP, a RETURN or a REVERT).
-When this happens, the CPU table invokes a special instruction with a dedicated operation flag \texttt{exception}.
-Exceptions can only happen in user mode; triggering an exception in kernel mode would make the proof unverifiable.
-No matter the exception, the handling is the same:
-
-- The opcode which would trigger the exception is not executed. The operation flag set is \texttt{exception} instead of the opcode's flag.
-
-- We push a value to the stack which contains: the current program counter (to retrieve the faulty opcode), and the current value of \texttt{gas\_used}.
-The program counter is then set to the corresponding exception handler in the kernel (e.g. \texttt{exc\_out\_of\_gas}).
-
-- The exception handler verifies that the given exception would indeed be triggered by the faulty opcode. If this is not the case (if the exception has already happened or if it doesn't happen after executing
-the faulty opcode), then the kernel panics: there was an issue during witness generation.
-
-- The kernel consumes the remaining gas and returns from the current context with \texttt{success} set to 0 to indicate an execution failure.
-
-Here is the list of the possible exceptions:
-
-\begin{enumerate}[align=left]
-  \item[\textbf{Out of gas:}] Raised when a native instruction (i.e. not a syscall) in user mode pushes the amount of gas used over the current gas limit.
-When this happens, the EVM jumps to \texttt{exc\_out\_of\_gas}. The kernel then checks that the consumed gas is currently below the gas limit,
-and that adding the gas cost of the faulty instruction pushes it over it.
-If the exception is not raised, the prover will panic when returning from the execution: the remaining gas is checked to be positive after STOP, RETURN or REVERT.
-  \item[\textbf{Invalid opcode:}] Raised when the read opcode is invalid. It means either that it doesn't exist, or that it's a privileged instruction and
-thus not available in user mode. When this happens, the EVM jumps to \texttt{exc\_invalid\_opcode}. The kernel then checks that the given opcode is indeed invalid.
-If the exception is not raised, decoding constraints ensure no operation flag is set to 1, which would make it a padding row. Halting constraints would then make the proof
-unverifiable.
-  \item[\textbf{Stack underflow:}] Raised when an instruction which pops from the stack is called when the stack doesn't have enough elements.
-When this happens, the EVM jumps to \texttt{exc\_stack\_overflow}. The kernel then checks that the current stack length is smaller than the minimum 
-stack length required by the faulty opcode.
-If the exception is not raised, the popping memory operation's address offset would underflow, and the Memory range check would require the Memory trace to be too
-large ($>2^{32}$).
-  \item[\textbf{Invalid JUMP destination:}] Raised when the program counter jumps to an invalid location (i.e. not a JUMPDEST). When this happens, the EVM jumps to
-\texttt{exc\_invalid\_jump\_destination}. The kernel then checks that the opcode is a JUMP, and that the destination is not a JUMPDEST by checking the
-JUMPDEST segment.
-If the exception is not raised, jumping constraints will fail the proof.
-  \item[\textbf{Invalid JUMPI destination:}] Same as the above, for JUMPI.
-  \item[\textbf{Stack overflow:}] Raised when a pushing instruction in user mode pushes the stack over 1024. When this happens, the EVM jumps
-to \texttt{exc\_stack\_overflow}. The kernel then checks that the current stack length is exactly equal to 1024 (since an instruction can only
-push once at most), and that the faulty instruction is pushing.
-If the exception is not raised, stack constraints ensure that a stack length of 1025 in user mode will fail the proof.
-\end{enumerate}
--- a/evm/spec/framework.tex
+++ b/evm/spec/framework.tex
@ -1,159 +0,0 @@
-\section{STARK framework}
-\label{framework}
-
-
-\subsection{Cost model}
-
-Our zkEVM is designed for efficient verification by STARKs \cite{stark}, particularly by an AIR with degree 3 constraints. In this model, the prover bottleneck is typically constructing Merkle trees, particularly constructing the tree containing low-degree extensions of witness polynomials.
-
-
-\subsection{Field selection}
-\label{field}
-Our zkEVM is designed to have its execution traces encoded in a particular prime field $\mathbb{F}_p$, with $p = 2^{64} - 2^{32} + 1$. A nice property of this field is that it can represent the results of many common \texttt{u32} operations. For example, (widening) \texttt{u32} multiplication has a maximum value of $(2^{32} - 1)^2$, which is less than $p$. In fact a \texttt{u32} multiply-add has a maximum value of $p - 1$, so the result can be represented with a single field element, although if we were to add a carry in bit, this no longer holds.
-
-This field also enables a very efficient reduction method. Observe that
-$$
-2^{64} \equiv 2^{32} - 1 \pmod p
-$$
-and consequently
-\begin{align*}
-  2^{96} &\equiv 2^{32} (2^{32} - 1) \pmod p \\
-         &\equiv 2^{64} - 2^{32} \pmod p \\
-         &\equiv -1 \pmod p.
-\end{align*}
-To reduce a 128-bit number $n$, we first rewrite $n$ as $n_0 + 2^{64} n_1 + 2^{96} n_2$, where $n_0$ is 64 bits and $n_1, n_2$ are 32 bits each. Then
-\begin{align*}
-  n &\equiv n_0 + 2^{64} n_1 + 2^{96} n_2 \pmod p \\
-  &\equiv n_0 + (2^{32} - 1) n_1 - n_2 \pmod p
-\end{align*}
-After computing $(2^{32} - 1) n_1$, which can be done with a shift and subtraction, we add the first two terms, subtracting $p$ if overflow occurs. We then subtract $n_2$, adding $p$ if underflow occurs.
-
-At this point we have reduced $n$ to a \texttt{u64}. This partial reduction is adequate for most purposes, but if we needed the result in canonical form, we would perform a final conditional subtraction.
-
-\subsection{Cross-table lookups}
-\label{ctl}
-The various STARK tables carry out independent operations, but on shared values. We need to check that the shared values are identical in all the STARKs that require them. This is where cross-table lookups (CTLs) come in handy. 
-
-Suppose STARK $S_1$ requires an operation -- say $Op$ -- that is carried out by another STARK $S_2$. Then $S_1$ writes the input and output of $Op$ in its own table, and provides the inputs to $S_2$. $S_2$ also writes the inputs and outputs in its rows, and the table's constraints check that $Op$ is carried out correctly. We then need to ensure that the inputs and outputs are the same in $S_1$ and $S_2$.
-
-In other words, we need to ensure that the rows -- reduced to the input and output columns -- of $S_1$ calling $Op$ are permutations of the rows of $S_2$ that carry out $Op$. Our CTL protocol is based on logUp and is similar to our range-checks.
-
-To prove this, the first step is to only select the rows of interest in $S_1$ and $S_2$, and filter out the rest. Let $f^1$ be the filter for $S_1$ and $f^2$ the filter for $S_2$. $f^1$ and $f^2$ are constrained to be in $\{0, 1\}$. $f^1 = 1$ (resp. $f^2 = 1$) whenever the row at hand carries out $Op$ in $S_1$ (resp. in $S_2$), and 0 otherwise. Let also $(\alpha, \beta)$ be two random challenges.
-
-The idea is to create subtables $S_1'$ and $S_2'$ of $S_1$ and $S_2$ respectively, such that $f^1 = 1$ and $f^2 = 1$ for all their rows. The columns in the subtables are limited to the ones whose values must be identical (the inputs and outputs of $Op$ in our example).
-
-Note that for design and constraint reasons, filters are limited to (at most) degree 2 combinations of columns.
-
-Let $\{c^{1, i}\}_{i=1}^m$ be the columns in $S_1'$ an $\{c^{2,i}\}_{i=1}^m$ be the columns in $S_2'$.
-
-The prover defines a ``running sum'' $Z$ for $S_1'$ such that:
-\begin{gather*}
-  Z^{S_1}_{n-1} = \frac{1}{\sum_{j=0}^{m-1} \alpha^j \cdot c^{1, j}_{n-1} + \beta} \\
-  Z^{S_1}_{i+1} = Z^{S_1}_i + f^1_i \cdot \frac{1}{\sum_{j=0}^{m-1} \alpha^j \cdot c^{1, j}_i + \beta}
-\end{gather*}
-The second equation ``selects'' the terms of interest thanks to $f^1$ and filters out the rest.
-
-Similarly, the prover constructs a running sum $Z^{S_2}$for $S_2$. Note that $Z$ is computed ``upside down'': we start with $Z_{n-1}$ and the final sum is in $Z_0$. 
-
-On top of the constraints to check that the running sums were correctly constructed, the verifier checks that $Z^{S_1}_0 = Z^{S_2}_0$.
-This ensures that the columns in $S_1'$ and the columns in $S_2'$ are permutations of each other.
-
-In other words, the CTL argument is a logUp lookup argument where $S_1'$ is the looking table, $S_2'$ is the looked table, and $S_1' = S_2'$ (all the multiplicities are 1).
-For more details about logUp, see the next section.
-
-To sum up, for each STARK $S$, the prover:
-\begin{enumerate}
-  \item constructs a running sum $Z_i^l$ for each table looking into $S$ (called looking sums here),
-  \item constructs a running sum $Z^S$ for $S$ (called looked sum here),
-  \item sends the final value for each running sum $Z_{i, 0}^l$ and $Z^S_0$ to the verifier,
-  \item sends a commitment to $Z_i^l$  and $Z^S$ to the verifier.
-\end{enumerate}
-Then, for each STARK $S$, the verifier:
-\begin{enumerate}
-  \item computes the sum $Z = \sum_i Z_{i, 0}^l$,
-  \item checks that $Z = Z^S_0$,
-  \item checks that each $Z_i^l$  and $Z^S$ was correctly constructed.
-\end{enumerate}
-
-
-\subsection{Range-checks}
-\label{rc}
-In most cases, tables deal with U256 words, split into 32-bit limbs (to avoid overflowing the field). To prevent a malicious prover from cheating, it is crucial to range-check those limbs. 
-\subsubsection{What to range-check?}
-One can note that every element that ever appears on the stack has been pushed. Therefore, enforcing a range-check on pushed elements is enough to range-check all elements on the stack. Similarly, all elements in memory must have been written prior, and therefore it is enough to range-check memory writes. However, range-checking the PUSH and MSTORE opcodes is not sufficient.
-\begin{enumerate}
-  \item Pushes and memory writes for ``MSTORE\_32BYTES'' are range-checked in ``BytePackingStark''.
-  \item Syscalls, exceptions and prover inputs are range-checked in ``ArithmeticStark''.
-  \item The inputs and outputs of binary and ternary arithmetic operations are range-checked in ``ArithmeticStark''.
-  \item The inputs' bits of logic operations are checked to be either 1 or 0 in ``LogicStark''. Since ``LogicStark'' only deals with bitwise operations, this is enough to have range-checked outputs as well.
-  \item The inputs of Keccak operations are range-checked in ``KeccakStark''. The output digest is written as bytes in ``KeccakStark''. Those bytes are used to reconstruct the associated 32-bit limbs checked against the limbs in ``CpuStark''. This implicitly ensures that the output is range-checked.
-\end{enumerate}
-Note that some operations do not require a range-check:
-\begin{enumerate}
-  \item ``MSTORE\_GENERAL'' read the value to write from the stack. Thus, the written value was already range-checked by a previous push.
-  \item ``EQ'' reads two -- already range-checked -- elements on the stack, and checks they are equal. The output is either 0 or 1, and does therefore not need to be checked.
-  \item ``NOT'' reads one -- already range-checked -- element. The result is constrained to be equal to $\texttt{0xFFFFFFFF} - \texttt{input}$, which implicitly enforces the range check.
-  \item ``PC'': the program counter cannot be greater than $2^{32}$ in user mode. Indeed, the user code cannot be longer than $2^{32}$, and jumps are constrained to be JUMPDESTs. Moreover, in kernel mode, every jump is towards a location within the kernel, and the kernel code is smaller than $2^{32}$. These two points implicitly enforce $PC$'s range check.
-  \item ``GET\_CONTEXT'', ``DUP'' and ``SWAP'' all read and push values that were already written in memory. The pushed values were therefore already range-checked.
-\end{enumerate}
-Range-checks are performed on the range $[0, 2^{16} - 1]$, to limit the trace length.
-
-\subsubsection{Lookup Argument}
-To enforce the range-checks, we leverage \href{https://eprint.iacr.org/2022/1530.pdf}{logUp}, a lookup argument by Ulrich Häbock. Given a looking table $s = (s_1, ..., s_n)$ and a looked table $t = (t_1, ..., t_m)$, the goal is to prove that 
-$$\forall 1 \leq i \leq n, \exists 1 \leq j \leq r \texttt{ such that } s_i = t_j$$
-In our case, $t = (0, .., 2^{16} - 1)$ and $s$ is composed of all the columns in each STARK that must be range-checked. 
-
-The logUp paper explains that proving the previous assertion is actually equivalent to proving that there exists a sequence $l$ such that:
-$$ \sum_{i=1}^n \frac{1}{X - s_i} = \sum_{j=1}^r \frac{l_j}{X-t_j}$$
-
-The values of $s$ can be stored in $c$ different columns of length $n$ each. In that case, the equality becomes:
-$$\sum_{k=1}^c \sum_{i=1}^n \frac{1}{X - s_i^k} = \sum_{j=1}^r \frac{l_j}{X-t_j}$$
-
-The `multiplicity' $m_i$ of value $t_i$ is defined as the number of times $t_i$ appears in $s$. In other words:
-$$m_i = |s_j \in s; s_j = t_i|$$
-
-Multiplicities provide a valid sequence of values in the previously stated equation. Thus, if we store the multiplicities, and are provided with a challenge $\alpha$, we can prove the lookup argument by ensuring:
-$$\sum_{k=1}^c \sum_{i=1}^n \frac{1}{\alpha - s_i^k} = \sum_{j=1}^r \frac{m_j}{\alpha-t_j}$$
-However, the equation is too high degree. To circumvent this issue, Häbock suggests providing helper columns $h_i$ and $d$ such that at a given row $i$:
-\begin{gather*}
-  h_i^k = \frac{1}{\alpha + s_i^k } \forall 1 \leq k \leq c \\
-  d_i = \frac{1}{\alpha + t_i}
-\end{gather*}
-
-The $h$ helper columns can be batched together to save columns. We can batch at most $\texttt{constraint\_degree} - 1$ helper functions together. In our case, we batch them 2 by 2. At row $i$, we now have:
-\begin{align*}
-  h_i^k = \frac{1}{\alpha + s_i^{2k}} + \frac{1}{\alpha + s_i^{2k+1}} \forall 1 \leq k \leq c/2 \\
-\end{align*}
-If $c$ is odd, then we have one extra helper column:
-$$h_i^{c/2+1} = \frac{1}{\alpha + s_i^{c}}$$
-
-For clarity, we will assume that $c$ is even in what follows.
-
-Let $g$ be a generator of a subgroup of order $n$. We extrapolate $h, m$ and $d$ to get polynomials such that, for $f \in \{h^k, m, g\}$: $f(g^i) = f_i$.
-We can define the following polynomial:
-$$ Z(x) :=  \sum_{i=1}^n \big[\sum_{k=1}^{c/2} h^k(x) - m(x) * d(x)\big]$$
-
-
-\subsubsection{Constraints}
-With these definitions and a challenge $\alpha$, we can finally check that the assertion holds with the following constraints:
-\begin{gather*}
-  Z(1) = 0 \\
-  Z(g \alpha) = Z(\alpha) + \sum_{k=1}^{c/2} h^k(\alpha) - m(\alpha) d(\alpha)
-\end{gather*}
-These ensure that 
-We also need to ensure that $h^k$ is well constructed for all $1 \leq k \leq c/2$:
-$$
-  h(\alpha)^k \cdot (\alpha + s_{2k}) \cdot (\alpha + s_{2k+1}) = (\alpha + s_{2k}) + (\alpha + s_{2k+1})
-$$
-
-Note: if $c$ is odd, we have one unbatched helper column $h^{c/2+1}$ for which we need a last constraint:
-$$
-  h(\alpha)^{c/2+1} \cdot (\alpha + s_{c}) = 1
-$$
-
-Finally, the verifier needs to ensure that the table $t$ was also correctly computed. In each STARK, $t$ is computed starting from 0 and adding at most 1 at each row. This construction is constrained as follows:
-\begin{enumerate}
-  \item $t(1) = 0$
-  \item $(t(g^{i+1}) - t(g^{i})) \cdot ((t(g^{i+1}) - t(g^{i})) - 1) = 0$
-  \item $t(g^{n-1}) = 2^{16} - 1$
-\end{enumerate}
--- a/evm/spec/introduction.tex
+++ b/evm/spec/introduction.tex
@ -1,3 +0,0 @@
-\section{Introduction}
-
-TODO
--- a/evm/spec/mpts.tex
+++ b/evm/spec/mpts.tex
@ -1,94 +0,0 @@
-\section{Merkle Patricia Tries}
-\label{tries}
-The \emph{EVM World state} is a representation of the different accounts at a particular time, as well as the last processed transactions together with their receipts. The world state is represented using \emph{Merkle Patricia Tries} (MPTs) \cite[App.~D]{yellowpaper}, and there are three different tries: the state trie, the transaction trie and the receipt trie.
-
-For each transaction we need to show that the prover knows preimages of the hashed initial and final EVM states.  When the kernel starts execution, it stores these three tries within the {\tt Segment::TrieData} segment. The prover loads the initial tries from the inputs into memory. Subsequently, the tries are modified during transaction execution, inserting new nodes or deleting existing nodes. 
-
-An MPT is composed of five different nodes: branch, extension, leaf, empty and digest nodes. Branch and leaf nodes might contain a payload whose format depends on the particular trie. The nodes are encoded, primarily using RLP encoding and Hex-prefix encoding (see \cite{yellowpaper} App. B and C, respectively). The resulting encoding is then hashed, following a strategy similar to that of normal Merkle trees, to generate the trie hashes.
-
-Insertion and deletion is performed in the same way as other MPTs implementations. The only difference is for inserting extension nodes where we create a new node with the new data, instead of modifying the existing one. In the rest of this section we describe how the MPTs are represented in memory, how they are given as input, and how MPTs are hashed.
-
-\subsection{Internal memory format}
-
-The tries are stored in kernel memory, specifically in the {\tt Segment:TrieData} segment. Each node type is stored as
-\begin{enumerate}
-  \item An empty node is encoded as $(\texttt{MPT\_NODE\_EMPTY})$.
-  \item A branch node is encoded as $(\texttt{MPT\_NODE\_BRANCH}, c_1, \dots, c_{16}, v)$, where each $c_i$ is a pointer to a child node, and $v$ is a pointer to a value. If a branch node has no associated value, then $v = 0$, i.e. the null pointer.
-  \item An extension node is encoded as $(\texttt{MPT\_NODE\_EXTENSION}, k, c)$, $k$ represents the part of the key associated with this extension, and is encoded as a 2-tuple $(\texttt{packed\_nibbles}, \texttt{num\_nibbles})$. $c$ is a pointer to a child node.
-  \item A leaf node is encoded as $(\texttt{MPT\_NODE\_LEAF}, k, v)$, where $k$ is a 2-tuple as above, and $v$ is a pointer to a value.
-  \item A digest node is encoded as $(\texttt{MPT\_NODE\_HASH}, d)$, where $d$ is a Keccak256 digest.
-\end{enumerate}
-
-On the other hand the values or payloads are represented differently depending on the particular trie.
-
-\subsubsection{State trie}
-The state trie payload contains the account data. Each account is stored in 4 contiguous memory addresses containing
-\begin{enumerate}
-	\item the nonce,
-	\item the balance,
-	\item a pointer to the account's storage trie,
-	\item a hash of the account's code.
-\end{enumerate}
-The storage trie payload in turn is a single word.
-
-\subsubsection{Transaction Trie}
-The transaction trie nodes contain the length of the RLP encoded transaction, followed by the bytes of the RLP encoding of the transaction.
-
-\subsubsection{Receipt Trie}
-The payload of the receipts trie is a receipt. Each receipt is stored as
-\begin{enumerate}
-	\item the length in words of the payload,
-	\item the status,
-	\item the cumulative gas used,
-	\item the bloom filter, stored as 256 words.
-	\item the number of topics,
-	\item the topics
-	\item the data length,
-	\item the data.
-\end{enumerate}
-
-
-\subsection{Prover input format}
-
-The initial state of each trie is given by the prover as a nondeterministic input tape. This tape has a slightly different format:
-\begin{enumerate}
-  \item An empty node is encoded as $(\texttt{MPT\_NODE\_EMPTY})$.
-  \item A branch node is encoded as $(\texttt{MPT\_NODE\_BRANCH}, v_?, c_1, \dots, c_{16})$. Here $v_?$ consists of a flag indicating whether a value is present, followed by the actual value payload if one is present. Each $c_i$ is the encoding of a child node.
-  \item An extension node is encoded as $(\texttt{MPT\_NODE\_EXTENSION}, k, c)$, where $k$ represents the part of the key associated with this extension, and is encoded as a 2-tuple $(\texttt{packed\_nibbles}, \texttt{num\_nibbles})$. $c$ is a pointer to a child node.
-  \item A leaf node is encoded as $(\texttt{MPT\_NODE\_LEAF}, k, v)$, where $k$ is a 2-tuple as above, and $v$ is a value payload.
-  \item A digest node is encoded as $(\texttt{MPT\_NODE\_HASH}, d)$, where $d$ is a Keccak256 digest.
-\end{enumerate}
-Nodes are thus given in depth-first order, enabling natural recursive methods for encoding and decoding this format.
-The payload of state and receipt tries is given in the natural sequential way. The transaction an receipt payloads contain variable size data, thus the input is slightly different. The prover input for for the transactions is the transaction RLP encoding preceded by its length. For the receipts is in the natural sequential way, except that topics and data are preceded by their lengths, respectively.
-
-\subsection{Encoding and Hashing}
-
-Encoding is done recursively starting from the trie root. Leaf, branch and extension nodes are encoded as the RLP encoding of list containing the hex prefix encoding of the node key as well as
-
-\begin{description}
-	\item[Leaf Node:] the encoding of the the payload,
-	\item[Branch Node:] the hash or encoding of the 16 children and the encoding of the payload,
-	\item[Extension Node:] the hash or encoding of the child and the encoding of the payload.
-\end{description}
-For the rest of the nodes we have:
-\begin{description}
-	\item[Empty Node:] the encoding of an empty node is {\tt 0x80},
-	\item[Digest Node:] the encoding of a digest node stored as $({\tt MPT\_HASH\_NODE}, d)$ is $d$.
-\end{description}
-
-The payloads in turn are RLP encoded as follows
-\begin{description}
-	\item[State Trie:] Encoded as a list containing nonce, balance, storage trie hash and code hash.
-	\item[Storage Trie:] The RLP encoding of the value (thus the double RLP encoding)
-	\item[Transaction Trie:] The RLP encoded transaction.
-	\item[Receipt Trie:] Depending on the transaction type it's encoded as ${\sf RLP}({\sf RLP}({\tt receipt}))$ for Legacy transactions or ${\sf RLP}({\tt txn\_type}||{\sf RLP}({\tt receipt}))$ for transactions of type 1 or 2. Each receipt is encoded as a list containing:
-	\begin{enumerate}
-		\item the status,
-		\item the cumulative gas used,
-		\item the bloom filter, stored as a list of length 256.
-		\item the list of topics
-		\item the data string.
-	\end{enumerate}
-\end{description}
-
-Once a node is encoded it is written to the {\tt Segment::RlpRaw} segment as a sequence of bytes. Then the RLP encoded data is hashed if the length of the data is more than 32 bytes. Otherwise we return the encoding. Further details can be found in the \href{https://github.com/0xPolygonZero/plonky2/tree/main/evm/src/cpu/mpt/hash}{mpt hash folder}.
--- a/evm/spec/tables.tex
+++ b/evm/spec/tables.tex
@ -1,10 +0,0 @@
-\section{Tables}
-\label{tables}
-
-\input{tables/cpu}
-\input{tables/arithmetic}
-\input{tables/byte-packing}
-\input{tables/logic}
-\input{tables/memory}
-\input{tables/keccak-f}
-\input{tables/keccak-sponge}
--- a/evm/spec/tables/arithmetic.tex
+++ b/evm/spec/tables/arithmetic.tex
@ -1,54 +0,0 @@
-\subsection{Arithmetic}
-\label{arithmetic}
-
-Each row of the arithmetic table corresponds to a binary or ternary arithmetic operation. Each of these operations has an associated flag $f_{op}$ in the table, such that $f_{\texttt{op}} = 1$ whenever the operation is $\texttt{op}$ and 0 otherwise. The full list of operations carried out by the table is as follows:
-\paragraph*{Binary operations:} \begin{itemize}
-        \item basic operations: ``add'', ``mul'', ``sub'' and ``div'',
-        \item comparisons: ``lt'' and ``gt'',
-        \item shifts: ``shr'' and ``shl'',
-        \item ``byte'': given $x_1, x_2$, returns the $x_1$-th ``byte'' in $x_2$,
-        \item modular operations: ``mod'', ``AddFp254'', ``MulFp254'' and ``SubFp254'',
-        \item range-check: no operation is performed, as this is only used to range-check the input and output limbs in the range [$0, 2^{16} - 1$].
-    \end{itemize}
-For `mod', the second input is the modulus. ``AddFp254'', ``MulFp254'' and ``SubFp254'' are modular operations modulo ``Fp254'` -- the prime for the BN curve's base field.
-
-\paragraph*{Ternary operations:} There are three ternary operations: modular addition ``AddMod'', modular multiplication ``MulMod'' and modular subtraction ``SubMod''.
-
-Besides the flags, the arithmetic table needs to store the inputs, output and some auxiliary values necessary to constraints. The input and output values are range-checked to ensure their canonical representation. Inputs are 256-bits words. To avoid having too large a range-check, inputs are therefore split into sixteen 16-bits limbs, and range-checked in the range $[0, 2^{16}-1]$.
-
-Overall, the table comprises the following columns:
-\begin{itemize}
-    \item 17 columns for the operation flags $f_{op}$,
-    \item 1 column $op$ containing the opcode,
-    \item 16 columns for the 16-bit limbs $x_{0, i}$ of the first input $x_{0}$,
-    \item 16 columns for the 16-bit limbs $x_{1, i}$ of the second input $x_{1}$,
-    \item 16 columns for the 16-bit limbs $x_{2, i}$ of the third input $x_{2}$,
-    \item 16 columns for the 16-bit limbs $r_i$ of the output $r$,
-    \item 32 columns for auxiliary values $\texttt{aux}_i$,
-    \item 1 column $\texttt{range\_counter}$ containing values in the range [$0, 2^{16}-1$], for the range-check,
-    \item 1 column storing the frequency of appearance of each value in the range $[0, 2^{16} - 1]$.
-\end{itemize}
-
-\paragraph{Note on $op$:} The opcode column is only used for range-checks. For optimization purposes, we check all arithmetic operations against the cpu table together. To ensure correctness, we also check that the operation's opcode corresponds to its behavior. But range-check is not associated to a unique operation: any operation in the cpu table might require its values to be checked. Thus, the arithmetic table cannot know its opcode in advance: it needs to store the value provided by the cpu table.
-
-\subsubsection{Auxiliary columns}
-The way auxiliary values are leveraged to efficiently check correctness is not trivial, but it is explained in detail in each dedicated file. Overall, five files explain the implementations of the various checks. Refer to:
-\begin{enumerate}
-    \item ``mul.rs'' for details on multiplications.
-    \item ``addcy.rs'' for details on addition, subtraction, ``lt'' and ``gt''. 
-    \item ``modular.rs'' for details on how modular operations are checked. Note that even though ``div'' and ``mod'' are generated and checked in a separate file, they leverage the logic for modular operations described in ``modular.rs''. 
-    \item ``byte'' for details on how ``byte'' is checked.
-    \item ``shift.rs'' for details on how shifts are checked. 
-\end{enumerate}
-
-\paragraph*{Note on ``lt'' and ``gt'':} For ``lt'' and ``gt'', auxiliary columns hold the difference $d$ between the two inputs $x_1, x_2$. We can then treat them similarly to subtractions by ensuring that $x_1 - x_2 = d$ for ``lt'' and $x_2 - x_1 = d$ for ``gt''. An auxiliary column $cy$ is used for the carry in additions and subtractions. In the comparisons case, it holds the overflow flag. Contrary to subtractions, the output of ``lt'' and ``gt'' operations is not $d$ but $cy$. 
-
-\paragraph*{Note on ``div'':} It might be unclear why ``div'' and ``mod'' are dealt with in the same file. 
-
-Given numerator and denominator $n, d$, we compute, like for other modular operations, the quotient $q$ and remainder $\texttt{rem}$: 
-$$div(x_1, x_2) = q * x_2 + \texttt{rem}$$. 
-We then set the associated auxiliary columns to $\texttt{rem}$ and the output to $q$. 
-
-This is why ``div'' is essentially a modulo operation, and can be addressed in almost the same way as ``mod''. The only difference is that in the ``mod'' case, the output is $\texttt{rem}$ and the auxiliary value is $q$.
-
-\paragraph{Note on shifts:} ``shr'' and ``shl'' are internally constrained as ``div'' and ``mul'' respectively with shifted operands. Indeed, given inputs $s, x$, the output should be $x >> s$ for ``shr'' (resp. $x << s$ for ``shl''). Since shifts are binary operations, we can use the third input columns to store $s_{\texttt{shifted}} = 1 << s$. Then, we can use the ``div'' logic (resp. ``mul'' logic) to ensure that the output is $\frac{x}{s_{\texttt{shifted}}}$ (resp. $x * s_{\texttt{shifted}}$).
--- a/evm/spec/tables/byte-packing.tex
+++ b/evm/spec/tables/byte-packing.tex
@ -1,59 +0,0 @@
-\subsection{Byte Packing}
-\label{byte-packing}
-
-The BytePacking STARK module is used for reading and writing non-empty byte sequences of length at most 32 to memory.
-The "packing" term highlights that reading a sequence in memory will pack the bytes into an EVM word (i.e. U256), while
-the "unpacking" operation consists in breaking down an EVM word into its byte sequence and writing it to memory.
-
-This allows faster memory copies between two memory locations, as well as faster memory reset
-(see \href{https://github.com/0xPolygonZero/plonky2/blob/main/evm/src/cpu/kernel/asm/memory/memcpy.asm}{memcpy.asm} and 
-\href{https://github.com/0xPolygonZero/plonky2/blob/main/evm/src/cpu/kernel/asm/memory/memset.asm}{memset.asm} modules).
-
-The `BytePackingStark' table has one row per packing/unpacking operation.
-
-Each row contains the following columns:
-\begin{enumerate}
-    \item 5 columns containing information on the initial memory address from which the sequence starts
-    (namely a flag differentiating read and write operations, address context, segment and offset values, as well as timestamp),
-    \item 32 columns $b_i$ indicating the length of the byte sequence ($b_i = 1$ if the length is $i+1$, and $b_i = 0$ otherwise),
-    \item 32 columns $v_i$ indicating the values of the bytes that have been read or written during a sequence,
-    \item 2 columns $r_i$ needed for range-checking the byte values.
-\end{enumerate}
-
-\paragraph{Notes on columns generation:}
-Whenever a byte unpacking operation is called, the value $\texttt{val}$ is read from the stack, but because the EVM and the STARKs use different endianness, we need to convert $\texttt{val}$ to a little-endian byte sequence. Only then do we resize it to the appropriate length, and prune extra zeros and higher bytes in the process. Finally, we reverse the byte order and write this new sequence into the $v_i$ columns of the table. 
-
-Whenever the operation is a byte packing, the bytes are read one by one from memory and stored in the $v_i$ columns of the BytePackingStark table.
-
-Note that because of the different endianness on the memory and EVM sides, we write bytes starting with the last one.
-
-The $b_i$ columns hold a boolean value. $b_i = 1$ whenever we are currently reading or writing the i-th element in the byte sequence. $b_i = 0$ otherwise.
-
-\paragraph{Cross-table lookups:}
-The read or written bytes need to be checked against both the cpu and the memory tables. Whenever we call $\texttt{MSTORE\_32BYTES}$, $\texttt{MLOAD\_32BYTES}$ or $\texttt{PUSH}$ on the cpu side, we make use of `BytePackingStark' to make sure we are carrying out the correct operation on the correct values. For this, we check that the following values correspond:
-\begin{enumerate}
-    \item the address (comprising the context, the segment, and the virtual address),
-    \item the length of the byte sequence,
-    \item the timestamp,
-    \item the value (either written to or read from the stack)
-\end{enumerate}
-
-The address here corresponds to the address of the first byte.
-
-On the other hand, we need to make sure that the read and write operations correspond to the values read or stored on the memory side. We therefore need a CTL for each byte, checking that the following values are identical in `MemoryStark' and `BytePackingStark':
-\begin{enumerate}
-    \item a flag indicating whether the operation is a read or a write,
-    \item the address (context, segment and virtual address),
-    \item the byte (followed by 0s to make sure the memory address contains a byte and not a U256 word),
-    \item the timestamp
-\end{enumerate}
-
-Note that the virtual address has to be recomputed based on the length of the sequence of bytes. The virtual address for the $i$-th byte is written as:
-$$ \texttt{virt} + \sum_{j=0}^{31} b_j * j - i$$
-where $\sum_{j=0}^{31} b_j * j$ is equal to $\texttt{sequence\_length} - 1$.
-
-\paragraph*{Note on range-check:} Range-checking is necessary whenever we do a memory unpacking operation that will
-write values to memory. These values are constrained by the range-check to be 8-bit values, i.e. fitting between 0 and 255 included.
-While range-checking values read from memory is not necessary, because we use the same $\texttt{byte\_values}$ columns for both read
-and write operations, this extra condition is enforced throughout the whole trace regardless of the operation type.
-
--- a/evm/spec/tables/cpu.tex
+++ b/evm/spec/tables/cpu.tex
@ -1,73 +0,0 @@
-\subsection{CPU}
-\label{cpu}
-
-The CPU is the central component of the zkEVM. Like any CPU, it reads instructions, executes them and modifies the state (registers and the memory)
-accordingly. The constraining of some complex instructions (e.g. Keccak hashing) is delegated to other tables.
-This section will only briefly present the CPU and its columns. Details about the CPU logic will be provided later.
-
-\subsubsection{CPU flow}
-
-An execution run can be decomposed into two distinct parts:
-\begin{itemize}
-    \item \textbf{CPU cycles:} The bulk of the execution. In each row, the CPU reads the current code at the program counter (PC) address, and executes it. The current code can be the kernel code,
-or whichever code is being executed in the current context (transaction code or contract code). Executing an instruction consists in modifying the registers, possibly
-performing some memory operations, and updating the PC.
-    \item \textbf{Padding:} At the end of the execution, we need to pad the length of the CPU trace to the next power of two. When the program counter reaches the special halting label
-in the kernel, execution halts. Constraints ensure that every subsequent row is a padding row and that execution cannot resume.
-\end{itemize}
-
-In the CPU cycles phase, the CPU can switch between different contexts, which correspond to the different environments of the possible calls. Context 0 is the kernel itself, which
-handles initialization (input processing, transaction parsing, transaction trie updating...) and termination (receipt creation, final trie checks...) before and after executing the transaction. Subsequent contexts are created when
-executing user code (transaction or contract code). In a non-zero user context, syscalls may be executed, which are specific instructions written in the kernel. They don't change the context
-but change the code context, which is where the instructions are read from.
-
-\subsubsection{CPU columns}
-
-\paragraph*{Registers:} \begin{itemize}
-    \item \texttt{context}: Indicates which context we are in. 0 for the kernel, and a positive integer for every user context. Incremented by 1 at every call.
-    \item \texttt{code\_context}: Indicates in which context the code to execute resides. It's equal to \texttt{context} in user mode, but is always 0 in kernel mode.
-    \item \texttt{program\_counter}: The address of the instruction to be read and executed.
-    \item \texttt{stack\_len}: The current length of the stack.
-    \item \texttt{is\_kernel\_mode}: Boolean indicating whether we are in kernel (i.e. privileged) mode. This means we are executing kernel code, and we have access to
-privileged instructions.
-    \item \texttt{gas}: The current amount of gas used in the current context. It is eventually checked to be below the current gas limit. Must fit in 32 bits.
-    \item \texttt{clock}: Monotonic counter which starts at 0 and is incremented by 1 at each row. Used to enforce correct ordering of memory accesses. 
-    \item \texttt{opcode\_bits}: 8 boolean columns, which are the bit decomposition of the opcode being read at the current PC.
-\end{itemize}
-
-\paragraph*{Operation flags:} Boolean flags. During CPU cycles phase, each row executes a single instruction, which sets one and only one operation flag. No flag is set during
-padding. The decoding constraints ensure that the flag set corresponds to the opcode being read.
-There isn't a 1-to-1 correspondance between instructions and flags. For efficiency, the same flag can be set by different, unrelated instructions (e.g. \texttt{eq\_iszero}, which represents
-the \texttt{EQ} and the \texttt{ISZERO} instructions). When there is a need to differentiate them in constraints, we filter them with their respective opcode: since the first bit of \texttt{EQ}'s opcode
-(resp. \texttt{ISZERO}'s opcode) is 0 (resp. 1), we can filter a constraint for an EQ instruction with \texttt{eq\_iszero * (1 - opcode\_bits[0])}
-(resp. \texttt{eq\_iszero * opcode\_bits[0]}).
-
-\paragraph*{Memory columns:} The CPU interacts with the EVM memory via its memory channels. At each row, a memory channel can execute a write, a read, or be disabled. A full memory channel is composed of:
-\begin{itemize}
-    \item  \texttt{used}: Boolean flag. If it's set to 1, a memory operation is executed in this channel at this row. If it's set to 0, no operation is done but its columns might be reused for other purposes.
-    \item  \texttt{is\_read}: Boolean flag indicating if a memory operation is a read or a write.
-    \item  3 \texttt{address} columns. A memory address is made of three parts: \texttt{context}, \texttt{segment} and \texttt{virtual}.
-    \item  8 \texttt{value} columns. EVM words are 256 bits long, and they are broken down in 8 32-bit limbs.
-\end{itemize}
-The last memory channel is a partial channel: it doesn't have its own \texttt{value} columns and shares them with the first full memory channel. This allows us to save eight columns.
-
-\paragraph*{General columns:} There are 8 shared general columns. Depending on the instruction, they are used differently:
-\begin{itemize}
-    \item  \texttt{Exceptions}: When raising an exception, the first three general columns are the bit decomposition of the exception code.
-They are used to jump to the correct exception handler.
-    \item  \texttt{Logic}: For EQ, and ISZERO operations, it's easy to check that the result is 1 if \texttt{input0} and \texttt{input1} are equal. It's more difficult
-to prove that, if the result is 0, the inputs are actually unequal. To prove it, each general column contains the modular inverse of $(\texttt{input0}_i - \texttt{input1}_i)$
-for each limb $i$ (or 0 if the limbs are equal). Then the quantity $\texttt{general}_i * (\texttt{input0}_i - \texttt{input1}_i)$ will be 1 if and only if $\texttt{general}_i$ is
-indeed the modular inverse, which is only possible if the difference is non-zero.
-    \item  \texttt{Jumps}: For jumps, we use the first two columns: \texttt{should\_jump} and \texttt{cond\_sum\_pinv}. \texttt{should\_jump} conditions whether the EVM should jump: it's
-1 for a JUMP, and $\texttt{condition} \neq 0$ for a JUMPI. To check if the condition is actually non-zero for a JUMPI, \texttt{cond\_sum\_pinv} stores the modular inverse of
-\texttt{condition} (or 0 if it's zero).
-    \item  \texttt{Shift}: For shifts, the logic differs depending on whether the displacement is lower than $2^{32}$, i.e. if it fits in a single value limb.
-To check if this is not the case, we must check that at least one of the seven high limbs is not zero. The general column \texttt{high\_limb\_sum\_inv} holds the modular inverse
-of the sum of the seven high limbs, and is used to check it's non-zero like the previous cases.
-Contrary to the logic operations, we do not need to check limbs individually: each limb has been range-checked to 32 bits, meaning that it's not possible for the sum to
-overflow and be zero if some of the limbs are non-zero.
-    \item  \texttt{Stack}: \texttt{stack\_inv}, \texttt{stack\_inv\_aux} and \texttt{stack\_inv\_aux\_2} are used by popping-only (resp. pushing-only) instructions to check if the stack is empty after (resp. was empty
-before) the instruction. \texttt{stack\_len\_bounds\_ aux} is used to check that the stack doesn't overflow in user mode. We use the last four columns to prevent conflicts with the other general columns.
-See \ref{stackhandling} for more details.
-\end{itemize}
--- a/evm/spec/tables/keccak-f.tex
+++ b/evm/spec/tables/keccak-f.tex
@ -1,65 +0,0 @@
-\subsection{Keccak-f}
-\label{keccak-f}
-
-This table computes the Keccak-f[1600] permutation.
-
-\subsubsection{Keccak-f Permutation}
-To explain how this table is structured, we first need to detail how the permutation is computed. \href{https://keccak.team/keccak_specs_summary.html}{This page} gives a pseudo-code for the permutation. Our implementation differs slightly -- but remains equivalent -- for optimization and constraint degree reasons. 
-
-Let:
-\begin{itemize}
-    \item $S$ be the sponge width ($S=25$ in our case)
-    \item $\texttt{NUM\_ROUNDS}$ be the number of Keccak rounds ($\texttt{NUM\_ROUNDS} = 24$)
-    \item $RC$ a vector of round constants of size  $\texttt{NUM\_ROUNDS}$
-    \item $I$ be the input of the permutation, comprised of $S$ 64-bit elements
-\end{itemize}    
-
-The first step is to reshape $I$ into a $5 \times 5$ matrix. We initialize the state $A$ of the sponge with $I$: $$A[x, y] := I[x, y] \text{ }  \forall x, y \in \{0..4\}$$
-
-We store $A$ in the table, and subdivide each 64-bit element into two 32-bit limbs.
-Then, for each round $i$, we proceed as follows:
-\begin{enumerate}
-    \item First, we define $C[x] := \texttt{xor}_{i=0}^4 A[x, i]$. We store $C$ as bits in the table. This is because we need to apply a rotation on its elements' bits and carry out \texttt{ xor } operations in the next step.
-    \item Then, we store a second vector $C'$ in bits, such that: $$C'[x, z] = C[x, z] \texttt{ xor } C[x-1, z] \texttt{ xor } C[x+1, z-1]$$. 
-    \item We then need to store the updated value of $A$: $$A'[x, y] = A[x, y] \texttt{ xor } C[x, y] \texttt{ xor } C'[x, y]$$ Note that this is equivalent to the equation in the official Keccak-f description: $$A'[x, y] = A[x, y] \texttt{ xor } C[x-1, z] \texttt{ xor } C[x+1, z-1]$$.
-    \item The previous three points correspond to the $\theta$ step in Keccak-f. We can now move on to the $\rho$ and $\pi$ steps. These steps are written as: $$B[y, 2\times x + 3 \times y] := \texttt{rot}(A'[x, y], r[x, y])$$ where $\texttt{rot(a, s)}$ is the bitwise cyclic shift operation, and $r$ is the matrix of rotation offsets. We do not need to store $B$: $B$'s bits are only a permutation of $A'$'s bits. 
-    \item The $\chi$ step updates the state once again, and we store the new values: $$A''[x, y] := B[x, y] \texttt{ xor } (\texttt{not }B[x+1, y] \texttt{ and } B[x+2, y])$$ Because of the way we carry out constraints (as explained below), we do not need to store the individual bits for $A''$: we only need the 32-bit limbs.
-    \item The final step, $\iota$, consists in updating the first element of the state as follows: $$A'''[0, 0] = A''[0, 0] \texttt{ xor } RC[i]$$ where $$A'''[x, y] = A''[x, y] \forall (x, y) \neq (0, 0)$$ Since only the first element is updated, we only need to store $A'''[0, 0]$ of this updated state. The remaining elements are fetched from $A''$. However, because of the bitwise $\texttt{xor}$ operation, we do need columns for the bits of $A''[0, 0]$.  
-\end{enumerate}
-
-Note that all permutation elements are 64-bit long. But they are stored as 32-bit limbs so that we do not overflow the field. 
-
-It is also important to note that all bitwise logic operations ($\texttt{ xor }$, $\texttt{ not }$ and $\texttt{ and}$) are checked in this table. This is why we need to store the bits of most elements. The logic table can only carry out eight 32-bit logic operations per row. Thus, leveraging it here would drastically increase the number of logic rows, and incur too much overhead in proving time.
-
-
-
-\subsubsection{Columns}
-Using the notations from the previous section, we can now list the columns in the table:
-\begin{enumerate}
-    \item $\texttt{NUM\_ROUND}S = 24$ columns $c_i$ to determine which round is currently being computed. $c_i = 1$ when we are in the $i$-th round, and 0 otherwise. These columns' purpose is to ensure that the correct round constants are used at each round.
-    \item $1$ column $t$ which stores the timestamp at which the Keccak operation was called in the cpu. This column enables us to ensure that inputs and outputs are consistent between the cpu, keccak-sponge and keccak-f tables.
-    \item $5 \times 5 \times 2 = 50 $columns to store the elements of $A$. As a reminder, each 64-bit element is divided into two 32-bit limbs, and $A$ comprises $S = 25$ elements.
-    \item $5 \times 64 = 320$ columns to store the bits of the vector $C$.
-    \item $5 \times 64 = 320$ columns to store the bits of the vector $C'$.
-    \item $5 \times 5 \times 64 = 1600$ columns to store the bits of $A'$.
-    \item $5 \times 5 \times 2 = 50$ columns to store the 32-bit limbs of $A''$.
-    \item $64$ columns to store the bits of $A''[0, 0]$.
-    \item $2$ columns to store the two limbs of $A'''[0, 0]$.
-\end{enumerate}
-
-In total, this table comprises 2,431 columns.
-
-\subsubsection{Constraints}
-Some constraints checking that the elements are computed correctly are not straightforward. Let us detail them here.
-
-First, it is important to highlight the fact that a $\texttt{xor}$ between two elements is of degree 2. Indeed, for $x \texttt{ xor } y$, the constraint is $x + y - 2 \times x \times y$, which is of degree 2. This implies that a $\texttt{xor}$ between 3 elements is of degree 3, which is the maximal constraint degree for our STARKs.
-
-We can check that $C'[x, z] = C[x, z] \texttt{ xor } C[x - 1, z] \texttt{ xor } C[x + 1, z - 1]$. However, we cannot directly check that $C[x] = \texttt{xor}_{i=0}^4 A[x, i]$, as it would be a degree 5 constraint. Instead, we use $C'$ for this constraint. We see that:
-$$\texttt{xor}_{i=0}^4 A'[x, i, z] = C'[x, z]$$
-This implies that the difference $d = \sum_{i=0}^4 A'[x, i, z] - C'[x, z]$ is either 0, 2 or 4. We can therefore enforce the following degree 3 constraint instead:
-$$d \times (d - 2) \times (d - 4) = 0$$
-
-Additionally, we have to check that $A'$ is well constructed. We know that $A'$ should be such that $A'[x, y, z] = A[x, y, z] \texttt{ xor } C[x, z] \texttt{ xor } C'[x, z]$. Since we do not have the bits of $A$ elements but the bits of $A'$ elements, we check the equivalent degree 3 constraint:
-$$A[x, y, z] = A'[x, y, z] \texttt{ xor } C[x, z] \texttt { xor } C'[x, z]$$
-
-Finally, the constraints for the remaining elements, $A''$ and $A'''$ are straightforward: $A''$ is a three-element bitwise $\texttt{xor}$ where all bits involved are already storedn and $A'''[0, 0]$ is the output of a simple bitwise $\texttt{xor}$ with a round constant.
--- a/evm/spec/tables/keccak-sponge.tex
+++ b/evm/spec/tables/keccak-sponge.tex
@ -1,66 +0,0 @@
-\subsection{KeccakSponge}
-\label{keccak-sponge}
-
-This table computes the Keccak256 hash, a sponge-based hash built on top of the Keccak-f[1600] permutation. An instance of KeccakSponge takes as input a Memory address $a$,
-a length $l$, and computes the Keccak256 digest of the memory segment starting at $a$ and of size $l$. An instance can span many rows, each individual row being a single call to
-the Keccak table. Note that all the read elements must be bytes; the proof will be unverifiable if this is not the case. Following the Keccak specifications, the input string is padded to the next multiple of 136 bytes. 
-Each row contains the following columns:
-\begin{itemize}
-    \item Read bytes:
-        \begin{itemize}
-            \item 3 address columns: \texttt{context}, \texttt{segment} and the offset \texttt{virt} of $a$.
-            \item \texttt{timestamp}: the timestamp which will be used for all memory reads of this instance.
-            \item \texttt{already\_absorbed\_bytes}: keeps track of how many bytes have been hashed in the current instance. At the end of an instance, we should have absorbed $l$ bytes in total.
-            \item \texttt{KECCAK\_RATE\_BYTES} \texttt{block\_bytes} columns: the bytes being absorbed at this row. They are read from memory and will be XORed to the rate part of the current state.
-        \end{itemize}
-    \item Input columns:
-    \begin{itemize}
-        \item \texttt{KECCAK\_RATE\_U32S} \texttt{original\_rate\_u32s} columns: hold the rate part of the state before XORing it with \texttt{block\_bytes}. At the beginning of an instance, they are initialized with 0.
-        \item \texttt{KECCAK\_RATE\_U32s} \texttt{xored\_rate\_u32s} columns: hold the original rate XORed with \texttt{block\_bytes}.
-        \item \texttt{KECCAK\_CAPACITY\_U32S} \texttt{original\_capacity\_u32s} columns: hold the capacity part of the state before applying the Keccak permutation.
-    \end{itemize}
-    \item Output columns:
-    \begin{itemize}
-        \item \texttt{KECCAK\_DIGEST\_BYTES} \texttt{updated\_digest\_state\_bytes columns}: the beginning of the output state after applying the Keccak permutation. At the last row of an instance, they hold the computed hash.
-They are decomposed in bytes for endianness reasons.
-        \item \texttt{KECCAK\_WIDTH\_MINUS\_DIGEST\_U32S} \texttt{partial\_updated\_state\_u32s} columns: the rest of the output state. They are discarded for the final digest, but are used between instance rows.
-    \end{itemize}
-    \item Helper columns:
-    \begin{itemize}
-        \item \texttt{is\_full\_input\_block}: indicates if the current row has a full input block, i.e. \texttt{block\_bytes} contains only bytes read from memory and no padding bytes.
-        \item \texttt{KECCAK\_RATE\_BYTES} \texttt{is\_final\_input\_len} columns: in the final row of an instance, indicate where the final read byte is. If the $i$-th column is set to 1, it means that
-all bytes after the $i$-th are padding bytes. In a full input block, all columns are set to 0. 
-    \end{itemize}
-\end{itemize}
-
-For each instance, constraints ensure that:
-\begin{itemize}
-    \item at each row:
-    \begin{itemize}
-        \item \texttt{is\_full\_input\_block} and \texttt{is\_final\_input\_len} columns are all binary.
-        \item Only one column in \texttt{is\_full\_input\_block} and \texttt{is\_final\_input\_len} is set to 1.
-        \item \texttt{xored\_rate\_u32s} is \texttt{original\_rate\_u32s} XOR \texttt{block\_bytes}.
-        \item The CTL with Keccak ensures that (\texttt{updated\_digest\_state\_bytes columns}, \texttt{partial\_updated\_state\_u32s}) is the Keccak permutation output of (\texttt{xored\_rate\_u32s}, \texttt{original\_capacity\_u32s}).
-    \end{itemize}
-    \item at the first row:
-    \begin{itemize}
-        \item \texttt{original\_rate\_u32s} is all 0.
-        \item \texttt{already\_absorbed\_bytes} is 0.
-    \end{itemize}
-    \item at each full input row (i.e. \texttt{is\_full\_input\_block} is 1, all \texttt{is\_final\_input\_len} columns are 0):
-    \begin{itemize}
-        \item \texttt{context}, \texttt{segment}, \texttt{virt} and \texttt{timestamp} are unchanged in the next row.
-        \item Next \texttt{already\_absorbed\_bytes} is current \texttt{already\_absorbed\_bytes} + \texttt{KECCAK\_RATE\_BYTES}.
-        \item Next (\texttt{original\_rate\_u32s}, \texttt{original\_capacity\_u32s}) is current (\texttt{updated\_digest\_state\_bytes columns}, \texttt{partial\_updated\_state\_u32s}).
-        \item The CTL with Memory ensures that \texttt{block\_bytes} is filled with contiguous memory elements [$a$ + \texttt{already\_absorbed\_bytes}, $a$ + \texttt{already\_absorbed\_bytes} + \texttt{KECCAK\_RATE\_BYTES} - 1]
-    \end{itemize}
-    \item at the final row (i.e. \texttt{is\_full\_input\_block} is 0, \texttt{is\_final\_input\_len}'s $i$-th column is 1 for a certain $i$, the rest are 0):
-    \begin{itemize}
-        \item The CTL with Memory ensures that \texttt{block\_bytes} is filled with contiguous memory elements [$a$ + \texttt{already\_absorbed\_bytes}, $a$ + \texttt{already\_absorbed\_bytes} + $i$ - 1]. The rest are padding bytes.
-        \item The CTL with CPU ensures that \texttt{context}, \texttt{segment}, \texttt{virt} and \texttt{timestamp} match the \texttt{KECCAK\_GENERAL} call.
-        \item The CTL with CPU ensures that $l$ = \texttt{already\_absorbed\_bytes} + $i$.
-        \item The CTL with CPU ensures that \texttt{updated\_digest\_state\_bytes} is the output of the \texttt{KECCAK\_GENERAL} call.
-    \end{itemize}
-\end{itemize}
-
-The trace is padded to the next power of two with dummy rows, whose \texttt{is\_full\_input\_block} and \texttt{is\_final\_input\_len} columns are all 0.
--- a/evm/spec/tables/logic.tex
+++ b/evm/spec/tables/logic.tex
@ -1,18 +0,0 @@
-\subsection{Logic}
-\label{logic}
-
-Each row of the logic table corresponds to one bitwise logic operation: either AND, OR or XOR. Each input for these operations is represented as 256 bits, while the output is stored as eight 32-bit limbs. 
-
-Each row therefore contains the following columns:
-\begin{enumerate}
-    \item $f_{\texttt{and}}$, an ``is and'' flag, which should be 1 for an OR operation and 0 otherwise,
-    \item $f_{\texttt{or}}$, an ``is or'' flag, which should be 1 for an OR operation and 0 otherwise,
-    \item $f_{\texttt{xor}}$, an ``is xor'' flag, which should be 1 for a XOR operation and 0 otherwise,
-    \item 256 columns $x_{1, i}$ for the bits of the first input $x_1$,
-    \item 256 columns $x_{2, i}$ for the bits of the second input $x_2$,
-    \item 8 columns $r_i$ for the 32-bit limbs of the output $r$.
-\end{enumerate}
-
-Note that we need all three flags because we need to be able to distinguish between an operation row and a padding row -- where all flags are set to 0.
-
-The subdivision into bits is required for the two inputs as the table carries out bitwise operations. The result, on the other hand, is represented in 32-bit limbs since we do not need individual bits and can therefore save the remaining 248 columns. Moreover, the output is checked against the cpu, which stores values in the same way. 
--- a/evm/spec/tables/memory.tex
+++ b/evm/spec/tables/memory.tex
@ -1,87 +0,0 @@
-\subsection{Memory}
-\label{memory}
-
-For simplicity, let's treat addresses and values as individual field elements. The generalization to multi-element addresses and values is straightforward.
-
-Each row of the memory table corresponds to a single memory operation (a read or a write), and contains the following columns:
-
-\begin{enumerate}
-  \item $a$, the target address
-  \item $r$, an ``is read'' flag, which should be 1 for a read or 0 for a write
-  \item $v$, the value being read or written
-  \item $\tau$, the timestamp of the operation
-\end{enumerate}
-The memory table should be ordered by $(a, \tau)$. Note that the correctness of the memory could be checked as follows:
-\begin{enumerate}
-  \item Verify the ordering by checking that $(a_i, \tau_i) \leq (a_{i+1}, \tau_{i+1})$ for each consecutive pair.
-  \item Enumerate the purportedly-ordered log while tracking the ``current'' value of $v$. 
-  \begin{enumerate}
-    \item Upon observing an address which doesn't match that of the previous row, if the address is zero-initialized
-  and if the operation is a read, check that  $v = 0$.
-    \item Upon observing a write, don't constrain $v$.
-    \item Upon observing a read at timestamp $\tau_i$ which isn't the first operation at this address, check that $v_i = v_{i-1}$.
-  \end{enumerate}
-\end{enumerate}
-
-The ordering check is slightly involved since we are comparing multiple columns. To facilitate this, we add an additional column $e$, where the prover can indicate whether two consecutive addresses changed. An honest prover will set
-$$
-e_i \leftarrow \begin{cases}
-  1 & \text{if } a_i \neq a_{i + 1}, \\
-  0 & \text{otherwise}.
-\end{cases}
-$$
-We also introduce a range-check column $c$, which should hold:
-$$
-c_i \leftarrow \begin{cases}
-  a_{i + 1} - a_i - 1 & \text{if } e_i = 1, \\
-  \tau_{i+1} - \tau_i & \text{otherwise}.
-\end{cases}
-$$
-The extra $-1$ ensures that the address actually changed if $e_i = 1$.
-We then impose the following transition constraints:
-\begin{enumerate}
-  \item $e_i (e_i - 1) = 0$,
-  \item $(1 - e_i) (a_{i + 1} - a_i) = 0$,
-  \item $c_i < 2^{32}$.
-\end{enumerate}
-The third constraint emulates a comparison between two addresses or timestamps by bounding their difference; this assumes that all addresses and timestamps fit in 32 bits and that the field is larger than that.
-
-\subsubsection{Virtual memory}
-
-In the EVM, each contract call has its own address space. Within that address space, there are separate segments for code, main memory, stack memory, calldata, and returndata. Thus each address actually has three compoments:
-\begin{enumerate}
-  \item an execution context, representing a contract call,
-  \item a segment ID, used to separate code, main memory, and so forth, and so on
-  \item a virtual address.
-\end{enumerate}
-The comparisons now involve several columns, which requires some minor adaptations to the technique described above; we will leave these as an exercise to the reader.
-
-Note that an additional constraint check is required: whenever we change the context or the segment, the virtual address must be range-checked to $2^{32}$.
-Without this check, addresses could start at -1 (i.e. $p - 2$) and then increase properly.
-
-\subsubsection{Timestamps}
-
-Memory operations are sorted by address $a$ and timestamp $\tau$. For a memory operation in the CPU, we have:
-$$\tau = \texttt{NUM\_CHANNELS} \times \texttt{cycle} + \texttt{channel}.$$
-Since a memory channel can only hold at most one memory operation, every CPU memory operation's timestamp is unique.
-
-Note that it doesn't mean that all memory operations have unique timestamps. There are two exceptions:
-
-\begin{itemize}
-  \item Before the CPU cycles, we write some global metadata in memory. These extra operations are done at timestamp $\tau = 0$.
-  \item Some tables other than CPU can generate memory operations, like KeccakSponge. When this happens, these operations all have the timestamp of the CPU row of the instruction which invoked the table (for KeccakSponge, KECCAK\_GENERAL).
-\end{itemize}
-
-\subsubsection{Memory initialization}
-
-By default, all memory is zero-initialized. However, to save numerous writes, we allow some specific segments to be initialized with arbitrary values.
-
-\begin{itemize}
-  \item The read-only kernel code (in segment 0, context 0) is initialized with its correct values. It's checked by hashing the segment and verifying
-that the hash value matches a verifier-provided one.
-  \item The code segment (segment 0) in other contexts is initialized with externally-provided account code, then checked against the account code hash.
-If the code is meant to be executed, there is a soundness concern: if the code is malformed and ends with an incomplete PUSH, then the missing bytes must
-be 0 accordingly to the Ethereum specs. To prevent the issue, we manually write 33 zeros (at most 32 bytes for the PUSH argument, and an extra one for
-the post-PUSH PC value).
-  \item The ``TrieData'' segment is initialized with the input tries. The stored tries are hashed and checked against the provided initial hash. Note that the length of the segment and the pointers -- within the ``TrieData'' segment -- for the three tries are provided as prover inputs. The length is then checked against a value computed when hashing the tries.
-\end{itemize}
--- a/evm/spec/zkevm.pdf
+++ b/evm/spec/zkevm.pdf
--- a/evm/spec/zkevm.tex
+++ b/evm/spec/zkevm.tex
@ -1,61 +0,0 @@
-\documentclass[12pt]{article}
-\usepackage{amsmath}
-\usepackage{amssymb}
-\usepackage{cite}
-\usepackage{draftwatermark}
-\usepackage[margin=1.5in]{geometry}
-\usepackage{hyperref}
-\usepackage{makecell}
-\usepackage{mathtools}
-\usepackage{tabularx}
-\usepackage{enumitem}
-\usepackage[textwidth=1.25in]{todonotes}
-
-% Scale for DRAFT watermark.
-\SetWatermarkFontSize{24cm}
-\SetWatermarkScale{5}
-\SetWatermarkLightness{0.92}
-
-% Hyperlink colors.
-\hypersetup{
-    colorlinks=true,
-    linkcolor=blue,
-    citecolor=blue,
-    urlcolor=blue,
-}
-
-% We want all section autorefs to say "Section".
-\def\sectionautorefname{Section}
-\let\subsectionautorefname\sectionautorefname
-\let\subsubsectionautorefname\sectionautorefname
-
-% \abs{...}, \floor{...} and \ceil{...}
-\DeclarePairedDelimiter\abs{\lvert}{\rvert}
-\DeclarePairedDelimiter\ceil{\lceil}{\rceil}
-\DeclarePairedDelimiter\floor{\lfloor}{\rfloor}
-
-\title{The Polygon Zero zkEVM}
-%\author{Polygon Zero Team}
-\date{DRAFT\\\today}
-
-\begin{document}
-\maketitle
-
-\begin{abstract}
-  We describe the design of Polygon Zero's zkEVM, ...
-\end{abstract}
-
-\newpage
-{\hypersetup{hidelinks} \tableofcontents}
-\newpage
-
-\input{introduction}
-\input{framework}
-\input{tables}
-\input{mpts}
-\input{cpulogic}
-
-\bibliography{bibliography}{}
-\bibliographystyle{ieeetr}
-
-\end{document}
--- a/evm/src/all_stark.rs
+++ b/evm/src/all_stark.rs
@ -1,300 +0,0 @@
-use core::ops::Deref;
-
-use plonky2::field::extension::Extendable;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use starky::config::StarkConfig;
-use starky::cross_table_lookup::{CrossTableLookup, TableIdx, TableWithColumns};
-use starky::evaluation_frame::StarkFrame;
-use starky::stark::Stark;
-
-use crate::arithmetic::arithmetic_stark;
-use crate::arithmetic::arithmetic_stark::ArithmeticStark;
-use crate::byte_packing::byte_packing_stark::{self, BytePackingStark};
-use crate::cpu::cpu_stark;
-use crate::cpu::cpu_stark::CpuStark;
-use crate::cpu::membus::NUM_GP_CHANNELS;
-use crate::keccak::keccak_stark;
-use crate::keccak::keccak_stark::KeccakStark;
-use crate::keccak_sponge::columns::KECCAK_RATE_BYTES;
-use crate::keccak_sponge::keccak_sponge_stark;
-use crate::keccak_sponge::keccak_sponge_stark::KeccakSpongeStark;
-use crate::logic;
-use crate::logic::LogicStark;
-use crate::memory::memory_stark;
-use crate::memory::memory_stark::MemoryStark;
-
-/// Structure containing all STARKs and the cross-table lookups.
-#[derive(Clone)]
-pub struct AllStark<F: RichField + Extendable<D>, const D: usize> {
-    pub(crate) arithmetic_stark: ArithmeticStark<F, D>,
-    pub(crate) byte_packing_stark: BytePackingStark<F, D>,
-    pub(crate) cpu_stark: CpuStark<F, D>,
-    pub(crate) keccak_stark: KeccakStark<F, D>,
-    pub(crate) keccak_sponge_stark: KeccakSpongeStark<F, D>,
-    pub(crate) logic_stark: LogicStark<F, D>,
-    pub(crate) memory_stark: MemoryStark<F, D>,
-    pub(crate) cross_table_lookups: Vec<CrossTableLookup<F>>,
-}
-
-impl<F: RichField + Extendable<D>, const D: usize> Default for AllStark<F, D> {
-    /// Returns an `AllStark` containing all the STARKs initialized with default values.
-    fn default() -> Self {
-        Self {
-            arithmetic_stark: ArithmeticStark::default(),
-            byte_packing_stark: BytePackingStark::default(),
-            cpu_stark: CpuStark::default(),
-            keccak_stark: KeccakStark::default(),
-            keccak_sponge_stark: KeccakSpongeStark::default(),
-            logic_stark: LogicStark::default(),
-            memory_stark: MemoryStark::default(),
-            cross_table_lookups: all_cross_table_lookups(),
-        }
-    }
-}
-
-impl<F: RichField + Extendable<D>, const D: usize> AllStark<F, D> {
-    pub(crate) fn num_lookups_helper_columns(&self, config: &StarkConfig) -> [usize; NUM_TABLES] {
-        [
-            self.arithmetic_stark.num_lookup_helper_columns(config),
-            self.byte_packing_stark.num_lookup_helper_columns(config),
-            self.cpu_stark.num_lookup_helper_columns(config),
-            self.keccak_stark.num_lookup_helper_columns(config),
-            self.keccak_sponge_stark.num_lookup_helper_columns(config),
-            self.logic_stark.num_lookup_helper_columns(config),
-            self.memory_stark.num_lookup_helper_columns(config),
-        ]
-    }
-}
-
-pub type EvmStarkFrame<T, U, const N: usize> = StarkFrame<T, U, N, 0>;
-
-/// Associates STARK tables with a unique index.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Table {
-    Arithmetic = 0,
-    BytePacking = 1,
-    Cpu = 2,
-    Keccak = 3,
-    KeccakSponge = 4,
-    Logic = 5,
-    Memory = 6,
-}
-
-impl Deref for Table {
-    type Target = TableIdx;
-
-    fn deref(&self) -> &Self::Target {
-        // Hacky way to implement `Deref` for `Table` so that we don't have to
-        // call `Table::Foo as usize`, but perhaps too ugly to be worth it.
-        [&0, &1, &2, &3, &4, &5, &6][*self as TableIdx]
-    }
-}
-
-/// Number of STARK tables.
-pub(crate) const NUM_TABLES: usize = Table::Memory as usize + 1;
-
-impl Table {
-    /// Returns all STARK table indices.
-    pub(crate) const fn all() -> [Self; NUM_TABLES] {
-        [
-            Self::Arithmetic,
-            Self::BytePacking,
-            Self::Cpu,
-            Self::Keccak,
-            Self::KeccakSponge,
-            Self::Logic,
-            Self::Memory,
-        ]
-    }
-}
-
-/// Returns all the `CrossTableLookups` used for proving the EVM.
-pub(crate) fn all_cross_table_lookups<F: Field>() -> Vec<CrossTableLookup<F>> {
-    vec![
-        ctl_arithmetic(),
-        ctl_byte_packing(),
-        ctl_keccak_sponge(),
-        ctl_keccak_inputs(),
-        ctl_keccak_outputs(),
-        ctl_logic(),
-        ctl_memory(),
-    ]
-}
-
-/// `CrossTableLookup` for `ArithmeticStark`, to connect it with the `Cpu` module.
-fn ctl_arithmetic<F: Field>() -> CrossTableLookup<F> {
-    CrossTableLookup::new(
-        vec![cpu_stark::ctl_arithmetic_base_rows()],
-        arithmetic_stark::ctl_arithmetic_rows(),
-    )
-}
-
-/// `CrossTableLookup` for `BytePackingStark`, to connect it with the `Cpu` module.
-fn ctl_byte_packing<F: Field>() -> CrossTableLookup<F> {
-    let cpu_packing_looking = TableWithColumns::new(
-        *Table::Cpu,
-        cpu_stark::ctl_data_byte_packing(),
-        Some(cpu_stark::ctl_filter_byte_packing()),
-    );
-    let cpu_unpacking_looking = TableWithColumns::new(
-        *Table::Cpu,
-        cpu_stark::ctl_data_byte_unpacking(),
-        Some(cpu_stark::ctl_filter_byte_unpacking()),
-    );
-    let cpu_push_packing_looking = TableWithColumns::new(
-        *Table::Cpu,
-        cpu_stark::ctl_data_byte_packing_push(),
-        Some(cpu_stark::ctl_filter_byte_packing_push()),
-    );
-    let cpu_jumptable_read_looking = TableWithColumns::new(
-        *Table::Cpu,
-        cpu_stark::ctl_data_jumptable_read(),
-        Some(cpu_stark::ctl_filter_syscall_exceptions()),
-    );
-    let byte_packing_looked = TableWithColumns::new(
-        *Table::BytePacking,
-        byte_packing_stark::ctl_looked_data(),
-        Some(byte_packing_stark::ctl_looked_filter()),
-    );
-    CrossTableLookup::new(
-        vec![
-            cpu_packing_looking,
-            cpu_unpacking_looking,
-            cpu_push_packing_looking,
-            cpu_jumptable_read_looking,
-        ],
-        byte_packing_looked,
-    )
-}
-
-/// `CrossTableLookup` for `KeccakStark` inputs, to connect it with the `KeccakSponge` module.
-/// `KeccakStarkSponge` looks into `KeccakStark` to give the inputs of the sponge.
-/// Its consistency with the 'output' CTL is ensured through a timestamp column on the `KeccakStark` side.
-fn ctl_keccak_inputs<F: Field>() -> CrossTableLookup<F> {
-    let keccak_sponge_looking = TableWithColumns::new(
-        *Table::KeccakSponge,
-        keccak_sponge_stark::ctl_looking_keccak_inputs(),
-        Some(keccak_sponge_stark::ctl_looking_keccak_filter()),
-    );
-    let keccak_looked = TableWithColumns::new(
-        *Table::Keccak,
-        keccak_stark::ctl_data_inputs(),
-        Some(keccak_stark::ctl_filter_inputs()),
-    );
-    CrossTableLookup::new(vec![keccak_sponge_looking], keccak_looked)
-}
-
-/// `CrossTableLookup` for `KeccakStark` outputs, to connect it with the `KeccakSponge` module.
-/// `KeccakStarkSponge` looks into `KeccakStark` to give the outputs of the sponge.
-fn ctl_keccak_outputs<F: Field>() -> CrossTableLookup<F> {
-    let keccak_sponge_looking = TableWithColumns::new(
-        *Table::KeccakSponge,
-        keccak_sponge_stark::ctl_looking_keccak_outputs(),
-        Some(keccak_sponge_stark::ctl_looking_keccak_filter()),
-    );
-    let keccak_looked = TableWithColumns::new(
-        *Table::Keccak,
-        keccak_stark::ctl_data_outputs(),
-        Some(keccak_stark::ctl_filter_outputs()),
-    );
-    CrossTableLookup::new(vec![keccak_sponge_looking], keccak_looked)
-}
-
-/// `CrossTableLookup` for `KeccakSpongeStark` to connect it with the `Cpu` module.
-fn ctl_keccak_sponge<F: Field>() -> CrossTableLookup<F> {
-    let cpu_looking = TableWithColumns::new(
-        *Table::Cpu,
-        cpu_stark::ctl_data_keccak_sponge(),
-        Some(cpu_stark::ctl_filter_keccak_sponge()),
-    );
-    let keccak_sponge_looked = TableWithColumns::new(
-        *Table::KeccakSponge,
-        keccak_sponge_stark::ctl_looked_data(),
-        Some(keccak_sponge_stark::ctl_looked_filter()),
-    );
-    CrossTableLookup::new(vec![cpu_looking], keccak_sponge_looked)
-}
-
-/// `CrossTableLookup` for `LogicStark` to connect it with the `Cpu` and `KeccakSponge` modules.
-fn ctl_logic<F: Field>() -> CrossTableLookup<F> {
-    let cpu_looking = TableWithColumns::new(
-        *Table::Cpu,
-        cpu_stark::ctl_data_logic(),
-        Some(cpu_stark::ctl_filter_logic()),
-    );
-    let mut all_lookers = vec![cpu_looking];
-    for i in 0..keccak_sponge_stark::num_logic_ctls() {
-        let keccak_sponge_looking = TableWithColumns::new(
-            *Table::KeccakSponge,
-            keccak_sponge_stark::ctl_looking_logic(i),
-            Some(keccak_sponge_stark::ctl_looking_logic_filter()),
-        );
-        all_lookers.push(keccak_sponge_looking);
-    }
-    let logic_looked =
-        TableWithColumns::new(*Table::Logic, logic::ctl_data(), Some(logic::ctl_filter()));
-    CrossTableLookup::new(all_lookers, logic_looked)
-}
-
-/// `CrossTableLookup` for `MemoryStark` to connect it with all the modules which need memory accesses.
-fn ctl_memory<F: Field>() -> CrossTableLookup<F> {
-    let cpu_memory_code_read = TableWithColumns::new(
-        *Table::Cpu,
-        cpu_stark::ctl_data_code_memory(),
-        Some(cpu_stark::ctl_filter_code_memory()),
-    );
-    let cpu_memory_gp_ops = (0..NUM_GP_CHANNELS).map(|channel| {
-        TableWithColumns::new(
-            *Table::Cpu,
-            cpu_stark::ctl_data_gp_memory(channel),
-            Some(cpu_stark::ctl_filter_gp_memory(channel)),
-        )
-    });
-    let cpu_push_write_ops = TableWithColumns::new(
-        *Table::Cpu,
-        cpu_stark::ctl_data_partial_memory::<F>(),
-        Some(cpu_stark::ctl_filter_partial_memory()),
-    );
-    let cpu_set_context_write = TableWithColumns::new(
-        *Table::Cpu,
-        cpu_stark::ctl_data_memory_old_sp_write_set_context::<F>(),
-        Some(cpu_stark::ctl_filter_set_context()),
-    );
-    let cpu_set_context_read = TableWithColumns::new(
-        *Table::Cpu,
-        cpu_stark::ctl_data_memory_new_sp_read_set_context::<F>(),
-        Some(cpu_stark::ctl_filter_set_context()),
-    );
-    let keccak_sponge_reads = (0..KECCAK_RATE_BYTES).map(|i| {
-        TableWithColumns::new(
-            *Table::KeccakSponge,
-            keccak_sponge_stark::ctl_looking_memory(i),
-            Some(keccak_sponge_stark::ctl_looking_memory_filter(i)),
-        )
-    });
-    let byte_packing_ops = (0..32).map(|i| {
-        TableWithColumns::new(
-            *Table::BytePacking,
-            byte_packing_stark::ctl_looking_memory(i),
-            Some(byte_packing_stark::ctl_looking_memory_filter(i)),
-        )
-    });
-    let all_lookers = vec![
-        cpu_memory_code_read,
-        cpu_push_write_ops,
-        cpu_set_context_write,
-        cpu_set_context_read,
-    ]
-    .into_iter()
-    .chain(cpu_memory_gp_ops)
-    .chain(keccak_sponge_reads)
-    .chain(byte_packing_ops)
-    .collect();
-    let memory_looked = TableWithColumns::new(
-        *Table::Memory,
-        memory_stark::ctl_data(),
-        Some(memory_stark::ctl_filter()),
-    );
-    CrossTableLookup::new(all_lookers, memory_looked)
-}
--- a/evm/src/arithmetic/addcy.rs
+++ b/evm/src/arithmetic/addcy.rs
@ -1,355 +0,0 @@
-//! Support for EVM instructions ADD, SUB, LT and GT
-//!
-//! This crate verifies EVM instructions ADD, SUB, LT and GT (i.e. for
-//! unsigned inputs). Each of these instructions can be verified using
-//! the "add with carry out" equation
-//!
-//!   X + Y = Z + CY * 2^256
-//!
-//! by an appropriate assignment of "inputs" and "outputs" to the
-//! variables X, Y, Z and CY. Specifically,
-//!
-//! ADD: X + Y, inputs X, Y, output Z, ignore CY
-//! SUB: Z - X, inputs X, Z, output Y, ignore CY
-//!  GT: X > Z, inputs X, Z, output CY, auxiliary output Y
-//!  LT: Z < X, inputs Z, X, output CY, auxiliary output Y
-
-use ethereum_types::U256;
-use itertools::Itertools;
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::{Field, PrimeField64};
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::plonk::circuit_builder::CircuitBuilder;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use crate::arithmetic::columns::*;
-use crate::arithmetic::utils::u256_to_array;
-
-/// Generate row for ADD, SUB, GT and LT operations.
-pub(crate) fn generate<F: PrimeField64>(
-    lv: &mut [F],
-    filter: usize,
-    left_in: U256,
-    right_in: U256,
-) {
-    u256_to_array(&mut lv[INPUT_REGISTER_0], left_in);
-    u256_to_array(&mut lv[INPUT_REGISTER_1], right_in);
-    u256_to_array(&mut lv[INPUT_REGISTER_2], U256::zero());
-
-    match filter {
-        IS_ADD => {
-            let (result, cy) = left_in.overflowing_add(right_in);
-            u256_to_array(&mut lv[AUX_INPUT_REGISTER_0], U256::from(cy as u32));
-            u256_to_array(&mut lv[OUTPUT_REGISTER], result);
-        }
-        IS_SUB => {
-            let (diff, cy) = left_in.overflowing_sub(right_in);
-            u256_to_array(&mut lv[AUX_INPUT_REGISTER_0], U256::from(cy as u32));
-            u256_to_array(&mut lv[OUTPUT_REGISTER], diff);
-        }
-        IS_LT => {
-            let (diff, cy) = left_in.overflowing_sub(right_in);
-            u256_to_array(&mut lv[AUX_INPUT_REGISTER_0], diff);
-            u256_to_array(&mut lv[OUTPUT_REGISTER], U256::from(cy as u32));
-        }
-        IS_GT => {
-            let (diff, cy) = right_in.overflowing_sub(left_in);
-            u256_to_array(&mut lv[AUX_INPUT_REGISTER_0], diff);
-            u256_to_array(&mut lv[OUTPUT_REGISTER], U256::from(cy as u32));
-        }
-        _ => panic!("unexpected operation filter"),
-    };
-}
-
-/// 2^-16 mod (2^64 - 2^32 + 1)
-const GOLDILOCKS_INVERSE_65536: u64 = 18446462594437939201;
-
-/// Constrains x + y == z + cy*2^256, assuming filter != 0.
-///
-/// Set `is_two_row_op=true` to allow the code to be called from the
-/// two-row `modular` code (for checking that the modular output is
-/// reduced).
-///
-/// NB: This function ONLY verifies that cy is 0 or 1 when
-/// is_two_row_op=false; when is_two_row_op=true the caller must
-/// verify for itself.
-///
-/// Note that the digits of `x + y` are in `[0, 2*(2^16-1)]`
-/// (i.e. they are the sums of two 16-bit numbers), whereas the digits
-/// of `z` can only be in `[0, 2^16-1]`. In the function we check that:
-///
-/// \sum_i (x_i + y_i) * 2^(16*i) = \sum_i z_i * 2^(16*i) + given_cy*2^256.
-///
-/// If `N_LIMBS = 1`, then this amounts to verifying that either `x_0
-/// + y_0 = z_0` or `x_0 + y_0 == z_0 + cy*2^16` (this is `t` on line
-/// 127ff). Ok. Now assume the constraints are valid for `N_LIMBS =
-/// n-1`. Then by induction,
-///
-/// \sum_{i=0}^{n-1} (x_i + y_i) * 2^(16*i) + (x_n + y_n)*2^(16*n) ==
-/// \sum_{i=0}^{n-1} z_i * 2^(16*i) + cy_{n-1}*2^(16*n) + z_n*2^(16*n)
-/// + cy_n*2^(16*n)
-///
-/// is true if `(x_n + y_n)*2^(16*n) == cy_{n-1}*2^(16*n) +
-/// z_n*2^(16*n) + cy_n*2^(16*n)` (again, this is `t` on line 127ff)
-/// with the last `cy_n` checked against the `given_cy` given as input.
-pub(crate) fn eval_packed_generic_addcy<P: PackedField>(
-    yield_constr: &mut ConstraintConsumer<P>,
-    filter: P,
-    x: &[P],
-    y: &[P],
-    z: &[P],
-    given_cy: &[P],
-    is_two_row_op: bool,
-) {
-    debug_assert!(
-        x.len() == N_LIMBS && y.len() == N_LIMBS && z.len() == N_LIMBS && given_cy.len() == N_LIMBS
-    );
-
-    let overflow = P::Scalar::from_canonical_u64(1u64 << LIMB_BITS);
-    let overflow_inv = P::Scalar::from_canonical_u64(GOLDILOCKS_INVERSE_65536);
-    debug_assert!(
-        overflow * overflow_inv == P::Scalar::ONE,
-        "only works with LIMB_BITS=16 and F=Goldilocks"
-    );
-
-    let mut cy = P::ZEROS;
-    for ((&xi, &yi), &zi) in x.iter().zip_eq(y).zip_eq(z) {
-        // Verify that (xi + yi) - zi is either 0 or 2^LIMB_BITS
-        let t = cy + xi + yi - zi;
-        if is_two_row_op {
-            yield_constr.constraint_transition(filter * t * (overflow - t));
-        } else {
-            yield_constr.constraint(filter * t * (overflow - t));
-        }
-        // cy <-- 0 or 1
-        // NB: this is multiplication by a constant, so doesn't
-        // increase the degree of the constraint.
-        cy = t * overflow_inv;
-    }
-
-    if is_two_row_op {
-        // NB: Mild hack: We don't check that given_cy[0] is 0 or 1
-        // when is_two_row_op is true because that's only the case
-        // when this function is called from
-        // modular::modular_constr_poly(), in which case (1) this
-        // condition has already been checked and (2) it exceeds the
-        // degree budget because given_cy[0] is already degree 2.
-        yield_constr.constraint_transition(filter * (cy - given_cy[0]));
-        for i in 1..N_LIMBS {
-            yield_constr.constraint_transition(filter * given_cy[i]);
-        }
-    } else {
-        yield_constr.constraint(filter * given_cy[0] * (given_cy[0] - P::ONES));
-        yield_constr.constraint(filter * (cy - given_cy[0]));
-        for i in 1..N_LIMBS {
-            yield_constr.constraint(filter * given_cy[i]);
-        }
-    }
-}
-
-pub(crate) fn eval_packed_generic<P: PackedField>(
-    lv: &[P; NUM_ARITH_COLUMNS],
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let is_add = lv[IS_ADD];
-    let is_sub = lv[IS_SUB];
-    let is_lt = lv[IS_LT];
-    let is_gt = lv[IS_GT];
-
-    let in0 = &lv[INPUT_REGISTER_0];
-    let in1 = &lv[INPUT_REGISTER_1];
-    let out = &lv[OUTPUT_REGISTER];
-    let aux = &lv[AUX_INPUT_REGISTER_0];
-
-    // x + y = z + w*2^256
-    eval_packed_generic_addcy(yield_constr, is_add, in0, in1, out, aux, false);
-    eval_packed_generic_addcy(yield_constr, is_sub, in1, out, in0, aux, false);
-    eval_packed_generic_addcy(yield_constr, is_lt, in1, aux, in0, out, false);
-    eval_packed_generic_addcy(yield_constr, is_gt, in0, aux, in1, out, false);
-}
-
-#[allow(clippy::needless_collect)]
-pub(crate) fn eval_ext_circuit_addcy<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-    filter: ExtensionTarget<D>,
-    x: &[ExtensionTarget<D>],
-    y: &[ExtensionTarget<D>],
-    z: &[ExtensionTarget<D>],
-    given_cy: &[ExtensionTarget<D>],
-    is_two_row_op: bool,
-) {
-    debug_assert!(
-        x.len() == N_LIMBS && y.len() == N_LIMBS && z.len() == N_LIMBS && given_cy.len() == N_LIMBS
-    );
-
-    // 2^LIMB_BITS in the base field
-    let overflow_base = F::from_canonical_u64(1 << LIMB_BITS);
-    // 2^LIMB_BITS in the extension field as an ExtensionTarget
-    let overflow = builder.constant_extension(F::Extension::from(overflow_base));
-    // 2^-LIMB_BITS in the base field.
-    let overflow_inv = F::from_canonical_u64(GOLDILOCKS_INVERSE_65536);
-
-    let mut cy = builder.zero_extension();
-    for ((&xi, &yi), &zi) in x.iter().zip_eq(y).zip_eq(z) {
-        // t0 = cy + xi + yi
-        let t0 = builder.add_many_extension([cy, xi, yi]);
-        // t  = t0 - zi
-        let t = builder.sub_extension(t0, zi);
-        // t1 = overflow - t
-        let t1 = builder.sub_extension(overflow, t);
-        // t2 = t * t1
-        let t2 = builder.mul_extension(t, t1);
-
-        let filtered_limb_constraint = builder.mul_extension(filter, t2);
-        if is_two_row_op {
-            yield_constr.constraint_transition(builder, filtered_limb_constraint);
-        } else {
-            yield_constr.constraint(builder, filtered_limb_constraint);
-        }
-
-        cy = builder.mul_const_extension(overflow_inv, t);
-    }
-
-    let good_cy = builder.sub_extension(cy, given_cy[0]);
-    let cy_filter = builder.mul_extension(filter, good_cy);
-
-    // Check given carry is one bit
-    let bit_constr = builder.mul_sub_extension(given_cy[0], given_cy[0], given_cy[0]);
-    let bit_filter = builder.mul_extension(filter, bit_constr);
-
-    if is_two_row_op {
-        yield_constr.constraint_transition(builder, cy_filter);
-        for i in 1..N_LIMBS {
-            let t = builder.mul_extension(filter, given_cy[i]);
-            yield_constr.constraint_transition(builder, t);
-        }
-    } else {
-        yield_constr.constraint(builder, bit_filter);
-        yield_constr.constraint(builder, cy_filter);
-        for i in 1..N_LIMBS {
-            let t = builder.mul_extension(filter, given_cy[i]);
-            yield_constr.constraint(builder, t);
-        }
-    }
-}
-
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let is_add = lv[IS_ADD];
-    let is_sub = lv[IS_SUB];
-    let is_lt = lv[IS_LT];
-    let is_gt = lv[IS_GT];
-
-    let in0 = &lv[INPUT_REGISTER_0];
-    let in1 = &lv[INPUT_REGISTER_1];
-    let out = &lv[OUTPUT_REGISTER];
-    let aux = &lv[AUX_INPUT_REGISTER_0];
-
-    eval_ext_circuit_addcy(builder, yield_constr, is_add, in0, in1, out, aux, false);
-    eval_ext_circuit_addcy(builder, yield_constr, is_sub, in1, out, in0, aux, false);
-    eval_ext_circuit_addcy(builder, yield_constr, is_lt, in1, aux, in0, out, false);
-    eval_ext_circuit_addcy(builder, yield_constr, is_gt, in0, aux, in1, out, false);
-}
-
-#[cfg(test)]
-mod tests {
-    use plonky2::field::goldilocks_field::GoldilocksField;
-    use plonky2::field::types::{Field, Sample};
-    use rand::{Rng, SeedableRng};
-    use rand_chacha::ChaCha8Rng;
-    use starky::constraint_consumer::ConstraintConsumer;
-
-    use super::*;
-    use crate::arithmetic::columns::NUM_ARITH_COLUMNS;
-
-    // TODO: Should be able to refactor this test to apply to all operations.
-    #[test]
-    fn generate_eval_consistency_not_addcy() {
-        type F = GoldilocksField;
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-        let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-
-        // if the operation filters are all zero, then the constraints
-        // should be met even if all values are
-        // garbage.
-        lv[IS_ADD] = F::ZERO;
-        lv[IS_SUB] = F::ZERO;
-        lv[IS_LT] = F::ZERO;
-        lv[IS_GT] = F::ZERO;
-
-        let mut constraint_consumer = ConstraintConsumer::new(
-            vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-            F::ONE,
-            F::ONE,
-            F::ONE,
-        );
-        eval_packed_generic(&lv, &mut constraint_consumer);
-        for &acc in &constraint_consumer.accumulators() {
-            assert_eq!(acc, F::ZERO);
-        }
-    }
-
-    #[test]
-    fn generate_eval_consistency_addcy() {
-        type F = GoldilocksField;
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-        const N_ITERS: usize = 1000;
-
-        for _ in 0..N_ITERS {
-            for op_filter in [IS_ADD, IS_SUB, IS_LT, IS_GT] {
-                // set entire row to random 16-bit values
-                let mut lv = [F::default(); NUM_ARITH_COLUMNS]
-                    .map(|_| F::from_canonical_u16(rng.gen::<u16>()));
-
-                // set operation filter and ensure all constraints are
-                // satisfied. We have to explicitly set the other
-                // operation filters to zero since all are treated by
-                // the call.
-                lv[IS_ADD] = F::ZERO;
-                lv[IS_SUB] = F::ZERO;
-                lv[IS_LT] = F::ZERO;
-                lv[IS_GT] = F::ZERO;
-                lv[op_filter] = F::ONE;
-
-                let left_in = U256::from(rng.gen::<[u8; 32]>());
-                let right_in = U256::from(rng.gen::<[u8; 32]>());
-
-                generate(&mut lv, op_filter, left_in, right_in);
-
-                let mut constraint_consumer = ConstraintConsumer::new(
-                    vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-                    F::ONE,
-                    F::ONE,
-                    F::ONE,
-                );
-                eval_packed_generic(&lv, &mut constraint_consumer);
-                for &acc in &constraint_consumer.accumulators() {
-                    assert_eq!(acc, F::ZERO);
-                }
-
-                let expected = match op_filter {
-                    IS_ADD => left_in.overflowing_add(right_in).0,
-                    IS_SUB => left_in.overflowing_sub(right_in).0,
-                    IS_LT => U256::from((left_in < right_in) as u8),
-                    IS_GT => U256::from((left_in > right_in) as u8),
-                    _ => panic!("unrecognised operation"),
-                };
-
-                let mut expected_limbs = [F::ZERO; N_LIMBS];
-                u256_to_array(&mut expected_limbs, expected);
-                assert!(expected_limbs
-                    .iter()
-                    .zip(&lv[OUTPUT_REGISTER])
-                    .all(|(x, y)| x == y));
-            }
-        }
-    }
-}
--- a/evm/src/arithmetic/arithmetic_stark.rs
+++ b/evm/src/arithmetic/arithmetic_stark.rs
@ -1,511 +0,0 @@
-use core::marker::PhantomData;
-use core::ops::Range;
-
-use plonky2::field::extension::{Extendable, FieldExtension};
-use plonky2::field::packed::PackedField;
-use plonky2::field::polynomial::PolynomialValues;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::plonk::circuit_builder::CircuitBuilder;
-use plonky2::util::transpose;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-use starky::cross_table_lookup::TableWithColumns;
-use starky::evaluation_frame::StarkEvaluationFrame;
-use starky::lookup::{Column, Filter, Lookup};
-use starky::stark::Stark;
-use static_assertions::const_assert;
-
-use super::columns::{op_flags, NUM_ARITH_COLUMNS};
-use super::shift;
-use crate::all_stark::{EvmStarkFrame, Table};
-use crate::arithmetic::columns::{NUM_SHARED_COLS, RANGE_COUNTER, RC_FREQUENCIES, SHARED_COLS};
-use crate::arithmetic::{addcy, byte, columns, divmod, modular, mul, Operation};
-
-/// Creates a vector of `Columns` to link the 16-bit columns of the arithmetic table,
-/// split into groups of N_LIMBS at a time in `regs`, with the corresponding 32-bit
-/// columns of the CPU table. Does this for all ops in `ops`.
-///
-/// This is done by taking pairs of columns (x, y) of the arithmetic
-/// table and combining them as x + y*2^16 to ensure they equal the
-/// corresponding 32-bit number in the CPU table.
-fn cpu_arith_data_link<F: Field>(
-    combined_ops: &[(usize, u8)],
-    regs: &[Range<usize>],
-) -> Vec<Column<F>> {
-    let limb_base = F::from_canonical_u64(1 << columns::LIMB_BITS);
-
-    let mut res = vec![Column::linear_combination(
-        combined_ops
-            .iter()
-            .map(|&(col, code)| (col, F::from_canonical_u8(code))),
-    )];
-
-    // The inner for loop below assumes N_LIMBS is even.
-    const_assert!(columns::N_LIMBS % 2 == 0);
-
-    for reg_cols in regs {
-        // Loop below assumes we're operating on a "register" of N_LIMBS columns.
-        debug_assert_eq!(reg_cols.len(), columns::N_LIMBS);
-
-        for i in 0..(columns::N_LIMBS / 2) {
-            let c0 = reg_cols.start + 2 * i;
-            let c1 = reg_cols.start + 2 * i + 1;
-            res.push(Column::linear_combination([(c0, F::ONE), (c1, limb_base)]));
-        }
-    }
-    res
-}
-
-/// Returns the `TableWithColumns` for `ArithmeticStark` rows where one of the arithmetic operations has been called.
-pub(crate) fn ctl_arithmetic_rows<F: Field>() -> TableWithColumns<F> {
-    // We scale each filter flag with the associated opcode value.
-    // If an arithmetic operation is happening on the CPU side,
-    // the CTL will enforce that the reconstructed opcode value
-    // from the opcode bits matches.
-    // These opcodes are missing the syscall and prover_input opcodes,
-    // since `IS_RANGE_CHECK` can be associated to multiple opcodes.
-    // For `IS_RANGE_CHECK`, the opcodes are written in OPCODE_COL,
-    // and we use that column for scaling and the CTL checks.
-    // Note that we ensure in the STARK's constraints that the
-    // value in `OPCODE_COL` is 0 if `IS_RANGE_CHECK` = 0.
-    const COMBINED_OPS: [(usize, u8); 16] = [
-        (columns::IS_ADD, 0x01),
-        (columns::IS_MUL, 0x02),
-        (columns::IS_SUB, 0x03),
-        (columns::IS_DIV, 0x04),
-        (columns::IS_MOD, 0x06),
-        (columns::IS_ADDMOD, 0x08),
-        (columns::IS_MULMOD, 0x09),
-        (columns::IS_ADDFP254, 0x0c),
-        (columns::IS_MULFP254, 0x0d),
-        (columns::IS_SUBFP254, 0x0e),
-        (columns::IS_SUBMOD, 0x0f),
-        (columns::IS_LT, 0x10),
-        (columns::IS_GT, 0x11),
-        (columns::IS_BYTE, 0x1a),
-        (columns::IS_SHL, 0x1b),
-        (columns::IS_SHR, 0x1c),
-    ];
-
-    const REGISTER_MAP: [Range<usize>; 4] = [
-        columns::INPUT_REGISTER_0,
-        columns::INPUT_REGISTER_1,
-        columns::INPUT_REGISTER_2,
-        columns::OUTPUT_REGISTER,
-    ];
-
-    let mut filter_cols = COMBINED_OPS.to_vec();
-    filter_cols.push((columns::IS_RANGE_CHECK, 0x01));
-
-    let filter = Some(Filter::new_simple(Column::sum(
-        filter_cols.iter().map(|(c, _v)| *c),
-    )));
-
-    let mut all_combined_cols = COMBINED_OPS.to_vec();
-    all_combined_cols.push((columns::OPCODE_COL, 0x01));
-    // Create the Arithmetic Table whose columns are those of the
-    // operations listed in `ops` whose inputs and outputs are given
-    // by `regs`, where each element of `regs` is a range of columns
-    // corresponding to a 256-bit input or output register (also `ops`
-    // is used as the operation filter).
-    TableWithColumns::new(
-        *Table::Arithmetic,
-        cpu_arith_data_link(&all_combined_cols, &REGISTER_MAP),
-        filter,
-    )
-}
-
-/// Structure representing the `Arithmetic` STARK, which carries out all the arithmetic operations.
-#[derive(Copy, Clone, Default)]
-pub(crate) struct ArithmeticStark<F, const D: usize> {
-    pub f: PhantomData<F>,
-}
-
-pub(crate) const RANGE_MAX: usize = 1usize << 16; // Range check strict upper bound
-
-impl<F: RichField, const D: usize> ArithmeticStark<F, D> {
-    /// Expects input in *column*-major layout
-    fn generate_range_checks(&self, cols: &mut [Vec<F>]) {
-        debug_assert!(cols.len() == columns::NUM_ARITH_COLUMNS);
-
-        let n_rows = cols[0].len();
-        debug_assert!(cols.iter().all(|col| col.len() == n_rows));
-
-        for i in 0..RANGE_MAX {
-            cols[columns::RANGE_COUNTER][i] = F::from_canonical_usize(i);
-        }
-        for i in RANGE_MAX..n_rows {
-            cols[columns::RANGE_COUNTER][i] = F::from_canonical_usize(RANGE_MAX - 1);
-        }
-
-        // Generate the frequencies column.
-        for col in SHARED_COLS {
-            for i in 0..n_rows {
-                let x = cols[col][i].to_canonical_u64() as usize;
-                assert!(
-                    x < RANGE_MAX,
-                    "column value {} exceeds the max range value {}",
-                    x,
-                    RANGE_MAX
-                );
-                cols[RC_FREQUENCIES][x] += F::ONE;
-            }
-        }
-    }
-
-    pub(crate) fn generate_trace(&self, operations: Vec<Operation>) -> Vec<PolynomialValues<F>> {
-        // The number of rows reserved is the smallest value that's
-        // guaranteed to avoid a reallocation: The only ops that use
-        // two rows are the modular operations and DIV, so the only
-        // way to reach capacity is when every op is modular or DIV
-        // (which is obviously unlikely in normal
-        // circumstances). (Also need at least RANGE_MAX rows to
-        // accommodate range checks.)
-        let max_rows = std::cmp::max(2 * operations.len(), RANGE_MAX);
-        let mut trace_rows = Vec::with_capacity(max_rows);
-
-        for op in operations {
-            let (row1, maybe_row2) = op.to_rows();
-            trace_rows.push(row1);
-
-            if let Some(row2) = maybe_row2 {
-                trace_rows.push(row2);
-            }
-        }
-
-        // Pad the trace with zero rows if it doesn't have enough rows
-        // to accommodate the range check columns. Also make sure the
-        // trace length is a power of two.
-        let padded_len = trace_rows.len().next_power_of_two();
-        for _ in trace_rows.len()..std::cmp::max(padded_len, RANGE_MAX) {
-            trace_rows.push(vec![F::ZERO; columns::NUM_ARITH_COLUMNS]);
-        }
-
-        let mut trace_cols = transpose(&trace_rows);
-        self.generate_range_checks(&mut trace_cols);
-
-        trace_cols.into_iter().map(PolynomialValues::new).collect()
-    }
-}
-
-impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for ArithmeticStark<F, D> {
-    type EvaluationFrame<FE, P, const D2: usize> = EvmStarkFrame<P, FE, NUM_ARITH_COLUMNS>
-    where
-        FE: FieldExtension<D2, BaseField = F>,
-        P: PackedField<Scalar = FE>;
-
-    type EvaluationFrameTarget =
-        EvmStarkFrame<ExtensionTarget<D>, ExtensionTarget<D>, NUM_ARITH_COLUMNS>;
-
-    fn eval_packed_generic<FE, P, const D2: usize>(
-        &self,
-        vars: &Self::EvaluationFrame<FE, P, D2>,
-        yield_constr: &mut ConstraintConsumer<P>,
-    ) where
-        FE: FieldExtension<D2, BaseField = F>,
-        P: PackedField<Scalar = FE>,
-    {
-        let lv: &[P; NUM_ARITH_COLUMNS] = vars.get_local_values().try_into().unwrap();
-        let nv: &[P; NUM_ARITH_COLUMNS] = vars.get_next_values().try_into().unwrap();
-
-        // Flags must be boolean.
-        for flag_idx in op_flags() {
-            let flag = lv[flag_idx];
-            yield_constr.constraint(flag * (flag - P::ONES));
-        }
-
-        // Only a single flag must be activated at once.
-        let all_flags = op_flags().map(|i| lv[i]).sum::<P>();
-        yield_constr.constraint(all_flags * (all_flags - P::ONES));
-
-        // Check that `OPCODE_COL` holds 0 if the operation is not a range_check.
-        let opcode_constraint = (P::ONES - lv[columns::IS_RANGE_CHECK]) * lv[columns::OPCODE_COL];
-        yield_constr.constraint(opcode_constraint);
-
-        // Check the range column: First value must be 0, last row
-        // must be 2^16-1, and intermediate rows must increment by 0
-        // or 1.
-        let rc1 = lv[columns::RANGE_COUNTER];
-        let rc2 = nv[columns::RANGE_COUNTER];
-        yield_constr.constraint_first_row(rc1);
-        let incr = rc2 - rc1;
-        yield_constr.constraint_transition(incr * incr - incr);
-        let range_max = P::Scalar::from_canonical_u64((RANGE_MAX - 1) as u64);
-        yield_constr.constraint_last_row(rc1 - range_max);
-
-        // Evaluate constraints for the MUL operation.
-        mul::eval_packed_generic(lv, yield_constr);
-        // Evaluate constraints for ADD, SUB, LT and GT operations.
-        addcy::eval_packed_generic(lv, yield_constr);
-        // Evaluate constraints for DIV and MOD operations.
-        divmod::eval_packed(lv, nv, yield_constr);
-        // Evaluate constraints for ADDMOD, SUBMOD, MULMOD and for FP254 modular operations.
-        modular::eval_packed(lv, nv, yield_constr);
-        // Evaluate constraints for the BYTE operation.
-        byte::eval_packed(lv, yield_constr);
-        // Evaluate constraints for SHL and SHR operations.
-        shift::eval_packed_generic(lv, nv, yield_constr);
-    }
-
-    fn eval_ext_circuit(
-        &self,
-        builder: &mut CircuitBuilder<F, D>,
-        vars: &Self::EvaluationFrameTarget,
-        yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-    ) {
-        let lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS] =
-            vars.get_local_values().try_into().unwrap();
-        let nv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS] =
-            vars.get_next_values().try_into().unwrap();
-
-        // Flags must be boolean.
-        for flag_idx in op_flags() {
-            let flag = lv[flag_idx];
-            let constraint = builder.mul_sub_extension(flag, flag, flag);
-            yield_constr.constraint(builder, constraint);
-        }
-
-        // Only a single flag must be activated at once.
-        let all_flags = builder.add_many_extension(op_flags().map(|i| lv[i]));
-        let constraint = builder.mul_sub_extension(all_flags, all_flags, all_flags);
-        yield_constr.constraint(builder, constraint);
-
-        // Check that `OPCODE_COL` holds 0 if the operation is not a range_check.
-        let opcode_constraint = builder.arithmetic_extension(
-            F::NEG_ONE,
-            F::ONE,
-            lv[columns::IS_RANGE_CHECK],
-            lv[columns::OPCODE_COL],
-            lv[columns::OPCODE_COL],
-        );
-        yield_constr.constraint(builder, opcode_constraint);
-
-        // Check the range column: First value must be 0, last row
-        // must be 2^16-1, and intermediate rows must increment by 0
-        // or 1.
-        let rc1 = lv[columns::RANGE_COUNTER];
-        let rc2 = nv[columns::RANGE_COUNTER];
-        yield_constr.constraint_first_row(builder, rc1);
-        let incr = builder.sub_extension(rc2, rc1);
-        let t = builder.mul_sub_extension(incr, incr, incr);
-        yield_constr.constraint_transition(builder, t);
-        let range_max =
-            builder.constant_extension(F::Extension::from_canonical_usize(RANGE_MAX - 1));
-        let t = builder.sub_extension(rc1, range_max);
-        yield_constr.constraint_last_row(builder, t);
-
-        // Evaluate constraints for the MUL operation.
-        mul::eval_ext_circuit(builder, lv, yield_constr);
-        // Evaluate constraints for ADD, SUB, LT and GT operations.
-        addcy::eval_ext_circuit(builder, lv, yield_constr);
-        // Evaluate constraints for DIV and MOD operations.
-        divmod::eval_ext_circuit(builder, lv, nv, yield_constr);
-        // Evaluate constraints for ADDMOD, SUBMOD, MULMOD and for FP254 modular operations.
-        modular::eval_ext_circuit(builder, lv, nv, yield_constr);
-        // Evaluate constraints for the BYTE operation.
-        byte::eval_ext_circuit(builder, lv, yield_constr);
-        // Evaluate constraints for SHL and SHR operations.
-        shift::eval_ext_circuit(builder, lv, nv, yield_constr);
-    }
-
-    fn constraint_degree(&self) -> usize {
-        3
-    }
-
-    fn lookups(&self) -> Vec<Lookup<F>> {
-        vec![Lookup {
-            columns: Column::singles(SHARED_COLS).collect(),
-            table_column: Column::single(RANGE_COUNTER),
-            frequencies_column: Column::single(RC_FREQUENCIES),
-            filter_columns: vec![None; NUM_SHARED_COLS],
-        }]
-    }
-
-    fn requires_ctls(&self) -> bool {
-        true
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use anyhow::Result;
-    use ethereum_types::U256;
-    use plonky2::field::types::{Field, PrimeField64};
-    use plonky2::plonk::config::{GenericConfig, PoseidonGoldilocksConfig};
-    use rand::{Rng, SeedableRng};
-    use rand_chacha::ChaCha8Rng;
-    use starky::stark_testing::{test_stark_circuit_constraints, test_stark_low_degree};
-
-    use super::{columns, ArithmeticStark};
-    use crate::arithmetic::columns::OUTPUT_REGISTER;
-    use crate::arithmetic::*;
-
-    #[test]
-    fn degree() -> Result<()> {
-        const D: usize = 2;
-        type C = PoseidonGoldilocksConfig;
-        type F = <C as GenericConfig<D>>::F;
-        type S = ArithmeticStark<F, D>;
-
-        let stark = S {
-            f: Default::default(),
-        };
-        test_stark_low_degree(stark)
-    }
-
-    #[test]
-    fn circuit() -> Result<()> {
-        const D: usize = 2;
-        type C = PoseidonGoldilocksConfig;
-        type F = <C as GenericConfig<D>>::F;
-        type S = ArithmeticStark<F, D>;
-
-        let stark = S {
-            f: Default::default(),
-        };
-        test_stark_circuit_constraints::<F, C, S, D>(stark)
-    }
-
-    #[test]
-    fn basic_trace() {
-        const D: usize = 2;
-        type C = PoseidonGoldilocksConfig;
-        type F = <C as GenericConfig<D>>::F;
-        type S = ArithmeticStark<F, D>;
-
-        let stark = S {
-            f: Default::default(),
-        };
-
-        // 123 + 456 == 579
-        let add = Operation::binary(BinaryOperator::Add, U256::from(123), U256::from(456));
-        // (123 * 456) % 1007 == 703
-        let mulmod = Operation::ternary(
-            TernaryOperator::MulMod,
-            U256::from(123),
-            U256::from(456),
-            U256::from(1007),
-        );
-        // (1234 + 567) % 1007 == 794
-        let addmod = Operation::ternary(
-            TernaryOperator::AddMod,
-            U256::from(1234),
-            U256::from(567),
-            U256::from(1007),
-        );
-        // 123 * 456 == 56088
-        let mul = Operation::binary(BinaryOperator::Mul, U256::from(123), U256::from(456));
-        // 128 / 13 == 9
-        let div = Operation::binary(BinaryOperator::Div, U256::from(128), U256::from(13));
-
-        // 128 < 13 == 0
-        let lt1 = Operation::binary(BinaryOperator::Lt, U256::from(128), U256::from(13));
-        // 13 < 128 == 1
-        let lt2 = Operation::binary(BinaryOperator::Lt, U256::from(13), U256::from(128));
-        // 128 < 128 == 0
-        let lt3 = Operation::binary(BinaryOperator::Lt, U256::from(128), U256::from(128));
-
-        // 128 % 13 == 11
-        let modop = Operation::binary(BinaryOperator::Mod, U256::from(128), U256::from(13));
-
-        // byte(30, 0xABCD) = 0xAB
-        let byte = Operation::binary(BinaryOperator::Byte, U256::from(30), U256::from(0xABCD));
-
-        let ops: Vec<Operation> = vec![add, mulmod, addmod, mul, modop, lt1, lt2, lt3, div, byte];
-
-        let pols = stark.generate_trace(ops);
-
-        // Trace should always have NUM_ARITH_COLUMNS columns and
-        // min(RANGE_MAX, operations.len()) rows. In this case there
-        // are only 6 rows, so we should have RANGE_MAX rows.
-        assert!(
-            pols.len() == columns::NUM_ARITH_COLUMNS
-                && pols.iter().all(|v| v.len() == super::RANGE_MAX)
-        );
-
-        // Each operation has a single word answer that we can check
-        let expected_output = [
-            // Row (some ops take two rows), expected
-            (0, 579), // ADD_OUTPUT
-            (1, 703),
-            (3, 794),
-            (5, 56088),
-            (6, 11),
-            (8, 0),
-            (9, 1),
-            (10, 0),
-            (11, 9),
-            (13, 0xAB),
-        ];
-
-        for (row, expected) in expected_output {
-            // First register should match expected value...
-            let first = OUTPUT_REGISTER.start;
-            let out = pols[first].values[row].to_canonical_u64();
-            assert_eq!(
-                out, expected,
-                "expected column {} on row {} to be {} but it was {}",
-                first, row, expected, out,
-            );
-            // ...other registers should be zero
-            let rest = OUTPUT_REGISTER.start + 1..OUTPUT_REGISTER.end;
-            assert!(pols[rest].iter().all(|v| v.values[row] == F::ZERO));
-        }
-    }
-
-    #[test]
-    fn big_traces() {
-        const D: usize = 2;
-        type C = PoseidonGoldilocksConfig;
-        type F = <C as GenericConfig<D>>::F;
-        type S = ArithmeticStark<F, D>;
-
-        let stark = S {
-            f: Default::default(),
-        };
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-
-        let ops = (0..super::RANGE_MAX)
-            .map(|_| {
-                Operation::binary(
-                    BinaryOperator::Mul,
-                    U256::from(rng.gen::<[u8; 32]>()),
-                    U256::from(rng.gen::<[u8; 32]>()),
-                )
-            })
-            .collect::<Vec<_>>();
-
-        let pols = stark.generate_trace(ops);
-
-        // Trace should always have NUM_ARITH_COLUMNS columns and
-        // min(RANGE_MAX, operations.len()) rows. In this case there
-        // are RANGE_MAX operations with one row each, so RANGE_MAX.
-        assert!(
-            pols.len() == columns::NUM_ARITH_COLUMNS
-                && pols.iter().all(|v| v.len() == super::RANGE_MAX)
-        );
-
-        let ops = (0..super::RANGE_MAX)
-            .map(|_| {
-                Operation::ternary(
-                    TernaryOperator::MulMod,
-                    U256::from(rng.gen::<[u8; 32]>()),
-                    U256::from(rng.gen::<[u8; 32]>()),
-                    U256::from(rng.gen::<[u8; 32]>()),
-                )
-            })
-            .collect::<Vec<_>>();
-
-        let pols = stark.generate_trace(ops);
-
-        // Trace should always have NUM_ARITH_COLUMNS columns and
-        // min(RANGE_MAX, operations.len()) rows. In this case there
-        // are RANGE_MAX operations with two rows each, so 2*RANGE_MAX.
-        assert!(
-            pols.len() == columns::NUM_ARITH_COLUMNS
-                && pols.iter().all(|v| v.len() == 2 * super::RANGE_MAX)
-        );
-    }
-}
--- a/evm/src/arithmetic/byte.rs
+++ b/evm/src/arithmetic/byte.rs
@ -1,502 +0,0 @@
-//! Support for the EVM BYTE instruction
-//!
-//! This crate verifies the EVM BYTE instruction, defined as follows:
-//!
-//! INPUTS: 256-bit values I and X = \sum_{i=0}^31 X_i B^i,
-//! where B = 2^8 and 0 <= X_i < B for all i.
-//!
-//! OUTPUT: X_{31-I} if 0 <= I < 32, otherwise 0.
-//!
-//! NB: index I=0 corresponds to byte X_31, i.e. the most significant
-//! byte. This is exactly the opposite of anyone would expect; who
-//! knows what the EVM designers were thinking. Anyway, if anything
-//! below seems confusing, first check to ensure you're counting from
-//! the wrong end of X, as the spec requires.
-//!
-//! Wlog consider 0 <= I < 32, so I has five bits b0,...,b4. We are
-//! given X as an array of 16-bit limbs; write X := \sum_{i=0}^15 Y_i
-//! 2^{16i} where 0 <= Y_i < 2^16.
-//!
-//! The technique (hat tip to Jacqui for the idea) is to store a tree
-//! of limbs of X that are selected according to the bits in I.  The
-//! main observation is that each bit `bi` halves the number of
-//! candidate bytes that we might return: If b4 is 0, then I < 16 and
-//! the possible bytes are in the top half of X: Y_8,..,Y_15
-//! (corresponding to bytes X_16,..,X_31), and if b4 is 1 then I >= 16
-//! and the possible bytes are the bottom half of X: Y_0,..,Y_7
-//! (corresponding to bytes X_0,..,X_15).
-//!
-//! Let Z_0,..,Z_7 be the bytes selected in the first step. Then, in
-//! the next step, if b3 is 0, we select Z_4,..,Z_7 and if it's 1 we
-//! select Z_0,..,Z_3. Together, b4 and b3 divide the bytes of X into
-//! 4 equal-sized chunks of 4 limbs, and the byte we're after will be
-//! among the limbs 4 selected limbs.
-//!
-//! Repeating for b2 and b1, we reduce to a single 16-bit limb
-//! L=x+y*256; the desired byte will be x if b0 is 1 and y if b0
-//! is 0.
-//!
-//! -*-
-//!
-//! To prove that the bytes x and y are in the range [0, 2^8) (rather
-//! than [0, 2^16), which is all the range-checker guarantees) we do
-//! the following (hat tip to Jacqui for this trick too): Instead of
-//! storing x and y, we store w = 256 * x and y. Then, to verify that
-//! x, y < 256 and the last limb L = x + y * 256, we check that
-//! L = w / 256 + y * 256.
-//!
-//! The proof of why verifying that L = w / 256 + y * 256
-//! suffices is as follows:
-//!
-//! 1. The given L, w and y are range-checked to be less than 2^16.
-//! 2. y * 256 ∈ {0, 256, 512, ..., 2^24 - 512, 2^24 - 256}
-//! 3. w / 256 = L - y * 256 ∈ {-2^24 + 256, -2^24 + 257, ..., 2^16 - 2, 2^16 - 1}
-//! 4. By inspection, for w < 2^16, if w / 256 < 2^16 or
-//!    w / 256 >= P - 2^24 + 256 (i.e. if w / 256 falls in the range
-//!    of point 3 above), then w = 256 * m for some 0 <= m < 256.
-//! 5. Hence w / 256 ∈ {0, 1, ..., 255}
-//! 6. Hence y * 256 = L - w / 256 ∈ {-255, -254, ..., 2^16 - 1}
-//! 7. Taking the intersection of ranges in 2. and 6. we see that
-//!    y * 256 ∈ {0, 256, 512, ..., 2^16 - 256}
-//! 8. Hence y ∈ {0, 1, ..., 255}
-
-use core::ops::Range;
-
-use ethereum_types::U256;
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::{Field, PrimeField64};
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::plonk::circuit_builder::CircuitBuilder;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-use static_assertions::const_assert;
-
-use crate::arithmetic::columns::*;
-use crate::arithmetic::utils::u256_to_array;
-
-// Give meaningful names to the columns of AUX_INPUT_REGISTER_0 that
-// we're using
-const BYTE_IDX_DECOMP: Range<usize> = AUX_INPUT_REGISTER_0.start..AUX_INPUT_REGISTER_0.start + 6;
-const BYTE_IDX_DECOMP_HI: usize = AUX_INPUT_REGISTER_0.start + 5;
-const BYTE_LAST_LIMB_LO: usize = AUX_INPUT_REGISTER_0.start + 6;
-const BYTE_LAST_LIMB_HI: usize = AUX_INPUT_REGISTER_0.start + 7;
-const BYTE_IDX_IS_LARGE: usize = AUX_INPUT_REGISTER_0.start + 8;
-const BYTE_IDX_HI_LIMB_SUM_INV_0: usize = AUX_INPUT_REGISTER_0.start + 9;
-const BYTE_IDX_HI_LIMB_SUM_INV_1: usize = AUX_INPUT_REGISTER_0.start + 10;
-const BYTE_IDX_HI_LIMB_SUM_INV_2: usize = AUX_INPUT_REGISTER_0.start + 11;
-const BYTE_IDX_HI_LIMB_SUM_INV_3: usize = AUX_INPUT_REGISTER_0.start + 12;
-
-/// Decompose `idx` into bits and bobs and store in `idx_decomp`.
-///
-/// Specifically, write
-///
-///   idx = idx0_lo5 + idx0_hi * 2^5 + \sum_i idx[i] * 2^(16i),
-///
-/// where `0 <= idx0_lo5 < 32` and `0 <= idx0_hi < 2^11`.  Store the
-/// 5 bits of `idx0_lo5` in `idx_decomp[0..5]`; we don't explicitly need
-/// the higher 11 bits of the first limb, so we put them in
-/// `idx_decomp[5]`. The rest of `idx_decomp` is set to 0.
-fn set_idx_decomp<F: PrimeField64>(idx_decomp: &mut [F], idx: &U256) {
-    debug_assert!(idx_decomp.len() == 6);
-    for i in 0..5 {
-        idx_decomp[i] = F::from_bool(idx.bit(i));
-    }
-    idx_decomp[5] = F::from_canonical_u16((idx.low_u64() as u16) >> 5);
-}
-
-pub(crate) fn generate<F: PrimeField64>(lv: &mut [F], idx: U256, val: U256) {
-    u256_to_array(&mut lv[INPUT_REGISTER_0], idx);
-    u256_to_array(&mut lv[INPUT_REGISTER_1], val);
-    set_idx_decomp(&mut lv[BYTE_IDX_DECOMP], &idx);
-
-    let idx0_hi = lv[BYTE_IDX_DECOMP_HI];
-    let hi_limb_sum = lv[INPUT_REGISTER_0][1..]
-        .iter()
-        .fold(idx0_hi, |acc, &x| acc + x);
-    let hi_limb_sum_inv = hi_limb_sum
-        .try_inverse()
-        .unwrap_or(F::ONE)
-        .to_canonical_u64();
-    // It's a bit silly that we have to split this value, which
-    // doesn't need to be range-checked, into 16-bit limbs so that it
-    // can be range-checked; but the rigidity of the range-checking
-    // mechanism means we can't optionally switch it off for some
-    // instructions.
-    lv[BYTE_IDX_HI_LIMB_SUM_INV_0] = F::from_canonical_u16(hi_limb_sum_inv as u16);
-    lv[BYTE_IDX_HI_LIMB_SUM_INV_1] = F::from_canonical_u16((hi_limb_sum_inv >> 16) as u16);
-    lv[BYTE_IDX_HI_LIMB_SUM_INV_2] = F::from_canonical_u16((hi_limb_sum_inv >> 32) as u16);
-    lv[BYTE_IDX_HI_LIMB_SUM_INV_3] = F::from_canonical_u16((hi_limb_sum_inv >> 48) as u16);
-    lv[BYTE_IDX_IS_LARGE] = F::from_bool(!hi_limb_sum.is_zero());
-
-    // Set the tree values according to the low 5 bits of idx, even
-    // when idx >= 32.
-
-    // Use the bits of idx0 to build a multiplexor that selects
-    // the correct byte of val. Each level of the tree uses one
-    // bit to halve the set of possible bytes from the previous
-    // level. The tree stores limbs rather than bytes though, so
-    // the last value must be handled specially.
-
-    // Morally, offset at i is 2^i * bit[i], but because of the
-    // reversed indexing and handling of the last element
-    // separately, the offset is 2^i * ( ! bit[i + 1]). (The !bit
-    // corresponds to calculating 31 - bits which is just bitwise NOT.)
-
-    // `lvl_len` is the number of elements of the current level of the
-    // "tree". Can think of `val_limbs` as level 0, with length =
-    // N_LIMBS = 16.
-    const_assert!(N_LIMBS == 16); // Enforce assumption
-
-    // Build the tree of limbs from the low 5 bits of idx:
-    let mut i = 3; // tree level, from 3 downto 0.
-    let mut src = INPUT_REGISTER_1.start; // val_limbs start
-    let mut dest = AUX_INPUT_REGISTER_1.start; // tree start
-    loop {
-        let lvl_len = 1 << i;
-        // pick which half of src becomes the new tree level
-        let offset = (!idx.bit(i + 1) as usize) * lvl_len;
-        src += offset;
-        // copy new tree level to dest
-        lv.copy_within(src..src + lvl_len, dest);
-        if i == 0 {
-            break;
-        }
-        // next src is this new tree level
-        src = dest;
-        // next dest is after this new tree level
-        dest += lvl_len;
-        i -= 1;
-    }
-
-    // Handle the last bit; i.e. pick a byte of the final limb.
-    let t = lv[dest].to_canonical_u64();
-    let lo = t as u8 as u64;
-    let hi = t >> 8;
-
-    // Store 256 * lo rather than lo:
-    lv[BYTE_LAST_LIMB_LO] = F::from_canonical_u64(lo << 8);
-    lv[BYTE_LAST_LIMB_HI] = F::from_canonical_u64(hi);
-
-    let tree = &mut lv[AUX_INPUT_REGISTER_1];
-    let output = if idx.bit(0) {
-        tree[15] = F::from_canonical_u64(lo);
-        lo.into()
-    } else {
-        tree[15] = F::from_canonical_u64(hi);
-        hi.into()
-    };
-
-    u256_to_array(
-        &mut lv[OUTPUT_REGISTER],
-        if idx < 32.into() {
-            output
-        } else {
-            U256::zero()
-        },
-    );
-}
-
-pub(crate) fn eval_packed<P: PackedField>(
-    lv: &[P; NUM_ARITH_COLUMNS],
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let is_byte = lv[IS_BYTE];
-
-    let idx = &lv[INPUT_REGISTER_0];
-    let val = &lv[INPUT_REGISTER_1];
-    let out = &lv[OUTPUT_REGISTER];
-    let idx_decomp = &lv[AUX_INPUT_REGISTER_0];
-    let tree = &lv[AUX_INPUT_REGISTER_1];
-
-    // low 5 bits of the first limb of idx:
-    let mut idx0_lo5 = P::ZEROS;
-    for i in 0..5 {
-        let bit = idx_decomp[i];
-        yield_constr.constraint(is_byte * (bit * bit - bit));
-        idx0_lo5 += bit * P::Scalar::from_canonical_u64(1 << i);
-    }
-    // Verify that idx0_hi is the high (11) bits of the first limb of
-    // idx (in particular idx0_hi is at most 11 bits, since idx[0] is
-    // at most 16 bits).
-    let idx0_hi = idx_decomp[5] * P::Scalar::from_canonical_u64(32u64);
-    yield_constr.constraint(is_byte * (idx[0] - (idx0_lo5 + idx0_hi)));
-
-    // Verify the layers of the tree
-    // NB: Each of the bit values is negated in place to account for
-    // the reversed indexing.
-    let bit = idx_decomp[4];
-    for i in 0..8 {
-        let limb = bit * val[i] + (P::ONES - bit) * val[i + 8];
-        yield_constr.constraint(is_byte * (tree[i] - limb));
-    }
-
-    let bit = idx_decomp[3];
-    for i in 0..4 {
-        let limb = bit * tree[i] + (P::ONES - bit) * tree[i + 4];
-        yield_constr.constraint(is_byte * (tree[i + 8] - limb));
-    }
-
-    let bit = idx_decomp[2];
-    for i in 0..2 {
-        let limb = bit * tree[i + 8] + (P::ONES - bit) * tree[i + 10];
-        yield_constr.constraint(is_byte * (tree[i + 12] - limb));
-    }
-
-    let bit = idx_decomp[1];
-    let limb = bit * tree[12] + (P::ONES - bit) * tree[13];
-    yield_constr.constraint(is_byte * (tree[14] - limb));
-
-    // Check byte decomposition of last limb:
-
-    let base8 = P::Scalar::from_canonical_u64(1 << 8);
-    let lo_byte = lv[BYTE_LAST_LIMB_LO];
-    let hi_byte = lv[BYTE_LAST_LIMB_HI];
-    yield_constr.constraint(is_byte * (lo_byte + base8 * (base8 * hi_byte - limb)));
-
-    let bit = idx_decomp[0];
-    let t = bit * lo_byte + (P::ONES - bit) * base8 * hi_byte;
-    yield_constr.constraint(is_byte * (base8 * tree[15] - t));
-    let expected_out_byte = tree[15];
-
-    // Sum all higher limbs; sum will be non-zero iff idx >= 32.
-    let hi_limb_sum = lv[BYTE_IDX_DECOMP_HI] + idx[1..].iter().copied().sum::<P>();
-    let idx_is_large = lv[BYTE_IDX_IS_LARGE];
-
-    // idx_is_large is 0 or 1
-    yield_constr.constraint(is_byte * (idx_is_large * idx_is_large - idx_is_large));
-
-    // If hi_limb_sum is nonzero, then idx_is_large must be one.
-    yield_constr.constraint(is_byte * hi_limb_sum * (idx_is_large - P::ONES));
-
-    let hi_limb_sum_inv = lv[BYTE_IDX_HI_LIMB_SUM_INV_0]
-        + lv[BYTE_IDX_HI_LIMB_SUM_INV_1] * P::Scalar::from_canonical_u64(1 << 16)
-        + lv[BYTE_IDX_HI_LIMB_SUM_INV_2] * P::Scalar::from_canonical_u64(1 << 32)
-        + lv[BYTE_IDX_HI_LIMB_SUM_INV_3] * P::Scalar::from_canonical_u64(1 << 48);
-
-    // If idx_is_large is 1, then hi_limb_sum_inv must be the inverse
-    // of hi_limb_sum, hence hi_limb_sum is non-zero, hence idx is
-    // indeed "large".
-    //
-    // Otherwise, if idx_is_large is 0, then hi_limb_sum * hi_limb_sum_inv
-    // is zero, which is only possible if hi_limb_sum is zero, since
-    // hi_limb_sum_inv is non-zero.
-    yield_constr.constraint(is_byte * (hi_limb_sum * hi_limb_sum_inv - idx_is_large));
-
-    let out_byte = out[0];
-    let check = out_byte - (P::ONES - idx_is_large) * expected_out_byte;
-    yield_constr.constraint(is_byte * check);
-
-    // Check that the rest of the output limbs are zero
-    for i in 1..N_LIMBS {
-        yield_constr.constraint(is_byte * out[i]);
-    }
-}
-
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let is_byte = lv[IS_BYTE];
-
-    let idx = &lv[INPUT_REGISTER_0];
-    let val = &lv[INPUT_REGISTER_1];
-    let out = &lv[OUTPUT_REGISTER];
-    let idx_decomp = &lv[AUX_INPUT_REGISTER_0];
-    let tree = &lv[AUX_INPUT_REGISTER_1];
-
-    // low 5 bits of the first limb of idx:
-    let mut idx0_lo5 = builder.zero_extension();
-    for i in 0..5 {
-        let bit = idx_decomp[i];
-        let t = builder.mul_sub_extension(bit, bit, bit);
-        let t = builder.mul_extension(t, is_byte);
-        yield_constr.constraint(builder, t);
-        let scale = F::Extension::from(F::from_canonical_u64(1 << i));
-        let scale = builder.constant_extension(scale);
-        idx0_lo5 = builder.mul_add_extension(bit, scale, idx0_lo5);
-    }
-    // Verify that idx0_hi is the high (11) bits of the first limb of
-    // idx (in particular idx0_hi is at most 11 bits, since idx[0] is
-    // at most 16 bits).
-    let t = F::Extension::from(F::from_canonical_u64(32));
-    let t = builder.constant_extension(t);
-    let t = builder.mul_add_extension(idx_decomp[5], t, idx0_lo5);
-    let t = builder.sub_extension(idx[0], t);
-    let t = builder.mul_extension(is_byte, t);
-    yield_constr.constraint(builder, t);
-
-    // Verify the layers of the tree
-    // NB: Each of the bit values is negated in place to account for
-    // the reversed indexing.
-    let one = builder.one_extension();
-    let bit = idx_decomp[4];
-    for i in 0..8 {
-        let t = builder.mul_extension(bit, val[i]);
-        let u = builder.sub_extension(one, bit);
-        let v = builder.mul_add_extension(u, val[i + 8], t);
-        let t = builder.sub_extension(tree[i], v);
-        let t = builder.mul_extension(is_byte, t);
-        yield_constr.constraint(builder, t);
-    }
-
-    let bit = idx_decomp[3];
-    for i in 0..4 {
-        let t = builder.mul_extension(bit, tree[i]);
-        let u = builder.sub_extension(one, bit);
-        let v = builder.mul_add_extension(u, tree[i + 4], t);
-        let t = builder.sub_extension(tree[i + 8], v);
-        let t = builder.mul_extension(is_byte, t);
-        yield_constr.constraint(builder, t);
-    }
-
-    let bit = idx_decomp[2];
-    for i in 0..2 {
-        let t = builder.mul_extension(bit, tree[i + 8]);
-        let u = builder.sub_extension(one, bit);
-        let v = builder.mul_add_extension(u, tree[i + 10], t);
-        let t = builder.sub_extension(tree[i + 12], v);
-        let t = builder.mul_extension(is_byte, t);
-        yield_constr.constraint(builder, t);
-    }
-
-    let bit = idx_decomp[1];
-    let t = builder.mul_extension(bit, tree[12]);
-    let u = builder.sub_extension(one, bit);
-    let limb = builder.mul_add_extension(u, tree[13], t);
-    let t = builder.sub_extension(tree[14], limb);
-    let t = builder.mul_extension(is_byte, t);
-    yield_constr.constraint(builder, t);
-
-    // Check byte decomposition of last limb:
-
-    let base8 = F::Extension::from(F::from_canonical_u64(1 << 8));
-    let base8 = builder.constant_extension(base8);
-    let lo_byte = lv[BYTE_LAST_LIMB_LO];
-    let hi_byte = lv[BYTE_LAST_LIMB_HI];
-    let t = builder.mul_sub_extension(base8, hi_byte, limb);
-    let t = builder.mul_add_extension(base8, t, lo_byte);
-    let t = builder.mul_extension(is_byte, t);
-    yield_constr.constraint(builder, t);
-
-    let bit = idx_decomp[0];
-    let nbit = builder.sub_extension(one, bit);
-    let t = builder.mul_many_extension([nbit, base8, hi_byte]);
-    let t = builder.mul_add_extension(bit, lo_byte, t);
-    let t = builder.mul_sub_extension(base8, tree[15], t);
-    let t = builder.mul_extension(is_byte, t);
-    yield_constr.constraint(builder, t);
-    let expected_out_byte = tree[15];
-
-    // Sum all higher limbs; sum will be non-zero iff idx >= 32.
-    let mut hi_limb_sum = lv[BYTE_IDX_DECOMP_HI];
-    for i in 1..N_LIMBS {
-        hi_limb_sum = builder.add_extension(hi_limb_sum, idx[i]);
-    }
-    // idx_is_large is 0 or 1
-    let idx_is_large = lv[BYTE_IDX_IS_LARGE];
-    let t = builder.mul_sub_extension(idx_is_large, idx_is_large, idx_is_large);
-    let t = builder.mul_extension(is_byte, t);
-    yield_constr.constraint(builder, t);
-
-    // If hi_limb_sum is nonzero, then idx_is_large must be one.
-    let t = builder.sub_extension(idx_is_large, one);
-    let t = builder.mul_many_extension([is_byte, hi_limb_sum, t]);
-    yield_constr.constraint(builder, t);
-
-    // If idx_is_large is 1, then hi_limb_sum_inv must be the inverse
-    // of hi_limb_sum, hence hi_limb_sum is non-zero, hence idx is
-    // indeed "large".
-    //
-    // Otherwise, if idx_is_large is 0, then hi_limb_sum * hi_limb_sum_inv
-    // is zero, which is only possible if hi_limb_sum is zero, since
-    // hi_limb_sum_inv is non-zero.
-    let base16 = F::from_canonical_u64(1 << 16);
-    let hi_limb_sum_inv = builder.mul_const_add_extension(
-        base16,
-        lv[BYTE_IDX_HI_LIMB_SUM_INV_3],
-        lv[BYTE_IDX_HI_LIMB_SUM_INV_2],
-    );
-    let hi_limb_sum_inv =
-        builder.mul_const_add_extension(base16, hi_limb_sum_inv, lv[BYTE_IDX_HI_LIMB_SUM_INV_1]);
-    let hi_limb_sum_inv =
-        builder.mul_const_add_extension(base16, hi_limb_sum_inv, lv[BYTE_IDX_HI_LIMB_SUM_INV_0]);
-    let t = builder.mul_sub_extension(hi_limb_sum, hi_limb_sum_inv, idx_is_large);
-    let t = builder.mul_extension(is_byte, t);
-    yield_constr.constraint(builder, t);
-
-    let out_byte = out[0];
-    let t = builder.sub_extension(one, idx_is_large);
-    let t = builder.mul_extension(t, expected_out_byte);
-    let check = builder.sub_extension(out_byte, t);
-    let t = builder.mul_extension(is_byte, check);
-    yield_constr.constraint(builder, t);
-
-    // Check that the rest of the output limbs are zero
-    for i in 1..N_LIMBS {
-        let t = builder.mul_extension(is_byte, out[i]);
-        yield_constr.constraint(builder, t);
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use plonky2::field::goldilocks_field::GoldilocksField;
-    use rand::{Rng, SeedableRng};
-    use rand_chacha::ChaCha8Rng;
-
-    use super::*;
-    use crate::arithmetic::columns::NUM_ARITH_COLUMNS;
-
-    type F = GoldilocksField;
-
-    fn verify_output(lv: &[F], expected_byte: u64) {
-        let out_byte = lv[OUTPUT_REGISTER][0].to_canonical_u64();
-        assert!(out_byte == expected_byte);
-        for j in 1..N_LIMBS {
-            assert!(lv[OUTPUT_REGISTER][j] == F::ZERO);
-        }
-    }
-
-    #[test]
-    fn generate_eval_consistency() {
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-        const N_ITERS: usize = 1000;
-
-        for _ in 0..N_ITERS {
-            // set entire row to random 16-bit values
-            let mut lv =
-                [F::default(); NUM_ARITH_COLUMNS].map(|_| F::from_canonical_u16(rng.gen::<u16>()));
-
-            lv[IS_BYTE] = F::ONE;
-
-            let val = U256::from(rng.gen::<[u8; 32]>());
-            for i in 0..32 {
-                let idx = i.into();
-                generate(&mut lv, idx, val);
-
-                // Check correctness
-                let out_byte = val.byte(31 - i) as u64;
-                verify_output(&lv, out_byte);
-
-                let mut constraint_consumer = ConstraintConsumer::new(
-                    vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-                    F::ONE,
-                    F::ONE,
-                    F::ONE,
-                );
-                eval_packed(&lv, &mut constraint_consumer);
-                for &acc in &constraint_consumer.accumulators() {
-                    assert_eq!(acc, F::ZERO);
-                }
-            }
-            // Check that output is zero when the index is big.
-            let big_indices = [32.into(), 33.into(), val, U256::max_value()];
-            for idx in big_indices {
-                generate(&mut lv, idx, val);
-                verify_output(&lv, 0);
-            }
-        }
-    }
-}
--- a/evm/src/arithmetic/columns.rs
+++ b/evm/src/arithmetic/columns.rs
@ -1,119 +0,0 @@
-//! Arithmetic unit
-
-use core::ops::Range;
-
-pub(crate) const LIMB_BITS: usize = 16;
-const EVM_REGISTER_BITS: usize = 256;
-
-/// Return the number of LIMB_BITS limbs that are in an EVM
-/// register-sized number, panicking if LIMB_BITS doesn't divide in
-/// the EVM register size.
-const fn n_limbs() -> usize {
-    if EVM_REGISTER_BITS % LIMB_BITS != 0 {
-        panic!("limb size must divide EVM register size");
-    }
-    let n = EVM_REGISTER_BITS / LIMB_BITS;
-    if n % 2 == 1 {
-        panic!("number of limbs must be even");
-    }
-    n
-}
-
-/// Number of LIMB_BITS limbs that are in on EVM register-sized number.
-pub(crate) const N_LIMBS: usize = n_limbs();
-
-pub(crate) const IS_ADD: usize = 0;
-pub(crate) const IS_MUL: usize = IS_ADD + 1;
-pub(crate) const IS_SUB: usize = IS_MUL + 1;
-pub(crate) const IS_DIV: usize = IS_SUB + 1;
-pub(crate) const IS_MOD: usize = IS_DIV + 1;
-pub(crate) const IS_ADDMOD: usize = IS_MOD + 1;
-pub(crate) const IS_MULMOD: usize = IS_ADDMOD + 1;
-pub(crate) const IS_ADDFP254: usize = IS_MULMOD + 1;
-pub(crate) const IS_MULFP254: usize = IS_ADDFP254 + 1;
-pub(crate) const IS_SUBFP254: usize = IS_MULFP254 + 1;
-pub(crate) const IS_SUBMOD: usize = IS_SUBFP254 + 1;
-pub(crate) const IS_LT: usize = IS_SUBMOD + 1;
-pub(crate) const IS_GT: usize = IS_LT + 1;
-pub(crate) const IS_BYTE: usize = IS_GT + 1;
-pub(crate) const IS_SHL: usize = IS_BYTE + 1;
-pub(crate) const IS_SHR: usize = IS_SHL + 1;
-pub(crate) const IS_RANGE_CHECK: usize = IS_SHR + 1;
-/// Column that stores the opcode if the operation is a range check.
-pub(crate) const OPCODE_COL: usize = IS_RANGE_CHECK + 1;
-pub(crate) const START_SHARED_COLS: usize = OPCODE_COL + 1;
-
-pub(crate) const fn op_flags() -> Range<usize> {
-    IS_ADD..IS_RANGE_CHECK + 1
-}
-
-/// Within the Arithmetic Unit, there are shared columns which can be
-/// used by any arithmetic circuit, depending on which one is active
-/// this cycle.
-///
-/// Modular arithmetic takes 11 * N_LIMBS columns which is split across
-/// two rows, the first with 6 * N_LIMBS columns and the second with
-/// 5 * N_LIMBS columns. (There are hence N_LIMBS "wasted columns" in
-/// the second row.)
-pub(crate) const NUM_SHARED_COLS: usize = 6 * N_LIMBS;
-pub(crate) const SHARED_COLS: Range<usize> = START_SHARED_COLS..START_SHARED_COLS + NUM_SHARED_COLS;
-
-pub(crate) const INPUT_REGISTER_0: Range<usize> = START_SHARED_COLS..START_SHARED_COLS + N_LIMBS;
-pub(crate) const INPUT_REGISTER_1: Range<usize> =
-    INPUT_REGISTER_0.end..INPUT_REGISTER_0.end + N_LIMBS;
-pub(crate) const INPUT_REGISTER_2: Range<usize> =
-    INPUT_REGISTER_1.end..INPUT_REGISTER_1.end + N_LIMBS;
-pub(crate) const OUTPUT_REGISTER: Range<usize> =
-    INPUT_REGISTER_2.end..INPUT_REGISTER_2.end + N_LIMBS;
-
-// NB: Only one of AUX_INPUT_REGISTER_[01] or AUX_INPUT_REGISTER_DBL
-// will be used for a given operation since they overlap
-pub(crate) const AUX_INPUT_REGISTER_0: Range<usize> =
-    OUTPUT_REGISTER.end..OUTPUT_REGISTER.end + N_LIMBS;
-pub(crate) const AUX_INPUT_REGISTER_1: Range<usize> =
-    AUX_INPUT_REGISTER_0.end..AUX_INPUT_REGISTER_0.end + N_LIMBS;
-pub(crate) const AUX_INPUT_REGISTER_DBL: Range<usize> =
-    OUTPUT_REGISTER.end..OUTPUT_REGISTER.end + 2 * N_LIMBS;
-
-// The auxiliary input columns overlap the general input columns
-// because they correspond to the values in the second row for modular
-// operations.
-const AUX_REGISTER_0: Range<usize> = START_SHARED_COLS..START_SHARED_COLS + N_LIMBS;
-const AUX_REGISTER_1: Range<usize> = AUX_REGISTER_0.end..AUX_REGISTER_0.end + 2 * N_LIMBS;
-const AUX_REGISTER_2: Range<usize> = AUX_REGISTER_1.end..AUX_REGISTER_1.end + 2 * N_LIMBS - 1;
-
-// Each element c of {MUL,MODULAR}_AUX_REGISTER is -2^20 <= c <= 2^20;
-// this value is used as an offset so that everything is positive in
-// the range checks.
-pub(crate) const AUX_COEFF_ABS_MAX: i64 = 1 << 20;
-
-// MUL takes 5 * N_LIMBS = 80 columns
-pub(crate) const MUL_AUX_INPUT_LO: Range<usize> = AUX_INPUT_REGISTER_0;
-pub(crate) const MUL_AUX_INPUT_HI: Range<usize> = AUX_INPUT_REGISTER_1;
-
-// MULMOD takes 4 * N_LIMBS + 3 * 2*N_LIMBS + N_LIMBS = 176 columns
-// but split over two rows of 96 columns and 80 columns.
-//
-// ADDMOD, SUBMOD, MOD and DIV are currently implemented in terms of
-// the general modular code, so they also take 144 columns (also split
-// over two rows).
-pub(crate) const MODULAR_INPUT_0: Range<usize> = INPUT_REGISTER_0;
-pub(crate) const MODULAR_INPUT_1: Range<usize> = INPUT_REGISTER_1;
-pub(crate) const MODULAR_MODULUS: Range<usize> = INPUT_REGISTER_2;
-pub(crate) const MODULAR_OUTPUT: Range<usize> = OUTPUT_REGISTER;
-pub(crate) const MODULAR_QUO_INPUT: Range<usize> = AUX_INPUT_REGISTER_DBL;
-pub(crate) const MODULAR_OUT_AUX_RED: Range<usize> = AUX_REGISTER_0;
-// NB: Last value is not used in AUX, it is used in MOD_IS_ZERO
-pub(crate) const MODULAR_MOD_IS_ZERO: usize = AUX_REGISTER_1.start;
-pub(crate) const MODULAR_AUX_INPUT_LO: Range<usize> = AUX_REGISTER_1.start + 1..AUX_REGISTER_1.end;
-pub(crate) const MODULAR_AUX_INPUT_HI: Range<usize> = AUX_REGISTER_2;
-// Must be set to MOD_IS_ZERO for DIV and SHR operations i.e. MOD_IS_ZERO * (lv[IS_DIV] + lv[IS_SHR]).
-pub(crate) const MODULAR_DIV_DENOM_IS_ZERO: usize = AUX_REGISTER_2.end;
-
-/// The counter column (used for the range check) starts from 0 and increments.
-pub(crate) const RANGE_COUNTER: usize = START_SHARED_COLS + NUM_SHARED_COLS;
-/// The frequencies column used in logUp.
-pub(crate) const RC_FREQUENCIES: usize = RANGE_COUNTER + 1;
-
-/// Number of columns in `ArithmeticStark`.
-pub(crate) const NUM_ARITH_COLUMNS: usize = START_SHARED_COLS + NUM_SHARED_COLS + 2;
--- a/evm/src/arithmetic/divmod.rs
+++ b/evm/src/arithmetic/divmod.rs
@ -1,378 +0,0 @@
-//! Support for EVM instructions DIV and MOD.
-//!
-//! The logic for verifying them is detailed in the `modular` submodule.
-
-use core::ops::Range;
-
-use ethereum_types::U256;
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::PrimeField64;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::plonk::circuit_builder::CircuitBuilder;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use crate::arithmetic::columns::*;
-use crate::arithmetic::modular::{
-    generate_modular_op, modular_constr_poly, modular_constr_poly_ext_circuit,
-};
-use crate::arithmetic::utils::*;
-
-/// Generates the output and auxiliary values for modular operations,
-/// assuming the input, modular and output limbs are already set.
-pub(crate) fn generate_divmod<F: PrimeField64>(
-    lv: &mut [F],
-    nv: &mut [F],
-    filter: usize,
-    input_limbs_range: Range<usize>,
-    modulus_range: Range<usize>,
-) {
-    let input_limbs = read_value_i64_limbs::<N_LIMBS, _>(lv, input_limbs_range);
-    let pol_input = pol_extend(input_limbs);
-    let (out, quo_input) = generate_modular_op(lv, nv, filter, pol_input, modulus_range);
-
-    debug_assert!(
-        &quo_input[N_LIMBS..].iter().all(|&x| x == F::ZERO),
-        "expected top half of quo_input to be zero"
-    );
-
-    // Initialise whole (double) register to zero; the low half will
-    // be overwritten via lv[AUX_INPUT_REGISTER] below.
-    for i in MODULAR_QUO_INPUT {
-        lv[i] = F::ZERO;
-    }
-
-    match filter {
-        IS_DIV | IS_SHR => {
-            debug_assert!(
-                lv[OUTPUT_REGISTER]
-                    .iter()
-                    .zip(&quo_input[..N_LIMBS])
-                    .all(|(x, y)| x == y),
-                "computed output doesn't match expected"
-            );
-            lv[AUX_INPUT_REGISTER_0].copy_from_slice(&out);
-        }
-        IS_MOD => {
-            debug_assert!(
-                lv[OUTPUT_REGISTER].iter().zip(&out).all(|(x, y)| x == y),
-                "computed output doesn't match expected"
-            );
-            lv[AUX_INPUT_REGISTER_0].copy_from_slice(&quo_input[..N_LIMBS]);
-        }
-        _ => panic!("expected filter to be IS_DIV, IS_SHR or IS_MOD but it was {filter}"),
-    };
-}
-/// Generate the output and auxiliary values for modular operations.
-pub(crate) fn generate<F: PrimeField64>(
-    lv: &mut [F],
-    nv: &mut [F],
-    filter: usize,
-    input0: U256,
-    input1: U256,
-    result: U256,
-) {
-    debug_assert!(lv.len() == NUM_ARITH_COLUMNS);
-
-    u256_to_array(&mut lv[INPUT_REGISTER_0], input0);
-    u256_to_array(&mut lv[INPUT_REGISTER_1], input1);
-    u256_to_array(&mut lv[OUTPUT_REGISTER], result);
-
-    generate_divmod(lv, nv, filter, INPUT_REGISTER_0, INPUT_REGISTER_1);
-}
-
-/// Verify that num = quo * den + rem and 0 <= rem < den.
-pub(crate) fn eval_packed_divmod_helper<P: PackedField>(
-    lv: &[P; NUM_ARITH_COLUMNS],
-    nv: &[P; NUM_ARITH_COLUMNS],
-    yield_constr: &mut ConstraintConsumer<P>,
-    filter: P,
-    num_range: Range<usize>,
-    den_range: Range<usize>,
-    quo_range: Range<usize>,
-    rem_range: Range<usize>,
-) {
-    debug_assert!(quo_range.len() == N_LIMBS);
-    debug_assert!(rem_range.len() == N_LIMBS);
-
-    yield_constr.constraint_last_row(filter);
-
-    let num = &lv[num_range];
-    let den = read_value(lv, den_range);
-    let quo = {
-        let mut quo = [P::ZEROS; 2 * N_LIMBS];
-        quo[..N_LIMBS].copy_from_slice(&lv[quo_range]);
-        quo
-    };
-    let rem = read_value(lv, rem_range);
-
-    let mut constr_poly = modular_constr_poly(lv, nv, yield_constr, filter, rem, den, quo);
-
-    let input = num;
-    pol_sub_assign(&mut constr_poly, input);
-
-    for &c in constr_poly.iter() {
-        yield_constr.constraint_transition(filter * c);
-    }
-}
-
-pub(crate) fn eval_packed<P: PackedField>(
-    lv: &[P; NUM_ARITH_COLUMNS],
-    nv: &[P; NUM_ARITH_COLUMNS],
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    eval_packed_divmod_helper(
-        lv,
-        nv,
-        yield_constr,
-        lv[IS_DIV],
-        INPUT_REGISTER_0,
-        INPUT_REGISTER_1,
-        OUTPUT_REGISTER,
-        AUX_INPUT_REGISTER_0,
-    );
-    eval_packed_divmod_helper(
-        lv,
-        nv,
-        yield_constr,
-        lv[IS_MOD],
-        INPUT_REGISTER_0,
-        INPUT_REGISTER_1,
-        AUX_INPUT_REGISTER_0,
-        OUTPUT_REGISTER,
-    );
-}
-
-pub(crate) fn eval_ext_circuit_divmod_helper<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    nv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-    filter: ExtensionTarget<D>,
-    num_range: Range<usize>,
-    den_range: Range<usize>,
-    quo_range: Range<usize>,
-    rem_range: Range<usize>,
-) {
-    yield_constr.constraint_last_row(builder, filter);
-
-    let num = &lv[num_range];
-    let den = read_value(lv, den_range);
-    let quo = {
-        let zero = builder.zero_extension();
-        let mut quo = [zero; 2 * N_LIMBS];
-        quo[..N_LIMBS].copy_from_slice(&lv[quo_range]);
-        quo
-    };
-    let rem = read_value(lv, rem_range);
-
-    let mut constr_poly =
-        modular_constr_poly_ext_circuit(lv, nv, builder, yield_constr, filter, rem, den, quo);
-
-    let input = num;
-    pol_sub_assign_ext_circuit(builder, &mut constr_poly, input);
-
-    for &c in constr_poly.iter() {
-        let t = builder.mul_extension(filter, c);
-        yield_constr.constraint_transition(builder, t);
-    }
-}
-
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    nv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    eval_ext_circuit_divmod_helper(
-        builder,
-        lv,
-        nv,
-        yield_constr,
-        lv[IS_DIV],
-        INPUT_REGISTER_0,
-        INPUT_REGISTER_1,
-        OUTPUT_REGISTER,
-        AUX_INPUT_REGISTER_0,
-    );
-    eval_ext_circuit_divmod_helper(
-        builder,
-        lv,
-        nv,
-        yield_constr,
-        lv[IS_MOD],
-        INPUT_REGISTER_0,
-        INPUT_REGISTER_1,
-        AUX_INPUT_REGISTER_0,
-        OUTPUT_REGISTER,
-    );
-}
-
-#[cfg(test)]
-mod tests {
-    use plonky2::field::goldilocks_field::GoldilocksField;
-    use plonky2::field::types::{Field, Sample};
-    use rand::{Rng, SeedableRng};
-    use rand_chacha::ChaCha8Rng;
-    use starky::constraint_consumer::ConstraintConsumer;
-
-    use super::*;
-    use crate::arithmetic::columns::NUM_ARITH_COLUMNS;
-
-    const N_RND_TESTS: usize = 1000;
-    const MODULAR_OPS: [usize; 2] = [IS_MOD, IS_DIV];
-
-    // TODO: Should be able to refactor this test to apply to all operations.
-    #[test]
-    fn generate_eval_consistency_not_modular() {
-        type F = GoldilocksField;
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-        let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-        let nv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-
-        // if `IS_MOD == 0`, then the constraints should be met even
-        // if all values are garbage (and similarly for the other operations).
-        for op in MODULAR_OPS {
-            lv[op] = F::ZERO;
-        }
-        // Since SHR uses the logic for DIV, `IS_SHR` should also be set to 0 here.
-        lv[IS_SHR] = F::ZERO;
-
-        let mut constraint_consumer = ConstraintConsumer::new(
-            vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-            GoldilocksField::ONE,
-            GoldilocksField::ONE,
-            GoldilocksField::ONE,
-        );
-        eval_packed(&lv, &nv, &mut constraint_consumer);
-        for &acc in &constraint_consumer.accumulators() {
-            assert_eq!(acc, GoldilocksField::ZERO);
-        }
-    }
-
-    #[test]
-    fn generate_eval_consistency() {
-        type F = GoldilocksField;
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-
-        for op_filter in MODULAR_OPS {
-            for i in 0..N_RND_TESTS {
-                // set inputs to random values
-                let mut lv = [F::default(); NUM_ARITH_COLUMNS]
-                    .map(|_| F::from_canonical_u16(rng.gen::<u16>()));
-                let mut nv = [F::default(); NUM_ARITH_COLUMNS]
-                    .map(|_| F::from_canonical_u16(rng.gen::<u16>()));
-
-                // Reset operation columns, then select one
-                for op in MODULAR_OPS {
-                    lv[op] = F::ZERO;
-                }
-                // Since SHR uses the logic for DIV, `IS_SHR` should also be set to 0 here.
-                lv[IS_SHR] = F::ZERO;
-                lv[op_filter] = F::ONE;
-
-                let input0 = U256::from(rng.gen::<[u8; 32]>());
-                let input1 = {
-                    let mut modulus_limbs = [0u8; 32];
-                    // For the second half of the tests, set the top
-                    // 16-start digits of the "modulus" to zero so it is
-                    // much smaller than the inputs.
-                    if i > N_RND_TESTS / 2 {
-                        // 1 <= start < N_LIMBS
-                        let start = (rng.gen::<usize>() % (modulus_limbs.len() - 1)) + 1;
-                        for mi in modulus_limbs.iter_mut().skip(start) {
-                            *mi = 0u8;
-                        }
-                    }
-                    U256::from(modulus_limbs)
-                };
-
-                let result = if input1 == U256::zero() {
-                    U256::zero()
-                } else if op_filter == IS_DIV {
-                    input0 / input1
-                } else {
-                    input0 % input1
-                };
-                generate(&mut lv, &mut nv, op_filter, input0, input1, result);
-
-                let mut constraint_consumer = ConstraintConsumer::new(
-                    vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-                    GoldilocksField::ONE,
-                    GoldilocksField::ZERO,
-                    GoldilocksField::ZERO,
-                );
-                eval_packed(&lv, &nv, &mut constraint_consumer);
-                for &acc in &constraint_consumer.accumulators() {
-                    assert_eq!(acc, GoldilocksField::ZERO);
-                }
-            }
-        }
-    }
-
-    #[test]
-    fn zero_modulus() {
-        type F = GoldilocksField;
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-
-        for op_filter in MODULAR_OPS {
-            for _i in 0..N_RND_TESTS {
-                for corrupt_constraints in [false, true] {
-                    // set inputs to random values and the modulus to zero;
-                    // the output is defined to be zero when modulus is zero.
-                    let mut lv = [F::default(); NUM_ARITH_COLUMNS]
-                        .map(|_| F::from_canonical_u16(rng.gen::<u16>()));
-                    let mut nv = [F::default(); NUM_ARITH_COLUMNS]
-                        .map(|_| F::from_canonical_u16(rng.gen::<u16>()));
-
-                    // Reset operation columns, then select one
-                    for op in MODULAR_OPS {
-                        lv[op] = F::ZERO;
-                    }
-                    // Since SHR uses the logic for DIV, `IS_SHR` should also be set to 0 here.
-                    lv[IS_SHR] = F::ZERO;
-                    lv[op_filter] = F::ONE;
-
-                    let input0 = U256::from(rng.gen::<[u8; 32]>());
-                    let input1 = U256::zero();
-
-                    generate(&mut lv, &mut nv, op_filter, input0, input1, U256::zero());
-
-                    // check that the correct output was generated
-                    assert!(lv[OUTPUT_REGISTER].iter().all(|&c| c == F::ZERO));
-
-                    let mut constraint_consumer = ConstraintConsumer::new(
-                        vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-                        GoldilocksField::ONE,
-                        GoldilocksField::ZERO,
-                        GoldilocksField::ZERO,
-                    );
-                    eval_packed(&lv, &nv, &mut constraint_consumer);
-
-                    if corrupt_constraints {
-                        // Corrupt one output limb by setting it to a non-zero value.
-                        let random_oi = OUTPUT_REGISTER.start + rng.gen::<usize>() % N_LIMBS;
-                        lv[random_oi] = F::from_canonical_u16(rng.gen_range(1..u16::MAX));
-
-                        eval_packed(&lv, &nv, &mut constraint_consumer);
-
-                        // Check that at least one of the constraints was non-zero.
-                        assert!(constraint_consumer
-                            .accumulators()
-                            .iter()
-                            .any(|&acc| acc != F::ZERO));
-                    } else {
-                        assert!(constraint_consumer
-                            .accumulators()
-                            .iter()
-                            .all(|&acc| acc == F::ZERO));
-                    }
-                }
-            }
-        }
-    }
-}
--- a/evm/src/arithmetic/mod.rs
+++ b/evm/src/arithmetic/mod.rs
@ -1,350 +0,0 @@
-use ethereum_types::U256;
-use plonky2::field::types::PrimeField64;
-
-use self::columns::{
-    INPUT_REGISTER_0, INPUT_REGISTER_1, INPUT_REGISTER_2, OPCODE_COL, OUTPUT_REGISTER,
-};
-use self::utils::u256_to_array;
-use crate::arithmetic::columns::IS_RANGE_CHECK;
-use crate::extension_tower::BN_BASE;
-use crate::util::{addmod, mulmod, submod};
-
-mod addcy;
-mod byte;
-mod divmod;
-mod modular;
-mod mul;
-mod shift;
-mod utils;
-
-pub mod arithmetic_stark;
-pub(crate) mod columns;
-
-/// An enum representing different binary operations.
-///
-/// `Shl` and `Shr` are handled differently, by leveraging `Mul` and `Div` respectively.
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub(crate) enum BinaryOperator {
-    Add,
-    Mul,
-    Sub,
-    Div,
-    Mod,
-    Lt,
-    Gt,
-    AddFp254,
-    MulFp254,
-    SubFp254,
-    Byte,
-    Shl, // simulated with MUL
-    Shr, // simulated with DIV
-}
-
-impl BinaryOperator {
-    /// Computes the result of a binary arithmetic operation given two inputs.
-    pub(crate) fn result(&self, input0: U256, input1: U256) -> U256 {
-        match self {
-            BinaryOperator::Add => input0.overflowing_add(input1).0,
-            BinaryOperator::Mul => input0.overflowing_mul(input1).0,
-            BinaryOperator::Shl => {
-                if input0 < U256::from(256usize) {
-                    input1 << input0
-                } else {
-                    U256::zero()
-                }
-            }
-            BinaryOperator::Sub => input0.overflowing_sub(input1).0,
-            BinaryOperator::Div => {
-                if input1.is_zero() {
-                    U256::zero()
-                } else {
-                    input0 / input1
-                }
-            }
-            BinaryOperator::Shr => {
-                if input0 < U256::from(256usize) {
-                    input1 >> input0
-                } else {
-                    U256::zero()
-                }
-            }
-            BinaryOperator::Mod => {
-                if input1.is_zero() {
-                    U256::zero()
-                } else {
-                    input0 % input1
-                }
-            }
-            BinaryOperator::Lt => U256::from((input0 < input1) as u8),
-            BinaryOperator::Gt => U256::from((input0 > input1) as u8),
-            BinaryOperator::AddFp254 => addmod(input0, input1, BN_BASE),
-            BinaryOperator::MulFp254 => mulmod(input0, input1, BN_BASE),
-            BinaryOperator::SubFp254 => submod(input0, input1, BN_BASE),
-            BinaryOperator::Byte => {
-                if input0 >= 32.into() {
-                    U256::zero()
-                } else {
-                    input1.byte(31 - input0.as_usize()).into()
-                }
-            }
-        }
-    }
-
-    /// Maps a binary arithmetic operation to its associated flag column in the trace.
-    pub(crate) const fn row_filter(&self) -> usize {
-        match self {
-            BinaryOperator::Add => columns::IS_ADD,
-            BinaryOperator::Mul => columns::IS_MUL,
-            BinaryOperator::Sub => columns::IS_SUB,
-            BinaryOperator::Div => columns::IS_DIV,
-            BinaryOperator::Mod => columns::IS_MOD,
-            BinaryOperator::Lt => columns::IS_LT,
-            BinaryOperator::Gt => columns::IS_GT,
-            BinaryOperator::AddFp254 => columns::IS_ADDFP254,
-            BinaryOperator::MulFp254 => columns::IS_MULFP254,
-            BinaryOperator::SubFp254 => columns::IS_SUBFP254,
-            BinaryOperator::Byte => columns::IS_BYTE,
-            BinaryOperator::Shl => columns::IS_SHL,
-            BinaryOperator::Shr => columns::IS_SHR,
-        }
-    }
-}
-
-/// An enum representing different ternary operations.
-#[allow(clippy::enum_variant_names)]
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub(crate) enum TernaryOperator {
-    AddMod,
-    MulMod,
-    SubMod,
-}
-
-impl TernaryOperator {
-    /// Computes the result of a ternary arithmetic operation given three inputs.
-    pub(crate) fn result(&self, input0: U256, input1: U256, input2: U256) -> U256 {
-        match self {
-            TernaryOperator::AddMod => addmod(input0, input1, input2),
-            TernaryOperator::MulMod => mulmod(input0, input1, input2),
-            TernaryOperator::SubMod => submod(input0, input1, input2),
-        }
-    }
-
-    /// Maps a ternary arithmetic operation to its associated flag column in the trace.
-    pub(crate) const fn row_filter(&self) -> usize {
-        match self {
-            TernaryOperator::AddMod => columns::IS_ADDMOD,
-            TernaryOperator::MulMod => columns::IS_MULMOD,
-            TernaryOperator::SubMod => columns::IS_SUBMOD,
-        }
-    }
-}
-
-/// An enum representing arithmetic operations that can be either binary or ternary.
-#[allow(clippy::enum_variant_names)]
-#[derive(Debug)]
-pub(crate) enum Operation {
-    BinaryOperation {
-        operator: BinaryOperator,
-        input0: U256,
-        input1: U256,
-        result: U256,
-    },
-    TernaryOperation {
-        operator: TernaryOperator,
-        input0: U256,
-        input1: U256,
-        input2: U256,
-        result: U256,
-    },
-    RangeCheckOperation {
-        input0: U256,
-        input1: U256,
-        input2: U256,
-        opcode: U256,
-        result: U256,
-    },
-}
-
-impl Operation {
-    /// Creates a binary operator with given inputs.
-    ///
-    /// NB: This works as you would expect, EXCEPT for SHL and SHR,
-    /// whose inputs need a small amount of preprocessing. Specifically,
-    /// to create `SHL(shift, value)`, call (note the reversal of
-    /// argument order):
-    ///
-    ///    `Operation::binary(BinaryOperator::Shl, value, 1 << shift)`
-    ///
-    /// Similarly, to create `SHR(shift, value)`, call
-    ///
-    ///    `Operation::binary(BinaryOperator::Shr, value, 1 << shift)`
-    ///
-    /// See witness/operation.rs::append_shift() for an example (indeed
-    /// the only call site for such inputs).
-    pub(crate) fn binary(operator: BinaryOperator, input0: U256, input1: U256) -> Self {
-        let result = operator.result(input0, input1);
-        Self::BinaryOperation {
-            operator,
-            input0,
-            input1,
-            result,
-        }
-    }
-
-    /// Creates a ternary operator with given inputs.
-    pub(crate) fn ternary(
-        operator: TernaryOperator,
-        input0: U256,
-        input1: U256,
-        input2: U256,
-    ) -> Self {
-        let result = operator.result(input0, input1, input2);
-        Self::TernaryOperation {
-            operator,
-            input0,
-            input1,
-            input2,
-            result,
-        }
-    }
-
-    pub(crate) const fn range_check(
-        input0: U256,
-        input1: U256,
-        input2: U256,
-        opcode: U256,
-        result: U256,
-    ) -> Self {
-        Self::RangeCheckOperation {
-            input0,
-            input1,
-            input2,
-            opcode,
-            result,
-        }
-    }
-
-    /// Gets the result of an arithmetic operation.
-    pub(crate) fn result(&self) -> U256 {
-        match self {
-            Operation::BinaryOperation { result, .. } => *result,
-            Operation::TernaryOperation { result, .. } => *result,
-            _ => panic!("This function should not be called for range checks."),
-        }
-    }
-
-    /// Convert operation into one or two rows of the trace.
-    ///
-    /// Morally these types should be [F; NUM_ARITH_COLUMNS], but we
-    /// use vectors because that's what utils::transpose (who consumes
-    /// the result of this function as part of the range check code)
-    /// expects.
-    ///
-    /// The `is_simulated` bool indicates whether we use a native arithmetic
-    /// operation or simulate one with another. This is used to distinguish
-    /// SHL and SHR operations that are simulated through MUL and DIV respectively.
-    fn to_rows<F: PrimeField64>(&self) -> (Vec<F>, Option<Vec<F>>) {
-        match *self {
-            Operation::BinaryOperation {
-                operator,
-                input0,
-                input1,
-                result,
-            } => binary_op_to_rows(operator, input0, input1, result),
-            Operation::TernaryOperation {
-                operator,
-                input0,
-                input1,
-                input2,
-                result,
-            } => ternary_op_to_rows(operator.row_filter(), input0, input1, input2, result),
-            Operation::RangeCheckOperation {
-                input0,
-                input1,
-                input2,
-                opcode,
-                result,
-            } => range_check_to_rows(input0, input1, input2, opcode, result),
-        }
-    }
-}
-
-/// Converts a ternary arithmetic operation to one or two rows of the `ArithmeticStark` table.
-fn ternary_op_to_rows<F: PrimeField64>(
-    row_filter: usize,
-    input0: U256,
-    input1: U256,
-    input2: U256,
-    _result: U256,
-) -> (Vec<F>, Option<Vec<F>>) {
-    let mut row1 = vec![F::ZERO; columns::NUM_ARITH_COLUMNS];
-    let mut row2 = vec![F::ZERO; columns::NUM_ARITH_COLUMNS];
-
-    row1[row_filter] = F::ONE;
-
-    modular::generate(&mut row1, &mut row2, row_filter, input0, input1, input2);
-
-    (row1, Some(row2))
-}
-
-/// Converts a binary arithmetic operation to one or two rows of the `ArithmeticStark` table.
-fn binary_op_to_rows<F: PrimeField64>(
-    op: BinaryOperator,
-    input0: U256,
-    input1: U256,
-    result: U256,
-) -> (Vec<F>, Option<Vec<F>>) {
-    let mut row = vec![F::ZERO; columns::NUM_ARITH_COLUMNS];
-    row[op.row_filter()] = F::ONE;
-
-    match op {
-        BinaryOperator::Add | BinaryOperator::Sub | BinaryOperator::Lt | BinaryOperator::Gt => {
-            addcy::generate(&mut row, op.row_filter(), input0, input1);
-            (row, None)
-        }
-        BinaryOperator::Mul => {
-            mul::generate(&mut row, input0, input1);
-            (row, None)
-        }
-        BinaryOperator::Shl => {
-            let mut nv = vec![F::ZERO; columns::NUM_ARITH_COLUMNS];
-            shift::generate(&mut row, &mut nv, true, input0, input1, result);
-            (row, None)
-        }
-        BinaryOperator::Div | BinaryOperator::Mod => {
-            let mut nv = vec![F::ZERO; columns::NUM_ARITH_COLUMNS];
-            divmod::generate(&mut row, &mut nv, op.row_filter(), input0, input1, result);
-            (row, Some(nv))
-        }
-        BinaryOperator::Shr => {
-            let mut nv = vec![F::ZERO; columns::NUM_ARITH_COLUMNS];
-            shift::generate(&mut row, &mut nv, false, input0, input1, result);
-            (row, Some(nv))
-        }
-        BinaryOperator::AddFp254 | BinaryOperator::MulFp254 | BinaryOperator::SubFp254 => {
-            ternary_op_to_rows::<F>(op.row_filter(), input0, input1, BN_BASE, result)
-        }
-        BinaryOperator::Byte => {
-            byte::generate(&mut row, input0, input1);
-            (row, None)
-        }
-    }
-}
-
-fn range_check_to_rows<F: PrimeField64>(
-    input0: U256,
-    input1: U256,
-    input2: U256,
-    opcode: U256,
-    result: U256,
-) -> (Vec<F>, Option<Vec<F>>) {
-    let mut row = vec![F::ZERO; columns::NUM_ARITH_COLUMNS];
-    row[IS_RANGE_CHECK] = F::ONE;
-    row[OPCODE_COL] = F::from_canonical_u64(opcode.as_u64());
-    u256_to_array(&mut row[INPUT_REGISTER_0], input0);
-    u256_to_array(&mut row[INPUT_REGISTER_1], input1);
-    u256_to_array(&mut row[INPUT_REGISTER_2], input2);
-    u256_to_array(&mut row[OUTPUT_REGISTER], result);
-
-    (row, None)
-}
--- a/evm/src/arithmetic/modular.rs
+++ b/evm/src/arithmetic/modular.rs
--- a/evm/src/arithmetic/mul.rs
+++ b/evm/src/arithmetic/mul.rs
@ -1,320 +0,0 @@
-//! Support for the EVM MUL instruction.
-//!
-//! This crate verifies an EVM MUL instruction, which takes two
-//! 256-bit inputs A and B, and produces a 256-bit output C satisfying
-//!
-//!    C = A*B (mod 2^256),
-//!
-//! i.e. C is the lower half of the usual long multiplication
-//! A*B. Inputs A and B, and output C, are given as arrays of 16-bit
-//! limbs. For example, if the limbs of A are a[0]...a[15], then
-//!
-//!    A = \sum_{i=0}^15 a[i] β^i,
-//!
-//! where β = 2^16 = 2^LIMB_BITS. To verify that A, B and C satisfy
-//! the equation we proceed as follows. Define
-//!
-//!    a(x) = \sum_{i=0}^15 a[i] x^i
-//!
-//! (so A = a(β)) and similarly for b(x) and c(x). Then A*B = C (mod
-//! 2^256) if and only if there exists q such that the polynomial
-//!
-//!    a(x) * b(x) - c(x) - x^16 * q(x)
-//!
-//! is zero when evaluated at x = β, i.e. it is divisible by (x - β);
-//! equivalently, there exists a polynomial s (representing the
-//! carries from the long multiplication) such that
-//!
-//!    a(x) * b(x) - c(x) - x^16 * q(x) - (x - β) * s(x) == 0
-//!
-//! As we only need the lower half of the product, we can omit q(x)
-//! since it is multiplied by the modulus β^16 = 2^256. Thus we only
-//! need to verify
-//!
-//!    a(x) * b(x) - c(x) - (x - β) * s(x) == 0
-//!
-//! In the code below, this "constraint polynomial" is constructed in
-//! the variable `constr_poly`. It must be identically zero for the
-//! multiplication operation to be verified, or, equivalently, each of
-//! its coefficients must be zero. The variable names of the
-//! constituent polynomials are (writing N for N_LIMBS=16):
-//!
-//!   a(x) = \sum_{i=0}^{N-1} input0[i] * x^i
-//!   b(x) = \sum_{i=0}^{N-1} input1[i] * x^i
-//!   c(x) = \sum_{i=0}^{N-1} output[i] * x^i
-//!   s(x) = \sum_i^{2N-3} aux[i] * x^i
-//!
-//! Because A, B and C are 256-bit numbers, the degrees of a, b and c
-//! are (at most) 15. Thus deg(a*b) <= 30 and deg(s) <= 29; however,
-//! as we're only verifying the lower half of A*B, we only need to
-//! know s(x) up to degree 14 (so that (x - β)*s(x) has degree 15). On
-//! the other hand, the coefficients of s(x) can be as large as
-//! 16*(β-2) or 20 bits.
-//!
-//! Note that, unlike for the general modular multiplication (see the
-//! file `modular.rs`), we don't need to check that output is reduced,
-//! since any value of output is less than β^16 and is hence reduced.
-
-use ethereum_types::U256;
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::{Field, PrimeField64};
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::plonk::circuit_builder::CircuitBuilder;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use crate::arithmetic::columns::*;
-use crate::arithmetic::utils::*;
-
-/// Given the two limbs of `left_in` and `right_in`, computes `left_in * right_in`.
-pub(crate) fn generate_mul<F: PrimeField64>(lv: &mut [F], left_in: [i64; 16], right_in: [i64; 16]) {
-    const MASK: i64 = (1i64 << LIMB_BITS) - 1i64;
-
-    // Input and output have 16-bit limbs
-    let mut output_limbs = [0i64; N_LIMBS];
-
-    // Column-wise pen-and-paper long multiplication on 16-bit limbs.
-    // First calculate the coefficients of a(x)*b(x) (in unreduced_prod),
-    // then do carry propagation to obtain C = c(β) = a(β)*b(β).
-    let mut cy = 0i64;
-    let mut unreduced_prod = pol_mul_lo(left_in, right_in);
-    for col in 0..N_LIMBS {
-        let t = unreduced_prod[col] + cy;
-        cy = t >> LIMB_BITS;
-        output_limbs[col] = t & MASK;
-    }
-    // In principle, the last cy could be dropped because this is
-    // multiplication modulo 2^256. However, we need it below for
-    // aux_limbs to handle the fact that unreduced_prod will
-    // inevitably contain one digit's worth that is > 2^256.
-
-    lv[OUTPUT_REGISTER].copy_from_slice(&output_limbs.map(|c| F::from_canonical_i64(c)));
-    pol_sub_assign(&mut unreduced_prod, &output_limbs);
-
-    let mut aux_limbs = pol_remove_root_2exp::<LIMB_BITS, _, N_LIMBS>(unreduced_prod);
-    aux_limbs[N_LIMBS - 1] = -cy;
-
-    for c in aux_limbs.iter_mut() {
-        // we store the unsigned offset value c + 2^20
-        *c += AUX_COEFF_ABS_MAX;
-    }
-
-    debug_assert!(aux_limbs.iter().all(|&c| c.abs() <= 2 * AUX_COEFF_ABS_MAX));
-
-    lv[MUL_AUX_INPUT_LO].copy_from_slice(&aux_limbs.map(|c| F::from_canonical_u16(c as u16)));
-    lv[MUL_AUX_INPUT_HI]
-        .copy_from_slice(&aux_limbs.map(|c| F::from_canonical_u16((c >> 16) as u16)));
-}
-
-pub(crate) fn generate<F: PrimeField64>(lv: &mut [F], left_in: U256, right_in: U256) {
-    // TODO: It would probably be clearer/cleaner to read the U256
-    // into an [i64;N] and then copy that to the lv table.
-    u256_to_array(&mut lv[INPUT_REGISTER_0], left_in);
-    u256_to_array(&mut lv[INPUT_REGISTER_1], right_in);
-    u256_to_array(&mut lv[INPUT_REGISTER_2], U256::zero());
-
-    let input0 = read_value_i64_limbs(lv, INPUT_REGISTER_0);
-    let input1 = read_value_i64_limbs(lv, INPUT_REGISTER_1);
-
-    generate_mul(lv, input0, input1);
-}
-
-pub(crate) fn eval_packed_generic_mul<P: PackedField>(
-    lv: &[P; NUM_ARITH_COLUMNS],
-    filter: P,
-    left_in_limbs: [P; 16],
-    right_in_limbs: [P; 16],
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let output_limbs = read_value::<N_LIMBS, _>(lv, OUTPUT_REGISTER);
-
-    let base = P::Scalar::from_canonical_u64(1 << LIMB_BITS);
-
-    let aux_limbs = {
-        // MUL_AUX_INPUT was offset by 2^20 in generation, so we undo
-        // that here
-        let offset = P::Scalar::from_canonical_u64(AUX_COEFF_ABS_MAX as u64);
-        let mut aux_limbs = read_value::<N_LIMBS, _>(lv, MUL_AUX_INPUT_LO);
-        let aux_limbs_hi = &lv[MUL_AUX_INPUT_HI];
-        for (lo, &hi) in aux_limbs.iter_mut().zip(aux_limbs_hi) {
-            *lo += hi * base - offset;
-        }
-        aux_limbs
-    };
-
-    // Constraint poly holds the coefficients of the polynomial that
-    // must be identically zero for this multiplication to be
-    // verified.
-    //
-    // These two lines set constr_poly to the polynomial a(x)b(x) - c(x),
-    // where a, b and c are the polynomials
-    //
-    //   a(x) = \sum_i input0_limbs[i] * x^i
-    //   b(x) = \sum_i input1_limbs[i] * x^i
-    //   c(x) = \sum_i output_limbs[i] * x^i
-    //
-    // This polynomial should equal (x - β)*s(x) where s is
-    //
-    //   s(x) = \sum_i aux_limbs[i] * x^i
-    //
-    let mut constr_poly = pol_mul_lo(left_in_limbs, right_in_limbs);
-    pol_sub_assign(&mut constr_poly, &output_limbs);
-
-    // This subtracts (x - β) * s(x) from constr_poly.
-    pol_sub_assign(&mut constr_poly, &pol_adjoin_root(aux_limbs, base));
-
-    // At this point constr_poly holds the coefficients of the
-    // polynomial a(x)b(x) - c(x) - (x - β)*s(x). The
-    // multiplication is valid if and only if all of those
-    // coefficients are zero.
-    for &c in &constr_poly {
-        yield_constr.constraint(filter * c);
-    }
-}
-
-pub(crate) fn eval_packed_generic<P: PackedField>(
-    lv: &[P; NUM_ARITH_COLUMNS],
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let is_mul = lv[IS_MUL];
-    let input0_limbs = read_value::<N_LIMBS, _>(lv, INPUT_REGISTER_0);
-    let input1_limbs = read_value::<N_LIMBS, _>(lv, INPUT_REGISTER_1);
-
-    eval_packed_generic_mul(lv, is_mul, input0_limbs, input1_limbs, yield_constr);
-}
-
-pub(crate) fn eval_ext_mul_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    filter: ExtensionTarget<D>,
-    left_in_limbs: [ExtensionTarget<D>; 16],
-    right_in_limbs: [ExtensionTarget<D>; 16],
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let output_limbs = read_value::<N_LIMBS, _>(lv, OUTPUT_REGISTER);
-
-    let aux_limbs = {
-        // MUL_AUX_INPUT was offset by 2^20 in generation, so we undo
-        // that here
-        let base = builder.constant_extension(F::Extension::from_canonical_u64(1 << LIMB_BITS));
-        let offset =
-            builder.constant_extension(F::Extension::from_canonical_u64(AUX_COEFF_ABS_MAX as u64));
-        let mut aux_limbs = read_value::<N_LIMBS, _>(lv, MUL_AUX_INPUT_LO);
-        let aux_limbs_hi = &lv[MUL_AUX_INPUT_HI];
-        for (lo, &hi) in aux_limbs.iter_mut().zip(aux_limbs_hi) {
-            //*lo = lo + hi * base - offset;
-            let t = builder.mul_sub_extension(hi, base, offset);
-            *lo = builder.add_extension(*lo, t);
-        }
-        aux_limbs
-    };
-
-    let mut constr_poly = pol_mul_lo_ext_circuit(builder, left_in_limbs, right_in_limbs);
-    pol_sub_assign_ext_circuit(builder, &mut constr_poly, &output_limbs);
-
-    // This subtracts (x - β) * s(x) from constr_poly.
-    let base = builder.constant_extension(F::Extension::from_canonical_u64(1 << LIMB_BITS));
-    let rhs = pol_adjoin_root_ext_circuit(builder, aux_limbs, base);
-    pol_sub_assign_ext_circuit(builder, &mut constr_poly, &rhs);
-
-    // At this point constr_poly holds the coefficients of the
-    // polynomial a(x)b(x) - c(x) - (x - β)*s(x). The
-    // multiplication is valid if and only if all of those
-    // coefficients are zero.
-    for &c in &constr_poly {
-        let filter = builder.mul_extension(filter, c);
-        yield_constr.constraint(builder, filter);
-    }
-}
-
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let is_mul = lv[IS_MUL];
-    let input0_limbs = read_value::<N_LIMBS, _>(lv, INPUT_REGISTER_0);
-    let input1_limbs = read_value::<N_LIMBS, _>(lv, INPUT_REGISTER_1);
-
-    eval_ext_mul_circuit(
-        builder,
-        lv,
-        is_mul,
-        input0_limbs,
-        input1_limbs,
-        yield_constr,
-    );
-}
-
-#[cfg(test)]
-mod tests {
-    use plonky2::field::goldilocks_field::GoldilocksField;
-    use plonky2::field::types::{Field, Sample};
-    use rand::{Rng, SeedableRng};
-    use rand_chacha::ChaCha8Rng;
-    use starky::constraint_consumer::ConstraintConsumer;
-
-    use super::*;
-    use crate::arithmetic::columns::NUM_ARITH_COLUMNS;
-
-    const N_RND_TESTS: usize = 1000;
-
-    // TODO: Should be able to refactor this test to apply to all operations.
-    #[test]
-    fn generate_eval_consistency_not_mul() {
-        type F = GoldilocksField;
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-        let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-
-        // if `IS_MUL == 0`, then the constraints should be met even
-        // if all values are garbage.
-        lv[IS_MUL] = F::ZERO;
-
-        let mut constraint_consumer = ConstraintConsumer::new(
-            vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-            GoldilocksField::ONE,
-            GoldilocksField::ONE,
-            GoldilocksField::ONE,
-        );
-        eval_packed_generic(&lv, &mut constraint_consumer);
-        for &acc in &constraint_consumer.accumulators() {
-            assert_eq!(acc, GoldilocksField::ZERO);
-        }
-    }
-
-    #[test]
-    fn generate_eval_consistency_mul() {
-        type F = GoldilocksField;
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-        let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-
-        // set `IS_MUL == 1` and ensure all constraints are satisfied.
-        lv[IS_MUL] = F::ONE;
-
-        for _i in 0..N_RND_TESTS {
-            // set inputs to random values
-            for (ai, bi) in INPUT_REGISTER_0.zip(INPUT_REGISTER_1) {
-                lv[ai] = F::from_canonical_u16(rng.gen());
-                lv[bi] = F::from_canonical_u16(rng.gen());
-            }
-
-            let left_in = U256::from(rng.gen::<[u8; 32]>());
-            let right_in = U256::from(rng.gen::<[u8; 32]>());
-            generate(&mut lv, left_in, right_in);
-
-            let mut constraint_consumer = ConstraintConsumer::new(
-                vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-                GoldilocksField::ONE,
-                GoldilocksField::ONE,
-                GoldilocksField::ONE,
-            );
-            eval_packed_generic(&lv, &mut constraint_consumer);
-            for &acc in &constraint_consumer.accumulators() {
-                assert_eq!(acc, GoldilocksField::ZERO);
-            }
-        }
-    }
-}
--- a/evm/src/arithmetic/shift.rs
+++ b/evm/src/arithmetic/shift.rs
@ -1,338 +0,0 @@
-//! Support for the EVM SHL and SHR instructions.
-//!
-//! This crate verifies an EVM shift instruction, which takes two
-//! 256-bit inputs S and A, and produces a 256-bit output C satisfying
-//!
-//!    C = A << S (mod 2^256) for SHL or
-//!    C = A >> S (mod 2^256) for SHR.
-//!
-//! The way this computation is carried is by providing a third input
-//!    B = 1 << S (mod 2^256)
-//! and then computing:
-//!    C = A * B (mod 2^256) for SHL or
-//!    C = A / B (mod 2^256) for SHR
-//!
-//! Inputs A, S, and B, and output C, are given as arrays of 16-bit
-//! limbs. For example, if the limbs of A are a[0]...a[15], then
-//!
-//!    A = \sum_{i=0}^15 a[i] β^i,
-//!
-//! where β = 2^16 = 2^LIMB_BITS. To verify that A, S, B and C satisfy
-//! the equations, we proceed similarly to MUL for SHL and to DIV for SHR.
-
-use ethereum_types::U256;
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::PrimeField64;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::plonk::circuit_builder::CircuitBuilder;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use super::{divmod, mul};
-use crate::arithmetic::columns::*;
-use crate::arithmetic::utils::*;
-
-/// Generates a shift operation (either SHL or SHR).
-/// The inputs are stored in the form `(shift, input, 1 << shift)`.
-/// NB: if `shift >= 256`, then the third register holds 0.
-/// We leverage the functions in mul.rs and divmod.rs to carry out
-/// the computation.
-pub(crate) fn generate<F: PrimeField64>(
-    lv: &mut [F],
-    nv: &mut [F],
-    is_shl: bool,
-    shift: U256,
-    input: U256,
-    result: U256,
-) {
-    // We use the multiplication logic to generate SHL
-    // TODO: It would probably be clearer/cleaner to read the U256
-    // into an [i64;N] and then copy that to the lv table.
-    // The first input is the shift we need to apply.
-    u256_to_array(&mut lv[INPUT_REGISTER_0], shift);
-    // The second register holds the input which needs shifting.
-    u256_to_array(&mut lv[INPUT_REGISTER_1], input);
-    u256_to_array(&mut lv[OUTPUT_REGISTER], result);
-    // If `shift >= 256`, the shifted displacement is set to 0.
-    // Compute 1 << shift and store it in the third input register.
-    let shifted_displacement = if shift > U256::from(255u64) {
-        U256::zero()
-    } else {
-        U256::one() << shift
-    };
-
-    u256_to_array(&mut lv[INPUT_REGISTER_2], shifted_displacement);
-
-    let input0 = read_value_i64_limbs(lv, INPUT_REGISTER_1); // input
-    let input1 = read_value_i64_limbs(lv, INPUT_REGISTER_2); // 1 << shift
-
-    if is_shl {
-        // We generate the multiplication input0 * input1 using mul.rs.
-        mul::generate_mul(lv, input0, input1);
-    } else {
-        // If the operation is SHR, we compute: `input / shifted_displacement` if `shifted_displacement == 0`
-        // otherwise, the output is 0. We use the logic in divmod.rs to achieve that.
-        divmod::generate_divmod(lv, nv, IS_SHR, INPUT_REGISTER_1, INPUT_REGISTER_2);
-    }
-}
-
-/// Evaluates the constraints for an SHL opcode.
-/// The logic is the same as the one for MUL. The only difference is that
-/// the inputs are in `INPUT_REGISTER_1`  and `INPUT_REGISTER_2` instead of
-/// `INPUT_REGISTER_0` and `INPUT_REGISTER_1`.
-fn eval_packed_shl<P: PackedField>(
-    lv: &[P; NUM_ARITH_COLUMNS],
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let is_shl = lv[IS_SHL];
-    let input0_limbs = read_value::<N_LIMBS, _>(lv, INPUT_REGISTER_1);
-    let shifted_limbs = read_value::<N_LIMBS, _>(lv, INPUT_REGISTER_2);
-
-    mul::eval_packed_generic_mul(lv, is_shl, input0_limbs, shifted_limbs, yield_constr);
-}
-
-/// Evaluates the constraints for an SHR opcode.
-/// The logic is tha same as the one for DIV. The only difference is that
-/// the inputs are in `INPUT_REGISTER_1`  and `INPUT_REGISTER_2` instead of
-/// `INPUT_REGISTER_0` and `INPUT_REGISTER_1`.
-fn eval_packed_shr<P: PackedField>(
-    lv: &[P; NUM_ARITH_COLUMNS],
-    nv: &[P; NUM_ARITH_COLUMNS],
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let quo_range = OUTPUT_REGISTER;
-    let rem_range = AUX_INPUT_REGISTER_0;
-    let filter = lv[IS_SHR];
-
-    divmod::eval_packed_divmod_helper(
-        lv,
-        nv,
-        yield_constr,
-        filter,
-        INPUT_REGISTER_1,
-        INPUT_REGISTER_2,
-        quo_range,
-        rem_range,
-    );
-}
-
-pub(crate) fn eval_packed_generic<P: PackedField>(
-    lv: &[P; NUM_ARITH_COLUMNS],
-    nv: &[P; NUM_ARITH_COLUMNS],
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    eval_packed_shl(lv, yield_constr);
-    eval_packed_shr(lv, nv, yield_constr);
-}
-
-fn eval_ext_circuit_shl<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let is_shl = lv[IS_SHL];
-    let input0_limbs = read_value::<N_LIMBS, _>(lv, INPUT_REGISTER_1);
-    let shifted_limbs = read_value::<N_LIMBS, _>(lv, INPUT_REGISTER_2);
-
-    mul::eval_ext_mul_circuit(
-        builder,
-        lv,
-        is_shl,
-        input0_limbs,
-        shifted_limbs,
-        yield_constr,
-    );
-}
-
-fn eval_ext_circuit_shr<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    nv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let filter = lv[IS_SHR];
-    let quo_range = OUTPUT_REGISTER;
-    let rem_range = AUX_INPUT_REGISTER_0;
-
-    divmod::eval_ext_circuit_divmod_helper(
-        builder,
-        lv,
-        nv,
-        yield_constr,
-        filter,
-        INPUT_REGISTER_1,
-        INPUT_REGISTER_2,
-        quo_range,
-        rem_range,
-    );
-}
-
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    nv: &[ExtensionTarget<D>; NUM_ARITH_COLUMNS],
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    eval_ext_circuit_shl(builder, lv, yield_constr);
-    eval_ext_circuit_shr(builder, lv, nv, yield_constr);
-}
-
-#[cfg(test)]
-mod tests {
-    use plonky2::field::goldilocks_field::GoldilocksField;
-    use plonky2::field::types::{Field, Sample};
-    use rand::{Rng, SeedableRng};
-    use rand_chacha::ChaCha8Rng;
-    use starky::constraint_consumer::ConstraintConsumer;
-
-    use super::*;
-    use crate::arithmetic::columns::NUM_ARITH_COLUMNS;
-
-    const N_RND_TESTS: usize = 1000;
-
-    // TODO: Should be able to refactor this test to apply to all operations.
-    #[test]
-    fn generate_eval_consistency_not_shift() {
-        type F = GoldilocksField;
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-        let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-        let nv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-
-        // if `IS_SHL == 0` and `IS_SHR == 0`, then the constraints should be met even
-        // if all values are garbage.
-        lv[IS_SHL] = F::ZERO;
-        lv[IS_SHR] = F::ZERO;
-
-        let mut constraint_consumer = ConstraintConsumer::new(
-            vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-            GoldilocksField::ONE,
-            GoldilocksField::ONE,
-            GoldilocksField::ONE,
-        );
-        eval_packed_generic(&lv, &nv, &mut constraint_consumer);
-        for &acc in &constraint_consumer.accumulators() {
-            assert_eq!(acc, GoldilocksField::ZERO);
-        }
-    }
-
-    fn generate_eval_consistency_shift(is_shl: bool) {
-        type F = GoldilocksField;
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-        let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-        let mut nv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-
-        // set `IS_SHL == 1` or `IS_SHR == 1` and ensure all constraints are satisfied.
-        if is_shl {
-            lv[IS_SHL] = F::ONE;
-            lv[IS_SHR] = F::ZERO;
-        } else {
-            // Set `IS_DIV` to 0 in this case, since we're using the logic of DIV for SHR.
-            lv[IS_DIV] = F::ZERO;
-            lv[IS_SHL] = F::ZERO;
-            lv[IS_SHR] = F::ONE;
-        }
-
-        for _i in 0..N_RND_TESTS {
-            let shift = U256::from(rng.gen::<u8>());
-
-            let mut full_input = U256::from(0);
-            // set inputs to random values
-            for ai in INPUT_REGISTER_1 {
-                lv[ai] = F::from_canonical_u16(rng.gen());
-                full_input =
-                    U256::from(lv[ai].to_canonical_u64()) + full_input * U256::from(1 << 16);
-            }
-
-            let output = if is_shl {
-                full_input << shift
-            } else {
-                full_input >> shift
-            };
-
-            generate(&mut lv, &mut nv, is_shl, shift, full_input, output);
-
-            let mut constraint_consumer = ConstraintConsumer::new(
-                vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-                GoldilocksField::ONE,
-                GoldilocksField::ONE,
-                GoldilocksField::ZERO,
-            );
-            eval_packed_generic(&lv, &nv, &mut constraint_consumer);
-            for &acc in &constraint_consumer.accumulators() {
-                assert_eq!(acc, GoldilocksField::ZERO);
-            }
-        }
-    }
-
-    #[test]
-    fn generate_eval_consistency_shl() {
-        generate_eval_consistency_shift(true);
-    }
-
-    #[test]
-    fn generate_eval_consistency_shr() {
-        generate_eval_consistency_shift(false);
-    }
-
-    fn generate_eval_consistency_shift_over_256(is_shl: bool) {
-        type F = GoldilocksField;
-
-        let mut rng = ChaCha8Rng::seed_from_u64(0x6feb51b7ec230f25);
-        let mut lv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-        let mut nv = [F::default(); NUM_ARITH_COLUMNS].map(|_| F::sample(&mut rng));
-
-        // set `IS_SHL == 1` or `IS_SHR == 1` and ensure all constraints are satisfied.
-        if is_shl {
-            lv[IS_SHL] = F::ONE;
-            lv[IS_SHR] = F::ZERO;
-        } else {
-            // Set `IS_DIV` to 0 in this case, since we're using the logic of DIV for SHR.
-            lv[IS_DIV] = F::ZERO;
-            lv[IS_SHL] = F::ZERO;
-            lv[IS_SHR] = F::ONE;
-        }
-
-        for _i in 0..N_RND_TESTS {
-            let mut shift = U256::from(rng.gen::<usize>());
-            while shift > U256::MAX - 256 {
-                shift = U256::from(rng.gen::<usize>());
-            }
-            shift += U256::from(256);
-
-            let mut full_input = U256::from(0);
-            // set inputs to random values
-            for ai in INPUT_REGISTER_1 {
-                lv[ai] = F::from_canonical_u16(rng.gen());
-                full_input =
-                    U256::from(lv[ai].to_canonical_u64()) + full_input * U256::from(1 << 16);
-            }
-
-            let output = 0.into();
-            generate(&mut lv, &mut nv, is_shl, shift, full_input, output);
-
-            let mut constraint_consumer = ConstraintConsumer::new(
-                vec![GoldilocksField(2), GoldilocksField(3), GoldilocksField(5)],
-                GoldilocksField::ONE,
-                GoldilocksField::ONE,
-                GoldilocksField::ZERO,
-            );
-            eval_packed_generic(&lv, &nv, &mut constraint_consumer);
-            for &acc in &constraint_consumer.accumulators() {
-                assert_eq!(acc, GoldilocksField::ZERO);
-            }
-        }
-    }
-
-    #[test]
-    fn generate_eval_consistency_shl_over_256() {
-        generate_eval_consistency_shift_over_256(true);
-    }
-
-    #[test]
-    fn generate_eval_consistency_shr_over_256() {
-        generate_eval_consistency_shift_over_256(false);
-    }
-}
--- a/evm/src/arithmetic/utils.rs
+++ b/evm/src/arithmetic/utils.rs
@ -1,343 +0,0 @@
-use core::ops::{Add, AddAssign, Mul, Neg, Range, Shr, Sub, SubAssign};
-
-use ethereum_types::U256;
-use plonky2::field::extension::Extendable;
-use plonky2::field::types::{Field, PrimeField64};
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::plonk::circuit_builder::CircuitBuilder;
-use static_assertions::const_assert;
-
-use crate::arithmetic::columns::{LIMB_BITS, N_LIMBS};
-
-/// Return an array of `N` zeros of type T.
-pub(crate) fn pol_zero<T, const N: usize>() -> [T; N]
-where
-    T: Copy + Default,
-{
-    // TODO: This should really be T::zero() from num::Zero, because
-    // default() doesn't guarantee to initialise to zero (though in
-    // our case it always does). However I couldn't work out how to do
-    // that without touching half of the entire crate because it
-    // involves replacing Field::is_zero() with num::Zero::is_zero()
-    // which is used everywhere. Hence Default::default() it is.
-    [T::default(); N]
-}
-
-/// a(x) += b(x), but must have deg(a) >= deg(b).
-pub(crate) fn pol_add_assign<T>(a: &mut [T], b: &[T])
-where
-    T: AddAssign + Copy + Default,
-{
-    debug_assert!(a.len() >= b.len(), "expected {} >= {}", a.len(), b.len());
-    for (a_item, b_item) in a.iter_mut().zip(b) {
-        *a_item += *b_item;
-    }
-}
-
-pub(crate) fn pol_add_assign_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    a: &mut [ExtensionTarget<D>],
-    b: &[ExtensionTarget<D>],
-) {
-    debug_assert!(a.len() >= b.len(), "expected {} >= {}", a.len(), b.len());
-    for (a_item, b_item) in a.iter_mut().zip(b) {
-        *a_item = builder.add_extension(*a_item, *b_item);
-    }
-}
-
-/// Return a(x) + b(x); returned array is bigger than necessary to
-/// make the interface consistent with `pol_mul_wide`.
-pub(crate) fn pol_add<T>(a: [T; N_LIMBS], b: [T; N_LIMBS]) -> [T; 2 * N_LIMBS - 1]
-where
-    T: Add<Output = T> + Copy + Default,
-{
-    let mut sum = pol_zero();
-    for i in 0..N_LIMBS {
-        sum[i] = a[i] + b[i];
-    }
-    sum
-}
-
-pub(crate) fn pol_add_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    a: [ExtensionTarget<D>; N_LIMBS],
-    b: [ExtensionTarget<D>; N_LIMBS],
-) -> [ExtensionTarget<D>; 2 * N_LIMBS - 1] {
-    let zero = builder.zero_extension();
-    let mut sum = [zero; 2 * N_LIMBS - 1];
-    for i in 0..N_LIMBS {
-        sum[i] = builder.add_extension(a[i], b[i]);
-    }
-    sum
-}
-
-/// Return a(x) - b(x); returned array is bigger than necessary to
-/// make the interface consistent with `pol_mul_wide`.
-pub(crate) fn pol_sub<T>(a: [T; N_LIMBS], b: [T; N_LIMBS]) -> [T; 2 * N_LIMBS - 1]
-where
-    T: Sub<Output = T> + Copy + Default,
-{
-    let mut diff = pol_zero();
-    for i in 0..N_LIMBS {
-        diff[i] = a[i] - b[i];
-    }
-    diff
-}
-
-pub(crate) fn pol_sub_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    a: [ExtensionTarget<D>; N_LIMBS],
-    b: [ExtensionTarget<D>; N_LIMBS],
-) -> [ExtensionTarget<D>; 2 * N_LIMBS - 1] {
-    let zero = builder.zero_extension();
-    let mut diff = [zero; 2 * N_LIMBS - 1];
-    for i in 0..N_LIMBS {
-        diff[i] = builder.sub_extension(a[i], b[i]);
-    }
-    diff
-}
-
-/// a(x) -= b(x), but must have deg(a) >= deg(b).
-pub(crate) fn pol_sub_assign<T>(a: &mut [T], b: &[T])
-where
-    T: SubAssign + Copy,
-{
-    debug_assert!(a.len() >= b.len(), "expected {} >= {}", a.len(), b.len());
-    for (a_item, b_item) in a.iter_mut().zip(b) {
-        *a_item -= *b_item;
-    }
-}
-
-pub(crate) fn pol_sub_assign_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    a: &mut [ExtensionTarget<D>],
-    b: &[ExtensionTarget<D>],
-) {
-    debug_assert!(a.len() >= b.len(), "expected {} >= {}", a.len(), b.len());
-    for (a_item, b_item) in a.iter_mut().zip(b) {
-        *a_item = builder.sub_extension(*a_item, *b_item);
-    }
-}
-
-/// Given polynomials a(x) and b(x), return a(x)*b(x).
-///
-/// NB: The caller is responsible for ensuring that no undesired
-/// overflow occurs during the calculation of the coefficients of the
-/// product.
-pub(crate) fn pol_mul_wide<T>(a: [T; N_LIMBS], b: [T; N_LIMBS]) -> [T; 2 * N_LIMBS - 1]
-where
-    T: AddAssign + Copy + Mul<Output = T> + Default,
-{
-    let mut res = [T::default(); 2 * N_LIMBS - 1];
-    for (i, &ai) in a.iter().enumerate() {
-        for (j, &bj) in b.iter().enumerate() {
-            res[i + j] += ai * bj;
-        }
-    }
-    res
-}
-
-pub(crate) fn pol_mul_wide_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    a: [ExtensionTarget<D>; N_LIMBS],
-    b: [ExtensionTarget<D>; N_LIMBS],
-) -> [ExtensionTarget<D>; 2 * N_LIMBS - 1] {
-    let zero = builder.zero_extension();
-    let mut res = [zero; 2 * N_LIMBS - 1];
-    for (i, &ai) in a.iter().enumerate() {
-        for (j, &bj) in b.iter().enumerate() {
-            res[i + j] = builder.mul_add_extension(ai, bj, res[i + j]);
-        }
-    }
-    res
-}
-
-/// As for `pol_mul_wide` but the first argument has 2N elements and
-/// hence the result has 3N-1.
-pub(crate) fn pol_mul_wide2<T>(a: [T; 2 * N_LIMBS], b: [T; N_LIMBS]) -> [T; 3 * N_LIMBS - 1]
-where
-    T: AddAssign + Copy + Mul<Output = T> + Default,
-{
-    let mut res = [T::default(); 3 * N_LIMBS - 1];
-    for (i, &ai) in a.iter().enumerate() {
-        for (j, &bj) in b.iter().enumerate() {
-            res[i + j] += ai * bj;
-        }
-    }
-    res
-}
-
-pub(crate) fn pol_mul_wide2_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    a: [ExtensionTarget<D>; 2 * N_LIMBS],
-    b: [ExtensionTarget<D>; N_LIMBS],
-) -> [ExtensionTarget<D>; 3 * N_LIMBS - 1] {
-    let zero = builder.zero_extension();
-    let mut res = [zero; 3 * N_LIMBS - 1];
-    for (i, &ai) in a.iter().enumerate() {
-        for (j, &bj) in b.iter().enumerate() {
-            res[i + j] = builder.mul_add_extension(ai, bj, res[i + j]);
-        }
-    }
-    res
-}
-
-/// Given a(x) and b(x), return a(x)*b(x) mod 2^256.
-pub(crate) fn pol_mul_lo<T, const N: usize>(a: [T; N], b: [T; N]) -> [T; N]
-where
-    T: AddAssign + Copy + Default + Mul<Output = T>,
-{
-    let mut res = pol_zero();
-    for deg in 0..N {
-        // Invariant: i + j = deg
-        for i in 0..=deg {
-            let j = deg - i;
-            res[deg] += a[i] * b[j];
-        }
-    }
-    res
-}
-
-pub(crate) fn pol_mul_lo_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    a: [ExtensionTarget<D>; N_LIMBS],
-    b: [ExtensionTarget<D>; N_LIMBS],
-) -> [ExtensionTarget<D>; N_LIMBS] {
-    let zero = builder.zero_extension();
-    let mut res = [zero; N_LIMBS];
-    for deg in 0..N_LIMBS {
-        for i in 0..=deg {
-            let j = deg - i;
-            res[deg] = builder.mul_add_extension(a[i], b[j], res[deg]);
-        }
-    }
-    res
-}
-
-/// Adjoin M - N zeros to a, returning [a[0], a[1], ..., a[N-1], 0, 0, ..., 0].
-pub(crate) fn pol_extend<T, const N: usize, const M: usize>(a: [T; N]) -> [T; M]
-where
-    T: Copy + Default,
-{
-    assert_eq!(M, 2 * N - 1);
-
-    let mut zero_extend = pol_zero();
-    zero_extend[..N].copy_from_slice(&a);
-    zero_extend
-}
-
-/// Given polynomial a(x) = \sum_{i=0}^{N-2} a[i] x^i and an element
-/// `root`, return b = (x - root) * a(x).
-pub(crate) fn pol_adjoin_root<T, U, const N: usize>(a: [T; N], root: U) -> [T; N]
-where
-    T: Add<Output = T> + Copy + Default + Mul<Output = T> + Sub<Output = T>,
-    U: Copy + Mul<T, Output = T> + Neg<Output = U>,
-{
-    // \sum_i res[i] x^i = (x - root) \sum_i a[i] x^i. Comparing
-    // coefficients, res[0] = -root*a[0] and
-    // res[i] = a[i-1] - root * a[i]
-
-    let mut res = [T::default(); N];
-    res[0] = -root * a[0];
-    for deg in 1..N {
-        res[deg] = a[deg - 1] - (root * a[deg]);
-    }
-    res
-}
-
-pub(crate) fn pol_adjoin_root_ext_circuit<
-    F: RichField + Extendable<D>,
-    const D: usize,
-    const N: usize,
->(
-    builder: &mut CircuitBuilder<F, D>,
-    a: [ExtensionTarget<D>; N],
-    root: ExtensionTarget<D>,
-) -> [ExtensionTarget<D>; N] {
-    let zero = builder.zero_extension();
-    let mut res = [zero; N];
-    // res[0] = NEG_ONE * root * a[0] + ZERO * zero
-    res[0] = builder.mul_extension_with_const(F::NEG_ONE, root, a[0]);
-    for deg in 1..N {
-        // res[deg] = NEG_ONE * root * a[deg] + ONE * a[deg - 1]
-        res[deg] = builder.arithmetic_extension(F::NEG_ONE, F::ONE, root, a[deg], a[deg - 1]);
-    }
-    res
-}
-
-/// Given polynomial a(x) = \sum_{i=0}^{N-1} a[i] x^i and a root of `a`
-/// of the form 2^EXP, return q(x) satisfying a(x) = (x - root) * q(x).
-///
-/// NB: We do not verify that a(2^EXP) = 0; if this doesn't hold the
-/// result is basically junk.
-///
-/// NB: The result could be returned in N-1 elements, but we return
-/// N and set the last element to zero since the calling code
-/// happens to require a result zero-extended to N elements.
-pub(crate) fn pol_remove_root_2exp<const EXP: usize, T, const N: usize>(a: [T; N]) -> [T; N]
-where
-    T: Copy + Default + Neg<Output = T> + Shr<usize, Output = T> + Sub<Output = T>,
-{
-    // By assumption β := 2^EXP is a root of `a`, i.e. (x - β) divides
-    // `a`; if we write
-    //
-    //    a(x) = \sum_{i=0}^{N-1} a[i] x^i
-    //         = (x - β) \sum_{i=0}^{N-2} q[i] x^i
-    //
-    // then by comparing coefficients it is easy to see that
-    //
-    //   q[0] = -a[0] / β  and  q[i] = (q[i-1] - a[i]) / β
-    //
-    // for 0 < i <= N-1 (and the divisions are exact).
-
-    let mut q = [T::default(); N];
-    q[0] = -(a[0] >> EXP);
-
-    // NB: Last element of q is deliberately left equal to zero.
-    for deg in 1..N - 1 {
-        q[deg] = (q[deg - 1] - a[deg]) >> EXP;
-    }
-    q
-}
-
-/// Read the range `value_idxs` of values from `lv` into an array of
-/// length `N`. Panics if the length of the range is not `N`.
-pub(crate) fn read_value<const N: usize, T: Copy>(lv: &[T], value_idxs: Range<usize>) -> [T; N] {
-    lv[value_idxs].try_into().unwrap()
-}
-
-/// Read the range `value_idxs` of values from `lv` into an array of
-/// length `N`, interpreting the values as `i64`s. Panics if the
-/// length of the range is not `N`.
-pub(crate) fn read_value_i64_limbs<const N: usize, F: PrimeField64>(
-    lv: &[F],
-    value_idxs: Range<usize>,
-) -> [i64; N] {
-    let limbs: [_; N] = lv[value_idxs].try_into().unwrap();
-    limbs.map(|c| c.to_canonical_u64() as i64)
-}
-
-#[inline]
-/// Turn a 64-bit integer into 4 16-bit limbs and convert them to field elements.
-fn u64_to_array<F: Field>(out: &mut [F], x: u64) {
-    const_assert!(LIMB_BITS == 16);
-    debug_assert!(out.len() == 4);
-
-    out[0] = F::from_canonical_u16(x as u16);
-    out[1] = F::from_canonical_u16((x >> 16) as u16);
-    out[2] = F::from_canonical_u16((x >> 32) as u16);
-    out[3] = F::from_canonical_u16((x >> 48) as u16);
-}
-
-/// Turn a 256-bit integer into 16 16-bit limbs and convert them to field elements.
-// TODO: Refactor/replace u256_limbs in evm/src/util.rs
-pub(crate) fn u256_to_array<F: Field>(out: &mut [F], x: U256) {
-    const_assert!(N_LIMBS == 16);
-    debug_assert!(out.len() == N_LIMBS);
-
-    u64_to_array(&mut out[0..4], x.0[0]);
-    u64_to_array(&mut out[4..8], x.0[1]);
-    u64_to_array(&mut out[8..12], x.0[2]);
-    u64_to_array(&mut out[12..16], x.0[3]);
-}
--- a/evm/src/bin/assemble.rs
+++ b/evm/src/bin/assemble.rs
@ -1,12 +0,0 @@
-use std::{env, fs};
-
-use hex::encode;
-use plonky2_evm::cpu::kernel::assemble_to_bytes;
-
-fn main() {
-    let mut args = env::args();
-    args.next();
-    let file_contents: Vec<_> = args.map(|path| fs::read_to_string(path).unwrap()).collect();
-    let assembled = assemble_to_bytes(&file_contents[..]);
-    println!("{}", encode(assembled));
-}
--- a/evm/src/byte_packing/byte_packing_stark.rs
+++ b/evm/src/byte_packing/byte_packing_stark.rs
@ -1,440 +0,0 @@
-//! This crate enforces the correctness of reading and writing sequences
-//! of bytes in Big-Endian ordering from and to the memory.
-//!
-//! The trace layout consists in one row for an `N` byte sequence (where 32 ≥ `N` > 0).
-//!
-//! At each row the `i`-th byte flag will be activated to indicate a sequence of
-//! length i+1.
-//!
-//! The length of a sequence can be retrieved for CTLs as:
-//!
-//!    sequence_length = \sum_{i=0}^31 b[i] * (i + 1)
-//!
-//! where b[i] is the `i`-th byte flag.
-//!
-//! Because of the discrepancy in endianness between the different tables, the byte sequences
-//! are actually written in the trace in reverse order from the order they are provided.
-//! We only store the virtual address `virt` of the first byte, and the virtual address for byte `i`
-//! can be recovered as:
-//!     virt_i = virt + sequence_length - 1 - i
-//!
-//! Note that, when writing a sequence of bytes to memory, both the `U256` value and the
-//! corresponding sequence length are being read from the stack. Because of the endianness
-//! discrepancy mentioned above, we first convert the value to a byte sequence in Little-Endian,
-//! then resize the sequence to prune unneeded zeros before reverting the sequence order.
-//! This means that the higher-order bytes will be thrown away during the process, if the value
-//! is greater than 256^length, and as a result a different value will be stored in memory.
-
-use core::marker::PhantomData;
-
-use itertools::Itertools;
-use plonky2::field::extension::{Extendable, FieldExtension};
-use plonky2::field::packed::PackedField;
-use plonky2::field::polynomial::PolynomialValues;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::timed;
-use plonky2::util::timing::TimingTree;
-use plonky2::util::transpose;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-use starky::evaluation_frame::StarkEvaluationFrame;
-use starky::lookup::{Column, Filter, Lookup};
-use starky::stark::Stark;
-
-use super::NUM_BYTES;
-use crate::all_stark::EvmStarkFrame;
-use crate::byte_packing::columns::{
-    index_len, value_bytes, ADDR_CONTEXT, ADDR_SEGMENT, ADDR_VIRTUAL, IS_READ, LEN_INDICES_COLS,
-    NUM_COLUMNS, RANGE_COUNTER, RC_FREQUENCIES, TIMESTAMP,
-};
-use crate::witness::memory::MemoryAddress;
-
-/// Strict upper bound for the individual bytes range-check.
-const BYTE_RANGE_MAX: usize = 1usize << 8;
-
-/// Creates the vector of `Columns` for `BytePackingStark` corresponding to the final packed limbs being read/written.
-/// `CpuStark` will look into these columns, as the CPU needs the output of byte packing.
-pub(crate) fn ctl_looked_data<F: Field>() -> Vec<Column<F>> {
-    // Reconstruct the u32 limbs composing the final `U256` word
-    // being read/written from the underlying byte values. For each,
-    // we pack 4 consecutive bytes and shift them accordingly to
-    // obtain the corresponding limb.
-    let outputs: Vec<Column<F>> = (0..8)
-        .map(|i| {
-            let range = value_bytes(i * 4)..value_bytes(i * 4) + 4;
-            Column::linear_combination(
-                range
-                    .enumerate()
-                    .map(|(j, c)| (c, F::from_canonical_u64(1 << (8 * j)))),
-            )
-        })
-        .collect();
-
-    let sequence_len: Column<F> = Column::linear_combination(
-        (0..NUM_BYTES).map(|i| (index_len(i), F::from_canonical_usize(i + 1))),
-    );
-
-    Column::singles([IS_READ, ADDR_CONTEXT, ADDR_SEGMENT, ADDR_VIRTUAL])
-        .chain([sequence_len])
-        .chain(Column::singles(&[TIMESTAMP]))
-        .chain(outputs)
-        .collect()
-}
-
-/// CTL filter for the `BytePackingStark` looked table.
-pub(crate) fn ctl_looked_filter<F: Field>() -> Filter<F> {
-    // The CPU table is only interested in our sequence end rows,
-    // since those contain the final limbs of our packed int.
-    Filter::new_simple(Column::sum((0..NUM_BYTES).map(index_len)))
-}
-
-/// Column linear combination for the `BytePackingStark` table reading/writing the `i`th byte sequence from `MemoryStark`.
-pub(crate) fn ctl_looking_memory<F: Field>(i: usize) -> Vec<Column<F>> {
-    let mut res = Column::singles([IS_READ, ADDR_CONTEXT, ADDR_SEGMENT]).collect_vec();
-
-    // Compute the virtual address: `ADDR_VIRTUAL` + `sequence_len` - 1 - i.
-    let sequence_len_minus_one = (0..NUM_BYTES)
-        .map(|j| (index_len(j), F::from_canonical_usize(j)))
-        .collect::<Vec<_>>();
-    let mut addr_virt_cols = vec![(ADDR_VIRTUAL, F::ONE)];
-    addr_virt_cols.extend(sequence_len_minus_one);
-    let addr_virt = Column::linear_combination_with_constant(
-        addr_virt_cols,
-        F::NEG_ONE * F::from_canonical_usize(i),
-    );
-
-    res.push(addr_virt);
-
-    // The i'th input byte being read/written.
-    res.push(Column::single(value_bytes(i)));
-
-    // Since we're reading a single byte, the higher limbs must be zero.
-    res.extend((1..8).map(|_| Column::zero()));
-
-    res.push(Column::single(TIMESTAMP));
-
-    res
-}
-
-/// CTL filter for reading/writing the `i`th byte of the byte sequence from/to memory.
-pub(crate) fn ctl_looking_memory_filter<F: Field>(i: usize) -> Filter<F> {
-    Filter::new_simple(Column::sum((i..NUM_BYTES).map(index_len)))
-}
-
-/// Information about a byte packing operation needed for witness generation.
-#[derive(Clone, Debug)]
-pub(crate) struct BytePackingOp {
-    /// Whether this is a read (packing) or write (unpacking) operation.
-    pub(crate) is_read: bool,
-
-    /// The base address at which inputs are read/written.
-    pub(crate) base_address: MemoryAddress,
-
-    /// The timestamp at which inputs are read/written.
-    pub(crate) timestamp: usize,
-
-    /// The byte sequence that was read/written.
-    /// Its length is required to be at most 32.
-    pub(crate) bytes: Vec<u8>,
-}
-
-#[derive(Copy, Clone, Default)]
-pub(crate) struct BytePackingStark<F, const D: usize> {
-    pub(crate) f: PhantomData<F>,
-}
-
-impl<F: RichField + Extendable<D>, const D: usize> BytePackingStark<F, D> {
-    pub(crate) fn generate_trace(
-        &self,
-        ops: Vec<BytePackingOp>,
-        min_rows: usize,
-        timing: &mut TimingTree,
-    ) -> Vec<PolynomialValues<F>> {
-        // Generate most of the trace in row-major form.
-        let trace_rows = timed!(
-            timing,
-            "generate trace rows",
-            self.generate_trace_rows(ops, min_rows)
-        );
-        let trace_row_vecs: Vec<_> = trace_rows.into_iter().map(|row| row.to_vec()).collect();
-
-        let mut trace_cols = transpose(&trace_row_vecs);
-        self.generate_range_checks(&mut trace_cols);
-
-        trace_cols.into_iter().map(PolynomialValues::new).collect()
-    }
-
-    fn generate_trace_rows(
-        &self,
-        ops: Vec<BytePackingOp>,
-        min_rows: usize,
-    ) -> Vec<[F; NUM_COLUMNS]> {
-        let base_len: usize = ops.iter().map(|op| usize::from(!op.bytes.is_empty())).sum();
-        let num_rows = core::cmp::max(base_len.max(BYTE_RANGE_MAX), min_rows).next_power_of_two();
-        let mut rows = Vec::with_capacity(num_rows);
-
-        for op in ops {
-            if !op.bytes.is_empty() {
-                rows.push(self.generate_row_for_op(op));
-            }
-        }
-
-        for _ in rows.len()..num_rows {
-            rows.push(self.generate_padding_row());
-        }
-
-        rows
-    }
-
-    fn generate_row_for_op(&self, op: BytePackingOp) -> [F; NUM_COLUMNS] {
-        let BytePackingOp {
-            is_read,
-            base_address,
-            timestamp,
-            bytes,
-        } = op;
-
-        let MemoryAddress {
-            context,
-            segment,
-            virt,
-        } = base_address;
-
-        let mut row = [F::ZERO; NUM_COLUMNS];
-        row[IS_READ] = F::from_bool(is_read);
-
-        row[ADDR_CONTEXT] = F::from_canonical_usize(context);
-        row[ADDR_SEGMENT] = F::from_canonical_usize(segment);
-        // We store the initial virtual segment. But the CTLs,
-        // we start with virt + sequence_len - 1.
-        row[ADDR_VIRTUAL] = F::from_canonical_usize(virt);
-
-        row[TIMESTAMP] = F::from_canonical_usize(timestamp);
-
-        row[index_len(bytes.len() - 1)] = F::ONE;
-
-        for (i, &byte) in bytes.iter().rev().enumerate() {
-            row[value_bytes(i)] = F::from_canonical_u8(byte);
-        }
-
-        row
-    }
-
-    const fn generate_padding_row(&self) -> [F; NUM_COLUMNS] {
-        [F::ZERO; NUM_COLUMNS]
-    }
-
-    /// Expects input in *column*-major layout
-    fn generate_range_checks(&self, cols: &mut [Vec<F>]) {
-        debug_assert!(cols.len() == NUM_COLUMNS);
-
-        let n_rows = cols[0].len();
-        debug_assert!(cols.iter().all(|col| col.len() == n_rows));
-
-        for i in 0..BYTE_RANGE_MAX {
-            cols[RANGE_COUNTER][i] = F::from_canonical_usize(i);
-        }
-        for i in BYTE_RANGE_MAX..n_rows {
-            cols[RANGE_COUNTER][i] = F::from_canonical_usize(BYTE_RANGE_MAX - 1);
-        }
-
-        // For each column c in cols, generate the range-check
-        // permutations and put them in the corresponding range-check
-        // columns rc_c and rc_c+1.
-        for col in 0..NUM_BYTES {
-            for i in 0..n_rows {
-                let c = value_bytes(col);
-                let x = cols[c][i].to_canonical_u64() as usize;
-                assert!(
-                    x < BYTE_RANGE_MAX,
-                    "column value {} exceeds the max range value {}",
-                    x,
-                    BYTE_RANGE_MAX
-                );
-                cols[RC_FREQUENCIES][x] += F::ONE;
-            }
-        }
-    }
-}
-
-impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for BytePackingStark<F, D> {
-    type EvaluationFrame<FE, P, const D2: usize> = EvmStarkFrame<P, FE, NUM_COLUMNS>
-    where
-        FE: FieldExtension<D2, BaseField = F>,
-        P: PackedField<Scalar = FE>;
-
-    type EvaluationFrameTarget = EvmStarkFrame<ExtensionTarget<D>, ExtensionTarget<D>, NUM_COLUMNS>;
-
-    fn eval_packed_generic<FE, P, const D2: usize>(
-        &self,
-        vars: &Self::EvaluationFrame<FE, P, D2>,
-        yield_constr: &mut ConstraintConsumer<P>,
-    ) where
-        FE: FieldExtension<D2, BaseField = F>,
-        P: PackedField<Scalar = FE>,
-    {
-        let local_values: &[P; NUM_COLUMNS] = vars.get_local_values().try_into().unwrap();
-        let next_values: &[P; NUM_COLUMNS] = vars.get_next_values().try_into().unwrap();
-
-        // Check the range column: First value must be 0, last row
-        // must be 255, and intermediate rows must increment by 0
-        // or 1.
-        let rc1 = local_values[RANGE_COUNTER];
-        let rc2 = next_values[RANGE_COUNTER];
-        yield_constr.constraint_first_row(rc1);
-        let incr = rc2 - rc1;
-        yield_constr.constraint_transition(incr * incr - incr);
-        let range_max = P::Scalar::from_canonical_u64((BYTE_RANGE_MAX - 1) as u64);
-        yield_constr.constraint_last_row(rc1 - range_max);
-
-        let one = P::ONES;
-
-        // We filter active columns by summing all the byte indices.
-        // Constraining each of them to be boolean is done later on below.
-        let current_filter = local_values[LEN_INDICES_COLS].iter().copied().sum::<P>();
-        yield_constr.constraint(current_filter * (current_filter - one));
-
-        // The filter column must start by one.
-        yield_constr.constraint_first_row(current_filter - one);
-
-        // The is_read flag must be boolean.
-        let current_is_read = local_values[IS_READ];
-        yield_constr.constraint(current_is_read * (current_is_read - one));
-
-        // Each byte index must be boolean.
-        for i in 0..NUM_BYTES {
-            let idx_i = local_values[index_len(i)];
-            yield_constr.constraint(idx_i * (idx_i - one));
-        }
-
-        // Only padding rows have their filter turned off.
-        let next_filter = next_values[LEN_INDICES_COLS].iter().copied().sum::<P>();
-        yield_constr.constraint_transition(next_filter * (next_filter - current_filter));
-
-        // Check that all limbs after final length are 0.
-        for i in 0..NUM_BYTES - 1 {
-            // If the length is i+1, then value_bytes(i+1),...,value_bytes(NUM_BYTES-1) must be 0.
-            for j in i + 1..NUM_BYTES {
-                yield_constr.constraint(local_values[index_len(i)] * local_values[value_bytes(j)]);
-            }
-        }
-    }
-
-    fn eval_ext_circuit(
-        &self,
-        builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-        vars: &Self::EvaluationFrameTarget,
-        yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-    ) {
-        let local_values: &[ExtensionTarget<D>; NUM_COLUMNS] =
-            vars.get_local_values().try_into().unwrap();
-        let next_values: &[ExtensionTarget<D>; NUM_COLUMNS] =
-            vars.get_next_values().try_into().unwrap();
-
-        // Check the range column: First value must be 0, last row
-        // must be 255, and intermediate rows must increment by 0
-        // or 1.
-        let rc1 = local_values[RANGE_COUNTER];
-        let rc2 = next_values[RANGE_COUNTER];
-        yield_constr.constraint_first_row(builder, rc1);
-        let incr = builder.sub_extension(rc2, rc1);
-        let t = builder.mul_sub_extension(incr, incr, incr);
-        yield_constr.constraint_transition(builder, t);
-        let range_max =
-            builder.constant_extension(F::Extension::from_canonical_usize(BYTE_RANGE_MAX - 1));
-        let t = builder.sub_extension(rc1, range_max);
-        yield_constr.constraint_last_row(builder, t);
-
-        // We filter active columns by summing all the byte indices.
-        // Constraining each of them to be boolean is done later on below.
-        let current_filter = builder.add_many_extension(&local_values[LEN_INDICES_COLS]);
-        let constraint = builder.mul_sub_extension(current_filter, current_filter, current_filter);
-        yield_constr.constraint(builder, constraint);
-
-        // The filter column must start by one.
-        let constraint = builder.add_const_extension(current_filter, F::NEG_ONE);
-        yield_constr.constraint_first_row(builder, constraint);
-
-        // The is_read flag must be boolean.
-        let current_is_read = local_values[IS_READ];
-        let constraint =
-            builder.mul_sub_extension(current_is_read, current_is_read, current_is_read);
-        yield_constr.constraint(builder, constraint);
-
-        // Each byte index must be boolean.
-        for i in 0..NUM_BYTES {
-            let idx_i = local_values[index_len(i)];
-            let constraint = builder.mul_sub_extension(idx_i, idx_i, idx_i);
-            yield_constr.constraint(builder, constraint);
-        }
-
-        // Only padding rows have their filter turned off.
-        let next_filter = builder.add_many_extension(&next_values[LEN_INDICES_COLS]);
-        let constraint = builder.sub_extension(next_filter, current_filter);
-        let constraint = builder.mul_extension(next_filter, constraint);
-        yield_constr.constraint_transition(builder, constraint);
-
-        // Check that all limbs after final length are 0.
-        for i in 0..NUM_BYTES - 1 {
-            // If the length is i+1, then value_bytes(i+1),...,value_bytes(NUM_BYTES-1) must be 0.
-            for j in i + 1..NUM_BYTES {
-                let constr =
-                    builder.mul_extension(local_values[index_len(i)], local_values[value_bytes(j)]);
-                yield_constr.constraint(builder, constr);
-            }
-        }
-    }
-
-    fn constraint_degree(&self) -> usize {
-        3
-    }
-
-    fn lookups(&self) -> Vec<Lookup<F>> {
-        vec![Lookup {
-            columns: Column::singles(value_bytes(0)..value_bytes(0) + NUM_BYTES).collect(),
-            table_column: Column::single(RANGE_COUNTER),
-            frequencies_column: Column::single(RC_FREQUENCIES),
-            filter_columns: vec![None; NUM_BYTES],
-        }]
-    }
-
-    fn requires_ctls(&self) -> bool {
-        true
-    }
-}
-
-#[cfg(test)]
-pub(crate) mod tests {
-    use anyhow::Result;
-    use plonky2::plonk::config::{GenericConfig, PoseidonGoldilocksConfig};
-    use starky::stark_testing::{test_stark_circuit_constraints, test_stark_low_degree};
-
-    use crate::byte_packing::byte_packing_stark::BytePackingStark;
-
-    #[test]
-    fn test_stark_degree() -> Result<()> {
-        const D: usize = 2;
-        type C = PoseidonGoldilocksConfig;
-        type F = <C as GenericConfig<D>>::F;
-        type S = BytePackingStark<F, D>;
-
-        let stark = S {
-            f: Default::default(),
-        };
-        test_stark_low_degree(stark)
-    }
-
-    #[test]
-    fn test_stark_circuit() -> Result<()> {
-        const D: usize = 2;
-        type C = PoseidonGoldilocksConfig;
-        type F = <C as GenericConfig<D>>::F;
-        type S = BytePackingStark<F, D>;
-
-        let stark = S {
-            f: Default::default(),
-        };
-        test_stark_circuit_constraints::<F, C, S, D>(stark)
-    }
-}
--- a/evm/src/byte_packing/columns.rs
+++ b/evm/src/byte_packing/columns.rs
@ -1,42 +0,0 @@
-//! Byte packing registers.
-
-use core::ops::Range;
-
-use crate::byte_packing::NUM_BYTES;
-
-/// 1 if this is a READ operation, and 0 if this is a WRITE operation.
-pub(crate) const IS_READ: usize = 0;
-
-pub(super) const LEN_INDICES_START: usize = IS_READ + 1;
-// There are `NUM_BYTES` columns used to represent the length of
-// the input byte sequence for a (un)packing operation.
-// index_len(i) is 1 iff the length is i+1.
-pub(crate) const fn index_len(i: usize) -> usize {
-    debug_assert!(i < NUM_BYTES);
-    LEN_INDICES_START + i
-}
-
-// Note: Those are used to obtain the length of a sequence of bytes being processed.
-pub(crate) const LEN_INDICES_COLS: Range<usize> = LEN_INDICES_START..LEN_INDICES_START + NUM_BYTES;
-
-pub(crate) const ADDR_CONTEXT: usize = LEN_INDICES_START + NUM_BYTES;
-pub(crate) const ADDR_SEGMENT: usize = ADDR_CONTEXT + 1;
-pub(crate) const ADDR_VIRTUAL: usize = ADDR_SEGMENT + 1;
-pub(crate) const TIMESTAMP: usize = ADDR_VIRTUAL + 1;
-
-// 32 byte limbs hold a total of 256 bits.
-const BYTES_VALUES_START: usize = TIMESTAMP + 1;
-// There are `NUM_BYTES` columns used to store the values of the bytes
-// that are being read/written for an (un)packing operation.
-pub(crate) const fn value_bytes(i: usize) -> usize {
-    debug_assert!(i < NUM_BYTES);
-    BYTES_VALUES_START + i
-}
-
-/// The counter column (used for the range check) starts from 0 and increments.
-pub(crate) const RANGE_COUNTER: usize = BYTES_VALUES_START + NUM_BYTES;
-/// The frequencies column used in logUp.
-pub(crate) const RC_FREQUENCIES: usize = RANGE_COUNTER + 1;
-
-/// Number of columns in `BytePackingStark`.
-pub(crate) const NUM_COLUMNS: usize = RANGE_COUNTER + 2;
--- a/evm/src/byte_packing/mod.rs
+++ b/evm/src/byte_packing/mod.rs
@ -1,10 +0,0 @@
-//! Byte packing / unpacking unit for the EVM.
-//!
-//! This module handles reading / writing to memory byte sequences of
-//! length at most 32 in Big-Endian ordering.
-
-pub mod byte_packing_stark;
-pub mod columns;
-
-/// Maximum number of bytes being processed by a byte (un)packing operation.
-pub(crate) const NUM_BYTES: usize = 32;
--- a/evm/src/cpu/byte_unpacking.rs
+++ b/evm/src/cpu/byte_unpacking.rs
@ -1,94 +0,0 @@
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::plonk::circuit_builder::CircuitBuilder;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use crate::cpu::columns::CpuColumnsView;
-
-pub(crate) fn eval_packed<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    // The MSTORE_32BYTES opcodes are differentiated from MLOAD_32BYTES
-    // by the 5th bit set to 0.
-    let filter = lv.op.m_op_32bytes * (lv.opcode_bits[5] - P::ONES);
-
-    // The address to write to is stored in the first memory channel.
-    // It contains virt, segment, ctx in its first 3 limbs, and 0 otherwise.
-    // The new address is identical, except for its `virtual` limb that is increased by the corresponding `len` offset.
-    let new_addr = nv.mem_channels[0].value;
-    let written_addr = lv.mem_channels[0].value;
-
-    // Read len from opcode bits and constrain the pushed new offset.
-    let len_bits: P = lv.opcode_bits[..5]
-        .iter()
-        .enumerate()
-        .map(|(i, &bit)| bit * P::Scalar::from_canonical_u64(1 << i))
-        .sum();
-    let len = len_bits + P::ONES;
-
-    // Check that `virt` is increased properly.
-    yield_constr.constraint(filter * (new_addr[0] - written_addr[0] - len));
-
-    // Check that `segment` and `ctx` do not change.
-    yield_constr.constraint(filter * (new_addr[1] - written_addr[1]));
-    yield_constr.constraint(filter * (new_addr[2] - written_addr[2]));
-
-    // Check that the rest of the returned address is null.
-    for &limb in &new_addr[3..] {
-        yield_constr.constraint(filter * limb);
-    }
-}
-
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    // The MSTORE_32BYTES opcodes are differentiated from MLOAD_32BYTES
-    // by the 5th bit set to 0.
-    let filter =
-        builder.mul_sub_extension(lv.op.m_op_32bytes, lv.opcode_bits[5], lv.op.m_op_32bytes);
-
-    // The address to write to is stored in the first memory channel.
-    // It contains virt, segment, ctx in its first 3 limbs, and 0 otherwise.
-    // The new address is identical, except for its `virtual` limb that is increased by the corresponding `len` offset.
-    let new_addr = nv.mem_channels[0].value;
-    let written_addr = lv.mem_channels[0].value;
-
-    // Read len from opcode bits and constrain the pushed new offset.
-    let len_bits = lv.opcode_bits[..5].iter().enumerate().fold(
-        builder.zero_extension(),
-        |cumul, (i, &bit)| {
-            builder.mul_const_add_extension(F::from_canonical_u64(1 << i), bit, cumul)
-        },
-    );
-
-    // Check that `virt` is increased properly.
-    let diff = builder.sub_extension(new_addr[0], written_addr[0]);
-    let diff = builder.sub_extension(diff, len_bits);
-    let constr = builder.mul_sub_extension(filter, diff, filter);
-    yield_constr.constraint(builder, constr);
-
-    // Check that `segment` and `ctx` do not change.
-    {
-        let diff = builder.sub_extension(new_addr[1], written_addr[1]);
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint(builder, constr);
-
-        let diff = builder.sub_extension(new_addr[2], written_addr[2]);
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Check that the rest of the returned address is null.
-    for &limb in &new_addr[3..] {
-        let constr = builder.mul_extension(filter, limb);
-        yield_constr.constraint(builder, constr);
-    }
-}
--- a/evm/src/cpu/clock.rs
+++ b/evm/src/cpu/clock.rs
@ -1,37 +0,0 @@
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use crate::cpu::columns::CpuColumnsView;
-
-/// Check the correct updating of `clock`.
-pub(crate) fn eval_packed<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    // The clock is 0 at the beginning.
-    yield_constr.constraint_first_row(lv.clock);
-    // The clock is incremented by 1 at each row.
-    yield_constr.constraint_transition(nv.clock - lv.clock - P::ONES);
-}
-
-/// Circuit version of `eval_packed`.
-/// Check the correct updating of `clock`.
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    // The clock is 0 at the beginning.
-    yield_constr.constraint_first_row(builder, lv.clock);
-    // The clock is incremented by 1 at each row.
-    {
-        let new_clock = builder.add_const_extension(lv.clock, F::ONE);
-        let constr = builder.sub_extension(nv.clock, new_clock);
-        yield_constr.constraint_transition(builder, constr);
-    }
-}
--- a/evm/src/cpu/columns/general.rs
+++ b/evm/src/cpu/columns/general.rs
@ -1,157 +0,0 @@
-use core::borrow::{Borrow, BorrowMut};
-use core::fmt::{Debug, Formatter};
-use core::mem::{size_of, transmute};
-
-/// General purpose columns, which can have different meanings depending on what CTL or other
-/// operation is occurring at this row.
-#[derive(Clone, Copy)]
-pub(crate) union CpuGeneralColumnsView<T: Copy> {
-    exception: CpuExceptionView<T>,
-    logic: CpuLogicView<T>,
-    jumps: CpuJumpsView<T>,
-    shift: CpuShiftView<T>,
-    stack: CpuStackView<T>,
-}
-
-impl<T: Copy> CpuGeneralColumnsView<T> {
-    /// View of the columns used for exceptions: they are the exception code bits.
-    /// SAFETY: Each view is a valid interpretation of the underlying array.
-    pub(crate) fn exception(&self) -> &CpuExceptionView<T> {
-        unsafe { &self.exception }
-    }
-
-    /// Mutable view of the column required for exceptions: they are the exception code bits.
-    /// SAFETY: Each view is a valid interpretation of the underlying array.
-    pub(crate) fn exception_mut(&mut self) -> &mut CpuExceptionView<T> {
-        unsafe { &mut self.exception }
-    }
-
-    /// View of the columns required for logic operations.
-    /// SAFETY: Each view is a valid interpretation of the underlying array.
-    pub(crate) fn logic(&self) -> &CpuLogicView<T> {
-        unsafe { &self.logic }
-    }
-
-    /// Mutable view of the columns required for logic operations.
-    /// SAFETY: Each view is a valid interpretation of the underlying array.
-    pub(crate) fn logic_mut(&mut self) -> &mut CpuLogicView<T> {
-        unsafe { &mut self.logic }
-    }
-
-    /// View of the columns required for jump operations.
-    /// SAFETY: Each view is a valid interpretation of the underlying array.
-    pub(crate) fn jumps(&self) -> &CpuJumpsView<T> {
-        unsafe { &self.jumps }
-    }
-
-    /// Mutable view of the columns required for jump operations.
-    /// SAFETY: Each view is a valid interpretation of the underlying array.
-    pub(crate) fn jumps_mut(&mut self) -> &mut CpuJumpsView<T> {
-        unsafe { &mut self.jumps }
-    }
-
-    /// View of the columns required for shift operations.
-    /// SAFETY: Each view is a valid interpretation of the underlying array.
-    pub(crate) fn shift(&self) -> &CpuShiftView<T> {
-        unsafe { &self.shift }
-    }
-
-    /// Mutable view of the columns required for shift operations.
-    /// SAFETY: Each view is a valid interpretation of the underlying array.
-    pub(crate) fn shift_mut(&mut self) -> &mut CpuShiftView<T> {
-        unsafe { &mut self.shift }
-    }
-
-    /// View of the columns required for the stack top.
-    /// SAFETY: Each view is a valid interpretation of the underlying array.
-    pub(crate) fn stack(&self) -> &CpuStackView<T> {
-        unsafe { &self.stack }
-    }
-
-    /// Mutable view of the columns required for the stack top.
-    /// SAFETY: Each view is a valid interpretation of the underlying array.
-    pub(crate) fn stack_mut(&mut self) -> &mut CpuStackView<T> {
-        unsafe { &mut self.stack }
-    }
-}
-
-impl<T: Copy + PartialEq> PartialEq<Self> for CpuGeneralColumnsView<T> {
-    #[allow(clippy::unconditional_recursion)] // false positive
-    fn eq(&self, other: &Self) -> bool {
-        let self_arr: &[T; NUM_SHARED_COLUMNS] = self.borrow();
-        let other_arr: &[T; NUM_SHARED_COLUMNS] = other.borrow();
-        self_arr == other_arr
-    }
-}
-
-impl<T: Copy + Eq> Eq for CpuGeneralColumnsView<T> {}
-
-impl<T: Copy + Debug> Debug for CpuGeneralColumnsView<T> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        let self_arr: &[T; NUM_SHARED_COLUMNS] = self.borrow();
-        Debug::fmt(self_arr, f)
-    }
-}
-
-impl<T: Copy> Borrow<[T; NUM_SHARED_COLUMNS]> for CpuGeneralColumnsView<T> {
-    fn borrow(&self) -> &[T; NUM_SHARED_COLUMNS] {
-        unsafe { transmute(self) }
-    }
-}
-
-impl<T: Copy> BorrowMut<[T; NUM_SHARED_COLUMNS]> for CpuGeneralColumnsView<T> {
-    fn borrow_mut(&mut self) -> &mut [T; NUM_SHARED_COLUMNS] {
-        unsafe { transmute(self) }
-    }
-}
-
-/// View of the first three `CpuGeneralColumns` containing exception code bits.
-#[derive(Copy, Clone)]
-pub(crate) struct CpuExceptionView<T: Copy> {
-    /// Exception code as little-endian bits.
-    pub(crate) exc_code_bits: [T; 3],
-}
-
-/// View of the `CpuGeneralColumns` storing pseudo-inverses used to prove logic operations.
-#[derive(Copy, Clone)]
-pub(crate) struct CpuLogicView<T: Copy> {
-    /// Pseudoinverse of `(input0 - input1)`. Used prove that they are unequal. Assumes 32-bit limbs.
-    pub(crate) diff_pinv: [T; 8],
-}
-
-/// View of the first two `CpuGeneralColumns` storing a flag and a pseudoinverse used to prove jumps.
-#[derive(Copy, Clone)]
-pub(crate) struct CpuJumpsView<T: Copy> {
-    /// A flag indicating whether a jump should occur.
-    pub(crate) should_jump: T,
-    /// Pseudoinverse of `cond.iter().sum()`. Used to check `should_jump`.
-    pub(crate) cond_sum_pinv: T,
-}
-
-/// View of the first `CpuGeneralColumns` storing a pseudoinverse used to prove shift operations.
-#[derive(Copy, Clone)]
-pub(crate) struct CpuShiftView<T: Copy> {
-    /// For a shift amount of displacement: [T], this is the inverse of
-    /// sum(displacement[1..]) or zero if the sum is zero.
-    pub(crate) high_limb_sum_inv: T,
-}
-
-/// View of the last four `CpuGeneralColumns` storing stack-related variables. The first three are used
-/// for conditionally enabling and disabling channels when reading the next `stack_top`, and the fourth one
-/// is used to check for stack overflow.
-#[derive(Copy, Clone)]
-pub(crate) struct CpuStackView<T: Copy> {
-    _unused: [T; 4],
-    /// Pseudoinverse of `stack_len - num_pops`.
-    pub(crate) stack_inv: T,
-    /// stack_inv * stack_len.
-    pub(crate) stack_inv_aux: T,
-    /// Used to reduce the degree of stack constraints when needed.
-    pub(crate) stack_inv_aux_2: T,
-    /// Pseudoinverse of `nv.stack_len - (MAX_USER_STACK_SIZE + 1)` to check for stack overflow.
-    pub(crate) stack_len_bounds_aux: T,
-}
-
-/// Number of columns shared by all the views of `CpuGeneralColumnsView`.
-/// `u8` is guaranteed to have a `size_of` of 1.
-pub(crate) const NUM_SHARED_COLUMNS: usize = size_of::<CpuGeneralColumnsView<u8>>();
--- a/evm/src/cpu/columns/mod.rs
+++ b/evm/src/cpu/columns/mod.rs
@ -1,168 +0,0 @@
-use core::borrow::{Borrow, BorrowMut};
-use core::fmt::Debug;
-use core::mem::{size_of, transmute};
-use core::ops::{Index, IndexMut};
-
-use plonky2::field::types::Field;
-
-use crate::cpu::columns::general::CpuGeneralColumnsView;
-use crate::cpu::columns::ops::OpsColumnsView;
-use crate::cpu::membus::NUM_GP_CHANNELS;
-use crate::memory;
-use crate::util::{indices_arr, transmute_no_compile_time_size_checks};
-
-mod general;
-/// Cpu operation flags.
-pub(crate) mod ops;
-
-/// 32-bit limbs of the value stored in the current memory channel.
-pub type MemValue<T> = [T; memory::VALUE_LIMBS];
-
-/// View of the columns required for one memory channel.
-#[repr(C)]
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub(crate) struct MemoryChannelView<T: Copy> {
-    /// 1 if this row includes a memory operation in the `i`th channel of the memory bus, otherwise
-    /// 0.
-    pub used: T,
-    /// 1 if a read is performed on the `i`th channel of the memory bus, otherwise 0.
-    pub is_read: T,
-    /// Context of the memory operation in the `i`th channel of the memory bus.
-    pub addr_context: T,
-    /// Segment of the memory operation in the `ith` channel of the memory bus.
-    pub addr_segment: T,
-    /// Virtual address of the memory operation in the `ith` channel of the memory bus.
-    pub addr_virtual: T,
-    /// Value, subdivided into 32-bit limbs, stored in the `ith` channel of the memory bus.
-    pub value: MemValue<T>,
-}
-
-/// View of all the columns in `CpuStark`.
-#[repr(C)]
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-// A more lightweight channel, sharing values with the 0-th memory channel
-// (which contains the top of the stack).
-pub(crate) struct PartialMemoryChannelView<T: Copy> {
-    pub used: T,
-    pub is_read: T,
-    pub addr_context: T,
-    pub addr_segment: T,
-    pub addr_virtual: T,
-}
-
-#[repr(C)]
-#[derive(Clone, Copy, Eq, PartialEq, Debug)]
-pub(crate) struct CpuColumnsView<T: Copy> {
-    /// If CPU cycle: Current context.
-    pub context: T,
-
-    /// If CPU cycle: Context for code memory channel.
-    pub code_context: T,
-
-    /// If CPU cycle: The program counter for the current instruction.
-    pub program_counter: T,
-
-    /// If CPU cycle: The stack length.
-    pub stack_len: T,
-
-    /// If CPU cycle: We're in kernel (privileged) mode.
-    pub is_kernel_mode: T,
-
-    /// If CPU cycle: Gas counter.
-    pub gas: T,
-
-    /// If CPU cycle: flags for EVM instructions (a few cannot be shared; see the comments in
-    /// `OpsColumnsView`).
-    pub op: OpsColumnsView<T>,
-
-    /// If CPU cycle: the opcode, broken up into bits in little-endian order.
-    pub opcode_bits: [T; 8],
-
-    /// Columns shared by various operations.
-    pub(crate) general: CpuGeneralColumnsView<T>,
-
-    /// CPU clock.
-    pub(crate) clock: T,
-
-    /// Memory bus channels in the CPU.
-    /// Full channels are comprised of 13 columns.
-    pub mem_channels: [MemoryChannelView<T>; NUM_GP_CHANNELS],
-    /// Partial channel is only comprised of 5 columns.
-    pub(crate) partial_channel: PartialMemoryChannelView<T>,
-}
-
-/// Total number of columns in `CpuStark`.
-/// `u8` is guaranteed to have a `size_of` of 1.
-pub(crate) const NUM_CPU_COLUMNS: usize = size_of::<CpuColumnsView<u8>>();
-
-impl<F: Field> Default for CpuColumnsView<F> {
-    fn default() -> Self {
-        Self::from([F::ZERO; NUM_CPU_COLUMNS])
-    }
-}
-
-impl<T: Copy> From<[T; NUM_CPU_COLUMNS]> for CpuColumnsView<T> {
-    fn from(value: [T; NUM_CPU_COLUMNS]) -> Self {
-        unsafe { transmute_no_compile_time_size_checks(value) }
-    }
-}
-
-impl<T: Copy> From<CpuColumnsView<T>> for [T; NUM_CPU_COLUMNS] {
-    fn from(value: CpuColumnsView<T>) -> Self {
-        unsafe { transmute_no_compile_time_size_checks(value) }
-    }
-}
-
-impl<T: Copy> Borrow<CpuColumnsView<T>> for [T; NUM_CPU_COLUMNS] {
-    fn borrow(&self) -> &CpuColumnsView<T> {
-        unsafe { transmute(self) }
-    }
-}
-
-impl<T: Copy> BorrowMut<CpuColumnsView<T>> for [T; NUM_CPU_COLUMNS] {
-    fn borrow_mut(&mut self) -> &mut CpuColumnsView<T> {
-        unsafe { transmute(self) }
-    }
-}
-
-impl<T: Copy> Borrow<[T; NUM_CPU_COLUMNS]> for CpuColumnsView<T> {
-    fn borrow(&self) -> &[T; NUM_CPU_COLUMNS] {
-        unsafe { transmute(self) }
-    }
-}
-
-impl<T: Copy> BorrowMut<[T; NUM_CPU_COLUMNS]> for CpuColumnsView<T> {
-    fn borrow_mut(&mut self) -> &mut [T; NUM_CPU_COLUMNS] {
-        unsafe { transmute(self) }
-    }
-}
-
-impl<T: Copy, I> Index<I> for CpuColumnsView<T>
-where
-    [T]: Index<I>,
-{
-    type Output = <[T] as Index<I>>::Output;
-
-    fn index(&self, index: I) -> &Self::Output {
-        let arr: &[T; NUM_CPU_COLUMNS] = self.borrow();
-        <[T] as Index<I>>::index(arr, index)
-    }
-}
-
-impl<T: Copy, I> IndexMut<I> for CpuColumnsView<T>
-where
-    [T]: IndexMut<I>,
-{
-    fn index_mut(&mut self, index: I) -> &mut Self::Output {
-        let arr: &mut [T; NUM_CPU_COLUMNS] = self.borrow_mut();
-        <[T] as IndexMut<I>>::index_mut(arr, index)
-    }
-}
-
-const fn make_col_map() -> CpuColumnsView<usize> {
-    let indices_arr = indices_arr::<NUM_CPU_COLUMNS>();
-    unsafe { transmute::<[usize; NUM_CPU_COLUMNS], CpuColumnsView<usize>>(indices_arr) }
-}
-
-/// Mapping between [0..NUM_CPU_COLUMNS-1] and the CPU columns.
-pub(crate) const COL_MAP: CpuColumnsView<usize> = make_col_map();
--- a/evm/src/cpu/columns/ops.rs
+++ b/evm/src/cpu/columns/ops.rs
@ -1,89 +0,0 @@
-use core::borrow::{Borrow, BorrowMut};
-use core::mem::{size_of, transmute};
-use core::ops::{Deref, DerefMut};
-
-use crate::util::transmute_no_compile_time_size_checks;
-
-/// Structure representing the flags for the various opcodes.
-#[repr(C)]
-#[derive(Clone, Copy, Eq, PartialEq, Debug)]
-pub(crate) struct OpsColumnsView<T: Copy> {
-    /// Combines ADD, MUL, SUB, DIV, MOD, LT, GT and BYTE flags.
-    pub binary_op: T,
-    /// Combines ADDMOD, MULMOD and SUBMOD flags.
-    pub ternary_op: T,
-    /// Combines ADD_FP254, MUL_FP254 and SUB_FP254 flags.
-    pub fp254_op: T,
-    /// Combines EQ and ISZERO flags.
-    pub eq_iszero: T,
-    /// Combines AND, OR and XOR flags.
-    pub logic_op: T,
-    /// Combines NOT and POP flags.
-    pub not_pop: T,
-    /// Combines SHL and SHR flags.
-    pub shift: T,
-    /// Combines JUMPDEST and KECCAK_GENERAL flags.
-    pub jumpdest_keccak_general: T,
-    /// Combines JUMP and JUMPI flags.
-    pub jumps: T,
-    /// Combines PUSH and PROVER_INPUT flags.
-    pub push_prover_input: T,
-    /// Combines DUP and SWAP flags.
-    pub dup_swap: T,
-    /// Combines GET_CONTEXT and SET_CONTEXT flags.
-    pub context_op: T,
-    /// Combines MSTORE_32BYTES and MLOAD_32BYTES.
-    pub m_op_32bytes: T,
-    /// Flag for EXIT_KERNEL.
-    pub exit_kernel: T,
-    /// Combines MSTORE_GENERAL and MLOAD_GENERAL flags.
-    pub m_op_general: T,
-    /// Combines PC and PUSH0
-    pub pc_push0: T,
-
-    /// Flag for syscalls.
-    pub syscall: T,
-    /// Flag for exceptions.
-    pub exception: T,
-}
-
-/// Number of columns in Cpu Stark.
-/// `u8` is guaranteed to have a `size_of` of 1.
-pub(crate) const NUM_OPS_COLUMNS: usize = size_of::<OpsColumnsView<u8>>();
-
-impl<T: Copy> From<[T; NUM_OPS_COLUMNS]> for OpsColumnsView<T> {
-    fn from(value: [T; NUM_OPS_COLUMNS]) -> Self {
-        unsafe { transmute_no_compile_time_size_checks(value) }
-    }
-}
-
-impl<T: Copy> From<OpsColumnsView<T>> for [T; NUM_OPS_COLUMNS] {
-    fn from(value: OpsColumnsView<T>) -> Self {
-        unsafe { transmute_no_compile_time_size_checks(value) }
-    }
-}
-
-impl<T: Copy> Borrow<OpsColumnsView<T>> for [T; NUM_OPS_COLUMNS] {
-    fn borrow(&self) -> &OpsColumnsView<T> {
-        unsafe { transmute(self) }
-    }
-}
-
-impl<T: Copy> BorrowMut<OpsColumnsView<T>> for [T; NUM_OPS_COLUMNS] {
-    fn borrow_mut(&mut self) -> &mut OpsColumnsView<T> {
-        unsafe { transmute(self) }
-    }
-}
-
-impl<T: Copy> Deref for OpsColumnsView<T> {
-    type Target = [T; NUM_OPS_COLUMNS];
-    fn deref(&self) -> &Self::Target {
-        unsafe { transmute(self) }
-    }
-}
-
-impl<T: Copy> DerefMut for OpsColumnsView<T> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        unsafe { transmute(self) }
-    }
-}
--- a/evm/src/cpu/contextops.rs
+++ b/evm/src/cpu/contextops.rs
@ -1,344 +0,0 @@
-use itertools::izip;
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::plonk::circuit_builder::CircuitBuilder;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use super::columns::ops::OpsColumnsView;
-use super::cpu_stark::{disable_unused_channels, disable_unused_channels_circuit};
-use crate::cpu::columns::CpuColumnsView;
-use crate::memory::segments::Segment;
-
-// If true, the instruction will keep the current context for the next row.
-// If false, next row's context is handled manually.
-const KEEPS_CONTEXT: OpsColumnsView<bool> = OpsColumnsView {
-    binary_op: true,
-    ternary_op: true,
-    fp254_op: true,
-    eq_iszero: true,
-    logic_op: true,
-    not_pop: true,
-    shift: true,
-    jumpdest_keccak_general: true,
-    push_prover_input: true,
-    jumps: true,
-    pc_push0: true,
-    dup_swap: true,
-    context_op: false,
-    m_op_32bytes: true,
-    exit_kernel: true,
-    m_op_general: true,
-    syscall: true,
-    exception: true,
-};
-
-fn eval_packed_keep<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    for (op, keeps_context) in izip!(lv.op.into_iter(), KEEPS_CONTEXT.into_iter()) {
-        if keeps_context {
-            yield_constr.constraint_transition(op * (nv.context - lv.context));
-        }
-    }
-
-    // context_op is hybrid; we evaluate it separately.
-    let is_get_context = lv.op.context_op * (lv.opcode_bits[0] - P::ONES);
-    yield_constr.constraint_transition(is_get_context * (nv.context - lv.context));
-}
-
-fn eval_ext_circuit_keep<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    for (op, keeps_context) in izip!(lv.op.into_iter(), KEEPS_CONTEXT.into_iter()) {
-        if keeps_context {
-            let diff = builder.sub_extension(nv.context, lv.context);
-            let constr = builder.mul_extension(op, diff);
-            yield_constr.constraint_transition(builder, constr);
-        }
-    }
-
-    // context_op is hybrid; we evaluate it separately.
-    let is_get_context =
-        builder.mul_sub_extension(lv.op.context_op, lv.opcode_bits[0], lv.op.context_op);
-    let diff = builder.sub_extension(nv.context, lv.context);
-    let constr = builder.mul_extension(is_get_context, diff);
-    yield_constr.constraint_transition(builder, constr);
-}
-
-/// Evaluates constraints for GET_CONTEXT.
-fn eval_packed_get<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    // If the opcode is GET_CONTEXT, then lv.opcode_bits[0] = 0.
-    let filter = lv.op.context_op * (P::ONES - lv.opcode_bits[0]);
-    let new_stack_top = nv.mem_channels[0].value;
-    // Context is scaled by 2^64, hence stored in the 3rd limb.
-    yield_constr.constraint(filter * (new_stack_top[2] - lv.context));
-
-    for (_, &limb) in new_stack_top.iter().enumerate().filter(|(i, _)| *i != 2) {
-        yield_constr.constraint(filter * limb);
-    }
-
-    // Constrain new stack length.
-    yield_constr.constraint(filter * (nv.stack_len - (lv.stack_len + P::ONES)));
-
-    // Unused channels.
-    disable_unused_channels(lv, filter, vec![1], yield_constr);
-    yield_constr.constraint(filter * nv.mem_channels[0].used);
-}
-
-/// Circuit version of `eval_packed_get`.
-/// Evaluates constraints for GET_CONTEXT.
-fn eval_ext_circuit_get<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    // If the opcode is GET_CONTEXT, then lv.opcode_bits[0] = 0.
-    let prod = builder.mul_extension(lv.op.context_op, lv.opcode_bits[0]);
-    let filter = builder.sub_extension(lv.op.context_op, prod);
-    let new_stack_top = nv.mem_channels[0].value;
-    // Context is scaled by 2^64, hence stored in the 3rd limb.
-    {
-        let diff = builder.sub_extension(new_stack_top[2], lv.context);
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-
-    for (_, &limb) in new_stack_top.iter().enumerate().filter(|(i, _)| *i != 2) {
-        let constr = builder.mul_extension(filter, limb);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Constrain new stack length.
-    {
-        let new_len = builder.add_const_extension(lv.stack_len, F::ONE);
-        let diff = builder.sub_extension(nv.stack_len, new_len);
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Unused channels.
-    disable_unused_channels_circuit(builder, lv, filter, vec![1], yield_constr);
-    {
-        let constr = builder.mul_extension(filter, nv.mem_channels[0].used);
-        yield_constr.constraint(builder, constr);
-    }
-}
-
-/// Evaluates constraints for `SET_CONTEXT`.
-fn eval_packed_set<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let filter = lv.op.context_op * lv.opcode_bits[0];
-    let stack_top = lv.mem_channels[0].value;
-
-    // The next row's context is read from stack_top.
-    yield_constr.constraint(filter * (stack_top[2] - nv.context));
-    for (_, &limb) in stack_top.iter().enumerate().filter(|(i, _)| *i != 2) {
-        yield_constr.constraint(filter * limb);
-    }
-
-    // The old SP is decremented (since the new context was popped) and stored in memory.
-    // The new SP is loaded from memory.
-    // This is all done with CTLs: nothing is constrained here.
-
-    // Constrain stack_inv_aux_2.
-    let new_top_channel = nv.mem_channels[0];
-    yield_constr.constraint(
-        lv.op.context_op
-            * (lv.general.stack().stack_inv_aux * lv.opcode_bits[0]
-                - lv.general.stack().stack_inv_aux_2),
-    );
-    // The new top is loaded in memory channel 2, if the stack isn't empty (see eval_packed).
-    for (&limb_new_top, &limb_read_top) in new_top_channel
-        .value
-        .iter()
-        .zip(lv.mem_channels[2].value.iter())
-    {
-        yield_constr.constraint(
-            lv.op.context_op * lv.general.stack().stack_inv_aux_2 * (limb_new_top - limb_read_top),
-        );
-    }
-
-    // Unused channels.
-    disable_unused_channels(lv, filter, vec![1], yield_constr);
-    yield_constr.constraint(filter * new_top_channel.used);
-}
-
-/// Circuit version of `eval_packed_set`.
-/// Evaluates constraints for SET_CONTEXT.
-fn eval_ext_circuit_set<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let filter = builder.mul_extension(lv.op.context_op, lv.opcode_bits[0]);
-    let stack_top = lv.mem_channels[0].value;
-
-    // The next row's context is read from stack_top.
-    {
-        let diff = builder.sub_extension(stack_top[2], nv.context);
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-    for (_, &limb) in stack_top.iter().enumerate().filter(|(i, _)| *i != 2) {
-        let constr = builder.mul_extension(filter, limb);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // The old SP is decremented (since the new context was popped) and stored in memory.
-    // The new SP is loaded from memory.
-    // This is all done with CTLs: nothing is constrained here.
-
-    // Constrain stack_inv_aux_2.
-    let new_top_channel = nv.mem_channels[0];
-    {
-        let diff = builder.mul_sub_extension(
-            lv.general.stack().stack_inv_aux,
-            lv.opcode_bits[0],
-            lv.general.stack().stack_inv_aux_2,
-        );
-        let constr = builder.mul_extension(lv.op.context_op, diff);
-        yield_constr.constraint(builder, constr);
-    }
-    // The new top is loaded in memory channel 2, if the stack isn't empty (see eval_packed).
-    for (&limb_new_top, &limb_read_top) in new_top_channel
-        .value
-        .iter()
-        .zip(lv.mem_channels[2].value.iter())
-    {
-        let diff = builder.sub_extension(limb_new_top, limb_read_top);
-        let prod = builder.mul_extension(lv.general.stack().stack_inv_aux_2, diff);
-        let constr = builder.mul_extension(lv.op.context_op, prod);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Unused channels.
-    disable_unused_channels_circuit(builder, lv, filter, vec![1], yield_constr);
-    {
-        let constr = builder.mul_extension(filter, new_top_channel.used);
-        yield_constr.constraint(builder, constr);
-    }
-}
-
-/// Evaluates the constraints for the GET and SET opcodes.
-pub(crate) fn eval_packed<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    eval_packed_keep(lv, nv, yield_constr);
-    eval_packed_get(lv, nv, yield_constr);
-    eval_packed_set(lv, nv, yield_constr);
-
-    // Stack constraints.
-    // Both operations use memory channel 2. The operations are similar enough that
-    // we can constrain both at the same time.
-    let filter = lv.op.context_op;
-    let channel = lv.mem_channels[2];
-    // For get_context, we check if lv.stack_len is 0. For set_context, we check if nv.stack_len is 0.
-    // However, for get_context, we can deduce lv.stack_len from nv.stack_len since the operation only pushes.
-    let stack_len = nv.stack_len - (P::ONES - lv.opcode_bits[0]);
-    // Constrain stack_inv_aux. It's 0 if the relevant stack is empty, 1 otherwise.
-    yield_constr.constraint(
-        filter * (stack_len * lv.general.stack().stack_inv - lv.general.stack().stack_inv_aux),
-    );
-    // Enable or disable the channel.
-    yield_constr.constraint(filter * (lv.general.stack().stack_inv_aux - channel.used));
-    let new_filter = filter * lv.general.stack().stack_inv_aux;
-    // It's a write for get_context, a read for set_context.
-    yield_constr.constraint(new_filter * (channel.is_read - lv.opcode_bits[0]));
-    // In both cases, next row's context works.
-    yield_constr.constraint(new_filter * (channel.addr_context - nv.context));
-    // Same segment for both.
-    yield_constr.constraint(
-        new_filter
-            * (channel.addr_segment - P::Scalar::from_canonical_usize(Segment::Stack.unscale())),
-    );
-    // The address is one less than stack_len.
-    let addr_virtual = stack_len - P::ONES;
-    yield_constr.constraint(new_filter * (channel.addr_virtual - addr_virtual));
-}
-
-/// Circuit version of èval_packed`.
-/// Evaluates the constraints for the GET and SET opcodes.
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    eval_ext_circuit_keep(builder, lv, nv, yield_constr);
-    eval_ext_circuit_get(builder, lv, nv, yield_constr);
-    eval_ext_circuit_set(builder, lv, nv, yield_constr);
-
-    // Stack constraints.
-    // Both operations use memory channel 2. The operations are similar enough that
-    // we can constrain both at the same time.
-    let filter = lv.op.context_op;
-    let channel = lv.mem_channels[2];
-    // For get_context, we check if lv.stack_len is 0. For set_context, we check if nv.stack_len is 0.
-    // However, for get_context, we can deduce lv.stack_len from nv.stack_len since the operation only pushes.
-    let diff = builder.add_const_extension(lv.opcode_bits[0], -F::ONE);
-    let stack_len = builder.add_extension(nv.stack_len, diff);
-    // Constrain stack_inv_aux. It's 0 if the relevant stack is empty, 1 otherwise.
-    {
-        let diff = builder.mul_sub_extension(
-            stack_len,
-            lv.general.stack().stack_inv,
-            lv.general.stack().stack_inv_aux,
-        );
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-    // Enable or disable the channel.
-    {
-        let diff = builder.sub_extension(lv.general.stack().stack_inv_aux, channel.used);
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-    let new_filter = builder.mul_extension(filter, lv.general.stack().stack_inv_aux);
-    // It's a write for get_context, a read for set_context.
-    {
-        let diff = builder.sub_extension(channel.is_read, lv.opcode_bits[0]);
-        let constr = builder.mul_extension(new_filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-    // In both cases, next row's context works.
-    {
-        let diff = builder.sub_extension(channel.addr_context, nv.context);
-        let constr = builder.mul_extension(new_filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-    // Same segment for both.
-    {
-        let diff = builder.add_const_extension(
-            channel.addr_segment,
-            -F::from_canonical_usize(Segment::Stack.unscale()),
-        );
-        let constr = builder.mul_extension(new_filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-    // The address is one less than stack_len.
-    {
-        let addr_virtual = builder.add_const_extension(stack_len, -F::ONE);
-        let diff = builder.sub_extension(channel.addr_virtual, addr_virtual);
-        let constr = builder.mul_extension(new_filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-}
--- a/evm/src/cpu/control_flow.rs
+++ b/evm/src/cpu/control_flow.rs
@ -1,166 +0,0 @@
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use crate::cpu::columns::{CpuColumnsView, COL_MAP};
-use crate::cpu::kernel::aggregator::KERNEL;
-
-const NATIVE_INSTRUCTIONS: [usize; 12] = [
-    COL_MAP.op.binary_op,
-    COL_MAP.op.ternary_op,
-    COL_MAP.op.fp254_op,
-    COL_MAP.op.eq_iszero,
-    COL_MAP.op.logic_op,
-    COL_MAP.op.not_pop,
-    COL_MAP.op.shift,
-    COL_MAP.op.jumpdest_keccak_general,
-    // Not PROVER_INPUT: it is dealt with manually below.
-    // not JUMPS (possible need to jump)
-    COL_MAP.op.pc_push0,
-    // not PUSH (need to increment by more than 1)
-    COL_MAP.op.dup_swap,
-    COL_MAP.op.context_op,
-    // not EXIT_KERNEL (performs a jump)
-    COL_MAP.op.m_op_general,
-    // not SYSCALL (performs a jump)
-    // not exceptions (also jump)
-];
-
-/// Returns `halt`'s program counter.
-pub(crate) fn get_halt_pc<F: Field>() -> F {
-    let halt_pc = KERNEL.global_labels["halt"];
-    F::from_canonical_usize(halt_pc)
-}
-
-/// Returns `main`'s program counter.
-pub(crate) fn get_start_pc<F: Field>() -> F {
-    let start_pc = KERNEL.global_labels["main"];
-
-    F::from_canonical_usize(start_pc)
-}
-
-/// Evaluates the constraints related to the flow of instructions.
-pub(crate) fn eval_packed_generic<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let is_cpu_cycle: P = COL_MAP.op.iter().map(|&col_i| lv[col_i]).sum();
-    let is_cpu_cycle_next: P = COL_MAP.op.iter().map(|&col_i| nv[col_i]).sum();
-
-    let next_halt_state = P::ONES - is_cpu_cycle_next;
-
-    // Once we start executing instructions, then we continue until the end of the table
-    // or we reach dummy padding rows. This, along with the constraints on the first row,
-    // enforces that operation flags and the halt flag are mutually exclusive over the entire
-    // CPU trace.
-    yield_constr
-        .constraint_transition(is_cpu_cycle * (is_cpu_cycle_next + next_halt_state - P::ONES));
-
-    // If a row is a CPU cycle and executing a native instruction (implemented as a table row; not
-    // microcoded) then the program counter is incremented by 1 to obtain the next row's program
-    // counter. Also, the next row has the same kernel flag.
-    let is_native_instruction: P = NATIVE_INSTRUCTIONS.iter().map(|&col_i| lv[col_i]).sum();
-    yield_constr.constraint_transition(
-        is_native_instruction * (lv.program_counter - nv.program_counter + P::ONES),
-    );
-    yield_constr
-        .constraint_transition(is_native_instruction * (lv.is_kernel_mode - nv.is_kernel_mode));
-
-    // Apply the same checks as before, for PROVER_INPUT.
-    let is_prover_input: P = lv.op.push_prover_input * (lv.opcode_bits[5] - P::ONES);
-    yield_constr.constraint_transition(
-        is_prover_input * (lv.program_counter - nv.program_counter + P::ONES),
-    );
-    yield_constr.constraint_transition(is_prover_input * (lv.is_kernel_mode - nv.is_kernel_mode));
-
-    // If a non-CPU cycle row is followed by a CPU cycle row, then:
-    //  - the `program_counter` of the CPU cycle row is `main` (the entry point of our kernel),
-    //  - execution is in kernel mode, and
-    //  - the stack is empty.
-    let is_last_noncpu_cycle = (is_cpu_cycle - P::ONES) * is_cpu_cycle_next;
-    let pc_diff = nv.program_counter - get_start_pc::<P::Scalar>();
-    yield_constr.constraint_transition(is_last_noncpu_cycle * pc_diff);
-    yield_constr.constraint_transition(is_last_noncpu_cycle * (nv.is_kernel_mode - P::ONES));
-    yield_constr.constraint_transition(is_last_noncpu_cycle * nv.stack_len);
-}
-
-/// Circuit version of `eval_packed`.
-/// Evaluates the constraints related to the flow of instructions.
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let one = builder.one_extension();
-
-    let is_cpu_cycle = builder.add_many_extension(COL_MAP.op.iter().map(|&col_i| lv[col_i]));
-    let is_cpu_cycle_next = builder.add_many_extension(COL_MAP.op.iter().map(|&col_i| nv[col_i]));
-
-    let next_halt_state = builder.sub_extension(one, is_cpu_cycle_next);
-
-    // Once we start executing instructions, then we continue until the end of the table
-    // or we reach dummy padding rows. This, along with the constraints on the first row,
-    // enforces that operation flags and the halt flag are mutually exclusive over the entire
-    // CPU trace.
-    {
-        let constr = builder.add_extension(is_cpu_cycle_next, next_halt_state);
-        let constr = builder.mul_sub_extension(is_cpu_cycle, constr, is_cpu_cycle);
-        yield_constr.constraint_transition(builder, constr);
-    }
-
-    // If a row is a CPU cycle and executing a native instruction (implemented as a table row; not
-    // microcoded) then the program counter is incremented by 1 to obtain the next row's program
-    // counter. Also, the next row has the same kernel flag.
-    {
-        let filter = builder.add_many_extension(NATIVE_INSTRUCTIONS.iter().map(|&col_i| lv[col_i]));
-        let pc_diff = builder.sub_extension(lv.program_counter, nv.program_counter);
-        let pc_constr = builder.mul_add_extension(filter, pc_diff, filter);
-        yield_constr.constraint_transition(builder, pc_constr);
-        let kernel_diff = builder.sub_extension(lv.is_kernel_mode, nv.is_kernel_mode);
-        let kernel_constr = builder.mul_extension(filter, kernel_diff);
-        yield_constr.constraint_transition(builder, kernel_constr);
-
-        // Same constraints as before, for PROVER_INPUT.
-        let is_prover_input = builder.mul_sub_extension(
-            lv.op.push_prover_input,
-            lv.opcode_bits[5],
-            lv.op.push_prover_input,
-        );
-        let pc_constr = builder.mul_add_extension(is_prover_input, pc_diff, is_prover_input);
-        yield_constr.constraint_transition(builder, pc_constr);
-        let kernel_constr = builder.mul_extension(is_prover_input, kernel_diff);
-        yield_constr.constraint_transition(builder, kernel_constr);
-    }
-
-    // If a non-CPU cycle row is followed by a CPU cycle row, then:
-    //  - the `program_counter` of the CPU cycle row is `main` (the entry point of our kernel),
-    //  - execution is in kernel mode, and
-    //  - the stack is empty.
-    {
-        let is_last_noncpu_cycle =
-            builder.mul_sub_extension(is_cpu_cycle, is_cpu_cycle_next, is_cpu_cycle_next);
-
-        // Start at `main`.
-        let main = builder.constant_extension(get_start_pc::<F>().into());
-        let pc_diff = builder.sub_extension(nv.program_counter, main);
-        let pc_constr = builder.mul_extension(is_last_noncpu_cycle, pc_diff);
-        yield_constr.constraint_transition(builder, pc_constr);
-
-        // Start in kernel mode
-        let kernel_constr = builder.mul_sub_extension(
-            is_last_noncpu_cycle,
-            nv.is_kernel_mode,
-            is_last_noncpu_cycle,
-        );
-        yield_constr.constraint_transition(builder, kernel_constr);
-
-        // Start with empty stack
-        let kernel_constr = builder.mul_extension(is_last_noncpu_cycle, nv.stack_len);
-        yield_constr.constraint_transition(builder, kernel_constr);
-    }
-}
--- a/evm/src/cpu/cpu_stark.rs
+++ b/evm/src/cpu/cpu_stark.rs
@ -1,574 +0,0 @@
-use core::borrow::Borrow;
-use core::iter::repeat;
-use core::marker::PhantomData;
-
-use itertools::Itertools;
-use plonky2::field::extension::{Extendable, FieldExtension};
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-use starky::cross_table_lookup::TableWithColumns;
-use starky::evaluation_frame::StarkEvaluationFrame;
-use starky::lookup::{Column, Filter};
-use starky::stark::Stark;
-
-use super::columns::CpuColumnsView;
-use super::halt;
-use super::kernel::constants::context_metadata::ContextMetadata;
-use super::membus::NUM_GP_CHANNELS;
-use crate::all_stark::{EvmStarkFrame, Table};
-use crate::cpu::columns::{COL_MAP, NUM_CPU_COLUMNS};
-use crate::cpu::{
-    byte_unpacking, clock, contextops, control_flow, decode, dup_swap, gas, jumps, membus, memio,
-    modfp254, pc, push0, shift, simple_logic, stack, syscalls_exceptions,
-};
-use crate::memory::segments::Segment;
-use crate::memory::{NUM_CHANNELS, VALUE_LIMBS};
-
-/// Creates the vector of `Columns` corresponding to the General Purpose channels when calling the Keccak sponge:
-/// the CPU reads the output of the sponge directly from the `KeccakSpongeStark` table.
-pub(crate) fn ctl_data_keccak_sponge<F: Field>() -> Vec<Column<F>> {
-    // When executing KECCAK_GENERAL, the GP memory channels are used as follows:
-    // GP channel 0: stack[-1] = addr (context, segment, virt)
-    // GP channel 1: stack[-2] = len
-    // Next GP channel 0: pushed = outputs
-    let (context, segment, virt) = get_addr(&COL_MAP, 0);
-    let context = Column::single(context);
-    let segment = Column::single(segment);
-    let virt = Column::single(virt);
-    let len = Column::single(COL_MAP.mem_channels[1].value[0]);
-
-    let num_channels = F::from_canonical_usize(NUM_CHANNELS);
-    let timestamp = Column::linear_combination([(COL_MAP.clock, num_channels)]);
-
-    let mut cols = vec![context, segment, virt, len, timestamp];
-    cols.extend(Column::singles_next_row(COL_MAP.mem_channels[0].value));
-    cols
-}
-
-/// CTL filter for a call to the Keccak sponge.
-// KECCAK_GENERAL is differentiated from JUMPDEST by its second bit set to 0.
-pub(crate) fn ctl_filter_keccak_sponge<F: Field>() -> Filter<F> {
-    Filter::new(
-        vec![(
-            Column::single(COL_MAP.op.jumpdest_keccak_general),
-            Column::linear_combination_with_constant([(COL_MAP.opcode_bits[1], -F::ONE)], F::ONE),
-        )],
-        vec![],
-    )
-}
-
-/// Creates the vector of `Columns` corresponding to the two inputs and
-/// one output of a binary operation.
-fn ctl_data_binops<F: Field>() -> Vec<Column<F>> {
-    let mut res = Column::singles(COL_MAP.mem_channels[0].value).collect_vec();
-    res.extend(Column::singles(COL_MAP.mem_channels[1].value));
-    res.extend(Column::singles_next_row(COL_MAP.mem_channels[0].value));
-    res
-}
-
-/// Creates the vector of `Columns` corresponding to the three inputs and
-/// one output of a ternary operation. By default, ternary operations use
-/// the first three memory channels, and the next top of the stack for the
-/// result (binary operations do not use the third inputs).
-fn ctl_data_ternops<F: Field>() -> Vec<Column<F>> {
-    let mut res = Column::singles(COL_MAP.mem_channels[0].value).collect_vec();
-    res.extend(Column::singles(COL_MAP.mem_channels[1].value));
-    res.extend(Column::singles(COL_MAP.mem_channels[2].value));
-    res.extend(Column::singles_next_row(COL_MAP.mem_channels[0].value));
-    res
-}
-
-/// Creates the vector of columns corresponding to the opcode, the two inputs and the output of the logic operation.
-pub(crate) fn ctl_data_logic<F: Field>() -> Vec<Column<F>> {
-    // Instead of taking single columns, we reconstruct the entire opcode value directly.
-    let mut res = vec![Column::le_bits(COL_MAP.opcode_bits)];
-    res.extend(ctl_data_binops());
-    res
-}
-
-/// CTL filter for logic operations.
-pub(crate) fn ctl_filter_logic<F: Field>() -> Filter<F> {
-    Filter::new_simple(Column::single(COL_MAP.op.logic_op))
-}
-
-/// Returns the `TableWithColumns` for the CPU rows calling arithmetic operations.
-pub(crate) fn ctl_arithmetic_base_rows<F: Field>() -> TableWithColumns<F> {
-    // Instead of taking single columns, we reconstruct the entire opcode value directly.
-    let mut columns = vec![Column::le_bits(COL_MAP.opcode_bits)];
-    columns.extend(ctl_data_ternops());
-    // Create the CPU Table whose columns are those with the three
-    // inputs and one output of the ternary operations listed in `ops`
-    // (also `ops` is used as the operation filter). The list of
-    // operations includes binary operations which will simply ignore
-    // the third input.
-    let col_bit = Column::linear_combination_with_constant(
-        vec![(COL_MAP.opcode_bits[5], F::NEG_ONE)],
-        F::ONE,
-    );
-    TableWithColumns::new(
-        *Table::Cpu,
-        columns,
-        Some(Filter::new(
-            vec![(Column::single(COL_MAP.op.push_prover_input), col_bit)],
-            vec![Column::sum([
-                COL_MAP.op.binary_op,
-                COL_MAP.op.fp254_op,
-                COL_MAP.op.ternary_op,
-                COL_MAP.op.shift,
-                COL_MAP.op.syscall,
-                COL_MAP.op.exception,
-            ])],
-        )),
-    )
-}
-
-/// Creates the vector of `Columns` corresponding to the contents of General Purpose channels when calling byte packing.
-/// We use `ctl_data_keccak_sponge` because the `Columns` are the same as the ones computed for `KeccakSpongeStark`.
-pub(crate) fn ctl_data_byte_packing<F: Field>() -> Vec<Column<F>> {
-    let mut res = vec![Column::constant(F::ONE)]; // is_read
-    res.extend(ctl_data_keccak_sponge());
-    res
-}
-
-/// CTL filter for the `MLOAD_32BYTES` operation.
-/// MLOAD_32 BYTES is differentiated from MSTORE_32BYTES by its fifth bit set to 1.
-pub(crate) fn ctl_filter_byte_packing<F: Field>() -> Filter<F> {
-    Filter::new(
-        vec![(
-            Column::single(COL_MAP.op.m_op_32bytes),
-            Column::single(COL_MAP.opcode_bits[5]),
-        )],
-        vec![],
-    )
-}
-
-/// Creates the vector of `Columns` corresponding to the contents of General Purpose channels when calling byte unpacking.
-pub(crate) fn ctl_data_byte_unpacking<F: Field>() -> Vec<Column<F>> {
-    let is_read = Column::constant(F::ZERO);
-
-    // When executing MSTORE_32BYTES, the GP memory channels are used as follows:
-    // GP channel 0: stack[-1] = addr (context, segment, virt)
-    // GP channel 1: stack[-2] = val
-    // Next GP channel 0: pushed = new_offset (virt + len)
-    let (context, segment, virt) = get_addr(&COL_MAP, 0);
-    let mut res = vec![
-        is_read,
-        Column::single(context),
-        Column::single(segment),
-        Column::single(virt),
-    ];
-
-    // len can be reconstructed as new_offset - virt.
-    let len = Column::linear_combination_and_next_row_with_constant(
-        [(COL_MAP.mem_channels[0].value[0], -F::ONE)],
-        [(COL_MAP.mem_channels[0].value[0], F::ONE)],
-        F::ZERO,
-    );
-    res.push(len);
-
-    let num_channels = F::from_canonical_usize(NUM_CHANNELS);
-    let timestamp = Column::linear_combination([(COL_MAP.clock, num_channels)]);
-    res.push(timestamp);
-
-    let val = Column::singles(COL_MAP.mem_channels[1].value);
-    res.extend(val);
-
-    res
-}
-
-/// CTL filter for the `MSTORE_32BYTES` operation.
-/// MSTORE_32BYTES is differentiated from MLOAD_32BYTES by its fifth bit set to 0.
-pub(crate) fn ctl_filter_byte_unpacking<F: Field>() -> Filter<F> {
-    Filter::new(
-        vec![(
-            Column::single(COL_MAP.op.m_op_32bytes),
-            Column::linear_combination_with_constant([(COL_MAP.opcode_bits[5], -F::ONE)], F::ONE),
-        )],
-        vec![],
-    )
-}
-
-/// Creates the vector of `Columns` corresponding to three consecutive (byte) reads in memory.
-/// It's used by syscalls and exceptions to read an address in a jumptable.
-pub(crate) fn ctl_data_jumptable_read<F: Field>() -> Vec<Column<F>> {
-    let is_read = Column::constant(F::ONE);
-    let mut res = vec![is_read];
-
-    // When reading the jumptable, the address to start reading from is in
-    // GP channel 1; the result is in GP channel 1's values.
-    let channel_map = COL_MAP.mem_channels[1];
-    res.extend(Column::singles([
-        channel_map.addr_context,
-        channel_map.addr_segment,
-        channel_map.addr_virtual,
-    ]));
-    let val = Column::singles(channel_map.value);
-
-    // len is always 3.
-    let len = Column::constant(F::from_canonical_usize(3));
-    res.push(len);
-
-    let num_channels = F::from_canonical_usize(NUM_CHANNELS);
-    let timestamp = Column::linear_combination([(COL_MAP.clock, num_channels)]);
-    res.push(timestamp);
-
-    res.extend(val);
-
-    res
-}
-
-/// CTL filter for syscalls and exceptions.
-pub(crate) fn ctl_filter_syscall_exceptions<F: Field>() -> Filter<F> {
-    Filter::new_simple(Column::sum([COL_MAP.op.syscall, COL_MAP.op.exception]))
-}
-
-/// Creates the vector of `Columns` corresponding to the contents of the CPU registers when performing a `PUSH`.
-/// `PUSH` internal reads are done by calling `BytePackingStark`.
-pub(crate) fn ctl_data_byte_packing_push<F: Field>() -> Vec<Column<F>> {
-    let is_read = Column::constant(F::ONE);
-    let context = Column::single(COL_MAP.code_context);
-    let segment = Column::constant(F::from_canonical_usize(Segment::Code as usize));
-    // The initial offset if `pc + 1`.
-    let virt =
-        Column::linear_combination_with_constant([(COL_MAP.program_counter, F::ONE)], F::ONE);
-    let val = Column::singles_next_row(COL_MAP.mem_channels[0].value);
-
-    // We fetch the length from the `PUSH` opcode lower bits, that indicate `len - 1`.
-    let len = Column::le_bits_with_constant(&COL_MAP.opcode_bits[0..5], F::ONE);
-
-    let num_channels = F::from_canonical_usize(NUM_CHANNELS);
-    let timestamp = Column::linear_combination([(COL_MAP.clock, num_channels)]);
-
-    let mut res = vec![is_read, context, segment, virt, len, timestamp];
-    res.extend(val);
-
-    res
-}
-
-/// CTL filter for the `PUSH` operation.
-pub(crate) fn ctl_filter_byte_packing_push<F: Field>() -> Filter<F> {
-    let bit_col = Column::single(COL_MAP.opcode_bits[5]);
-    Filter::new(
-        vec![(Column::single(COL_MAP.op.push_prover_input), bit_col)],
-        vec![],
-    )
-}
-
-/// Index of the memory channel storing code.
-pub(crate) const MEM_CODE_CHANNEL_IDX: usize = 0;
-/// Index of the first general purpose memory channel.
-pub(crate) const MEM_GP_CHANNELS_IDX_START: usize = MEM_CODE_CHANNEL_IDX + 1;
-
-/// Recover the three components of an address, given a CPU row and
-/// a provided memory channel index.
-/// The components are recovered as follows:
-///
-/// - `context`, shifted by 2^64 (i.e. at index 2)
-/// - `segment`, shifted by 2^32 (i.e. at index 1)
-/// - `virtual`, not shifted (i.e. at index 0)
-pub(crate) const fn get_addr<T: Copy>(lv: &CpuColumnsView<T>, mem_channel: usize) -> (T, T, T) {
-    let addr_context = lv.mem_channels[mem_channel].value[2];
-    let addr_segment = lv.mem_channels[mem_channel].value[1];
-    let addr_virtual = lv.mem_channels[mem_channel].value[0];
-    (addr_context, addr_segment, addr_virtual)
-}
-
-/// Make the time/channel column for memory lookups.
-fn mem_time_and_channel<F: Field>(channel: usize) -> Column<F> {
-    let scalar = F::from_canonical_usize(NUM_CHANNELS);
-    let addend = F::from_canonical_usize(channel);
-    Column::linear_combination_with_constant([(COL_MAP.clock, scalar)], addend)
-}
-
-/// Creates the vector of `Columns` corresponding to the contents of the code channel when reading code values.
-pub(crate) fn ctl_data_code_memory<F: Field>() -> Vec<Column<F>> {
-    let mut cols = vec![
-        Column::constant(F::ONE),             // is_read
-        Column::single(COL_MAP.code_context), // addr_context
-        Column::constant(F::from_canonical_usize(Segment::Code.unscale())), // addr_segment
-        Column::single(COL_MAP.program_counter), // addr_virtual
-    ];
-
-    // Low limb of the value matches the opcode bits
-    cols.push(Column::le_bits(COL_MAP.opcode_bits));
-
-    // High limbs of the value are all zero.
-    cols.extend(repeat(Column::constant(F::ZERO)).take(VALUE_LIMBS - 1));
-
-    cols.push(mem_time_and_channel(MEM_CODE_CHANNEL_IDX));
-
-    cols
-}
-
-/// Creates the vector of `Columns` corresponding to the contents of General Purpose channels.
-pub(crate) fn ctl_data_gp_memory<F: Field>(channel: usize) -> Vec<Column<F>> {
-    let channel_map = COL_MAP.mem_channels[channel];
-    let mut cols: Vec<_> = Column::singles([
-        channel_map.is_read,
-        channel_map.addr_context,
-        channel_map.addr_segment,
-        channel_map.addr_virtual,
-    ])
-    .collect();
-
-    cols.extend(Column::singles(channel_map.value));
-
-    cols.push(mem_time_and_channel(MEM_GP_CHANNELS_IDX_START + channel));
-
-    cols
-}
-
-pub(crate) fn ctl_data_partial_memory<F: Field>() -> Vec<Column<F>> {
-    let channel_map = COL_MAP.partial_channel;
-    let values = COL_MAP.mem_channels[0].value;
-    let mut cols: Vec<_> = Column::singles([
-        channel_map.is_read,
-        channel_map.addr_context,
-        channel_map.addr_segment,
-        channel_map.addr_virtual,
-    ])
-    .collect();
-
-    cols.extend(Column::singles(values));
-
-    cols.push(mem_time_and_channel(
-        MEM_GP_CHANNELS_IDX_START + NUM_GP_CHANNELS,
-    ));
-
-    cols
-}
-
-/// Old stack pointer write for SET_CONTEXT.
-pub(crate) fn ctl_data_memory_old_sp_write_set_context<F: Field>() -> Vec<Column<F>> {
-    let mut cols = vec![
-        Column::constant(F::ZERO),       // is_read
-        Column::single(COL_MAP.context), // addr_context
-        Column::constant(F::from_canonical_usize(Segment::ContextMetadata.unscale())), // addr_segment
-        Column::constant(F::from_canonical_usize(
-            ContextMetadata::StackSize.unscale(),
-        )), // addr_virtual
-    ];
-
-    // Low limb is current stack length minus one.
-    cols.push(Column::linear_combination_with_constant(
-        [(COL_MAP.stack_len, F::ONE)],
-        -F::ONE,
-    ));
-
-    // High limbs of the value are all zero.
-    cols.extend(repeat(Column::constant(F::ZERO)).take(VALUE_LIMBS - 1));
-
-    cols.push(mem_time_and_channel(MEM_GP_CHANNELS_IDX_START + 1));
-
-    cols
-}
-
-/// New stack pointer read for SET_CONTEXT.
-pub(crate) fn ctl_data_memory_new_sp_read_set_context<F: Field>() -> Vec<Column<F>> {
-    let mut cols = vec![
-        Column::constant(F::ONE),                         // is_read
-        Column::single(COL_MAP.mem_channels[0].value[2]), // addr_context (in the top of the stack)
-        Column::constant(F::from_canonical_usize(Segment::ContextMetadata.unscale())), // addr_segment
-        Column::constant(F::from_canonical_u64(
-            ContextMetadata::StackSize as u64 - Segment::ContextMetadata as u64,
-        )), // addr_virtual
-    ];
-
-    // Low limb is new stack length.
-    cols.push(Column::single_next_row(COL_MAP.stack_len));
-
-    // High limbs of the value are all zero.
-    cols.extend(repeat(Column::constant(F::ZERO)).take(VALUE_LIMBS - 1));
-
-    cols.push(mem_time_and_channel(MEM_GP_CHANNELS_IDX_START + 2));
-
-    cols
-}
-
-/// CTL filter for code read and write operations.
-pub(crate) fn ctl_filter_code_memory<F: Field>() -> Filter<F> {
-    Filter::new_simple(Column::sum(COL_MAP.op.iter()))
-}
-
-/// CTL filter for General Purpose memory read and write operations.
-pub(crate) fn ctl_filter_gp_memory<F: Field>(channel: usize) -> Filter<F> {
-    Filter::new_simple(Column::single(COL_MAP.mem_channels[channel].used))
-}
-
-pub(crate) fn ctl_filter_partial_memory<F: Field>() -> Filter<F> {
-    Filter::new_simple(Column::single(COL_MAP.partial_channel.used))
-}
-
-/// CTL filter for the `SET_CONTEXT` operation.
-/// SET_CONTEXT is differentiated from GET_CONTEXT by its zeroth bit set to 1
-pub(crate) fn ctl_filter_set_context<F: Field>() -> Filter<F> {
-    Filter::new(
-        vec![(
-            Column::single(COL_MAP.op.context_op),
-            Column::single(COL_MAP.opcode_bits[0]),
-        )],
-        vec![],
-    )
-}
-
-/// Disable the specified memory channels.
-/// Since channel 0 contains the top of the stack and is handled specially,
-/// channels to disable are 1, 2 or both. All cases can be expressed as a vec.
-pub(crate) fn disable_unused_channels<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    filter: P,
-    channels: Vec<usize>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    for i in channels {
-        yield_constr.constraint(filter * lv.mem_channels[i].used);
-    }
-}
-
-/// Circuit version of `disable_unused_channels`.
-/// Disable the specified memory channels.
-/// Since channel 0 contains the top of the stack and is handled specially,
-/// channels to disable are 1, 2 or both. All cases can be expressed as a vec.
-pub(crate) fn disable_unused_channels_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    filter: ExtensionTarget<D>,
-    channels: Vec<usize>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    for i in channels {
-        let constr = builder.mul_extension(filter, lv.mem_channels[i].used);
-        yield_constr.constraint(builder, constr);
-    }
-}
-
-/// Structure representing the CPU Stark.
-#[derive(Copy, Clone, Default)]
-pub(crate) struct CpuStark<F, const D: usize> {
-    pub f: PhantomData<F>,
-}
-
-impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for CpuStark<F, D> {
-    type EvaluationFrame<FE, P, const D2: usize> = EvmStarkFrame<P, FE, NUM_CPU_COLUMNS>
-    where
-        FE: FieldExtension<D2, BaseField = F>,
-        P: PackedField<Scalar = FE>;
-
-    type EvaluationFrameTarget =
-        EvmStarkFrame<ExtensionTarget<D>, ExtensionTarget<D>, NUM_CPU_COLUMNS>;
-
-    /// Evaluates all CPU constraints.
-    fn eval_packed_generic<FE, P, const D2: usize>(
-        &self,
-        vars: &Self::EvaluationFrame<FE, P, D2>,
-        yield_constr: &mut ConstraintConsumer<P>,
-    ) where
-        FE: FieldExtension<D2, BaseField = F>,
-        P: PackedField<Scalar = FE>,
-    {
-        let local_values: &[P; NUM_CPU_COLUMNS] = vars.get_local_values().try_into().unwrap();
-        let local_values: &CpuColumnsView<P> = local_values.borrow();
-        let next_values: &[P; NUM_CPU_COLUMNS] = vars.get_next_values().try_into().unwrap();
-        let next_values: &CpuColumnsView<P> = next_values.borrow();
-
-        byte_unpacking::eval_packed(local_values, next_values, yield_constr);
-        clock::eval_packed(local_values, next_values, yield_constr);
-        contextops::eval_packed(local_values, next_values, yield_constr);
-        control_flow::eval_packed_generic(local_values, next_values, yield_constr);
-        decode::eval_packed_generic(local_values, yield_constr);
-        dup_swap::eval_packed(local_values, next_values, yield_constr);
-        gas::eval_packed(local_values, next_values, yield_constr);
-        halt::eval_packed(local_values, next_values, yield_constr);
-        jumps::eval_packed(local_values, next_values, yield_constr);
-        membus::eval_packed(local_values, yield_constr);
-        memio::eval_packed(local_values, next_values, yield_constr);
-        modfp254::eval_packed(local_values, yield_constr);
-        pc::eval_packed(local_values, next_values, yield_constr);
-        push0::eval_packed(local_values, next_values, yield_constr);
-        shift::eval_packed(local_values, yield_constr);
-        simple_logic::eval_packed(local_values, next_values, yield_constr);
-        stack::eval_packed(local_values, next_values, yield_constr);
-        syscalls_exceptions::eval_packed(local_values, next_values, yield_constr);
-    }
-
-    /// Circuit version of `eval_packed_generic`.
-    /// Evaluates all CPU constraints.
-    fn eval_ext_circuit(
-        &self,
-        builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-        vars: &Self::EvaluationFrameTarget,
-        yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-    ) {
-        let local_values: &[ExtensionTarget<D>; NUM_CPU_COLUMNS] =
-            vars.get_local_values().try_into().unwrap();
-        let local_values: &CpuColumnsView<ExtensionTarget<D>> = local_values.borrow();
-        let next_values: &[ExtensionTarget<D>; NUM_CPU_COLUMNS] =
-            vars.get_next_values().try_into().unwrap();
-        let next_values: &CpuColumnsView<ExtensionTarget<D>> = next_values.borrow();
-
-        byte_unpacking::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        clock::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        contextops::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        control_flow::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        decode::eval_ext_circuit(builder, local_values, yield_constr);
-        dup_swap::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        gas::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        halt::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        jumps::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        membus::eval_ext_circuit(builder, local_values, yield_constr);
-        memio::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        modfp254::eval_ext_circuit(builder, local_values, yield_constr);
-        pc::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        push0::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        shift::eval_ext_circuit(builder, local_values, yield_constr);
-        simple_logic::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        stack::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-        syscalls_exceptions::eval_ext_circuit(builder, local_values, next_values, yield_constr);
-    }
-
-    fn constraint_degree(&self) -> usize {
-        3
-    }
-
-    fn requires_ctls(&self) -> bool {
-        true
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use anyhow::Result;
-    use plonky2::plonk::config::{GenericConfig, PoseidonGoldilocksConfig};
-    use starky::stark_testing::{test_stark_circuit_constraints, test_stark_low_degree};
-
-    use crate::cpu::cpu_stark::CpuStark;
-
-    #[test]
-    fn test_stark_degree() -> Result<()> {
-        const D: usize = 2;
-        type C = PoseidonGoldilocksConfig;
-        type F = <C as GenericConfig<D>>::F;
-        type S = CpuStark<F, D>;
-
-        let stark = S {
-            f: Default::default(),
-        };
-        test_stark_low_degree(stark)
-    }
-
-    #[test]
-    fn test_stark_circuit() -> Result<()> {
-        const D: usize = 2;
-        type C = PoseidonGoldilocksConfig;
-        type F = <C as GenericConfig<D>>::F;
-        type S = CpuStark<F, D>;
-
-        let stark = S {
-            f: Default::default(),
-        };
-        test_stark_circuit_constraints::<F, C, S, D>(stark)
-    }
-}
--- a/evm/src/cpu/decode.rs
+++ b/evm/src/cpu/decode.rs
@ -1,405 +0,0 @@
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use crate::cpu::columns::{CpuColumnsView, COL_MAP};
-
-/// List of opcode blocks
-///  Each block corresponds to exactly one flag, and each flag corresponds to exactly one block.
-///  Each block of opcodes:
-/// - is contiguous,
-/// - has a length that is a power of 2, and
-/// - its start index is a multiple of its length (it is aligned).
-///  These properties permit us to check if an opcode belongs to a block of length 2^n by checking
-/// its top 8-n bits.
-///  Additionally, each block can be made available only to the user, only to the kernel, or to
-/// both. This is mainly useful for making some instructions kernel-only, while still decoding to
-/// invalid for the user. We do this by making one kernel-only block and another user-only block.
-/// The exception is the PANIC instruction which is user-only without a corresponding kernel block.
-/// This makes the proof unverifiable when PANIC is executed in kernel mode, which is the intended
-/// behavior.
-/// Note: invalid opcodes are not represented here. _Any_ opcode is permitted to decode to
-/// `is_invalid`. The kernel then verifies that the opcode was _actually_ invalid.
-const OPCODES: [(u8, usize, bool, usize); 5] = [
-    // (start index of block, number of top bits to check (log2), kernel-only, flag column)
-    // ADD, MUL, SUB, DIV, MOD, LT, GT and BYTE flags are handled partly manually here, and partly through the Arithmetic table CTL.
-    // ADDMOD, MULMOD and SUBMOD flags are handled partly manually here, and partly through the Arithmetic table CTL.
-    // FP254 operation flags are handled partly manually here, and partly through the Arithmetic table CTL.
-    (0x14, 1, false, COL_MAP.op.eq_iszero),
-    // AND, OR and XOR flags are handled partly manually here, and partly through the Logic table CTL.
-    // NOT and POP are handled manually here.
-    // SHL and SHR flags are handled partly manually here, and partly through the Logic table CTL.
-    // JUMPDEST and KECCAK_GENERAL are handled manually here.
-    (0x56, 1, false, COL_MAP.op.jumps),     // 0x56-0x57
-    (0x80, 5, false, COL_MAP.op.dup_swap),  // 0x80-0x9f
-    (0xf6, 1, true, COL_MAP.op.context_op), //0xf6-0xf7
-    (0xf9, 0, true, COL_MAP.op.exit_kernel),
-    // MLOAD_GENERAL and MSTORE_GENERAL flags are handled manually here.
-];
-
-/// List of combined opcodes requiring a special handling.
-/// Each index in the list corresponds to an arbitrary combination
-/// of opcodes defined in evm/src/cpu/columns/ops.rs.
-const COMBINED_OPCODES: [usize; 11] = [
-    COL_MAP.op.logic_op,
-    COL_MAP.op.fp254_op,
-    COL_MAP.op.binary_op,
-    COL_MAP.op.ternary_op,
-    COL_MAP.op.shift,
-    COL_MAP.op.m_op_general,
-    COL_MAP.op.jumpdest_keccak_general,
-    COL_MAP.op.not_pop,
-    COL_MAP.op.pc_push0,
-    COL_MAP.op.m_op_32bytes,
-    COL_MAP.op.push_prover_input,
-];
-
-/// Break up an opcode (which is 8 bits long) into its eight bits.
-const fn bits_from_opcode(opcode: u8) -> [bool; 8] {
-    [
-        opcode & (1 << 0) != 0,
-        opcode & (1 << 1) != 0,
-        opcode & (1 << 2) != 0,
-        opcode & (1 << 3) != 0,
-        opcode & (1 << 4) != 0,
-        opcode & (1 << 5) != 0,
-        opcode & (1 << 6) != 0,
-        opcode & (1 << 7) != 0,
-    ]
-}
-
-/// Evaluates the constraints for opcode decoding.
-pub(crate) fn eval_packed_generic<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    // Ensure that the kernel flag is valid (either 0 or 1).
-    let kernel_mode = lv.is_kernel_mode;
-    yield_constr.constraint(kernel_mode * (kernel_mode - P::ONES));
-
-    // Ensure that the opcode bits are valid: each has to be either 0 or 1.
-    for bit in lv.opcode_bits {
-        yield_constr.constraint(bit * (bit - P::ONES));
-    }
-
-    // Check that the instruction flags are valid.
-    // First, check that they are all either 0 or 1.
-    for (_, _, _, flag_col) in OPCODES {
-        let flag = lv[flag_col];
-        yield_constr.constraint(flag * (flag - P::ONES));
-    }
-    // Also check that the combined instruction flags are valid.
-    for flag_idx in COMBINED_OPCODES {
-        yield_constr.constraint(lv[flag_idx] * (lv[flag_idx] - P::ONES));
-    }
-
-    // Now check that they sum to 0 or 1, including the combined flags.
-    let flag_sum: P = OPCODES
-        .into_iter()
-        .map(|(_, _, _, flag_col)| lv[flag_col])
-        .chain(COMBINED_OPCODES.map(|op| lv[op]))
-        .sum::<P>();
-    yield_constr.constraint(flag_sum * (flag_sum - P::ONES));
-
-    // Finally, classify all opcodes, together with the kernel flag, into blocks
-    for (oc, block_length, kernel_only, col) in OPCODES {
-        // 0 if the block/flag is available to us (is always available or we are in kernel mode) and
-        // 1 otherwise.
-        let unavailable = match kernel_only {
-            false => P::ZEROS,
-            true => P::ONES - kernel_mode,
-        };
-        // 0 if all the opcode bits match, and something in {1, ..., 8}, otherwise.
-        let opcode_mismatch: P = lv
-            .opcode_bits
-            .into_iter()
-            .zip(bits_from_opcode(oc))
-            .rev()
-            .take(8 - block_length)
-            .map(|(row_bit, flag_bit)| match flag_bit {
-                // 1 if the bit does not match, and 0 otherwise
-                false => row_bit,
-                true => P::ONES - row_bit,
-            })
-            .sum();
-
-        // If unavailable + opcode_mismatch is 0, then the opcode bits all match and we are in the
-        // correct mode.
-        yield_constr.constraint(lv[col] * (unavailable + opcode_mismatch));
-    }
-
-    let opcode_high_bits = |num_high_bits| -> P {
-        lv.opcode_bits
-            .into_iter()
-            .enumerate()
-            .rev()
-            .take(num_high_bits)
-            .map(|(i, bit)| bit * P::Scalar::from_canonical_u64(1 << i))
-            .sum()
-    };
-
-    // Manually check lv.op.m_op_constr
-    let opcode = opcode_high_bits(8);
-    yield_constr.constraint((P::ONES - kernel_mode) * lv.op.m_op_general);
-
-    let m_op_constr = (opcode - P::Scalar::from_canonical_usize(0xfb_usize))
-        * (opcode - P::Scalar::from_canonical_usize(0xfc_usize))
-        * lv.op.m_op_general;
-    yield_constr.constraint(m_op_constr);
-
-    // Manually check lv.op.jumpdest_keccak_general.
-    // KECCAK_GENERAL is a kernel-only instruction, but not JUMPDEST.
-    // JUMPDEST is differentiated from KECCAK_GENERAL by its second bit set to 1.
-    yield_constr.constraint(
-        (P::ONES - kernel_mode) * lv.op.jumpdest_keccak_general * (P::ONES - lv.opcode_bits[1]),
-    );
-
-    // Check the JUMPDEST and KERNEL_GENERAL opcodes.
-    let jumpdest_opcode = P::Scalar::from_canonical_usize(0x5b);
-    let keccak_general_opcode = P::Scalar::from_canonical_usize(0x21);
-    let jumpdest_keccak_general_constr = (opcode - keccak_general_opcode)
-        * (opcode - jumpdest_opcode)
-        * lv.op.jumpdest_keccak_general;
-    yield_constr.constraint(jumpdest_keccak_general_constr);
-
-    // Manually check lv.op.pc_push0.
-    // Both PC and PUSH0 can be called outside of the kernel mode:
-    // there is no need to constrain them in that regard.
-    let pc_push0_constr = (opcode - P::Scalar::from_canonical_usize(0x58_usize))
-        * (opcode - P::Scalar::from_canonical_usize(0x5f_usize))
-        * lv.op.pc_push0;
-    yield_constr.constraint(pc_push0_constr);
-
-    // Manually check lv.op.not_pop.
-    // Both NOT and POP can be called outside of the kernel mode:
-    // there is no need to constrain them in that regard.
-    let not_pop_op = (opcode - P::Scalar::from_canonical_usize(0x19_usize))
-        * (opcode - P::Scalar::from_canonical_usize(0x50_usize))
-        * lv.op.not_pop;
-    yield_constr.constraint(not_pop_op);
-
-    // Manually check lv.op.m_op_32bytes.
-    // Both are kernel-only.
-    yield_constr.constraint((P::ONES - kernel_mode) * lv.op.m_op_32bytes);
-
-    // Check the MSTORE_32BYTES and MLOAD-32BYTES opcodes.
-    let opcode_high_three = opcode_high_bits(3);
-    let op_32bytes = (opcode_high_three - P::Scalar::from_canonical_usize(0xc0_usize))
-        * (opcode - P::Scalar::from_canonical_usize(0xf8_usize))
-        * lv.op.m_op_32bytes;
-    yield_constr.constraint(op_32bytes);
-
-    // Manually check PUSH and PROVER_INPUT.
-    // PROVER_INPUT is a kernel-only instruction, but not PUSH.
-    let push_prover_input_constr = (opcode - P::Scalar::from_canonical_usize(0x49_usize))
-        * (opcode_high_three - P::Scalar::from_canonical_usize(0x60_usize))
-        * lv.op.push_prover_input;
-    yield_constr.constraint(push_prover_input_constr);
-    let prover_input_constr =
-        lv.op.push_prover_input * (lv.opcode_bits[5] - P::ONES) * (P::ONES - kernel_mode);
-    yield_constr.constraint(prover_input_constr);
-}
-
-fn opcode_high_bits_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    num_high_bits: usize,
-) -> ExtensionTarget<D> {
-    lv.opcode_bits
-        .into_iter()
-        .enumerate()
-        .rev()
-        .take(num_high_bits)
-        .fold(builder.zero_extension(), |cumul, (i, bit)| {
-            builder.mul_const_add_extension(F::from_canonical_usize(1 << i), bit, cumul)
-        })
-}
-
-/// Circuit version of `eval_packed_generic`.
-/// Evaluates the constraints for opcode decoding.
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let one = builder.one_extension();
-
-    // Note: The constraints below do not need to be restricted to CPU cycles.
-
-    // Ensure that the kernel flag is valid (either 0 or 1).
-    let kernel_mode = lv.is_kernel_mode;
-    {
-        let constr = builder.mul_sub_extension(kernel_mode, kernel_mode, kernel_mode);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Ensure that the opcode bits are valid: each has to be either 0 or 1.
-    for bit in lv.opcode_bits {
-        let constr = builder.mul_sub_extension(bit, bit, bit);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Check that the instruction flags are valid.
-    // First, check that they are all either 0 or 1.
-    for (_, _, _, flag_col) in OPCODES {
-        let flag = lv[flag_col];
-        let constr = builder.mul_sub_extension(flag, flag, flag);
-        yield_constr.constraint(builder, constr);
-    }
-    // Also check that the combined instruction flags are valid.
-    for flag_idx in COMBINED_OPCODES {
-        let constr = builder.mul_sub_extension(lv[flag_idx], lv[flag_idx], lv[flag_idx]);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Now check that they sum to 0 or 1, including the combined flags.
-    {
-        let mut flag_sum =
-            builder.add_many_extension(COMBINED_OPCODES.into_iter().map(|idx| lv[idx]));
-        for (_, _, _, flag_col) in OPCODES {
-            let flag = lv[flag_col];
-            flag_sum = builder.add_extension(flag_sum, flag);
-        }
-        let constr = builder.mul_sub_extension(flag_sum, flag_sum, flag_sum);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Finally, classify all opcodes, together with the kernel flag, into blocks
-    for (oc, block_length, kernel_only, col) in OPCODES {
-        // 0 if the block/flag is available to us (is always available or we are in kernel mode) and
-        // 1 otherwise.
-        let unavailable = match kernel_only {
-            false => builder.zero_extension(),
-            true => builder.sub_extension(one, kernel_mode),
-        };
-        // 0 if all the opcode bits match, and something in {1, ..., 8}, otherwise.
-        let opcode_mismatch = lv
-            .opcode_bits
-            .into_iter()
-            .zip(bits_from_opcode(oc))
-            .rev()
-            .take(8 - block_length)
-            .fold(builder.zero_extension(), |cumul, (row_bit, flag_bit)| {
-                let to_add = match flag_bit {
-                    false => row_bit,
-                    true => builder.sub_extension(one, row_bit),
-                };
-                builder.add_extension(cumul, to_add)
-            });
-
-        // If unavailable + opcode_mismatch is 0, then the opcode bits all match and we are in the
-        // correct mode.
-        let constr = builder.add_extension(unavailable, opcode_mismatch);
-        let constr = builder.mul_extension(lv[col], constr);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Manually check lv.op.m_op_constr
-    let opcode = opcode_high_bits_circuit(builder, lv, 8);
-
-    let mload_opcode = builder.constant_extension(F::Extension::from_canonical_usize(0xfb_usize));
-    let mstore_opcode = builder.constant_extension(F::Extension::from_canonical_usize(0xfc_usize));
-
-    let one_extension = builder.constant_extension(F::Extension::ONE);
-    let is_not_kernel_mode = builder.sub_extension(one_extension, kernel_mode);
-    let constr = builder.mul_extension(is_not_kernel_mode, lv.op.m_op_general);
-    yield_constr.constraint(builder, constr);
-
-    let mload_constr = builder.sub_extension(opcode, mload_opcode);
-    let mstore_constr = builder.sub_extension(opcode, mstore_opcode);
-    let mut m_op_constr = builder.mul_extension(mload_constr, mstore_constr);
-    m_op_constr = builder.mul_extension(m_op_constr, lv.op.m_op_general);
-
-    yield_constr.constraint(builder, m_op_constr);
-
-    // Manually check lv.op.jumpdest_keccak_general.
-    // KECCAK_GENERAL is a kernel-only instruction, but not JUMPDEST.
-    // JUMPDEST is differentiated from KECCAK_GENERAL by its second bit set to 1.
-    let jumpdest_opcode =
-        builder.constant_extension(F::Extension::from_canonical_usize(0x5b_usize));
-    let keccak_general_opcode =
-        builder.constant_extension(F::Extension::from_canonical_usize(0x21_usize));
-
-    // Check that KECCAK_GENERAL is kernel-only.
-    let mut kernel_general_filter = builder.sub_extension(one, lv.opcode_bits[1]);
-    kernel_general_filter =
-        builder.mul_extension(lv.op.jumpdest_keccak_general, kernel_general_filter);
-    let constr = builder.mul_extension(is_not_kernel_mode, kernel_general_filter);
-    yield_constr.constraint(builder, constr);
-
-    // Check the JUMPDEST and KERNEL_GENERAL opcodes.
-    let jumpdest_constr = builder.sub_extension(opcode, jumpdest_opcode);
-    let keccak_general_constr = builder.sub_extension(opcode, keccak_general_opcode);
-    let mut jumpdest_keccak_general_constr =
-        builder.mul_extension(jumpdest_constr, keccak_general_constr);
-    jumpdest_keccak_general_constr = builder.mul_extension(
-        jumpdest_keccak_general_constr,
-        lv.op.jumpdest_keccak_general,
-    );
-
-    yield_constr.constraint(builder, jumpdest_keccak_general_constr);
-
-    // Manually check lv.op.pc_push0.
-    // Both PC and PUSH0 can be called outside of the kernel mode:
-    // there is no need to constrain them in that regard.
-    let pc_opcode = builder.constant_extension(F::Extension::from_canonical_usize(0x58_usize));
-    let push0_opcode = builder.constant_extension(F::Extension::from_canonical_usize(0x5f_usize));
-    let pc_constr = builder.sub_extension(opcode, pc_opcode);
-    let push0_constr = builder.sub_extension(opcode, push0_opcode);
-    let mut pc_push0_constr = builder.mul_extension(pc_constr, push0_constr);
-    pc_push0_constr = builder.mul_extension(pc_push0_constr, lv.op.pc_push0);
-    yield_constr.constraint(builder, pc_push0_constr);
-
-    // Manually check lv.op.not_pop.
-    // Both NOT and POP can be called outside of the kernel mode:
-    // there is no need to constrain them in that regard.
-    let not_opcode = builder.constant_extension(F::Extension::from_canonical_usize(0x19_usize));
-    let pop_opcode = builder.constant_extension(F::Extension::from_canonical_usize(0x50_usize));
-
-    let not_constr = builder.sub_extension(opcode, not_opcode);
-    let pop_constr = builder.sub_extension(opcode, pop_opcode);
-
-    let mut not_pop_constr = builder.mul_extension(not_constr, pop_constr);
-    not_pop_constr = builder.mul_extension(lv.op.not_pop, not_pop_constr);
-    yield_constr.constraint(builder, not_pop_constr);
-
-    // Manually check lv.op.m_op_32bytes.
-    // Both are kernel-only.
-    let constr = builder.mul_extension(is_not_kernel_mode, lv.op.m_op_32bytes);
-    yield_constr.constraint(builder, constr);
-
-    // Check the MSTORE_32BYTES and MLOAD-32BYTES opcodes.
-    let opcode_high_three = opcode_high_bits_circuit(builder, lv, 3);
-    let mstore_32bytes_opcode =
-        builder.constant_extension(F::Extension::from_canonical_usize(0xc0_usize));
-    let mload_32bytes_opcode =
-        builder.constant_extension(F::Extension::from_canonical_usize(0xf8_usize));
-    let mstore_32bytes_constr = builder.sub_extension(opcode_high_three, mstore_32bytes_opcode);
-    let mload_32bytes_constr = builder.sub_extension(opcode, mload_32bytes_opcode);
-    let constr = builder.mul_extension(mstore_32bytes_constr, mload_32bytes_constr);
-    let constr = builder.mul_extension(constr, lv.op.m_op_32bytes);
-    yield_constr.constraint(builder, constr);
-
-    // Manually check PUSH and PROVER_INPUT.
-    // PROVER_INPUT is a kernel-only instruction, but not PUSH.
-    let prover_input_opcode =
-        builder.constant_extension(F::Extension::from_canonical_usize(0x49usize));
-    let push_opcodes = builder.constant_extension(F::Extension::from_canonical_usize(0x60usize));
-
-    let push_constr = builder.sub_extension(opcode_high_three, push_opcodes);
-    let prover_input_constr = builder.sub_extension(opcode, prover_input_opcode);
-
-    let push_prover_input_constr =
-        builder.mul_many_extension([lv.op.push_prover_input, prover_input_constr, push_constr]);
-    yield_constr.constraint(builder, push_prover_input_constr);
-    let prover_input_filter = builder.mul_sub_extension(
-        lv.op.push_prover_input,
-        lv.opcode_bits[5],
-        lv.op.push_prover_input,
-    );
-    let constr = builder.mul_extension(prover_input_filter, is_not_kernel_mode);
-    yield_constr.constraint(builder, constr);
-}
--- a/evm/src/cpu/dup_swap.rs
+++ b/evm/src/cpu/dup_swap.rs
@ -1,343 +0,0 @@
-use itertools::izip;
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use plonky2::plonk::circuit_builder::CircuitBuilder;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use crate::cpu::columns::{CpuColumnsView, MemoryChannelView};
-use crate::memory::segments::Segment;
-
-/// Constrain two channels to have equal values.
-fn channels_equal_packed<P: PackedField>(
-    filter: P,
-    ch_a: &MemoryChannelView<P>,
-    ch_b: &MemoryChannelView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    for (limb_a, limb_b) in izip!(ch_a.value, ch_b.value) {
-        yield_constr.constraint(filter * (limb_a - limb_b));
-    }
-}
-
-/// Constrain two channels to have equal values.
-fn channels_equal_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    filter: ExtensionTarget<D>,
-    ch_a: &MemoryChannelView<ExtensionTarget<D>>,
-    ch_b: &MemoryChannelView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    for (limb_a, limb_b) in izip!(ch_a.value, ch_b.value) {
-        let diff = builder.sub_extension(limb_a, limb_b);
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-}
-
-/// Set `used`, `is_read`, and address for channel.
-///
-/// `offset` is the stack index before this instruction is executed, e.g. `0` for the top of the
-/// stack.
-fn constrain_channel_packed<P: PackedField>(
-    is_read: bool,
-    filter: P,
-    offset: P,
-    channel: &MemoryChannelView<P>,
-    lv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    yield_constr.constraint(filter * (channel.used - P::ONES));
-    yield_constr.constraint(filter * (channel.is_read - P::Scalar::from_bool(is_read)));
-    yield_constr.constraint(filter * (channel.addr_context - lv.context));
-    yield_constr.constraint(
-        filter * (channel.addr_segment - P::Scalar::from_canonical_usize(Segment::Stack.unscale())),
-    );
-    // Top of the stack is at `addr = lv.stack_len - 1`.
-    let addr_virtual = lv.stack_len - P::ONES - offset;
-    yield_constr.constraint(filter * (channel.addr_virtual - addr_virtual));
-}
-
-/// Set `used`, `is_read`, and address for channel.
-///
-/// `offset` is the stack index before this instruction is executed, e.g. `0` for the top of the
-/// stack.
-fn constrain_channel_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    is_read: bool,
-    filter: ExtensionTarget<D>,
-    offset: ExtensionTarget<D>,
-    channel: &MemoryChannelView<ExtensionTarget<D>>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    {
-        let constr = builder.mul_sub_extension(filter, channel.used, filter);
-        yield_constr.constraint(builder, constr);
-    }
-    {
-        let constr = if is_read {
-            builder.mul_sub_extension(filter, channel.is_read, filter)
-        } else {
-            builder.mul_extension(filter, channel.is_read)
-        };
-        yield_constr.constraint(builder, constr);
-    }
-    {
-        let diff = builder.sub_extension(channel.addr_context, lv.context);
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-    {
-        let constr = builder.arithmetic_extension(
-            F::ONE,
-            -F::from_canonical_usize(Segment::Stack.unscale()),
-            filter,
-            channel.addr_segment,
-            filter,
-        );
-        yield_constr.constraint(builder, constr);
-    }
-    // Top of the stack is at `addr = lv.stack_len - 1`.
-    {
-        let constr = builder.add_extension(channel.addr_virtual, offset);
-        let constr = builder.sub_extension(constr, lv.stack_len);
-        let constr = builder.mul_add_extension(filter, constr, filter);
-        yield_constr.constraint(builder, constr);
-    }
-}
-
-/// Evaluates constraints for DUP.
-fn eval_packed_dup<P: PackedField>(
-    n: P,
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    // DUP opcodes have 0 at the 5-th position, while SWAP opcodes have 1.
-    let filter = lv.op.dup_swap * (P::ONES - lv.opcode_bits[4]);
-
-    let write_channel = &lv.mem_channels[1];
-    let read_channel = &lv.mem_channels[2];
-
-    // Constrain the input and top of the stack channels to have the same value.
-    channels_equal_packed(filter, write_channel, &lv.mem_channels[0], yield_constr);
-    // Constrain the output channel's addresses, `is_read` and `used` fields.
-    constrain_channel_packed(false, filter, P::ZEROS, write_channel, lv, yield_constr);
-
-    // Constrain the output and top of the stack channels to have the same value.
-    channels_equal_packed(filter, read_channel, &nv.mem_channels[0], yield_constr);
-    // Constrain the input channel's addresses, `is_read` and `used` fields.
-    constrain_channel_packed(true, filter, n, read_channel, lv, yield_constr);
-
-    // Constrain nv.stack_len.
-    yield_constr.constraint_transition(filter * (nv.stack_len - lv.stack_len - P::ONES));
-
-    // Disable next top.
-    yield_constr.constraint(filter * nv.mem_channels[0].used);
-}
-
-/// Circuit version of `eval_packed_dup`.
-/// Evaluates constraints for DUP.
-fn eval_ext_circuit_dup<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    n: ExtensionTarget<D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let zero = builder.zero_extension();
-    let one = builder.one_extension();
-    // DUP opcodes have 0 at the 5-th position, while SWAP opcodes have 1.
-    let mut filter = builder.sub_extension(one, lv.opcode_bits[4]);
-    filter = builder.mul_extension(lv.op.dup_swap, filter);
-
-    let write_channel = &lv.mem_channels[1];
-    let read_channel = &lv.mem_channels[2];
-
-    // Constrain the input and top of the stack channels to have the same value.
-    channels_equal_ext_circuit(
-        builder,
-        filter,
-        write_channel,
-        &lv.mem_channels[0],
-        yield_constr,
-    );
-    // Constrain the output channel's addresses, `is_read` and `used` fields.
-    constrain_channel_ext_circuit(
-        builder,
-        false,
-        filter,
-        zero,
-        write_channel,
-        lv,
-        yield_constr,
-    );
-
-    // Constrain the output and top of the stack channels to have the same value.
-    channels_equal_ext_circuit(
-        builder,
-        filter,
-        read_channel,
-        &nv.mem_channels[0],
-        yield_constr,
-    );
-    // Constrain the input channel's addresses, `is_read` and `used` fields.
-    constrain_channel_ext_circuit(builder, true, filter, n, read_channel, lv, yield_constr);
-
-    // Constrain nv.stack_len.
-    {
-        let diff = builder.sub_extension(nv.stack_len, lv.stack_len);
-        let constr = builder.mul_sub_extension(filter, diff, filter);
-        yield_constr.constraint_transition(builder, constr);
-    }
-
-    // Disable next top.
-    {
-        let constr = builder.mul_extension(filter, nv.mem_channels[0].used);
-        yield_constr.constraint(builder, constr);
-    }
-}
-
-/// Evaluates constraints for SWAP.
-fn eval_packed_swap<P: PackedField>(
-    n: P,
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let n_plus_one = n + P::ONES;
-
-    // DUP opcodes have 0 at the 5-th position, while SWAP opcodes have 1.
-    let filter = lv.op.dup_swap * lv.opcode_bits[4];
-
-    let in1_channel = &lv.mem_channels[0];
-    let in2_channel = &lv.mem_channels[1];
-    let out_channel = &lv.mem_channels[2];
-
-    // Constrain the first input channel value to be equal to the output channel value.
-    channels_equal_packed(filter, in1_channel, out_channel, yield_constr);
-    // We set `is_read`, `used` and the address for the first input. The first input is
-    // read from the top of the stack, and is therefore not a memory read.
-    constrain_channel_packed(false, filter, n_plus_one, out_channel, lv, yield_constr);
-
-    // Constrain the second input channel value to be equal to the new top of the stack.
-    channels_equal_packed(filter, in2_channel, &nv.mem_channels[0], yield_constr);
-    // We set `is_read`, `used` and the address for the second input.
-    constrain_channel_packed(true, filter, n_plus_one, in2_channel, lv, yield_constr);
-
-    // Constrain nv.stack_len.
-    yield_constr.constraint(filter * (nv.stack_len - lv.stack_len));
-
-    // Disable next top.
-    yield_constr.constraint(filter * nv.mem_channels[0].used);
-}
-
-/// Circuit version of `eval_packed_swap`.
-/// Evaluates constraints for SWAP.
-fn eval_ext_circuit_swap<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    n: ExtensionTarget<D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let one = builder.one_extension();
-    let n_plus_one = builder.add_extension(n, one);
-
-    // DUP opcodes have 0 at the 5-th position, while SWAP opcodes have 1.
-    let filter = builder.mul_extension(lv.op.dup_swap, lv.opcode_bits[4]);
-
-    let in1_channel = &lv.mem_channels[0];
-    let in2_channel = &lv.mem_channels[1];
-    let out_channel = &lv.mem_channels[2];
-
-    // Constrain the first input channel value to be equal to the output channel value.
-    channels_equal_ext_circuit(builder, filter, in1_channel, out_channel, yield_constr);
-    // We set `is_read`, `used` and the address for the first input. The first input is
-    // read from the top of the stack, and is therefore not a memory read.
-    constrain_channel_ext_circuit(
-        builder,
-        false,
-        filter,
-        n_plus_one,
-        out_channel,
-        lv,
-        yield_constr,
-    );
-
-    // Constrain the second input channel value to be equal to the new top of the stack.
-    channels_equal_ext_circuit(
-        builder,
-        filter,
-        in2_channel,
-        &nv.mem_channels[0],
-        yield_constr,
-    );
-    // We set `is_read`, `used` and the address for the second input.
-    constrain_channel_ext_circuit(
-        builder,
-        true,
-        filter,
-        n_plus_one,
-        in2_channel,
-        lv,
-        yield_constr,
-    );
-
-    // Constrain nv.stack_len.
-    let diff = builder.sub_extension(nv.stack_len, lv.stack_len);
-    let constr = builder.mul_extension(filter, diff);
-    yield_constr.constraint(builder, constr);
-
-    // Disable next top.
-    {
-        let constr = builder.mul_extension(filter, nv.mem_channels[0].used);
-        yield_constr.constraint(builder, constr);
-    }
-}
-
-/// Evaluates the constraints for the DUP and SWAP opcodes.
-pub(crate) fn eval_packed<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let n = lv.opcode_bits[0]
-        + lv.opcode_bits[1] * P::Scalar::from_canonical_u64(2)
-        + lv.opcode_bits[2] * P::Scalar::from_canonical_u64(4)
-        + lv.opcode_bits[3] * P::Scalar::from_canonical_u64(8);
-
-    eval_packed_dup(n, lv, nv, yield_constr);
-    eval_packed_swap(n, lv, nv, yield_constr);
-
-    // For both, disable the partial channel.
-    yield_constr.constraint(lv.op.dup_swap * lv.partial_channel.used);
-}
-
-/// Circuit version of `eval_packed`.
-/// Evaluates the constraints for the DUP and SWAP opcodes.
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let n = lv.opcode_bits[..4].iter().enumerate().fold(
-        builder.zero_extension(),
-        |cumul, (i, &bit)| {
-            builder.mul_const_add_extension(F::from_canonical_u64(1 << i), bit, cumul)
-        },
-    );
-
-    eval_ext_circuit_dup(builder, n, lv, nv, yield_constr);
-    eval_ext_circuit_swap(builder, n, lv, nv, yield_constr);
-
-    // For both, disable the partial channel.
-    {
-        let constr = builder.mul_extension(lv.op.dup_swap, lv.partial_channel.used);
-        yield_constr.constraint(builder, constr);
-    }
-}
--- a/evm/src/cpu/gas.rs
+++ b/evm/src/cpu/gas.rs
@ -1,324 +0,0 @@
-use itertools::izip;
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use super::columns::COL_MAP;
-use crate::cpu::columns::ops::OpsColumnsView;
-use crate::cpu::columns::CpuColumnsView;
-
-const KERNEL_ONLY_INSTR: Option<u32> = Some(0);
-const G_JUMPDEST: Option<u32> = Some(1);
-const G_BASE: Option<u32> = Some(2);
-const G_VERYLOW: Option<u32> = Some(3);
-const G_LOW: Option<u32> = Some(5);
-const G_MID: Option<u32> = Some(8);
-const G_HIGH: Option<u32> = Some(10);
-
-const SIMPLE_OPCODES: OpsColumnsView<Option<u32>> = OpsColumnsView {
-    binary_op: None,  // This is handled manually below
-    ternary_op: None, // This is handled manually below
-    fp254_op: KERNEL_ONLY_INSTR,
-    eq_iszero: G_VERYLOW,
-    logic_op: G_VERYLOW,
-    not_pop: None, // This is handled manually below
-    shift: G_VERYLOW,
-    jumpdest_keccak_general: None, // This is handled manually below.
-    push_prover_input: None,       // This is handled manually below.
-    jumps: None,                   // Combined flag handled separately.
-    pc_push0: G_BASE,
-    dup_swap: G_VERYLOW,
-    context_op: KERNEL_ONLY_INSTR,
-    m_op_32bytes: KERNEL_ONLY_INSTR,
-    exit_kernel: None,
-    m_op_general: KERNEL_ONLY_INSTR,
-    syscall: None,
-    exception: None,
-};
-
-fn eval_packed_accumulate<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    // Is it an instruction that we constrain here?
-    // I.e., does it always cost a constant amount of gas?
-    let filter: P = SIMPLE_OPCODES
-        .into_iter()
-        .enumerate()
-        .filter_map(|(i, maybe_cost)| {
-            // Add flag `lv.op[i]` to the sum if `SIMPLE_OPCODES[i]` is `Some`.
-            maybe_cost.map(|_| lv.op[i])
-        })
-        .sum();
-
-    // How much gas did we use?
-    let gas_used: P = SIMPLE_OPCODES
-        .into_iter()
-        .enumerate()
-        .filter_map(|(i, maybe_cost)| {
-            maybe_cost.map(|cost| P::Scalar::from_canonical_u32(cost) * lv.op[i])
-        })
-        .sum();
-
-    let constr = nv.gas - (lv.gas + gas_used);
-    yield_constr.constraint_transition(filter * constr);
-
-    let gas_diff = nv.gas - lv.gas;
-
-    for (maybe_cost, op_flag) in izip!(SIMPLE_OPCODES.into_iter(), lv.op.into_iter()) {
-        if let Some(cost) = maybe_cost {
-            let cost = P::Scalar::from_canonical_u32(cost);
-            yield_constr.constraint_transition(op_flag * (gas_diff - cost));
-        }
-    }
-
-    // For jumps.
-    let jump_gas_cost = P::Scalar::from_canonical_u32(G_MID.unwrap())
-        + lv.opcode_bits[0] * P::Scalar::from_canonical_u32(G_HIGH.unwrap() - G_MID.unwrap());
-    yield_constr.constraint_transition(lv.op.jumps * (gas_diff - jump_gas_cost));
-
-    // For binary_ops.
-    // MUL, DIV and MOD are differentiated from ADD, SUB, LT, GT and BYTE by their first and fifth bits set to 0.
-    let cost_filter = lv.opcode_bits[0] + lv.opcode_bits[4] - lv.opcode_bits[0] * lv.opcode_bits[4];
-    let binary_op_cost = P::Scalar::from_canonical_u32(G_LOW.unwrap())
-        + cost_filter
-            * (P::Scalar::from_canonical_u32(G_VERYLOW.unwrap())
-                - P::Scalar::from_canonical_u32(G_LOW.unwrap()));
-    yield_constr.constraint_transition(lv.op.binary_op * (gas_diff - binary_op_cost));
-
-    // For ternary_ops.
-    // SUBMOD is differentiated by its second bit set to 1.
-    let ternary_op_cost = P::Scalar::from_canonical_u32(G_MID.unwrap())
-        - lv.opcode_bits[1] * P::Scalar::from_canonical_u32(G_MID.unwrap());
-    yield_constr.constraint_transition(lv.op.ternary_op * (gas_diff - ternary_op_cost));
-
-    // For NOT and POP.
-    // NOT is differentiated from POP by its first bit set to 1.
-    let not_pop_cost = (P::ONES - lv.opcode_bits[0])
-        * P::Scalar::from_canonical_u32(G_BASE.unwrap())
-        + lv.opcode_bits[0] * P::Scalar::from_canonical_u32(G_VERYLOW.unwrap());
-    yield_constr.constraint_transition(lv.op.not_pop * (gas_diff - not_pop_cost));
-
-    // For JUMPDEST and KECCAK_GENERAL.
-    // JUMPDEST is differentiated from KECCAK_GENERAL by its second bit set to 1.
-    let jumpdest_keccak_general_gas_cost = lv.opcode_bits[1]
-        * P::Scalar::from_canonical_u32(G_JUMPDEST.unwrap())
-        + (P::ONES - lv.opcode_bits[1]) * P::Scalar::from_canonical_u32(KERNEL_ONLY_INSTR.unwrap());
-    yield_constr.constraint_transition(
-        lv.op.jumpdest_keccak_general * (gas_diff - jumpdest_keccak_general_gas_cost),
-    );
-
-    // For PROVER_INPUT and PUSH operations.
-    // PUSH operations are differentiated from PROVER_INPUT by their 6th bit set to 1.
-    let push_prover_input_gas_cost = lv.opcode_bits[5]
-        * P::Scalar::from_canonical_u32(G_VERYLOW.unwrap())
-        + (P::ONES - lv.opcode_bits[5]) * P::Scalar::from_canonical_u32(KERNEL_ONLY_INSTR.unwrap());
-    yield_constr
-        .constraint_transition(lv.op.push_prover_input * (gas_diff - push_prover_input_gas_cost));
-}
-
-fn eval_packed_init<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let is_cpu_cycle: P = COL_MAP.op.iter().map(|&col_i| lv[col_i]).sum();
-    let is_cpu_cycle_next: P = COL_MAP.op.iter().map(|&col_i| nv[col_i]).sum();
-    // `nv` is the first row that executes an instruction.
-    let filter = (is_cpu_cycle - P::ONES) * is_cpu_cycle_next;
-    // Set initial gas to zero.
-    yield_constr.constraint_transition(filter * nv.gas);
-}
-
-/// Evaluate the gas constraints for the opcodes that cost a constant gas.
-pub(crate) fn eval_packed<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    eval_packed_accumulate(lv, nv, yield_constr);
-    eval_packed_init(lv, nv, yield_constr);
-}
-
-fn eval_ext_circuit_accumulate<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    // Is it an instruction that we constrain here?
-    // I.e., does it always cost a constant amount of gas?
-    let filter = SIMPLE_OPCODES.into_iter().enumerate().fold(
-        builder.zero_extension(),
-        |cumul, (i, maybe_cost)| {
-            // Add flag `lv.op[i]` to the sum if `SIMPLE_OPCODES[i]` is `Some`.
-            match maybe_cost {
-                None => cumul,
-                Some(_) => builder.add_extension(lv.op[i], cumul),
-            }
-        },
-    );
-
-    // How much gas did we use?
-    let gas_used = SIMPLE_OPCODES.into_iter().enumerate().fold(
-        builder.zero_extension(),
-        |cumul, (i, maybe_cost)| match maybe_cost {
-            None => cumul,
-            Some(cost) => {
-                let cost_ext = builder.constant_extension(F::from_canonical_u32(cost).into());
-                builder.mul_add_extension(lv.op[i], cost_ext, cumul)
-            }
-        },
-    );
-
-    let constr = {
-        let t = builder.add_extension(lv.gas, gas_used);
-        builder.sub_extension(nv.gas, t)
-    };
-    let filtered_constr = builder.mul_extension(filter, constr);
-    yield_constr.constraint_transition(builder, filtered_constr);
-
-    for (maybe_cost, op_flag) in izip!(SIMPLE_OPCODES.into_iter(), lv.op.into_iter()) {
-        if let Some(cost) = maybe_cost {
-            let nv_lv_diff = builder.sub_extension(nv.gas, lv.gas);
-            let constr = builder.arithmetic_extension(
-                F::ONE,
-                -F::from_canonical_u32(cost),
-                op_flag,
-                nv_lv_diff,
-                op_flag,
-            );
-            yield_constr.constraint_transition(builder, constr);
-        }
-    }
-
-    // For jumps.
-    let filter = lv.op.jumps;
-    let jump_gas_cost = builder.mul_const_extension(
-        F::from_canonical_u32(G_HIGH.unwrap() - G_MID.unwrap()),
-        lv.opcode_bits[0],
-    );
-    let jump_gas_cost =
-        builder.add_const_extension(jump_gas_cost, F::from_canonical_u32(G_MID.unwrap()));
-
-    let nv_lv_diff = builder.sub_extension(nv.gas, lv.gas);
-    let gas_diff = builder.sub_extension(nv_lv_diff, jump_gas_cost);
-    let constr = builder.mul_extension(filter, gas_diff);
-    yield_constr.constraint_transition(builder, constr);
-
-    // For binary_ops.
-    // MUL, DIV and MOD are differentiated from ADD, SUB, LT, GT and BYTE by their first and fifth bits set to 0.
-    let filter = lv.op.binary_op;
-    let cost_filter = {
-        let a = builder.add_extension(lv.opcode_bits[0], lv.opcode_bits[4]);
-        let b = builder.mul_extension(lv.opcode_bits[0], lv.opcode_bits[4]);
-        builder.sub_extension(a, b)
-    };
-    let binary_op_cost = builder.mul_const_extension(
-        F::from_canonical_u32(G_VERYLOW.unwrap()) - F::from_canonical_u32(G_LOW.unwrap()),
-        cost_filter,
-    );
-    let binary_op_cost =
-        builder.add_const_extension(binary_op_cost, F::from_canonical_u32(G_LOW.unwrap()));
-
-    let nv_lv_diff = builder.sub_extension(nv.gas, lv.gas);
-    let gas_diff = builder.sub_extension(nv_lv_diff, binary_op_cost);
-    let constr = builder.mul_extension(filter, gas_diff);
-    yield_constr.constraint_transition(builder, constr);
-
-    // For ternary_ops.
-    // SUBMOD is differentiated by its second bit set to 1.
-    let filter = lv.op.ternary_op;
-    let ternary_op_cost = builder.mul_const_extension(
-        F::from_canonical_u32(G_MID.unwrap()).neg(),
-        lv.opcode_bits[1],
-    );
-    let ternary_op_cost =
-        builder.add_const_extension(ternary_op_cost, F::from_canonical_u32(G_MID.unwrap()));
-
-    let nv_lv_diff = builder.sub_extension(nv.gas, lv.gas);
-    let gas_diff = builder.sub_extension(nv_lv_diff, ternary_op_cost);
-    let constr = builder.mul_extension(filter, gas_diff);
-    yield_constr.constraint_transition(builder, constr);
-
-    // For NOT and POP.
-    // NOT is differentiated from POP by its first bit set to 1.
-    let filter = lv.op.not_pop;
-    let one = builder.one_extension();
-    let mut not_pop_cost =
-        builder.mul_const_extension(F::from_canonical_u32(G_VERYLOW.unwrap()), lv.opcode_bits[0]);
-    let mut pop_cost = builder.sub_extension(one, lv.opcode_bits[0]);
-    pop_cost = builder.mul_const_extension(F::from_canonical_u32(G_BASE.unwrap()), pop_cost);
-    not_pop_cost = builder.add_extension(not_pop_cost, pop_cost);
-
-    let not_pop_gas_diff = builder.sub_extension(nv_lv_diff, not_pop_cost);
-    let not_pop_constr = builder.mul_extension(filter, not_pop_gas_diff);
-    yield_constr.constraint_transition(builder, not_pop_constr);
-
-    // For JUMPDEST and KECCAK_GENERAL.
-    // JUMPDEST is differentiated from KECCAK_GENERAL by its second bit set to 1.
-    let one = builder.one_extension();
-    let filter = lv.op.jumpdest_keccak_general;
-
-    let jumpdest_keccak_general_gas_cost = builder.arithmetic_extension(
-        F::from_canonical_u32(G_JUMPDEST.unwrap())
-            - F::from_canonical_u32(KERNEL_ONLY_INSTR.unwrap()),
-        F::from_canonical_u32(KERNEL_ONLY_INSTR.unwrap()),
-        lv.opcode_bits[1],
-        one,
-        one,
-    );
-
-    let gas_diff = builder.sub_extension(nv_lv_diff, jumpdest_keccak_general_gas_cost);
-    let constr = builder.mul_extension(filter, gas_diff);
-
-    yield_constr.constraint_transition(builder, constr);
-
-    // For PROVER_INPUT and PUSH operations.
-    // PUSH operations are differentiated from PROVER_INPUT by their 6th bit set to 1.
-    let push_prover_input_gas_cost = builder.arithmetic_extension(
-        F::from_canonical_u32(G_VERYLOW.unwrap())
-            - F::from_canonical_u32(KERNEL_ONLY_INSTR.unwrap()),
-        F::from_canonical_u32(KERNEL_ONLY_INSTR.unwrap()),
-        lv.opcode_bits[5],
-        one,
-        one,
-    );
-    let gas_diff = builder.sub_extension(nv_lv_diff, push_prover_input_gas_cost);
-    let constr = builder.mul_extension(lv.op.push_prover_input, gas_diff);
-
-    yield_constr.constraint_transition(builder, constr);
-}
-
-fn eval_ext_circuit_init<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    // `nv` is the first row that executes an instruction.
-    let is_cpu_cycle = builder.add_many_extension(COL_MAP.op.iter().map(|&col_i| lv[col_i]));
-    let is_cpu_cycle_next = builder.add_many_extension(COL_MAP.op.iter().map(|&col_i| nv[col_i]));
-    let filter = builder.mul_sub_extension(is_cpu_cycle, is_cpu_cycle_next, is_cpu_cycle_next);
-    // Set initial gas to zero.
-    let constr = builder.mul_extension(filter, nv.gas);
-    yield_constr.constraint_transition(builder, constr);
-}
-
-/// Circuit version of `eval_packed`.
-/// Evaluate the gas constraints for the opcodes that cost a constant gas.
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    // Evaluates the transition gas constraints.
-    eval_ext_circuit_accumulate(builder, lv, nv, yield_constr);
-    // Evaluates the initial gas constraints.
-    eval_ext_circuit_init(builder, lv, nv, yield_constr);
-}
--- a/evm/src/cpu/halt.rs
+++ b/evm/src/cpu/halt.rs
@ -1,104 +0,0 @@
-//! Once the CPU execution is over (i.e. reached the `halt` label in the kernel),
-//! the CPU trace will be padded with special dummy rows, incurring no memory overhead.
-
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use super::control_flow::get_halt_pc;
-use crate::cpu::columns::{CpuColumnsView, COL_MAP};
-use crate::cpu::membus::NUM_GP_CHANNELS;
-
-/// Evaluates constraints for the `halt` flag.
-pub(crate) fn eval_packed<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let is_cpu_cycle: P = COL_MAP.op.iter().map(|&col_i| lv[col_i]).sum();
-    let is_cpu_cycle_next: P = COL_MAP.op.iter().map(|&col_i| nv[col_i]).sum();
-
-    let halt_state = P::ONES - is_cpu_cycle;
-    let next_halt_state = P::ONES - is_cpu_cycle_next;
-
-    // The halt flag must be boolean.
-    yield_constr.constraint(halt_state * (halt_state - P::ONES));
-    // Once we reach a padding row, there must be only padding rows.
-    yield_constr.constraint_transition(halt_state * (next_halt_state - P::ONES));
-    // Check that we're in kernel mode.
-    yield_constr.constraint(halt_state * (lv.is_kernel_mode - P::ONES));
-
-    // Padding rows should have their memory channels disabled.
-    for i in 0..NUM_GP_CHANNELS {
-        let channel = lv.mem_channels[i];
-        yield_constr.constraint(halt_state * channel.used);
-    }
-
-    // The last row must be a dummy padding row.
-    yield_constr.constraint_last_row(halt_state - P::ONES);
-
-    // Also, a padding row's `program_counter` must be at the `halt` label.
-    // In particular, it ensures that the first padding row may only be added
-    // after we jumped to the `halt` function. Subsequent padding rows may set
-    // the `program_counter` to arbitrary values (there's no transition
-    // constraints) so we can place this requirement on them too.
-    let halt_pc = get_halt_pc::<P::Scalar>();
-    yield_constr.constraint(halt_state * (lv.program_counter - halt_pc));
-}
-
-/// Circuit version of `eval_packed`.
-/// Evaluates constraints for the `halt` flag.
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let one = builder.one_extension();
-
-    let is_cpu_cycle = builder.add_many_extension(COL_MAP.op.iter().map(|&col_i| lv[col_i]));
-    let is_cpu_cycle_next = builder.add_many_extension(COL_MAP.op.iter().map(|&col_i| nv[col_i]));
-
-    let halt_state = builder.sub_extension(one, is_cpu_cycle);
-    let next_halt_state = builder.sub_extension(one, is_cpu_cycle_next);
-
-    // The halt flag must be boolean.
-    let constr = builder.mul_sub_extension(halt_state, halt_state, halt_state);
-    yield_constr.constraint(builder, constr);
-    // Once we reach a padding row, there must be only padding rows.
-    let constr = builder.mul_sub_extension(halt_state, next_halt_state, halt_state);
-    yield_constr.constraint_transition(builder, constr);
-    // Check that we're in kernel mode.
-    let constr = builder.mul_sub_extension(halt_state, lv.is_kernel_mode, halt_state);
-    yield_constr.constraint(builder, constr);
-
-    // Padding rows should have their memory channels disabled.
-    for i in 0..NUM_GP_CHANNELS {
-        let channel = lv.mem_channels[i];
-        let constr = builder.mul_extension(halt_state, channel.used);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // The last row must be a dummy padding row.
-    {
-        let one = builder.one_extension();
-        let constr = builder.sub_extension(halt_state, one);
-        yield_constr.constraint_last_row(builder, constr);
-    }
-
-    // Also, a padding row's `program_counter` must be at the `halt` label.
-    // In particular, it ensures that the first padding row may only be added
-    // after we jumped to the `halt` function. Subsequent padding rows may set
-    // the `program_counter` to arbitrary values (there's no transition
-    // constraints) so we can place this requirement on them too.
-    {
-        let halt_pc = get_halt_pc();
-        let halt_pc_target = builder.constant_extension(halt_pc);
-        let constr = builder.sub_extension(lv.program_counter, halt_pc_target);
-        let constr = builder.mul_extension(halt_state, constr);
-
-        yield_constr.constraint(builder, constr);
-    }
-}
--- a/evm/src/cpu/jumps.rs
+++ b/evm/src/cpu/jumps.rs
@ -1,390 +0,0 @@
-use plonky2::field::extension::Extendable;
-use plonky2::field::packed::PackedField;
-use plonky2::field::types::Field;
-use plonky2::hash::hash_types::RichField;
-use plonky2::iop::ext_target::ExtensionTarget;
-use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
-
-use crate::cpu::columns::CpuColumnsView;
-use crate::cpu::membus::NUM_GP_CHANNELS;
-use crate::memory::segments::Segment;
-
-/// Evaluates constraints for EXIT_KERNEL.
-pub(crate) fn eval_packed_exit_kernel<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let input = lv.mem_channels[0].value;
-    let filter = lv.op.exit_kernel;
-
-    // If we are executing `EXIT_KERNEL` then we simply restore the program counter, kernel mode
-    // flag, and gas counter. The middle 4 (32-bit) limbs are ignored (this is not part of the spec,
-    // but we trust the kernel to set them to zero).
-    yield_constr.constraint_transition(filter * (input[0] - nv.program_counter));
-    yield_constr.constraint_transition(filter * (input[1] - nv.is_kernel_mode));
-    yield_constr.constraint_transition(filter * (input[6] - nv.gas));
-    // High limb of gas must be 0 for convenient detection of overflow.
-    yield_constr.constraint(filter * input[7]);
-}
-
-/// Circuit version of `eval_packed_exit_kernel`.
-/// Evaluates constraints for EXIT_KERNEL.
-pub(crate) fn eval_ext_circuit_exit_kernel<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let input = lv.mem_channels[0].value;
-    let filter = lv.op.exit_kernel;
-
-    // If we are executing `EXIT_KERNEL` then we simply restore the program counter and kernel mode
-    // flag. The top 6 (32-bit) limbs are ignored (this is not part of the spec, but we trust the
-    // kernel to set them to zero).
-
-    let pc_constr = builder.sub_extension(input[0], nv.program_counter);
-    let pc_constr = builder.mul_extension(filter, pc_constr);
-    yield_constr.constraint_transition(builder, pc_constr);
-
-    let kernel_constr = builder.sub_extension(input[1], nv.is_kernel_mode);
-    let kernel_constr = builder.mul_extension(filter, kernel_constr);
-    yield_constr.constraint_transition(builder, kernel_constr);
-
-    {
-        let diff = builder.sub_extension(input[6], nv.gas);
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint_transition(builder, constr);
-    }
-    {
-        // High limb of gas must be 0 for convenient detection of overflow.
-        let constr = builder.mul_extension(filter, input[7]);
-        yield_constr.constraint(builder, constr);
-    }
-}
-
-/// Evaluates constraints jump operations: JUMP and JUMPI.
-pub(crate) fn eval_packed_jump_jumpi<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    let jumps_lv = lv.general.jumps();
-    let dst = lv.mem_channels[0].value;
-    let cond = lv.mem_channels[1].value;
-    let filter = lv.op.jumps; // `JUMP` or `JUMPI`
-    let jumpdest_flag_channel = lv.mem_channels[NUM_GP_CHANNELS - 1];
-    let is_jump = filter * (P::ONES - lv.opcode_bits[0]);
-    let is_jumpi = filter * lv.opcode_bits[0];
-
-    // Stack constraints.
-    // If (JUMP and stack_len != 1) or (JUMPI and stack_len != 2)...
-    let len_diff = lv.stack_len - P::ONES - lv.opcode_bits[0];
-    let new_filter = len_diff * filter;
-    // Read an extra element.
-    let channel = nv.mem_channels[0];
-    yield_constr.constraint_transition(new_filter * (channel.used - P::ONES));
-    yield_constr.constraint_transition(new_filter * (channel.is_read - P::ONES));
-    yield_constr.constraint_transition(new_filter * (channel.addr_context - nv.context));
-    yield_constr.constraint_transition(
-        new_filter
-            * (channel.addr_segment - P::Scalar::from_canonical_usize(Segment::Stack.unscale())),
-    );
-    let addr_virtual = nv.stack_len - P::ONES;
-    yield_constr.constraint_transition(new_filter * (channel.addr_virtual - addr_virtual));
-    // Constrain `stack_inv_aux`.
-    yield_constr.constraint(
-        filter * (len_diff * lv.general.stack().stack_inv - lv.general.stack().stack_inv_aux),
-    );
-    // Disable channel if stack_len == N.
-    let empty_stack_filter = filter * (lv.general.stack().stack_inv_aux - P::ONES);
-    yield_constr.constraint_transition(empty_stack_filter * channel.used);
-
-    // If `JUMP`, re-use the `JUMPI` logic, but setting the second input (the predicate) to be 1.
-    // In other words, we implement `JUMP(dst)` as `JUMPI(dst, cond=1)`.
-    yield_constr.constraint(is_jump * (cond[0] - P::ONES));
-    for &limb in &cond[1..] {
-        // Set all limbs (other than the least-significant limb) to 0.
-        // NB: Technically, they don't have to be 0, as long as the sum
-        // `cond[0] + ... + cond[7]` cannot overflow.
-        yield_constr.constraint(is_jump * limb);
-    }
-
-    // Check `should_jump`:
-    yield_constr.constraint(filter * jumps_lv.should_jump * (jumps_lv.should_jump - P::ONES));
-    let cond_sum: P = cond.into_iter().sum();
-    yield_constr.constraint(filter * (jumps_lv.should_jump - P::ONES) * cond_sum);
-    yield_constr.constraint(filter * (jumps_lv.cond_sum_pinv * cond_sum - jumps_lv.should_jump));
-
-    // If we're jumping, then the high 7 limbs of the destination must be 0.
-    let dst_hi_sum: P = dst[1..].iter().copied().sum();
-    yield_constr.constraint(filter * jumps_lv.should_jump * dst_hi_sum);
-    // Check that the destination address holds a `JUMPDEST` instruction. Note that this constraint
-    // does not need to be conditioned on `should_jump` because no read takes place if we're not
-    // jumping, so we're free to set the channel to 1.
-    yield_constr.constraint(filter * (jumpdest_flag_channel.value[0] - P::ONES));
-
-    // Make sure that the JUMPDEST flag channel is constrained.
-    // Only need to read if we're about to jump and we're not in kernel mode.
-    yield_constr.constraint(
-        filter
-            * (jumpdest_flag_channel.used - jumps_lv.should_jump * (P::ONES - lv.is_kernel_mode)),
-    );
-    yield_constr.constraint(filter * (jumpdest_flag_channel.is_read - P::ONES));
-    yield_constr.constraint(filter * (jumpdest_flag_channel.addr_context - lv.context));
-    yield_constr.constraint(
-        filter
-            * (jumpdest_flag_channel.addr_segment
-                - P::Scalar::from_canonical_usize(Segment::JumpdestBits.unscale())),
-    );
-    yield_constr.constraint(filter * (jumpdest_flag_channel.addr_virtual - dst[0]));
-
-    // Disable unused memory channels
-    for &channel in &lv.mem_channels[2..NUM_GP_CHANNELS - 1] {
-        yield_constr.constraint(filter * channel.used);
-    }
-    yield_constr.constraint(filter * lv.partial_channel.used);
-
-    // Channel 1 is unused by the `JUMP` instruction.
-    yield_constr.constraint(is_jump * lv.mem_channels[1].used);
-
-    // Update stack length.
-    yield_constr.constraint_transition(is_jump * (nv.stack_len - lv.stack_len + P::ONES));
-    yield_constr.constraint_transition(
-        is_jumpi * (nv.stack_len - lv.stack_len + P::Scalar::from_canonical_u64(2)),
-    );
-
-    // Finally, set the next program counter.
-    let fallthrough_dst = lv.program_counter + P::ONES;
-    let jump_dest = dst[0];
-    yield_constr.constraint_transition(
-        filter * (jumps_lv.should_jump - P::ONES) * (nv.program_counter - fallthrough_dst),
-    );
-    yield_constr
-        .constraint_transition(filter * jumps_lv.should_jump * (nv.program_counter - jump_dest));
-}
-
-/// Circuit version of `eval_packed_jumpi_jumpi`.
-/// Evaluates constraints jump operations: JUMP and JUMPI.
-pub(crate) fn eval_ext_circuit_jump_jumpi<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    let jumps_lv = lv.general.jumps();
-    let dst = lv.mem_channels[0].value;
-    let cond = lv.mem_channels[1].value;
-    let filter = lv.op.jumps; // `JUMP` or `JUMPI`
-    let jumpdest_flag_channel = lv.mem_channels[NUM_GP_CHANNELS - 1];
-    let one_extension = builder.one_extension();
-    let is_jump = builder.sub_extension(one_extension, lv.opcode_bits[0]);
-    let is_jump = builder.mul_extension(filter, is_jump);
-    let is_jumpi = builder.mul_extension(filter, lv.opcode_bits[0]);
-
-    // Stack constraints.
-    // If (JUMP and stack_len != 1) or (JUMPI and stack_len != 2)...
-    let len_diff = builder.sub_extension(lv.stack_len, one_extension);
-    let len_diff = builder.sub_extension(len_diff, lv.opcode_bits[0]);
-    let new_filter = builder.mul_extension(len_diff, filter);
-    // Read an extra element.
-    let channel = nv.mem_channels[0];
-
-    {
-        let constr = builder.mul_sub_extension(new_filter, channel.used, new_filter);
-        yield_constr.constraint_transition(builder, constr);
-    }
-    {
-        let constr = builder.mul_sub_extension(new_filter, channel.is_read, new_filter);
-        yield_constr.constraint_transition(builder, constr);
-    }
-    {
-        let diff = builder.sub_extension(channel.addr_context, nv.context);
-        let constr = builder.mul_extension(new_filter, diff);
-        yield_constr.constraint_transition(builder, constr);
-    }
-    {
-        let constr = builder.arithmetic_extension(
-            F::ONE,
-            -F::from_canonical_usize(Segment::Stack.unscale()),
-            new_filter,
-            channel.addr_segment,
-            new_filter,
-        );
-        yield_constr.constraint_transition(builder, constr);
-    }
-    {
-        let diff = builder.sub_extension(channel.addr_virtual, nv.stack_len);
-        let constr = builder.arithmetic_extension(F::ONE, F::ONE, new_filter, diff, new_filter);
-        yield_constr.constraint_transition(builder, constr);
-    }
-    // Constrain `stack_inv_aux`.
-    {
-        let prod = builder.mul_extension(len_diff, lv.general.stack().stack_inv);
-        let diff = builder.sub_extension(prod, lv.general.stack().stack_inv_aux);
-        let constr = builder.mul_extension(filter, diff);
-        yield_constr.constraint(builder, constr);
-    }
-    // Disable channel if stack_len == N.
-    {
-        let empty_stack_filter =
-            builder.mul_sub_extension(filter, lv.general.stack().stack_inv_aux, filter);
-        let constr = builder.mul_extension(empty_stack_filter, channel.used);
-        yield_constr.constraint_transition(builder, constr);
-    }
-
-    // If `JUMP`, re-use the `JUMPI` logic, but setting the second input (the predicate) to be 1.
-    // In other words, we implement `JUMP(dst)` as `JUMPI(dst, cond=1)`.
-    {
-        let constr = builder.mul_sub_extension(is_jump, cond[0], is_jump);
-        yield_constr.constraint(builder, constr);
-    }
-    for &limb in &cond[1..] {
-        // Set all limbs (other than the least-significant limb) to 0.
-        // NB: Technically, they don't have to be 0, as long as the sum
-        // `cond[0] + ... + cond[7]` cannot overflow.
-        let constr = builder.mul_extension(is_jump, limb);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Check `should_jump`:
-    {
-        let constr = builder.mul_sub_extension(
-            jumps_lv.should_jump,
-            jumps_lv.should_jump,
-            jumps_lv.should_jump,
-        );
-        let constr = builder.mul_extension(filter, constr);
-        yield_constr.constraint(builder, constr);
-    }
-    let cond_sum = builder.add_many_extension(cond);
-    {
-        let constr = builder.mul_sub_extension(cond_sum, jumps_lv.should_jump, cond_sum);
-        let constr = builder.mul_extension(filter, constr);
-        yield_constr.constraint(builder, constr);
-    }
-    {
-        let constr =
-            builder.mul_sub_extension(jumps_lv.cond_sum_pinv, cond_sum, jumps_lv.should_jump);
-        let constr = builder.mul_extension(filter, constr);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // If we're jumping, then the high 7 limbs of the destination must be 0.
-    let dst_hi_sum = builder.add_many_extension(&dst[1..]);
-    {
-        let constr = builder.mul_extension(jumps_lv.should_jump, dst_hi_sum);
-        let constr = builder.mul_extension(filter, constr);
-        yield_constr.constraint(builder, constr);
-    }
-    // Check that the destination address holds a `JUMPDEST` instruction. Note that this constraint
-    // does not need to be conditioned on `should_jump` because no read takes place if we're not
-    // jumping, so we're free to set the channel to 1.
-    {
-        let constr = builder.mul_sub_extension(filter, jumpdest_flag_channel.value[0], filter);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Make sure that the JUMPDEST flag channel is constrained.
-    // Only need to read if we're about to jump and we're not in kernel mode.
-    {
-        let constr = builder.mul_sub_extension(
-            jumps_lv.should_jump,
-            lv.is_kernel_mode,
-            jumps_lv.should_jump,
-        );
-        let constr = builder.add_extension(jumpdest_flag_channel.used, constr);
-        let constr = builder.mul_extension(filter, constr);
-        yield_constr.constraint(builder, constr);
-    }
-    {
-        let constr = builder.mul_sub_extension(filter, jumpdest_flag_channel.is_read, filter);
-        yield_constr.constraint(builder, constr);
-    }
-    {
-        let constr = builder.sub_extension(jumpdest_flag_channel.addr_context, lv.context);
-        let constr = builder.mul_extension(filter, constr);
-        yield_constr.constraint(builder, constr);
-    }
-    {
-        let constr = builder.arithmetic_extension(
-            F::ONE,
-            -F::from_canonical_usize(Segment::JumpdestBits.unscale()),
-            filter,
-            jumpdest_flag_channel.addr_segment,
-            filter,
-        );
-        yield_constr.constraint(builder, constr);
-    }
-    {
-        let constr = builder.sub_extension(jumpdest_flag_channel.addr_virtual, dst[0]);
-        let constr = builder.mul_extension(filter, constr);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Disable unused memory channels
-    for &channel in &lv.mem_channels[2..NUM_GP_CHANNELS - 1] {
-        let constr = builder.mul_extension(filter, channel.used);
-        yield_constr.constraint(builder, constr);
-    }
-    {
-        let constr = builder.mul_extension(filter, lv.partial_channel.used);
-        yield_constr.constraint(builder, constr);
-    }
-    // Channel 1 is unused by the `JUMP` instruction.
-    {
-        let constr = builder.mul_extension(is_jump, lv.mem_channels[1].used);
-        yield_constr.constraint(builder, constr);
-    }
-
-    // Update stack length.
-    {
-        let diff = builder.sub_extension(nv.stack_len, lv.stack_len);
-        let constr = builder.mul_add_extension(is_jump, diff, is_jump);
-        yield_constr.constraint_transition(builder, constr);
-    }
-    {
-        let diff = builder.sub_extension(nv.stack_len, lv.stack_len);
-        let diff = builder.add_const_extension(diff, F::TWO);
-        let constr = builder.mul_extension(is_jumpi, diff);
-        yield_constr.constraint_transition(builder, constr);
-    }
-
-    // Finally, set the next program counter.
-    let fallthrough_dst = builder.add_const_extension(lv.program_counter, F::ONE);
-    let jump_dest = dst[0];
-    {
-        let constr_a = builder.mul_sub_extension(filter, jumps_lv.should_jump, filter);
-        let constr_b = builder.sub_extension(nv.program_counter, fallthrough_dst);
-        let constr = builder.mul_extension(constr_a, constr_b);
-        yield_constr.constraint_transition(builder, constr);
-    }
-    {
-        let constr_a = builder.mul_extension(filter, jumps_lv.should_jump);
-        let constr_b = builder.sub_extension(nv.program_counter, jump_dest);
-        let constr = builder.mul_extension(constr_a, constr_b);
-        yield_constr.constraint_transition(builder, constr);
-    }
-}
-
-/// Evaluates constraints for EXIT_KERNEL, JUMP and JUMPI.
-pub(crate) fn eval_packed<P: PackedField>(
-    lv: &CpuColumnsView<P>,
-    nv: &CpuColumnsView<P>,
-    yield_constr: &mut ConstraintConsumer<P>,
-) {
-    eval_packed_exit_kernel(lv, nv, yield_constr);
-    eval_packed_jump_jumpi(lv, nv, yield_constr);
-}
-
-/// Circuit version of `eval_packed`.
-/// Evaluates constraints for EXIT_KERNEL, JUMP and JUMPI.
-pub(crate) fn eval_ext_circuit<F: RichField + Extendable<D>, const D: usize>(
-    builder: &mut plonky2::plonk::circuit_builder::CircuitBuilder<F, D>,
-    lv: &CpuColumnsView<ExtensionTarget<D>>,
-    nv: &CpuColumnsView<ExtensionTarget<D>>,
-    yield_constr: &mut RecursiveConstraintConsumer<F, D>,
-) {
-    eval_ext_circuit_exit_kernel(builder, lv, nv, yield_constr);
-    eval_ext_circuit_jump_jumpi(builder, lv, nv, yield_constr);
-}
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@ -1,184 +0,0 @@
-//! Loads each kernel assembly file and concatenates them.
-
-use itertools::Itertools;
-use once_cell::sync::Lazy;
-
-use super::assembler::{assemble, Kernel};
-use crate::cpu::kernel::constants::evm_constants;
-use crate::cpu::kernel::parser::parse;
-
-pub static KERNEL: Lazy<Kernel> = Lazy::new(combined_kernel);
-
-pub(crate) fn combined_kernel() -> Kernel {
-    let files = vec![
-        "global jumped_to_0: PANIC",
-        "global jumped_to_1: PANIC",
-        include_str!("asm/bignum/add.asm"),
-        include_str!("asm/bignum/addmul.asm"),
-        include_str!("asm/bignum/cmp.asm"),
-        include_str!("asm/bignum/isone.asm"),
-        include_str!("asm/bignum/iszero.asm"),
-        include_str!("asm/bignum/modexp.asm"),
-        include_str!("asm/bignum/modmul.asm"),
-        include_str!("asm/bignum/mul.asm"),
-        include_str!("asm/bignum/shr.asm"),
-        include_str!("asm/bignum/util.asm"),
-        include_str!("asm/core/call.asm"),
-        include_str!("asm/core/call_gas.asm"),
-        include_str!("asm/core/create.asm"),
-        include_str!("asm/core/create_addresses.asm"),
-        include_str!("asm/core/create_contract_account.asm"),
-        include_str!("asm/core/exception.asm"),
-        include_str!("asm/core/create_receipt.asm"),
-        include_str!("asm/core/gas.asm"),
-        include_str!("asm/core/intrinsic_gas.asm"),
-        include_str!("asm/core/jumpdest_analysis.asm"),
-        include_str!("asm/core/nonce.asm"),
-        include_str!("asm/core/process_txn.asm"),
-        include_str!("asm/core/syscall.asm"),
-        include_str!("asm/core/terminate.asm"),
-        include_str!("asm/core/transfer.asm"),
-        include_str!("asm/core/util.asm"),
-        include_str!("asm/core/access_lists.asm"),
-        include_str!("asm/core/log.asm"),
-        include_str!("asm/core/selfdestruct_list.asm"),
-        include_str!("asm/core/touched_addresses.asm"),
-        include_str!("asm/core/withdrawals.asm"),
-        include_str!("asm/core/precompiles/main.asm"),
-        include_str!("asm/core/precompiles/ecrec.asm"),
-        include_str!("asm/core/precompiles/sha256.asm"),
-        include_str!("asm/core/precompiles/rip160.asm"),
-        include_str!("asm/core/precompiles/id.asm"),
-        include_str!("asm/core/precompiles/expmod.asm"),
-        include_str!("asm/core/precompiles/bn_add.asm"),
-        include_str!("asm/core/precompiles/bn_mul.asm"),
-        include_str!("asm/core/precompiles/snarkv.asm"),
-        include_str!("asm/core/precompiles/blake2_f.asm"),
-        include_str!("asm/curve/bls381/util.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/final_exponent.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/glv.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/msm.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/pairing.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/precomputation.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/twisted_curve.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/degree_6_mul.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/degree_12_mul.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/util.asm"),
-        include_str!("asm/curve/common.asm"),
-        include_str!("asm/curve/secp256k1/curve_add.asm"),
-        include_str!("asm/curve/secp256k1/ecrecover.asm"),
-        include_str!("asm/curve/secp256k1/inverse_scalar.asm"),
-        include_str!("asm/curve/secp256k1/lift_x.asm"),
-        include_str!("asm/curve/secp256k1/moddiv.asm"),
-        include_str!("asm/curve/secp256k1/glv.asm"),
-        include_str!("asm/curve/secp256k1/precomputation.asm"),
-        include_str!("asm/curve/wnaf.asm"),
-        include_str!("asm/exp.asm"),
-        include_str!("asm/halt.asm"),
-        include_str!("asm/hash/blake2/addresses.asm"),
-        include_str!("asm/hash/blake2/blake2_f.asm"),
-        // include_str!("asm/hash/blake2/blake2b.asm"),
-        // include_str!("asm/hash/blake2/compression.asm"),
-        include_str!("asm/hash/blake2/g_functions.asm"),
-        include_str!("asm/hash/blake2/hash.asm"),
-        include_str!("asm/hash/blake2/iv.asm"),
-        include_str!("asm/hash/blake2/ops.asm"),
-        include_str!("asm/hash/blake2/permutations.asm"),
-        include_str!("asm/hash/ripemd/box.asm"),
-        include_str!("asm/hash/ripemd/compression.asm"),
-        include_str!("asm/hash/ripemd/constants.asm"),
-        include_str!("asm/hash/ripemd/functions.asm"),
-        include_str!("asm/hash/ripemd/main.asm"),
-        include_str!("asm/hash/ripemd/update.asm"),
-        include_str!("asm/hash/sha2/compression.asm"),
-        include_str!("asm/hash/sha2/constants.asm"),
-        include_str!("asm/hash/sha2/main.asm"),
-        include_str!("asm/hash/sha2/message_schedule.asm"),
-        include_str!("asm/hash/sha2/ops.asm"),
-        include_str!("asm/hash/sha2/temp_words.asm"),
-        include_str!("asm/hash/sha2/write_length.asm"),
-        include_str!("asm/main.asm"),
-        include_str!("asm/memory/core.asm"),
-        include_str!("asm/memory/memcpy.asm"),
-        include_str!("asm/memory/memset.asm"),
-        include_str!("asm/memory/metadata.asm"),
-        include_str!("asm/memory/packing.asm"),
-        include_str!("asm/memory/syscalls.asm"),
-        include_str!("asm/memory/txn_fields.asm"),
-        include_str!("asm/mpt/accounts.asm"),
-        include_str!("asm/mpt/delete/delete.asm"),
-        include_str!("asm/mpt/delete/delete_branch.asm"),
-        include_str!("asm/mpt/delete/delete_extension.asm"),
-        include_str!("asm/mpt/hash/hash.asm"),
-        include_str!("asm/mpt/hash/hash_trie_specific.asm"),
-        include_str!("asm/mpt/hex_prefix.asm"),
-        include_str!("asm/mpt/insert/insert.asm"),
-        include_str!("asm/mpt/insert/insert_extension.asm"),
-        include_str!("asm/mpt/insert/insert_leaf.asm"),
-        include_str!("asm/mpt/insert/insert_trie_specific.asm"),
-        include_str!("asm/mpt/read.asm"),
-        include_str!("asm/mpt/storage/storage_read.asm"),
-        include_str!("asm/mpt/storage/storage_write.asm"),
-        include_str!("asm/mpt/util.asm"),
-        include_str!("asm/rlp/decode.asm"),
-        include_str!("asm/rlp/encode.asm"),
-        include_str!("asm/rlp/encode_rlp_scalar.asm"),
-        include_str!("asm/rlp/encode_rlp_string.asm"),
-        include_str!("asm/rlp/increment_bounded_rlp.asm"),
-        include_str!("asm/rlp/num_bytes.asm"),
-        include_str!("asm/rlp/read_to_memory.asm"),
-        include_str!("asm/shift.asm"),
-        include_str!("asm/signed.asm"),
-        include_str!("asm/journal/journal.asm"),
-        include_str!("asm/journal/account_loaded.asm"),
-        include_str!("asm/journal/account_destroyed.asm"),
-        include_str!("asm/journal/account_touched.asm"),
-        include_str!("asm/journal/balance_transfer.asm"),
-        include_str!("asm/journal/nonce_change.asm"),
-        include_str!("asm/journal/storage_change.asm"),
-        include_str!("asm/journal/storage_loaded.asm"),
-        include_str!("asm/journal/code_change.asm"),
-        include_str!("asm/journal/refund.asm"),
-        include_str!("asm/journal/account_created.asm"),
-        include_str!("asm/journal/revert.asm"),
-        include_str!("asm/journal/log.asm"),
-        include_str!("asm/transactions/common_decoding.asm"),
-        include_str!("asm/transactions/router.asm"),
-        include_str!("asm/transactions/type_0.asm"),
-        include_str!("asm/transactions/type_1.asm"),
-        include_str!("asm/transactions/type_2.asm"),
-        include_str!("asm/util/assertions.asm"),
-        include_str!("asm/util/basic_macros.asm"),
-        include_str!("asm/util/keccak.asm"),
-        include_str!("asm/util/math.asm"),
-        include_str!("asm/account_code.asm"),
-        include_str!("asm/balance.asm"),
-        include_str!("asm/bloom_filter.asm"),
-    ];
-
-    let parsed_files = files.iter().map(|f| parse(f)).collect_vec();
-    assemble(parsed_files, evm_constants(), true)
-}
-
-#[cfg(test)]
-mod tests {
-    use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV};
-    use log::debug;
-
-    use crate::cpu::kernel::aggregator::combined_kernel;
-
-    #[test]
-    fn make_kernel() {
-        let _ = try_init_from_env(Env::default().filter_or(DEFAULT_FILTER_ENV, "debug"));
-
-        // Make sure we can parse and assemble the entire kernel.
-        let kernel = combined_kernel();
-        debug!("Total kernel size: {} bytes", kernel.code.len());
-    }
-}
--- a/evm/src/cpu/kernel/asm/account_code.asm
+++ b/evm/src/cpu/kernel/asm/account_code.asm
@ -1,136 +0,0 @@
-global sys_extcodehash:
-    // stack: kexit_info, address
-    SWAP1 %u256_to_addr
-    // stack: address, kexit_info
-    SWAP1
-    DUP2 %insert_accessed_addresses
-    // stack: cold_access, kexit_info, address
-    PUSH @GAS_COLDACCOUNTACCESS_MINUS_WARMACCESS
-    MUL
-    PUSH @GAS_WARMACCESS
-    ADD
-    %charge_gas
-    // stack: kexit_info, address
-
-    SWAP1
-    DUP1 %is_dead %jumpi(extcodehash_dead)
-    %extcodehash
-    // stack: hash, kexit_info
-    SWAP1
-    EXIT_KERNEL
-extcodehash_dead:
-    %stack (address, kexit_info) -> (kexit_info, 0)
-    EXIT_KERNEL
-
-global extcodehash:
-    // stack: address, retdest
-    %mpt_read_state_trie
-    // stack: account_ptr, retdest
-    DUP1 ISZERO %jumpi(retzero)
-    %add_const(3)
-    // stack: codehash_ptr, retdest
-    %mload_trie_data
-    // stack: codehash, retdest
-    SWAP1 JUMP
-retzero:
-    %stack (account_ptr, retdest) -> (retdest, 0)
-    JUMP
-
-%macro extcodehash
-    %stack (address) -> (address, %%after)
-    %jump(extcodehash)
-%%after:
-%endmacro
-
-%macro ext_code_empty
-    %extcodehash
-    %eq_const(@EMPTY_STRING_HASH)
-%endmacro
-
-%macro extcodesize
-    %stack (address) -> (address, %%after)
-    %jump(extcodesize)
-%%after:
-%endmacro
-
-global sys_extcodesize:
-    // stack: kexit_info, address
-    SWAP1 %u256_to_addr
-    // stack: address, kexit_info
-    SWAP1
-    DUP2 %insert_accessed_addresses
-    // stack: cold_access, kexit_info, address
-    PUSH @GAS_COLDACCOUNTACCESS_MINUS_WARMACCESS
-    MUL
-    PUSH @GAS_WARMACCESS
-    ADD
-    %charge_gas
-    // stack: kexit_info, address
-
-    SWAP1
-    // stack: address, kexit_info
-    %extcodesize
-    // stack: code_size, kexit_info
-    SWAP1
-    EXIT_KERNEL
-
-global extcodesize:
-    // stack: address, retdest
-    %next_context_id
-    // stack: codesize_ctx, address, retdest
-    SWAP1
-    // stack: address, codesize_ctx, retdest
-    %jump(load_code)
-
-// Loads the code at `address` into memory, in the code segment of the given context, starting at offset 0.
-// Checks that the hash of the loaded code corresponds to the `codehash` in the state trie.
-// Pre stack: address, ctx, retdest
-// Post stack: code_size
-//
-// NOTE: The provided `dest` **MUST** have a virtual address of 0.
-global load_code:
-    %stack (address, ctx, retdest) -> (extcodehash, address, load_code_ctd, ctx, retdest)
-    JUMP
-load_code_ctd:
-    // stack: codehash, ctx, retdest
-    DUP1 ISZERO %jumpi(load_code_non_existent_account)
-    // Load the code non-deterministically in memory and return the length.
-    PROVER_INPUT(account_code)
-    %stack (code_size, codehash, ctx, retdest) -> (ctx, code_size, codehash, retdest, code_size)
-    // Check that the hash of the loaded code equals `codehash`.
-    // ctx == DST, as SEGMENT_CODE == offset == 0.
-    KECCAK_GENERAL
-    // stack: shouldbecodehash, codehash, retdest, code_size
-    %assert_eq
-    // stack: retdest, code_size
-    JUMP
-
-load_code_non_existent_account:
-    // Write 0 at address 0 for soundness: SEGMENT_CODE == 0, hence ctx == addr.
-    // stack: codehash, addr, retdest
-    %stack (codehash, addr, retdest) -> (0, addr, retdest, 0)
-    MSTORE_GENERAL
-    // stack: retdest, 0
-    JUMP
-
-// Identical to load_code, but adds 33 zeros after code_size for soundness reasons.
-// If the code ends with an incomplete PUSH, we must make sure that every subsequent read is 0,
-// accordingly to the Ethereum specs.
-// Pre stack: address, ctx, retdest
-// Post stack: code_size
-global load_code_padded:
-    %stack (address, ctx, retdest) -> (address, ctx, load_code_padded_ctd, ctx, retdest)
-    %jump(load_code)
-
-load_code_padded_ctd:
-    // SEGMENT_CODE == 0.
-    // stack: code_size, ctx, retdest
-    %stack (code_size, ctx, retdest) -> (ctx, code_size, 0, retdest, code_size)
-    ADD 
-    // stack: addr, 0, retdest, code_size
-    MSTORE_32BYTES_32
-    // stack: addr', retdest, code_size
-    PUSH 0
-    MSTORE_GENERAL
-    // stack: retdest, code_size
-    JUMP
--- a/evm/src/cpu/kernel/asm/balance.asm
+++ b/evm/src/cpu/kernel/asm/balance.asm
@ -1,56 +0,0 @@
-global sys_balance:
-    // stack: kexit_info, address
-    SWAP1 %u256_to_addr
-    // stack: address, kexit_info
-    SWAP1
-    DUP2 %insert_accessed_addresses
-    // stack: cold_access, kexit_info, address
-    PUSH @GAS_COLDACCOUNTACCESS_MINUS_WARMACCESS
-    MUL
-    PUSH @GAS_WARMACCESS
-    ADD
-    %charge_gas
-    // stack: kexit_info, address
-
-    SWAP1
-    // stack: address, kexit_info
-    %balance
-    // stack: balance, kexit_info
-    SWAP1
-    EXIT_KERNEL
-
-%macro balance
-    %stack (address) -> (address, %%after)
-    %jump(balance)
-%%after:
-%endmacro
-
-global balance:
-    // stack: address, retdest
-    %mpt_read_state_trie
-    // stack: account_ptr, retdest
-    DUP1 ISZERO %jumpi(retzero) // If the account pointer is null, return 0.
-    %add_const(1)
-    // stack: balance_ptr, retdest
-    %mload_trie_data
-    // stack: balance, retdest
-    SWAP1 JUMP
-
-retzero:
-    %stack (account_ptr, retdest) -> (retdest, 0)
-    JUMP
-
-global sys_selfbalance:
-    // stack: kexit_info
-    %charge_gas_const(@GAS_LOW)
-    %selfbalance
-    // stack: balance, kexit_info
-    SWAP1
-    EXIT_KERNEL
-
-%macro selfbalance
-    PUSH %%after
-    %address
-    %jump(balance)
-%%after:
-%endmacro
--- a/evm/src/cpu/kernel/asm/bignum/add.asm
+++ b/evm/src/cpu/kernel/asm/bignum/add.asm
@ -1,73 +0,0 @@
-// Arithmetic on little-endian integers represented with 128-bit limbs.
-// All integers must be under a given length bound, and are padded with leading zeroes.
-
-// Adds two bignums of the same given length. Assumes that len > 0.
-// Replaces a with a + b, leaving b unchanged, and returns the final carry.
-global add_bignum:
-    // stack: len, a_start_loc, b_start_loc, retdest
-    DUP1
-    ISZERO
-    %jumpi(len_zero)
-    // stack: len, a_start_loc, b_start_loc, retdest
-    %build_current_general_address_no_offset
-    PUSH 0
-    // stack: carry=0, base_addr, i=len, a_cur_loc=a_start_loc, b_cur_loc=b_start_loc, retdest
-add_loop:
-    // stack: carry, base_addr, i, a_cur_loc, b_cur_loc, retdest
-    DUP2
-    // stack: base_addr, carry, base_addr, i, a_cur_loc, b_cur_loc, retdest
-    DUP6 ADD // base_addr + b_cur_loc
-    MLOAD_GENERAL
-    // stack: b[cur], carry, base_addr, i, a_cur_loc, b_cur_loc, retdest
-    DUP3
-    DUP6 ADD // base_addr + a_cur_loc
-    MLOAD_GENERAL
-    // stack: a[cur], b[cur], carry, base_addr, i, a_cur_loc, b_cur_loc, retdest
-    ADD
-    ADD
-    // stack: a[cur] + b[cur] + carry, base_addr, i, a_cur_loc, b_cur_loc, retdest
-    DUP1
-    // stack: a[cur] + b[cur] + carry, a[cur] + b[cur] + carry, base_addr, i, a_cur_loc, b_cur_loc, retdest
-    %shr_const(128)
-    // stack: (a[cur] + b[cur] + carry) // 2^128, a[cur] + b[cur] + carry, base_addr, i, a_cur_loc, b_cur_loc, retdest
-    SWAP1
-    // stack: a[cur] + b[cur] + carry, (a[cur] + b[cur] + carry) // 2^128, base_addr, i, a_cur_loc, b_cur_loc, retdest
-    %mod_const(0x100000000000000000000000000000000)
-    // stack: c[cur] = (a[cur] + b[cur] + carry) % 2^128, carry_new = (a[cur] + b[cur] + carry) // 2^128, base_addr, i, a_cur_loc, b_cur_loc, retdest
-    DUP3
-    DUP6
-    ADD // base_addr + a_cur_loc
-    // stack: a_cur_addr, c[cur], carry_new,  base_addr, i, a_cur_loc, b_cur_loc, retdest
-    %swap_mstore
-    // stack: carry_new, base_addr, i, a_cur_loc, b_cur_loc, retdest
-    SWAP3
-    %increment
-    SWAP3
-    // stack: carry_new, base_addr, i, a_cur_loc + 1, b_cur_loc, retdest
-    SWAP4
-    %increment
-    SWAP4
-    // stack: carry_new, base_addr, i, a_cur_loc + 1, b_cur_loc + 1, retdest
-    SWAP2
-    %decrement
-    SWAP2
-    // stack: carry_new, base_addr, i - 1, a_cur_loc + 1, b_cur_loc + 1, retdest
-    DUP3
-    // stack: i - 1, carry_new, base_addr, i - 1, a_cur_loc + 1, b_cur_loc + 1, retdest
-    %jumpi(add_loop)
-add_end:
-    // stack: carry_new, base_addr, i - 1, a_cur_loc + 1, b_cur_loc + 1, retdest
-    %stack (c, addr, i, a, b) -> (c)
-    // stack: carry_new, retdest
-    SWAP1
-    // stack: retdest, carry_new
-    JUMP
-
-len_zero:
-    // stack: len, a_start_loc, b_start_loc, retdest
-    %pop3
-    // stack: retdest
-    PUSH 0
-    // stack: carry=0, retdest
-    SWAP1
-    JUMP
--- a/evm/src/cpu/kernel/asm/bignum/addmul.asm
+++ b/evm/src/cpu/kernel/asm/bignum/addmul.asm
@ -1,116 +0,0 @@
-// Arithmetic on little-endian integers represented with 128-bit limbs.
-// All integers must be under a given length bound, and are padded with leading zeroes.
-
-// Sets a[0:len] += b[0:len] * val, and returns the carry (a limb of up to 128 bits).
-global addmul_bignum:
-    // stack: len, a_start_loc, b_start_loc, val, retdest
-    DUP1
-    // stack: len, len, a_start_loc, b_start_loc, val, retdest
-    ISZERO
-    %jumpi(len_zero)
-    %build_current_general_address_no_offset
-    PUSH 0
-    // stack: carry_limb=0, base_addr, i=len, a_cur_loc=a_start_loc, b_cur_loc=b_start_loc, val, retdest
-addmul_loop:
-    // stack: carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    DUP2
-    DUP6 ADD // base_addr + b_cur_loc
-    // stack: b_cur_addr, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    MLOAD_GENERAL
-    // stack: b[cur], carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    DUP7
-    // stack: val, b[cur], carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    MUL
-    // stack: val * b[cur], carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    DUP1
-    // stack: val * b[cur], val * b[cur], carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    %shr_const(128)
-    // stack: (val * b[cur]) // 2^128, val * b[cur], carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    SWAP1
-    // stack: val * b[cur], (val * b[cur]) // 2^128, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    %shl_const(128)
-    %shr_const(128)
-    // stack: prod_lo = val * b[cur] % 2^128, prod_hi = (val * b[cur]) // 2^128, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    DUP4
-    DUP7 ADD // base_addr + a_cur_loc
-    // stack: a_cur_addr, prod_lo, prod_hi, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    MLOAD_GENERAL
-    // stack: a[cur], prod_lo, prod_hi, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    DUP1
-    // stack: a[cur], a[cur], prod_lo, prod_hi, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    SWAP2
-    // stack: prod_lo, a[cur], a[cur], prod_hi, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    ADD
-    %shl_const(128)
-    %shr_const(128)
-    // stack: prod_lo' = (prod_lo + a[cur]) % 2^128, a[cur], prod_hi, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    DUP1
-    // stack: prod_lo', prod_lo', a[cur], prod_hi, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    SWAP2
-    // stack: a[cur], prod_lo', prod_lo', prod_hi, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    GT
-    // stack: prod_lo_carry_limb = a[cur] > prod_lo', prod_lo', prod_hi, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    SWAP1
-    // stack: prod_lo', prod_lo_carry_limb, prod_hi, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    SWAP2
-    // stack: prod_hi, prod_lo_carry_limb, prod_lo', carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    ADD
-    // stack: prod_hi' = prod_hi + prod_lo_carry_limb, prod_lo', carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    DUP3
-    // stack: carry_limb, prod_hi', prod_lo', carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    DUP3
-    // stack: prod_lo', carry_limb, prod_hi', prod_lo', carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    ADD
-    %shl_const(128)
-    %shr_const(128)
-    // stack: to_write = (prod_lo' + carry_limb) % 2^128, prod_hi', prod_lo', carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    SWAP2
-    // stack: prod_lo', prod_hi', to_write, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    DUP3
-    // stack: to_write, prod_lo', prod_hi', to_write, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    LT
-    // stack: carry_limb_new = to_write < prod_lo', prod_hi', to_write, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
-    %stack (vals: 3, c) -> (vals)
-    // stack: carry_limb_new, prod_hi', to_write, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    ADD
-    // stack: carry_limb = carry_limb_new' + prod_hi', to_write, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    SWAP1
-    // stack: to_write, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    DUP3
-    DUP6 ADD // base_addr + a_cur_loc
-    // stack: a_cur_addr, to_write, carry_limb, addr, i, a_cur_loc, b_cur_loc, val, retdest
-    %swap_mstore
-    // stack: carry_limb, base_addr, i, a_cur_loc, b_cur_loc, val, retdest
-    SWAP2
-    // stack: i, base_addr, carry_limb, a_cur_loc, b_cur_loc, val, retdest
-    %decrement
-    // stack: i-1, base_addr, carry_limb, a_cur_loc, b_cur_loc, val, retdest
-    SWAP3
-    // stack: a_cur_loc, base_addr, carry_limb, i-1, b_cur_loc, val, retdest
-    %increment
-    // stack: a_cur_loc+1, base_addr, carry_limb, i-1, b_cur_loc, val, retdest
-    SWAP4
-    // stack: b_cur_loc, base_addr, carry_limb, i-1, a_cur_loc+1, val, retdest
-    %increment
-    // stack: b_cur_loc+1, base_addr, carry_limb, i-1, a_cur_loc+1, val, retdest
-    %stack (b, addr, c, i, a) -> (c, addr, i, a, b)
-    // stack: carry_limb, base_addr, i-1, a_cur_loc+1, b_cur_loc+1, val, retdest
-    DUP3
-    // stack: i-1, carry_limb, base_addr, i-1, a_cur_loc+1, b_cur_loc+1, val, retdest
-    %jumpi(addmul_loop)
-addmul_end:
-    // stack: carry_limb_new, base_addr, i-1, a_cur_loc+1, b_cur_loc+1, val, retdest
-    %stack (c, addr, i, a, b, v) -> (c)
-    // stack: carry_limb_new, retdest
-    SWAP1
-    // stack: retdest, carry_limb_new
-    JUMP
-
-len_zero:
-    // stack: len, a_start_loc, b_start_loc, val, retdest
-    %pop4
-    // stack: retdest
-    PUSH 0
-    // stack: carry_limb=0, retdest
-    SWAP1
-    JUMP
--- a/evm/src/cpu/kernel/asm/bignum/cmp.asm
+++ b/evm/src/cpu/kernel/asm/bignum/cmp.asm
@ -1,93 +0,0 @@
-// Arithmetic on little-endian integers represented with 128-bit limbs.
-// All integers must be under a given length bound, and are padded with leading zeroes.
-
-// Compares two bignums of the same given length. Assumes that len > 0.
-// Returns 1 if a > b, 0 if a == b, and -1 (that is, 2^256 - 1) if a < b.
-global cmp_bignum:
-    // stack: len, a_start_loc, b_start_loc, retdest
-    %build_current_general_address_no_offset
-    // stack: base_addr, len, a_start_loc, b_start_loc, retdest
-    DUP2
-    // stack: len, base_addr, len, a_start_loc, b_start_loc, retdest
-    ISZERO
-    %jumpi(equal) // len and base_addr are swapped, but they will be popped anyway
-    // stack: base_addr, len, a_start_loc, b_start_loc, retdest
-    SWAP2
-    // stack: a_start_loc, len, base_addr, b_start_loc, retdest
-    PUSH 1
-    DUP3
-    SUB
-    // stack: len-1, a_start_loc, len, base_addr, b_start_loc, retdest
-    ADD
-    // stack: a_end_loc, len, base_addr, b_start_loc, retdest
-    SWAP3
-    // stack: b_start_loc, len, base_addr, a_end_loc, retdest
-    PUSH 1
-    DUP3
-    SUB
-    // stack: len-1, b_start_loc, len, base_addr, a_end_loc, retdest
-    ADD
-    // stack: b_end_loc, len, base_addr, a_end_loc, retdest
-
-    %stack (b, l, addr, a) -> (l, addr, a, b)
-    // stack: len, base_addr, a_end_loc, b_end_loc, retdest
-    %decrement
-ge_loop:
-    // stack: i, base_addr, a_i_loc, b_i_loc, retdest
-    DUP4
-    // stack: b_i_loc, i, base_addr, a_i_loc, b_i_loc, retdest
-    DUP3 ADD // b_i_addr
-    MLOAD_GENERAL
-    // stack: b[i], i, base_addr, a_i_loc, b_i_loc, retdest
-    DUP4
-    // stack: a_i_loc, b[i], i, base_addr, a_i_loc, b_i_loc, retdest
-    DUP4 ADD // a_i_addr
-    MLOAD_GENERAL
-    // stack: a[i], b[i], i, base_addr, a_i_loc, b_i_loc, retdest
-    %stack (vals: 2) -> (vals, vals)
-    GT
-    %jumpi(greater)
-    // stack: a[i], b[i], i, base_addr, a_i_loc, b_i_loc, retdest
-    LT
-    %jumpi(less)
-    // stack: i, base_addr, a_i_loc, b_i_loc, retdest
-    DUP1
-    ISZERO
-    %jumpi(equal)
-    %decrement
-    // stack: i-1, base_addr, a_i_loc, b_i_loc, retdest
-    SWAP2
-    // stack: a_i_loc, base_addr, i-1, b_i_loc, retdest
-    %decrement
-    // stack: a_i_loc_new, base_addr, i-1, b_i_loc, retdest
-    SWAP3
-    // stack: b_i_loc, base_addr, i-1, a_i_loc_new, retdest
-    %decrement
-    // stack: b_i_loc_new, base_addr, i-1, a_i_loc_new, retdest
-    %stack (b, addr, i, a) -> (i, addr, a, b)
-    // stack: i-1, base_addr, a_i_loc_new, b_i_loc_new, retdest
-    %jump(ge_loop)
-equal:
-    // stack: i, base_addr, a_i_loc, b_i_loc, retdest
-    %pop4
-    // stack: retdest
-    PUSH 0
-    // stack: 0, retdest
-    SWAP1
-    JUMP
-greater:
-    // stack: a[i], b[i], i, base_addr, a_i_loc, b_i_loc, retdest
-    %pop6
-    // stack: retdest
-    PUSH 1
-    // stack: 1, retdest
-    SWAP1
-    JUMP
-less:
-    // stack: i, base_addr, a_i_loc, b_i_loc, retdest
-    %pop4
-    // stack: retdest
-    PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-    // stack: -1, retdest
-    SWAP1
-    JUMP
--- a/evm/src/cpu/kernel/asm/bignum/isone.asm
+++ b/evm/src/cpu/kernel/asm/bignum/isone.asm
@ -1,35 +0,0 @@
-// Arithmetic on little-endian integers represented with 128-bit limbs.
-// All integers must be under a given length bound, and are padded with leading zeroes.
-
-global isone_bignum:
-    // stack: len, start_loc, retdest
-    DUP1
-    // stack: len, len, start_loc, retdest
-    ISZERO
-    %jumpi(eqzero)
-    // stack: len, start_loc, retdest
-    DUP2
-    // stack: start_loc, len, start_loc, retdest
-    %mload_current_general
-    // stack: start_val, len, start_loc, retdest
-    %eq_const(1)
-    %jumpi(starts_with_one)
-    // Does not start with one, so not equal to one.
-    // stack: len, start_loc, retdest
-    %stack (vals: 2, retdest) -> (retdest, 0)
-    JUMP
-eqzero:
-    // Is zero, so not equal to one.
-    // stack: cur_loc, end_loc, retdest
-    %stack (vals: 2, retdest) -> (retdest, 0)
-    // stack: retdest, 0
-    JUMP
-starts_with_one:
-    // Starts with one, so check that the remaining limbs are zero.
-    // stack: len, start_loc, retdest
-    %decrement
-    SWAP1
-    %increment
-    SWAP1
-    // stack: len-1, start_loc+1, retdest
-    %jump(iszero_bignum)
--- a/evm/src/cpu/kernel/asm/bignum/iszero.asm
+++ b/evm/src/cpu/kernel/asm/bignum/iszero.asm
@ -1,40 +0,0 @@
-// Arithmetic on little-endian integers represented with 128-bit limbs.
-// All integers must be under a given length bound, and are padded with leading zeroes.
-
-global iszero_bignum:
-    // stack: len, start_loc, retdest
-    DUP1
-    // stack: len, len, start_loc, retdest
-    ISZERO
-    %jumpi(eqzero)
-    DUP2
-    // stack: start_loc, len, start_loc, retdest
-    ADD
-    // stack: end_loc, start_loc, retdest
-    SWAP1
-    // stack: cur_loc=start_loc, end_loc, retdest
-iszero_loop:
-    // stack: cur_loc, end_loc, retdest
-    DUP1
-    // stack: cur_loc, cur_loc, end_loc, retdest
-    %mload_current_general
-    // stack: cur_val, cur_loc, end_loc, retdest
-    %jumpi(neqzero)
-    // stack: cur_loc, end_loc, retdest
-    %increment
-    // stack: cur_loc + 1, end_loc, retdest
-    %stack (vals: 2) -> (vals, vals)
-    // stack: cur_loc + 1, end_loc, cur_loc + 1, end_loc, retdest
-    EQ
-    %jumpi(eqzero)
-    %jump(iszero_loop)
-neqzero:
-    // stack: cur_loc, end_loc, retdest
-    %stack (vals: 2, retdest) -> (retdest, 0)
-    // stack: retdest, 0
-    JUMP
-eqzero:
-    // stack: cur_loc, end_loc, retdest
-    %stack (vals: 2, retdest) -> (retdest, 1)
-    // stack: retdest, 1
-    JUMP
--- a/evm/src/cpu/kernel/asm/bignum/modexp.asm
+++ b/evm/src/cpu/kernel/asm/bignum/modexp.asm
@ -1,192 +0,0 @@
-// Arithmetic on integers represented with 128-bit limbs.
-// These integers are represented in LITTLE-ENDIAN form.
-// All integers must be under a given length bound, and are padded with leading zeroes.
-
-// Stores b ^ e % m in output_loc, leaving b, e, and m unchanged.
-// b, e, and m must have the same length.
-// output_loc must have size length and be initialized with zeroes; scratch_1 must have size length.
-// All of scratch_2..scratch_5 must have size 2 * length and be initialized with zeroes.
-// Also, scratch_2..scratch_5 must be CONSECUTIVE in memory.
-global modexp_bignum:
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-
-    // Special input cases:
-
-    // (1) Modulus is zero (also covers len=0 case).
-    PUSH modulus_zero_return
-    // stack: modulus_zero_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    DUP5
-    // stack: m_loc, modulus_zero_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    DUP3
-    // stack: len, m_loc, modulus_zero_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jump(iszero_bignum)
-modulus_zero_return:
-    // stack: m==0, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jumpi(modulus_zero_or_one)
-
-    // (2) Modulus is one.
-    PUSH modulus_one_return
-    // stack: modulus_one_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    DUP5
-    // stack: m_loc, modulus_one_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    DUP3
-    // stack: len, m_loc, modulus_one_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jump(isone_bignum)
-modulus_one_return:
-    // stack: m==1, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jumpi(modulus_zero_or_one)
-
-    // (3) Both b and e are zero.
-    PUSH b_zero_return
-    // stack: b_zero_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    DUP3
-    // stack: b_loc, b_zero_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    DUP3
-    // stack: len, b_loc, b_zero_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jump(iszero_bignum)
-b_zero_return:
-    // stack: b==0, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    PUSH e_zero_return
-    // stack: e_zero_return, b==0, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    DUP5
-    // stack: e_loc, e_zero_return, b==0, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    DUP4
-    // stack: len, e_loc, e_zero_return, b==0, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jump(iszero_bignum)
-e_zero_return:
-    // stack: e==0, b==0, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    MUL // logical AND
-    %jumpi(b_and_e_zero)
-
-    // End of special cases.
-
-    // We store the repeated-squares accumulator x_i in scratch_1, starting with x_0 := b.
-    DUP1
-    DUP3
-    DUP8
-    // stack: s1, b_loc, len, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %memcpy_current_general
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-
-    // We store the accumulated output value x_i in output_loc, starting with x_0=1.
-    PUSH 1
-    DUP6
-    // stack: out_loc, 1, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5,  retdest
-    %mstore_current_general
-
-modexp_loop:
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-
-    // y := e % 2
-    DUP3
-    // stack: e_loc, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %mload_current_general
-    // stack: e_first, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %mod_const(2)
-    // stack: y = e_first % 2 = e % 2, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    ISZERO
-    // stack: y == 0, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jumpi(modexp_y_0)
-
-    // if y == 1, modular-multiply output_loc by scratch_1, using scratch_2..scratch_4 as scratch space, and store in scratch_5.
-    PUSH modexp_mul_return
-    DUP10
-    DUP10
-    DUP10
-    DUP14
-    DUP9
-    DUP12
-    DUP12
-    DUP9
-    // stack: len, out_loc, s1, m_loc, s5, s2, s3, s4, modexp_mul_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jump(modmul_bignum)
-modexp_mul_return:
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-
-    // Copy scratch_5 to output_loc.
-    DUP1
-    DUP11
-    DUP7
-    // stack: out_loc, s5, len, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %memcpy_current_general
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-
-    // Zero out scratch_2..scratch_5.
-    DUP1
-    %mul_const(8)
-    DUP8
-    // stack: s2, 8 * len, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %clear_current_general
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-
-modexp_y_0:
-    // if y == 0, do nothing
-
-    // Modular-square repeated-squares accumulator x_i (in scratch_1), using scratch_2..scratch_4 as scratch space, and store in scratch_5.
-    PUSH modexp_square_return
-    DUP10
-    DUP10
-    DUP10
-    DUP14
-    DUP9
-    DUP12
-    DUP1
-    DUP9
-    // stack: len, s1, s1, m_loc, s5, s2, s3, s4, modexp_square_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jump(modmul_bignum)
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-
-modexp_square_return:
-    // Copy scratch_5 to scratch_1.
-    DUP1
-    DUP11
-    DUP8
-    // stack: s1, s5, len, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %memcpy_current_general
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-
-    // Zero out scratch_2..scratch_5.
-    DUP1
-    %mul_const(8)
-    DUP8
-    // stack: s2, 8 * len, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %clear_current_general
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-
-    // e //= 2 (with shr_bignum)
-
-    PUSH modexp_shr_return
-    DUP4
-    DUP3
-    // stack: len, e_loc, modexp_shr_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jump(shr_bignum)
-modexp_shr_return:
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-
-    // check if e == 0 (with iszero_bignum)
-    PUSH modexp_iszero_return
-    DUP4
-    DUP3
-    // stack: len, e_loc, modexp_iszero_return, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jump(iszero_bignum)
-modexp_iszero_return:
-    // stack: e == 0, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    ISZERO
-    // stack: e != 0, len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %jumpi(modexp_loop)
-// end of modexp_loop
-modulus_zero_or_one:
-    // If modulus is zero or one, return 0.
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    %pop10
-    // stack: retdest
-    JUMP
-b_and_e_zero:
-    // If base and exponent are zero (and modulus > 1), return 1.
-    // stack: len, b_loc, e_loc, m_loc, out_loc, s1, s2, s3, s4, s5, retdest
-    PUSH 1
-    DUP6
-    %mstore_current_general
-    %pop10
-    // stack: retdest
-    JUMP
--- a/evm/src/cpu/kernel/asm/bignum/modmul.asm
+++ b/evm/src/cpu/kernel/asm/bignum/modmul.asm
@ -1,178 +0,0 @@
-// Arithmetic on little-endian integers represented with 128-bit limbs.
-// All integers must be under a given length bound, and are padded with leading zeroes.
-
-// Stores a * b % m in output_loc, leaving a, b, and m unchanged.
-// a, b, and m must have the same length.
-// output_loc must have size length; scratch_2 must have size 2*length.
-// Both scratch_2 and scratch_3 have size 2*length and be initialized with zeroes.
-
-// The prover provides x := (a * b) % m, which is the output of this function.
-// We first check that x < m.
-// The prover also provides k := (a * b) / m, stored in scratch space.
-// We then check that x + k * m = a * b, by computing both of those using
-// bignum arithmetic, storing the results in scratch space.
-// We assert equality between those two, limb by limb.
-global modmul_bignum:
-    // stack: len, a_loc, b_loc, m_loc, out_loc, s1 (=scratch_1), s2, s3, retdest
-    DUP1
-    ISZERO
-    %jumpi(len_zero)
-    
-    // STEP 1:
-    // The prover provides x := (a * b) % m, which we store in output_loc.
-    
-    %build_current_general_address_no_offset
-
-    PUSH 0
-    // stack: i=0, base_addr, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-modmul_remainder_loop:
-    // stack: i, base_addr, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    PROVER_INPUT(bignum_modmul)
-    // stack: PI, i, base_addr, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    DUP8
-    DUP3
-    ADD
-    // stack: out_loc[i], PI, i, base_addr, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    DUP4 ADD // out_addr_i
-    %swap_mstore
-    // stack: i, base_addr, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    %increment
-    DUP3
-    DUP2
-    // stack: i+1, len, i+1, base_addr, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    SUB // functions as NEQ
-    // stack: i+1!=len, i+1, base_addr, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    %jumpi(modmul_remainder_loop)
-// end of modmul_remainder_loop
-    // stack: i, base_addr, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    %pop2
-    // stack: len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-
-    // stack: len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-
-    // STEP 2:
-    // We check that x < m.
-
-    PUSH modmul_return_1
-    DUP6
-    DUP6
-    DUP4
-    // stack: len, m_loc, out_loc, modmul_return_1, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    // Should return 1 iff the value at m_loc > the value at out_loc; in other words, if x < m.
-    %jump(cmp_bignum)
-modmul_return_1:
-    // stack: cmp_result, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    PUSH 1
-    %assert_eq
-
-    // STEP 3:
-    // The prover provides k := (a * b) / m, which we store in scratch_1.
-
-    // stack: len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    DUP1
-    // stack: len, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    %mul_const(2)
-    // stack: 2*len, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-
-    %build_current_general_address_no_offset
-
-    PUSH 0
-    // stack: i=0, base_addr, 2*len, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-modmul_quotient_loop:
-    // stack: i, base_addr, 2*len, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    PROVER_INPUT(bignum_modmul)
-    // stack: PI, i, base_addr, 2*len, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    DUP10
-    DUP3
-    ADD
-    // stack: s1[i], PI, i, base_addr, 2*len, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    DUP4 ADD // s1_addr_i
-    %swap_mstore
-    // stack: i, base_addr, 2*len, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    %increment
-    DUP3
-    DUP2
-    // stack: i+1, 2*len, i+1, base_addr, 2*len, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    SUB // functions as NEQ
-    // stack: i+1!=2*len, i+1, base_addr, 2*len, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    %jumpi(modmul_quotient_loop)
-// end of modmul_quotient_loop
-    // stack: i, base_addr, 2*len, len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    %pop3
-    // stack: len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-
-    // STEP 4:
-    // We calculate x + k * m.
-
-    // STEP 4.1:
-    // Multiply k with m and store k * m in scratch_2.
-    PUSH modmul_return_2
-    %stack (return, len, a, b, m, out, s1, s2) -> (len, s1, m, s2, return, len, a, b, out, s2)
-    // stack: len, s1, m_loc, s2, modmul_return_2, len, a_loc, b_loc, out_loc, s2, s3, retdest
-    %jump(mul_bignum)
-modmul_return_2:
-    // stack: len, a_loc, b_loc, out_loc, s2, s3, retdest
-
-    // STEP 4.2:
-    // Add x into k * m (in scratch_2).
-    PUSH modmul_return_3
-    %stack (return, len, a, b, out, s2) -> (len, s2, out, return, len, a, b, s2)
-    // stack: len, s2, out_loc, modmul_return_3, len, a_loc, b_loc, s2, s3, retdest
-    %jump(add_bignum)
-modmul_return_3:
-    // stack: carry, len, a_loc, b_loc, s2, s3, retdest
-    POP
-    // stack: len, a_loc, b_loc, s2, s3, retdest
-
-    // STEP 5:
-    // We calculate a * b.
-
-    // Multiply a with b and store a * b in scratch_3.
-    PUSH modmul_return_4
-    %stack (return, len, a, b, s2, s3) -> (len, a, b, s3, return, len, s2, s3)
-    // stack: len, a_loc, b_loc, s3, modmul_return_4, len, s2, s3, retdest
-    %jump(mul_bignum)
-modmul_return_4:
-    // stack: len, s2, s3, retdest
-
-    // STEP 6:
-    // Check that x + k * m = a * b.
-
-    %build_current_general_address_no_offset
-    // stack: base_addr, n=len, i=s2, j=s3, retdest
-modmul_check_loop:
-    // stack: base_addr, n, i, j, retdest
-    %stack (addr, l, i, j) -> (j, i, addr, addr, l, i, j)
-    // stack: j, i, base_addr, base_addr, n, i, j, retdest
-    DUP3 ADD // addr_j
-    MLOAD_GENERAL
-    // stack: mem[j], i, base_addr, base_addr, n, i, j, retdest
-    SWAP2
-    ADD // addr_i
-    MLOAD_GENERAL
-    // stack: mem[i], mem[j], base_addr, n, i, j, retdest
-    %assert_eq
-    // stack: base_addr, n, i, j, retdest
-    SWAP1
-    %decrement
-    // stack: n-1, base_addr, i, j, retdest
-    SWAP2
-    %increment
-    // stack: i+1, base_addr, n-1, j, retdest
-    SWAP3
-    %increment
-    // stack: j+1, base_addr, n-1, i+1, retdest
-    %stack (j, addr, n, i) -> (n, addr, n, i, j)
-    // stack: n-1, base_addr, n-1, i+1, j+1, retdest
-    %jumpi(modmul_check_loop)
-// end of modmul_check_loop
-    // stack: base_addr, n-1, i+1, j+1, retdest
-    %pop4
-    // stack: retdest
-    JUMP
-
-len_zero:
-    // stack: len, a_loc, b_loc, m_loc, out_loc, s1, s2, s3, retdest
-    %pop8
-    // stack: retdest
-    JUMP
--- a/evm/src/cpu/kernel/asm/bignum/mul.asm
+++ b/evm/src/cpu/kernel/asm/bignum/mul.asm
@ -1,70 +0,0 @@
-// Arithmetic on little-endian integers represented with 128-bit limbs.
-// All integers must be under a given length bound, and are padded with leading zeroes.
-
-// Stores a * b in output_loc, leaving a and b unchanged.
-// Both a and b have length len; a * b will have length 2 * len.
-// output_loc must be initialized as 2 * len zeroes.
-// TODO: possible optimization: allow output_loc to be uninitialized, and write over it with a[0:len] * b[0] (a multiplication
-// with carry) in place of the first addmul.
-global mul_bignum:
-    // stack: len, a_start_loc, b_start_loc, output_loc, retdest
-    DUP1
-    // stack: len, len, a_start_loc, b_start_loc, output_loc, retdest
-    ISZERO
-    %jumpi(len_zero)
-    
-    %build_current_general_address_no_offset
-
-    DUP2
-    // stack: n=len, base_addr, len, a_start_loc, bi=b_start_loc, output_cur=output_loc, retdest
-mul_loop:
-    // stack: n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    PUSH mul_addmul_return
-    // stack: mul_addmul_return, n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    DUP6
-    // stack: bi, mul_addmul_return, n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    DUP4 ADD // bi_addr
-    MLOAD_GENERAL
-    // stack: b[i], mul_addmul_return, n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    DUP6
-    // stack: a_start_loc, b[i], mul_addmul_return, n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    DUP9
-    // stack: output_loc, a_start_loc, b[i], mul_addmul_return, n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    DUP7
-    // stack: len, output_loc, a_start_loc, b[i], mul_addmul_return, n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    %jump(addmul_bignum)
-mul_addmul_return:
-    // stack: carry_limb, n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    DUP7
-    // stack: output_cur, carry_limb, n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    DUP5
-    // stack: len, output_cur, carry_limb, n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    ADD
-    // stack: output_cur + len, carry_limb, n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    DUP4 ADD
-    %swap_mstore
-    // stack: n, base_addr, len, a_start_loc, bi, output_cur, retdest
-    %decrement
-    // stack: n-1, base_addr, len, a_start_loc, bi, output_cur, retdest
-    SWAP4
-    %increment
-    SWAP4
-    // stack: n-1, base_addr, len, a_start_loc, bi+1, output_cur, retdest
-    SWAP5
-    %increment
-    SWAP5
-    // stack: n-1, base_addr, len, a_start_loc, bi+1, output_cur+1, retdest
-    DUP1
-    // stack: n-1, n-1, base_addr, len, a_start_loc, bi+1, output_cur+1, retdest
-    %jumpi(mul_loop)
-mul_end:
-    // stack: n-1, base_addr, len, a_start_loc, bi+1, output_cur+1, retdest
-    %pop6
-    // stack: retdest
-    JUMP
-
-len_zero:
-    // stack: len, a_start_loc, b_start_loc, output_loc, retdest
-    %pop4
-    // stack: retdest
-    JUMP
--- a/evm/src/cpu/kernel/asm/bignum/shr.asm
+++ b/evm/src/cpu/kernel/asm/bignum/shr.asm
@ -1,74 +0,0 @@
-// Arithmetic on little-endian integers represented with 128-bit limbs.
-// All integers must be under a given length bound, and are padded with leading zeroes.
-
-// Shifts a given bignum right by one bit (in place).
-// Assumes that len > 0.
-global shr_bignum:
-    // stack: len, start_loc, retdest
-    DUP1
-    // stack: len, len, start_loc, retdest
-    ISZERO
-    %jumpi(len_zero)
-    // stack: len, start_loc, retdest
-    DUP2
-    // stack: start_loc, len, start_loc, retdest
-    ADD
-    // stack: start_loc + len, start_loc, retdest
-    %decrement
-    // stack: end_loc, start_loc, retdest
-    
-    %build_current_general_address_no_offset
-
-    // stack: base_addr, end_loc, start_loc, retdest
-    %stack (addr, e) -> (e, addr, 0)
-    // stack: i=end_loc, base_addr, carry=0, start_loc, retdest
-shr_loop:
-    // stack: i, base_addr, carry, start_loc, retdest
-    DUP1
-    // stack: i, i, base_addr, carry, start_loc, retdest
-    DUP3 ADD // addr_i
-    MLOAD_GENERAL
-    // stack: a[i], i, base_addr, carry, start_loc, retdest
-    DUP1
-    // stack: a[i], a[i], i, base_addr, carry, start_loc, retdest
-    %shr_const(1)
-    // stack: a[i] >> 1, a[i], i, base_addr, carry, start_loc, retdest
-    SWAP1
-    // stack: a[i], a[i] >> 1, i, base_addr, carry, start_loc, retdest
-    %mod_const(2)
-    // stack: new_carry = a[i] % 2, a[i] >> 1, i, base_addr, carry, start_loc, retdest
-    SWAP4
-    // stack: carry, a[i] >> 1, i, base_addr, new_carry, start_loc, retdest
-    %shl_const(127)
-    // stack: carry << 127, a[i] >> 1, i, base_addr, new_carry, start_loc, retdest
-    ADD
-    // stack: carry << 127 | a[i] >> 1, i, base_addr, new_carry, start_loc, retdest
-    DUP2
-    // stack: i, carry << 127 | a[i] >> 1, i, base_addr, new_carry, start_loc, retdest
-    DUP4 ADD // addr_i
-    %swap_mstore
-    // stack: i, base_addr, new_carry, start_loc, retdest
-    PUSH 1
-    DUP2
-    SUB
-    // stack: i-1, i, base_addr, new_carry, start_loc, retdest
-    SWAP1
-    // stack: i, i-1, base_addr, new_carry, start_loc, retdest
-    DUP5
-    // stack: start_loc, i, i-1, base_addr, new_carry, start_loc, retdest
-    EQ
-    // stack: i == start_loc, i-1, base_addr, new_carry, start_loc, retdest
-    ISZERO
-    // stack: i != start_loc, i-1, base_addr, new_carry, start_loc, retdest
-    %jumpi(shr_loop)
-shr_end:
-    // stack: i, base_addr, new_carry, start_loc, retdest
-    %pop4
-    // stack: retdest
-    JUMP
-
-len_zero:
-    // stack: len, start_loc, retdest
-    %pop2
-    // stack: retdest
-    JUMP
--- a/evm/src/cpu/kernel/asm/bignum/util.asm
+++ b/evm/src/cpu/kernel/asm/bignum/util.asm
@ -1,21 +0,0 @@
-%macro memcpy_current_general
-    // stack: dst, src, len
-    // DST and SRC are offsets, for the same memory segment
-    %build_current_general_address_no_offset
-    %stack (addr_no_offset, dst, src, len) -> (addr_no_offset, src, addr_no_offset, dst, len, %%after)
-    ADD
-    // stack: SRC, addr_no_offset, dst, len, %%after
-    SWAP2
-    ADD
-    // stack: DST, SRC, len, %%after
-    %jump(memcpy)
-%%after:
-%endmacro
-
-%macro clear_current_general
-    // stack: dst, len
-    %build_current_general_address
-    %stack (DST, len) -> (DST, len, %%after)
-    %jump(memset)
-%%after:
-%endmacro
--- a/evm/src/cpu/kernel/asm/bloom_filter.asm
+++ b/evm/src/cpu/kernel/asm/bloom_filter.asm
@ -1,166 +0,0 @@
-/// Implementation of Bloom filters for logs.
-
-// Adds a Bloom entry to the transaction Bloom filter and the block Bloom filter.
-//
-// This is calculated by taking the least significant 11 bits from
-// the first 3 16-bit bytes of the keccak_256 hash of bloom_entry.
-add_to_bloom:
-    // stack: is_topic, bloom_entry, retdest
-    %compute_entry_hash
-    // stack: hash, retdest
-    DUP1
-    // stack: hash, hash, retdest
-    %shr_const(240)
-    // stack: hahs_shft_240, hash, retdest
-    %bloom_byte_indices
-    // stack: byte_index, byte_bit_index, hash, retdest
-    %bloom_write_bit
-    // stack: hash, retdest
-
-    // We shift the hash by 16 bits and repeat.
-    DUP1 %shr_const(224)
-    // stack: hash_shft_224, hash, retdest
-    %bloom_byte_indices
-    // stack: byte_index, byte_bit_index, hash, retdest
-    %bloom_write_bit
-    // stack: hash, retdest
-
-    // We shift again the hash by 16 bits and repeat.
-    %shr_const(208)
-    // stack: hash_shft_208, retdest
-    %bloom_byte_indices
-    // stack: byte_index, byte_bit_index, retdest
-    %bloom_write_bit
-    // stack: retdest
-    JUMP
-
-// The LOGS segment is [log0_ptr, log1_ptr...]. logs_len is a global metadata for the number of logs.
-// A log in the LOGS_DATA segment is [log_payload_len, address, num_topics, [topics], data_len, [data]].
-global logs_bloom:
-    // stack: retdest
-    %mload_global_metadata(@GLOBAL_METADATA_LOGS_LEN)
-    // stack: logs_len, retdest
-    PUSH 0
-
-logs_bloom_loop:
-    // stack: i, logs_len, retdest
-    DUP2 DUP2 EQ
-    // stack: i == logs_len, i, logs_len, retdest
-    %jumpi(logs_bloom_end)
-    // stack: i, logs_len, retdest
-    DUP1
-    %mload_kernel(@SEGMENT_LOGS)
-    // stack: log_payload_len_ptr, i, logs_len, retdest
-    
-    // Add address to bloom filter.
-    %increment
-    // stack: addr_ptr, i, logs_len, retdest
-    PUSH @SEGMENT_LOGS_DATA %build_kernel_address
-    DUP1
-    MLOAD_GENERAL
-    // stack: addr, full_addr_ptr, i, logs_len, retdest
-    PUSH 0
-    // stack: is_topic, addr, full_addr_ptr, i, logs_len, retdest
-    %add_to_bloom
-    // stack: full_addr_ptr, i, logs_len, retdest
-    %increment
-    // stack: full_num_topics_ptr, i, logs_len, retdest
-    DUP1
-    MLOAD_GENERAL
-    // stack: num_topics, full_num_topics_ptr, i, logs_len, retdest
-    SWAP1 %increment
-    // stack: full_topics_ptr, num_topics, i, logs_len, retdest
-    PUSH 0
-
-logs_bloom_topic_loop:
-    // stack: j, topics_ptr, num_topics, i, logs_len, retdest
-    DUP3 DUP2 EQ
-    // stack: j == num_topics, j, topics_ptr, num_topics, i, logs_len, retdest
-    %jumpi(logs_bloom_topic_end)
-    DUP2 DUP2 ADD
-    // stack: curr_topic_ptr, j, topics_ptr, num_topics, i, logs_len, retdest
-    MLOAD_GENERAL
-    // stack: topic, j, topics_ptr, num_topics, i, logs_len, retdest
-    PUSH 1
-    // stack: is_topic, topic, j, topics_ptr, num_topics, i, logs_len, retdest
-    %add_to_bloom
-    // stack: j, topics_ptr, num_topics, i, logs_len, retdest
-    %increment
-    %jump(logs_bloom_topic_loop)
-
-logs_bloom_topic_end:
-    // stack: num_topics, topics_ptr, num_topics, i, logs_len, retdest
-    %pop3
-    %increment
-    %jump(logs_bloom_loop)
-
-logs_bloom_end:
-    // stack: logs_len, logs_len, retdest
-    %pop2
-    JUMP
-
-%macro compute_entry_hash
-    // stack: is_topic, bloom_entry
-    ISZERO
-    %jumpi(%%compute_entry_hash_address)
-    // stack: bloom_entry
-    %keccak256_word(32)
-    // stack: topic_hash
-    %jump(%%after)
-
-%%compute_entry_hash_address:
-    // stack: bloom_entry
-    %keccak256_word(20)
-    // stack: address_hash
-
-%%after:
-%endmacro
-
-%macro add_to_bloom
-    %stack (is_topic, bloom_entry) -> (is_topic, bloom_entry, %%after)
-    %jump(add_to_bloom)
-
-%%after:
-%endmacro
-
-// Computes the byte index and bit index within to update the Bloom filter with.
-// The hash value must be properly shifted prior calling this macro.
-%macro bloom_byte_indices
-    // stack: hash
-    %and_const(0x07FF)
-    PUSH 0x07FF
-    SUB
-    // stack: bit_index
-    DUP1
-    %and_const(0x7)
-    SWAP1
-    %shr_const(0x3)
-    // stack: byte_index, byte_bit_index
-%endmacro
-
-
-// Updates the corresponding bloom filter byte with provided bit.
-// Also updates the block bloom filter.
-%macro bloom_write_bit
-    // stack: byte_index, byte_bit_index
-    PUSH @SEGMENT_TXN_BLOOM
-    %build_kernel_address
-    PUSH 1
-    DUP3
-    // stack: byte_bit_index, 1, byte_addr, byte_bit_index
-    PUSH 7 SUB
-    SHL
-    // Updates the current txn bloom filter.
-    SWAP2 POP DUP1
-    MLOAD_GENERAL
-    // stack: old_bloom_byte, byte_addr, one_shifted_by_index
-    DUP3 OR
-    // stack: new_bloom_byte, byte_addr, one_shifted_by_index
-    MSTORE_GENERAL
-    // stack: one_shifted_by_index
-    POP
-    // stack: empty
-%endmacro
-    
-
-
--- a/evm/src/cpu/kernel/asm/core/access_lists.asm
+++ b/evm/src/cpu/kernel/asm/core/access_lists.asm
@ -1,203 +0,0 @@
-/// Access lists for addresses and storage keys.
-/// The access list is stored in an array. The length of the array is stored in the global metadata.
-/// For storage keys, the address and key are stored as two consecutive elements.
-/// The array is stored in the SEGMENT_ACCESSED_ADDRESSES segment for addresses and in the SEGMENT_ACCESSED_STORAGE_KEYS segment for storage keys.
-/// Both arrays are stored in the kernel memory (context=0).
-/// Searching and inserting is done by doing a linear search through the array.
-/// If the address/storage key isn't found in the array, it is inserted at the end.
-/// TODO: Look into using a more efficient data structure for the access lists.
-
-%macro insert_accessed_addresses
-    %stack (addr) -> (addr, %%after)
-    %jump(insert_accessed_addresses)
-%%after:
-    // stack: cold_access
-%endmacro
-
-%macro insert_accessed_addresses_no_return
-    %insert_accessed_addresses
-    POP
-%endmacro
-
-/// Inserts the address into the access list if it is not already present.
-/// Return 1 if the address was inserted, 0 if it was already present.
-global insert_accessed_addresses:
-    // stack: addr, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_ACCESSED_ADDRESSES_LEN)
-    // stack: len, addr, retdest
-    PUSH @SEGMENT_ACCESSED_ADDRESSES ADD
-    PUSH @SEGMENT_ACCESSED_ADDRESSES
-insert_accessed_addresses_loop:
-    // `i` and `len` are both scaled by SEGMENT_ACCESSED_ADDRESSES
-    %stack (i, len, addr, retdest) -> (i, len, i, len, addr, retdest)
-    EQ %jumpi(insert_address)
-    // stack: i, len, addr, retdest
-    DUP1
-    MLOAD_GENERAL
-    // stack: loaded_addr, i, len, addr, retdest
-    DUP4
-    // stack: addr, loaded_addr, i, len, addr, retdest
-    EQ %jumpi(insert_accessed_addresses_found)
-    // stack: i, len, addr, retdest
-    %increment
-    %jump(insert_accessed_addresses_loop)
-
-insert_address:
-    %stack (i, len, addr, retdest) -> (i, addr, len, retdest)
-    DUP2 %journal_add_account_loaded // Add a journal entry for the loaded account.
-    %swap_mstore // Store new address at the end of the array.
-    // stack: len, retdest
-    %increment
-    %sub_const(@SEGMENT_ACCESSED_ADDRESSES) // unscale `len`
-    %mstore_global_metadata(@GLOBAL_METADATA_ACCESSED_ADDRESSES_LEN) // Store new length.
-    PUSH 1 // Return 1 to indicate that the address was inserted.
-    SWAP1 JUMP
-
-insert_accessed_addresses_found:
-    %stack (i, len, addr, retdest) -> (retdest, 0) // Return 0 to indicate that the address was already present.
-    JUMP
-
-/// Remove the address from the access list.
-/// Panics if the address is not in the access list.
-global remove_accessed_addresses:
-    // stack: addr, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_ACCESSED_ADDRESSES_LEN)
-    // stack: len, addr, retdest
-    PUSH @SEGMENT_ACCESSED_ADDRESSES ADD
-    PUSH @SEGMENT_ACCESSED_ADDRESSES
-remove_accessed_addresses_loop:
-    // `i` and `len` are both scaled by SEGMENT_ACCESSED_ADDRESSES
-    %stack (i, len, addr, retdest) -> (i, len, i, len, addr, retdest)
-    EQ %jumpi(panic)
-    // stack: i, len, addr, retdest
-    DUP1 MLOAD_GENERAL
-    // stack: loaded_addr, i, len, addr, retdest
-    DUP4
-    // stack: addr, loaded_addr, i, len, addr, retdest
-    EQ %jumpi(remove_accessed_addresses_found)
-    // stack: i, len, addr, retdest
-    %increment
-    %jump(remove_accessed_addresses_loop)
-remove_accessed_addresses_found:
-    %stack (i, len, addr, retdest) -> (len, 1, i, retdest)
-    SUB  // len -= 1
-    PUSH @SEGMENT_ACCESSED_ADDRESSES
-    DUP2 SUB // unscale `len`
-    %mstore_global_metadata(@GLOBAL_METADATA_ACCESSED_ADDRESSES_LEN) // Decrement the access list length.
-    // stack: len-1, i, retdest
-    MLOAD_GENERAL // Load the last address in the access list.
-    // stack: last_addr, i, retdest
-    MSTORE_GENERAL
-    // Store the last address at the position of the removed address.
-    JUMP
-
-
-%macro insert_accessed_storage_keys
-    %stack (addr, key, value) -> (addr, key, value, %%after)
-    %jump(insert_accessed_storage_keys)
-%%after:
-    // stack: cold_access, original_value
-%endmacro
-
-/// Inserts the storage key and value into the access list if it is not already present.
-/// `value` should be the current storage value at the slot `(addr, key)`.
-/// Return `1, original_value` if the storage key was inserted, `0, original_value` if it was already present.
-global insert_accessed_storage_keys:
-    // stack: addr, key, value, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_ACCESSED_STORAGE_KEYS_LEN)
-    // stack: len, addr, key, value, retdest
-    PUSH @SEGMENT_ACCESSED_STORAGE_KEYS ADD
-    PUSH @SEGMENT_ACCESSED_STORAGE_KEYS
-insert_accessed_storage_keys_loop:
-    // `i` and `len` are both scaled by SEGMENT_ACCESSED_STORAGE_KEYS
-    %stack (i, len, addr, key, value, retdest) -> (i, len, i, len, addr, key, value, retdest)
-    EQ %jumpi(insert_storage_key)
-    // stack: i, len, addr, key, value, retdest
-    DUP1 %increment MLOAD_GENERAL
-    // stack: loaded_key, i, len, addr, key, value, retdest
-    DUP2 MLOAD_GENERAL
-    // stack: loaded_addr, loaded_key, i, len, addr, key, value, retdest
-    DUP5 EQ
-    // stack: loaded_addr==addr, loaded_key, i, len, addr, key, value, retdest
-    SWAP1 DUP6 EQ
-    // stack: loaded_key==key, loaded_addr==addr, i, len, addr, key, value, retdest
-    MUL // AND
-    %jumpi(insert_accessed_storage_keys_found)
-    // stack: i, len, addr, key, value, retdest
-    %add_const(3)
-    %jump(insert_accessed_storage_keys_loop)
-
-insert_storage_key:
-    // stack: i, len, addr, key, value, retdest
-    DUP4 DUP4 %journal_add_storage_loaded // Add a journal entry for the loaded storage key.
-    // stack: i, len, addr, key, value, retdest
-
-    %stack(dst, len, addr, key, value) -> (addr, dst, dst, key, dst, value, dst, @SEGMENT_ACCESSED_STORAGE_KEYS, value)
-    MSTORE_GENERAL // Store new address at the end of the array.
-    // stack: dst, key, dst, value, dst, segment, value, retdest
-    %increment SWAP1
-    MSTORE_GENERAL // Store new key after that
-    // stack: dst, value, dst, segment, value, retdest
-    %add_const(2) SWAP1
-    MSTORE_GENERAL // Store new value after that
-    // stack: dst, segment, value, retdest
-    %add_const(3)
-    SUB // unscale dst
-    %mstore_global_metadata(@GLOBAL_METADATA_ACCESSED_STORAGE_KEYS_LEN) // Store new length.
-    %stack (value, retdest) -> (retdest, 1, value) // Return 1 to indicate that the storage key was inserted.
-    JUMP
-
-insert_accessed_storage_keys_found:
-    // stack: i, len, addr, key, value, retdest
-    %add_const(2)
-    MLOAD_GENERAL
-    %stack (original_value, len, addr, key, value, retdest) -> (retdest, 0, original_value) // Return 0 to indicate that the storage key was already present.
-    JUMP
-
-/// Remove the storage key and its value from the access list.
-/// Panics if the key is not in the list.
-global remove_accessed_storage_keys:
-    // stack: addr, key, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_ACCESSED_STORAGE_KEYS_LEN)
-    // stack: len, addr, key, retdest
-    PUSH @SEGMENT_ACCESSED_STORAGE_KEYS ADD
-    PUSH @SEGMENT_ACCESSED_STORAGE_KEYS
-remove_accessed_storage_keys_loop:
-    // `i` and `len` are both scaled by SEGMENT_ACCESSED_STORAGE_KEYS
-    %stack (i, len, addr, key, retdest) -> (i, len, i, len, addr, key, retdest)
-    EQ %jumpi(panic)
-    // stack: i, len, addr, key, retdest
-    DUP1 %increment MLOAD_GENERAL
-    // stack: loaded_key, i, len, addr, key, retdest
-    DUP2 MLOAD_GENERAL
-    // stack: loaded_addr, loaded_key, i, len, addr, key, retdest
-    DUP5 EQ
-    // stack: loaded_addr==addr, loaded_key, i, len, addr, key, retdest
-    SWAP1 DUP6 EQ
-    // stack: loaded_key==key, loaded_addr==addr, i, len, addr, key, retdest
-    MUL // AND
-    %jumpi(remove_accessed_storage_keys_found)
-    // stack: i, len, addr, key, retdest
-    %add_const(3)
-    %jump(remove_accessed_storage_keys_loop)
-
-remove_accessed_storage_keys_found:
-    %stack (i, len, addr, key, retdest) -> (len, 3, i, retdest)
-    SUB 
-    PUSH @SEGMENT_ACCESSED_STORAGE_KEYS
-    DUP2 SUB // unscale
-    %mstore_global_metadata(@GLOBAL_METADATA_ACCESSED_STORAGE_KEYS_LEN) // Decrease the access list length.
-    // stack: len-3, i, retdest
-    DUP1 %add_const(2) MLOAD_GENERAL
-    // stack: last_value, len-3, i, retdest
-    DUP2 %add_const(1) MLOAD_GENERAL
-    // stack: last_key, last_value, len-3, i, retdest
-    DUP3 MLOAD_GENERAL
-    // stack: last_addr, last_key, last_value, len-3, i, retdest
-    DUP5 %swap_mstore // Move the last tuple to the position of the removed tuple.
-    // stack: last_key, last_value, len-3, i, retdest
-    DUP4 %add_const(1) %swap_mstore
-    // stack: last_value, len-3, i, retdest
-    DUP3 %add_const(2) %swap_mstore
-    // stack: len-3, i, retdest
-    %pop2 JUMP
--- a/evm/src/cpu/kernel/asm/core/call.asm
+++ b/evm/src/cpu/kernel/asm/core/call.asm
@ -1,447 +0,0 @@
-// Handlers for call-like operations, namely CALL, CALLCODE, STATICCALL and DELEGATECALL.
-// Reminder: All context metadata hardcoded offsets are already scaled by `Segment::ContextMetadata`.
-
-// Creates a new sub context and executes the code of the given account.
-global sys_call:
-    // Check that the value is zero if the context is static.
-    // stack: kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size
-    DUP4 ISZERO %not_bit
-    // stack: value≠0, kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size
-    %mload_context_metadata(@CTX_METADATA_STATIC)
-    // stack: is_static, value≠0, kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size
-    MUL // Cheaper than AND
-    %jumpi(fault_exception)
-
-    %stack (kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-        (args_size, args_offset, kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size)
-    %checked_mem_expansion
-    %stack (kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-        (ret_size, ret_offset, kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size)
-    %checked_mem_expansion
-
-    SWAP2
-    // stack: address, gas, kexit_info, value, args_offset, args_size, ret_offset, ret_size
-    %u256_to_addr // Truncate to 160 bits
-    DUP1 %insert_accessed_addresses
-
-    %call_charge_gas(1, 1)
-    %check_depth
-
-    %checkpoint // Checkpoint
-    DUP3 %insert_touched_addresses
-
-    %create_context
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-
-    %stack (new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-          (new_ctx, args_offset, args_size, new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size)
-    %copy_mem_to_calldata
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    DUP5 DUP5 %address %transfer_eth %jumpi(call_insufficient_balance)
-    DUP5 DUP5 %address %journal_add_balance_transfer
-    DUP3 %set_new_ctx_gas_limit
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    DUP4
-    // stack: address, new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %handle_precompiles
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %set_new_ctx_parent_pc(after_call_instruction)
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-
-    // Each line in the block below does not change the stack.
-    %set_static
-    DUP4 %set_new_ctx_addr
-    %address %set_new_ctx_caller
-    DUP5 %set_new_ctx_value
-    DUP4 %set_new_ctx_code
-
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %stack (new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size)
-        -> (new_ctx, kexit_info, ret_offset, ret_size)
-    %enter_new_ctx
-
-// Creates a new sub context as if calling itself, but with the code of the
-// given account. In particular the storage remains the same.
-global sys_callcode:
-
-    // stack: kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size
-    %stack (kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-        (args_size, args_offset, kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size)
-    %checked_mem_expansion
-    %stack (kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-        (ret_size, ret_offset, kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size)
-    %checked_mem_expansion
-
-    SWAP2
-    // stack: address, gas, kexit_info, value, args_offset, args_size, ret_offset, ret_size
-    %u256_to_addr // Truncate to 160 bits
-    DUP1 %insert_accessed_addresses
-
-    %call_charge_gas(1, 0)
-    %check_depth
-
-    %checkpoint // Checkpoint
-    %address %insert_touched_addresses
-
-    // stack: kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %create_context
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-
-    %stack (new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-          (new_ctx, args_offset, args_size, new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size)
-    %copy_mem_to_calldata
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    DUP5 %address %address %transfer_eth %jumpi(call_insufficient_balance)
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    DUP3 %set_new_ctx_gas_limit
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    DUP4
-    // stack: address, new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %handle_precompiles
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %set_new_ctx_parent_pc(after_call_instruction)
-
-    // Each line in the block below does not change the stack.
-    %set_static
-    %address %set_new_ctx_addr
-    %address %set_new_ctx_caller
-    DUP5 %set_new_ctx_value
-    DUP4 %set_new_ctx_code
-
-
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %stack (new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size)
-        -> (new_ctx, kexit_info, ret_offset, ret_size)
-    %enter_new_ctx
-
-// Creates a new sub context and executes the code of the given account.
-// Equivalent to CALL, except that it does not allow any state modifying
-// instructions or sending ETH in the sub context. The disallowed instructions
-// are CREATE, CREATE2, LOG0, LOG1, LOG2, LOG3, LOG4, SSTORE, SELFDESTRUCT and
-// CALL if the value sent is not 0.
-global sys_staticcall:
-    // stack: kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size
-    %stack (kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size) ->
-        (args_size, args_offset, kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size)
-    %checked_mem_expansion
-    %stack (kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size) ->
-        (ret_size, ret_offset, kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size)
-    %checked_mem_expansion
-
-    SWAP2
-    // stack: address, gas, kexit_info, args_offset, args_size, ret_offset, ret_size
-    %u256_to_addr // Truncate to 160 bits
-    DUP1 %insert_accessed_addresses
-
-    // Add a value of 0 to the stack. Slightly inefficient but that way we can reuse %call_charge_gas.
-    %stack (cold_access, address, gas, kexit_info) -> (cold_access, address, gas, kexit_info, 0)
-    %call_charge_gas(0, 1)
-    %check_depth
-
-    %checkpoint // Checkpoint
-    DUP3 %insert_touched_addresses
-
-    // stack: kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %create_context
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-
-    %stack (new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-          (new_ctx, args_offset, args_size, new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size)
-    %copy_mem_to_calldata
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    DUP3 %set_new_ctx_gas_limit
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    DUP4
-    // stack: address, new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %handle_precompiles
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %set_new_ctx_parent_pc(after_call_instruction)
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-
-    // Each line in the block below does not change the stack.
-    %set_static_true
-    DUP4 %set_new_ctx_addr
-    %address %set_new_ctx_caller
-    PUSH 0 %set_new_ctx_value
-    DUP4 %set_new_ctx_code
-
-
-    %stack (new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size)
-        -> (new_ctx, kexit_info, ret_offset, ret_size)
-    %enter_new_ctx
-
-// Creates a new sub context as if calling itself, but with the code of the
-// given account. In particular the storage, the current sender and the current
-// value remain the same.
-global sys_delegatecall:
-
-    // stack: kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size
-    %stack (kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size) ->
-        (args_size, args_offset, kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size)
-    %checked_mem_expansion
-    %stack (kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size) ->
-        (ret_size, ret_offset, kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size)
-    %checked_mem_expansion
-
-    SWAP2
-    // stack: address, gas, kexit_info, args_offset, args_size, ret_offset, ret_size
-    %u256_to_addr // Truncate to 160 bits
-    DUP1 %insert_accessed_addresses
-
-    // Add a value of 0 to the stack. Slightly inefficient but that way we can reuse %call_charge_gas.
-    %stack (cold_access, address, gas, kexit_info) -> (cold_access, address, gas, kexit_info, 0)
-    %call_charge_gas(0, 0)
-    %check_depth
-
-    %checkpoint // Checkpoint
-    %address %insert_touched_addresses
-
-    // stack: kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %create_context
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-
-    %stack (new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-          (new_ctx, args_offset, args_size, new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size)
-    %copy_mem_to_calldata
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    DUP3 %set_new_ctx_gas_limit
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    DUP4
-    // stack: address, new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %handle_precompiles
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-    %set_new_ctx_parent_pc(after_call_instruction)
-    // stack: new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size
-
-    // Each line in the block below does not change the stack.
-    %set_static
-    %address %set_new_ctx_addr
-    %caller %set_new_ctx_caller
-    %callvalue %set_new_ctx_value
-    %set_new_ctx_parent_pc(after_call_instruction)
-    DUP4 %set_new_ctx_code
-
-    %stack (new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size)
-        -> (new_ctx, kexit_info, ret_offset, ret_size)
-    %enter_new_ctx
-
-// We go here after any CALL type instruction (but not after the special call by the transaction originator).
-global after_call_instruction:
-    // stack: success, leftover_gas, new_ctx, kexit_info, ret_offset, ret_size
-    DUP1 ISZERO %jumpi(after_call_instruction_failed)
-    %pop_checkpoint
-after_call_instruction_contd:
-    SWAP3
-    // stack: kexit_info, leftover_gas, new_ctx, success, ret_offset, ret_size
-    // Add the leftover gas into the appropriate bits of kexit_info.
-    SWAP1 %shl_const(192) SWAP1 SUB
-    // stack: kexit_info, new_ctx, success, ret_offset, ret_size
-
-    // The callee's terminal instruction will have populated RETURNDATA.
-    %copy_returndata_to_mem
-    EXIT_KERNEL
-
-after_call_instruction_failed:
-    // stack: success, leftover_gas, new_ctx, kexit_info, ret_offset, ret_size
-    %revert_checkpoint
-    %jump(after_call_instruction_contd)
-
-call_insufficient_balance:
-    %stack (new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-        (callgas, kexit_info, 0)
-    %shl_const(192) SWAP1 SUB
-    // stack: kexit_info', 0
-    %mstore_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-    EXIT_KERNEL
-
-%macro check_depth
-    %call_depth
-    %gt_const(@CALL_STACK_LIMIT)
-    %jumpi(call_too_deep)
-%endmacro
-
-call_too_deep:
-    %stack (kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-        (callgas, kexit_info, 0)
-    %shl_const(192) SWAP1 SUB
-    // stack: kexit_info', 0
-    %mstore_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-    EXIT_KERNEL
-
-// Set @CTX_METADATA_STATIC to 1. Note that there is no corresponding set_static_false routine
-// because it will already be 0 by default.
-%macro set_static_true
-    // stack: new_ctx
-    DUP1
-    %build_address_with_ctx_no_segment(@CTX_METADATA_STATIC)
-    PUSH 1
-    // stack: 1, addr, new_ctx
-    MSTORE_GENERAL
-    // stack: new_ctx
-%endmacro
-
-// Set @CTX_METADATA_STATIC of the next context to the current value.
-%macro set_static
-    // stack: new_ctx
-    DUP1
-    %build_address_with_ctx_no_segment(@CTX_METADATA_STATIC)
-    %mload_context_metadata(@CTX_METADATA_STATIC)
-    // stack: is_static, addr, new_ctx
-    MSTORE_GENERAL
-    // stack: new_ctx
-%endmacro
-
-%macro set_new_ctx_addr
-    // stack: called_addr, new_ctx
-    DUP2
-    %build_address_with_ctx_no_segment(@CTX_METADATA_ADDRESS)
-    SWAP1
-    // stack: called_addr, addr, new_ctx
-    MSTORE_GENERAL
-    // stack: new_ctx
-%endmacro
-
-%macro set_new_ctx_caller
-    // stack: sender, new_ctx
-    DUP2
-    %build_address_with_ctx_no_segment(@CTX_METADATA_CALLER)
-    SWAP1
-    // stack: sender, addr, new_ctx
-    MSTORE_GENERAL
-    // stack: new_ctx
-%endmacro
-
-%macro set_new_ctx_value
-    // stack: value, new_ctx
-    DUP2
-    %build_address_with_ctx_no_segment(@CTX_METADATA_CALL_VALUE)
-    SWAP1
-    // stack: value, addr, new_ctx
-    MSTORE_GENERAL
-    // stack: new_ctx
-%endmacro
-
-%macro set_new_ctx_code_size
-    // stack: code_size, new_ctx
-    DUP2
-    %build_address_with_ctx_no_segment(@CTX_METADATA_CODE_SIZE)
-    SWAP1
-    // stack: code_size, addr, new_ctx
-    MSTORE_GENERAL
-    // stack: new_ctx
-%endmacro
-
-%macro set_new_ctx_calldata_size
-    // stack: calldata_size, new_ctx
-    DUP2
-    %build_address_with_ctx_no_segment(@CTX_METADATA_CALLDATA_SIZE)
-    SWAP1
-    // stack: calldata_size, addr, new_ctx
-    MSTORE_GENERAL
-    // stack: new_ctx
-%endmacro
-
-%macro set_new_ctx_gas_limit
-    // stack: gas_limit, new_ctx
-    DUP2
-    %build_address_with_ctx_no_segment(@CTX_METADATA_GAS_LIMIT)
-    SWAP1
-    // stack: gas_limit, addr, new_ctx
-    MSTORE_GENERAL
-    // stack: new_ctx
-%endmacro
-
-%macro set_new_ctx_parent_ctx
-    // stack: new_ctx
-    DUP1
-    %build_address_with_ctx_no_segment(@CTX_METADATA_PARENT_CONTEXT)
-    GET_CONTEXT
-    // stack: ctx, addr, new_ctx
-    MSTORE_GENERAL
-    // stack: new_ctx
-%endmacro
-
-%macro set_new_ctx_parent_pc(label)
-    // stack: new_ctx
-    DUP1
-    %build_address_with_ctx_no_segment(@CTX_METADATA_PARENT_PC)
-    PUSH $label
-    // stack: label, addr, new_ctx
-    MSTORE_GENERAL
-    // stack: new_ctx
-%endmacro
-
-%macro set_new_ctx_code
-    %stack (address, new_ctx) -> (address, new_ctx, %%after, new_ctx)
-    %jump(load_code_padded)
-%%after:
-    %set_new_ctx_code_size
-    // stack: new_ctx
-%endmacro
-
-%macro enter_new_ctx
-    // stack: new_ctx
-    // Switch to the new context and go to usermode with PC=0.
-    DUP1 // new_ctx
-    SET_CONTEXT
-    %checkpoint // Checkpoint
-    %increment_call_depth
-    // Perform jumpdest analyis
-    %mload_context_metadata(@CTX_METADATA_CODE_SIZE)
-    GET_CONTEXT
-    // stack: ctx, code_size, retdest
-    %jumpdest_analysis
-    PUSH 0 // jump dest
-    EXIT_KERNEL
-    // (Old context) stack: new_ctx
-%endmacro
-
-%macro copy_mem_to_calldata
-    // stack: new_ctx, args_offset, args_size
-    GET_CONTEXT
-    %stack(ctx, new_ctx, args_offset, args_size) -> (ctx, @SEGMENT_MAIN_MEMORY, args_offset, args_size, %%after, new_ctx, args_size)
-    %build_address
-    // stack: SRC, args_size, %%after, new_ctx, args_size
-    DUP4
-    %build_address_with_ctx_no_offset(@SEGMENT_CALLDATA)
-    // stack: DST, SRC, args_size, %%after, new_ctx, args_size
-    %jump(memcpy_bytes)
-%%after:
-    // stack: new_ctx, args_size
-    %build_address_with_ctx_no_segment(@CTX_METADATA_CALLDATA_SIZE)
-    // stack: addr, args_size
-    SWAP1
-    MSTORE_GENERAL
-    // stack: (empty)
-%endmacro
-
-%macro copy_returndata_to_mem
-    // stack: kexit_info, new_ctx, success, ret_offset, ret_size
-    SWAP4
-    %returndatasize
-    // stack: returndata_size, ret_size, new_ctx, success, ret_offset, kexit_info
-    %min
-    GET_CONTEXT
-    %stack (ctx, n, new_ctx, success, ret_offset, kexit_info) -> (ctx, @SEGMENT_RETURNDATA, @SEGMENT_MAIN_MEMORY, ret_offset, ctx, n, %%after, kexit_info, success)
-    %build_address_no_offset
-    // stack: SRC, @SEGMENT_MAIN_MEMORY, ret_offset, ctx, n, %%after, kexit_info, success
-    SWAP3
-    %build_address
-    // stack: DST, SRC, n, %%after, kexit_info, success
-    %jump(memcpy_bytes)
-%%after:
-%endmacro
-
-// Checked memory expansion.
-%macro checked_mem_expansion
-    // stack: size, offset, kexit_info
-    DUP1 ISZERO %jumpi(%%zero)
-    %add_or_fault
-    // stack: expanded_num_bytes, kexit_info
-    DUP1 %ensure_reasonable_offset
-    %update_mem_bytes
-    %jump(%%after)
-%%zero:
-    %pop2
-%%after:
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/call_gas.asm
+++ b/evm/src/cpu/kernel/asm/core/call_gas.asm
@ -1,92 +0,0 @@
-%macro call_charge_gas(is_call_or_callcode, is_call_or_staticcall)
-    %stack (cold_access, address, gas, kexit_info, value) ->
-        ($is_call_or_callcode, $is_call_or_staticcall, cold_access, address, gas, kexit_info, value, %%after)
-    %jump(call_charge_gas)
-%%after:
-    //  stack: kexit_info, C_callgas, address, value
-%endmacro
-
-// Charge gas for *call opcodes and return the sub-context gas limit.
-// Doesn't include memory expansion costs.
-global call_charge_gas:
-    // Compute C_access
-    // stack: is_call_or_callcode, is_call_or_staticcall, cold_access, address, gas, kexit_info, value, retdest
-    SWAP2
-    // stack: cold_access, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    %mul_const(@GAS_COLDACCOUNTACCESS_MINUS_WARMACCESS)
-    %add_const(@GAS_WARMACCESS)
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    DUP3
-    // stack: is_call_or_callcode, cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    %jumpi(xfer_cost)
-after_xfer_cost:
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    DUP2
-    %jumpi(new_cost)
-after_new_cost:
-    %stack (Cextra, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest) ->
-        (Cextra, address, gas, kexit_info, value, retdest)
-    // Compute C_gascap
-    // stack: Cextra, address, gas, kexit_info, value, retdest
-    DUP4 %leftover_gas
-    // stack: leftover_gas, Cextra, address, gas, kexit_info, value, retdest
-    DUP2 DUP2 LT
-    // stack: leftover_gas<Cextra, leftover_gas, Cextra, address, gas, kexit_info, value, retdest
-    DUP5 DUP2 MUL
-    // stack: (leftover_gas<Cextra)*gas, leftover_gas<Cextra, leftover_gas, Cextra, address, gas, kexit_info, value, retdest
-    SWAP1 %not_bit
-    // stack: leftover_gas>=Cextra, (leftover_gas<Cextra)*gas, leftover_gas, Cextra, address, gas, kexit_info, value, retdest
-    DUP4 DUP4 SUB
-    // stack: leftover_gas - Cextra, leftover_gas>=Cextra, (leftover_gas<Cextra)*gas, leftover_gas, Cextra, address, gas, kexit_info, value, retdest
-    %all_but_one_64th
-    // stack: L(leftover_gas - Cextra), leftover_gas>=Cextra, (leftover_gas<Cextra)*gas, leftover_gas, Cextra, address, gas, kexit_info, value, retdest
-    DUP7 %min MUL ADD
-    // stack: Cgascap, leftover_gas, Cextra, address, gas, kexit_info, value, retdest
-
-    // Compute C_call and charge for it.
-    %stack (Cgascap, leftover_gas, Cextra) -> (Cextra, Cgascap, Cgascap)
-    ADD
-    %stack (C_call, Cgascap, address, gas, kexit_info, value) ->
-        (C_call, kexit_info, Cgascap, address, gas, value)
-    %charge_gas
-
-    // Compute C_callgas
-    %stack (kexit_info, Cgascap, address, gas, value) ->
-        (Cgascap, address, gas, kexit_info, value)
-    DUP5 ISZERO %not_bit
-    // stack: value!=0, Cgascap, address, gas, kexit_info, value, retdest
-    %mul_const(@GAS_CALLSTIPEND) ADD
-    %stack (C_callgas, address, gas, kexit_info, value, retdest) ->
-        (retdest, kexit_info, C_callgas, address, value)
-    JUMP
-
-global xfer_cost:
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    DUP7
-    // stack: value, cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    %jumpi(xfer_cost_nonzero)
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    %jump(after_xfer_cost)
-xfer_cost_nonzero:
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    %add_const(@GAS_CALLVALUE)
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    %jump(after_xfer_cost)
-
-new_cost:
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    DUP7
-    // stack: value, cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    %jumpi(new_cost_transfers_value)
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    %jump(after_new_cost)
-new_cost_transfers_value:
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    DUP4 %is_dead
-    %jumpi(new_cost_nonzero)
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    %jump(after_new_cost)
-new_cost_nonzero:
-    // stack: cost, is_call_or_staticcall, is_call_or_callcode, address, gas, kexit_info, value, retdest
-    %add_const(@GAS_NEWACCOUNT)
-    %jump(after_new_cost)
--- a/evm/src/cpu/kernel/asm/core/create.asm
+++ b/evm/src/cpu/kernel/asm/core/create.asm
@ -1,291 +0,0 @@
-// The CREATE syscall. Address will be
-//     address = KEC(RLP(sender, nonce))[12:]
-//
-// Pre stack: kexit_info, value, code_offset, code_len
-// Post stack: address
-global sys_create:
-    %check_static
-
-    %stack (kexit_info, value, code_offset, code_len) -> (code_len, code_offset, kexit_info, value, code_offset, code_len)
-    %checked_mem_expansion
-    // stack: kexit_info, value, code_offset, code_len
-    %charge_gas_const(@GAS_CREATE)
-    // stack: kexit_info, value, code_offset, code_len
-    DUP4
-    // stack: code_len, kexit_info, value, code_offset, code_len
-    %check_initcode_size
-
-    %stack (kexit_info, value, code_offset, code_len)
-        -> (sys_create_got_address, value, code_offset, code_len, kexit_info)
-    %address
-    // stack: sender, sys_create_got_address, value, code_offset, code_len, kexit_info
-    DUP1 %nonce
-    // stack: nonce, sender, sys_create_got_address, value, code_offset, code_len, kexit_info
-    SWAP1
-    // stack: sender, nonce, sys_create_got_address, value, code_offset, code_len, kexit_info
-    %jump(get_create_address)
-sys_create_got_address:
-    // stack: address, value, code_offset, code_len, kexit_info
-    %jump(create_common)
-
-// The CREATE2 syscall; see EIP-1014. Address will be
-//     address = KEC(0xff || sender || salt || code_hash)[12:]
-//
-// Pre stack: kexit_info, value, code_offset, code_len, salt
-// Post stack: address
-global sys_create2:
-    %check_static
-
-    // stack: kexit_info, value, code_offset, code_len, salt
-    %stack (kexit_info, value, code_offset, code_len) -> (code_len, code_offset, kexit_info, value, code_offset, code_len)
-    %checked_mem_expansion
-    // stack: kexit_info, value, code_offset, code_len, salt
-    DUP4 %num_bytes_to_num_words
-    %mul_const(@GAS_KECCAK256WORD) %add_const(@GAS_CREATE) %charge_gas
-    // stack: kexit_info, value, code_offset, code_len, salt
-    DUP4
-    // stack: code_len, kexit_info, value, code_offset, code_len, salt
-    %check_initcode_size
-
-
-    SWAP4
-    %stack (salt) -> (salt, create_common)
-    // stack: salt, create_common, value, code_offset, code_len, kexit_info
-
-    // Hash the code.
-    DUP5 // code_len
-    DUP5 // code_offset
-    PUSH @SEGMENT_MAIN_MEMORY
-    GET_CONTEXT
-    %build_address
-    KECCAK_GENERAL
-    // stack: hash, salt, create_common, value, code_offset, code_len, kexit_info
-
-    %address
-    // stack: sender, hash, salt, create_common, value, code_offset, code_len, kexit_info
-    %jump(get_create2_address)
-
-// Pre stack: address, value, code_offset, code_len, kexit_info
-// Post stack: address
-global create_common:
-    // stack: address, value, code_offset, code_len, kexit_info
-    DUP1 %insert_accessed_addresses_no_return
-
-    // Check call depth
-    %call_depth
-    %gt_const(@CALL_STACK_LIMIT)
-    %jumpi(create_too_deep)
-
-    // stack: address, value, code_offset, code_len, kexit_info
-    DUP2 %selfbalance LT %jumpi(create_insufficient_balance)
-    // Increment the sender's nonce.
-    %address
-    DUP1 %nonce %eq_const(@MAX_NONCE) %jumpi(nonce_overflow) // EIP-2681
-    %increment_nonce
-    // stack: address, value, code_offset, code_len, kexit_info
-
-    %checkpoint
-
-    // stack: address, value, code_offset, code_len, kexit_info
-    DUP2 DUP2 %address %transfer_eth %jumpi(panic) // We checked the balance above, so this should never happen.
-    DUP2 DUP2 %address %journal_add_balance_transfer // Add journal entry for the balance transfer.
-
-    %create_context
-    // stack: new_ctx, address, value, code_offset, code_len, kexit_info
-    GET_CONTEXT
-    // stack: src_ctx, new_ctx, address, value, code_offset, code_len, kexit_info
-
-    %stack (src_ctx, new_ctx, address, value, code_offset, code_len) ->
-        (code_len, new_ctx, src_ctx, new_ctx, address, value, code_offset, code_len)
-    %set_new_ctx_code_size POP
-    // Copy the code from memory to the new context's code segment.
-    %stack (src_ctx, new_ctx, address, value, code_offset, code_len)
-        -> (src_ctx, @SEGMENT_MAIN_MEMORY, code_offset, // SRC
-            new_ctx, // DST (SEGMENT_CODE == virt == 0)
-            code_len,
-            run_constructor,
-            new_ctx, value, address)
-    %build_address
-    // stack: SRC, DST, code_len, run_constructor, new_ctx, value, address
-    SWAP1
-    // stack: DST, SRC, code_len, run_constructor, new_ctx, value, address
-    %jump(memcpy_bytes)
-
-run_constructor:
-    // stack: new_ctx, value, address, kexit_info
-    SWAP1 %set_new_ctx_value
-    // stack: new_ctx, address, kexit_info
-
-    // Each line in the block below does not change the stack.
-    DUP2 %set_new_ctx_addr
-    %address %set_new_ctx_caller
-    %set_new_ctx_parent_pc(after_constructor)
-    // stack: new_ctx, address, kexit_info
-
-    // All but 1/64 of the sender's remaining gas goes to the constructor.
-    SWAP2
-    // stack: kexit_info, address, new_ctx
-    %drain_all_but_one_64th_gas
-    %stack (kexit_info, drained_gas, address, new_ctx) -> (drained_gas, new_ctx, address, kexit_info)
-    %set_new_ctx_gas_limit
-    // stack: new_ctx, address, kexit_info
-
-    // Create the new contract account in the state trie.
-    DUP2
-    %create_contract_account
-    // stack: status, new_ctx, address, kexit_info
-    %jumpi(create_collision)
-
-    %enter_new_ctx
-    // (Old context) stack: new_ctx, address, kexit_info
-
-after_constructor:
-    // stack: success, leftover_gas, new_ctx, address, kexit_info
-    DUP1 ISZERO %jumpi(after_constructor_failed)
-
-    // stack: success, leftover_gas, new_ctx, address, kexit_info
-    SWAP2
-    // stack: new_ctx, leftover_gas, success, address, kexit_info
-    POP
-
-    // EIP-3541: Reject new contract code starting with the 0xEF byte
-    PUSH @SEGMENT_RETURNDATA
-    GET_CONTEXT
-    %build_address_no_offset
-    MLOAD_GENERAL
-    %eq_const(0xEF) %jumpi(create_first_byte_ef)
-
-    // Charge gas for the code size.
-    // stack: leftover_gas, success, address, kexit_info
-    %returndatasize // Size of the code.
-    // stack: code_size, leftover_gas, success, address, kexit_info
-    DUP1 %gt_const(@MAX_CODE_SIZE) %jumpi(create_code_too_large)
-    // stack: code_size, leftover_gas, success, address, kexit_info
-    %mul_const(@GAS_CODEDEPOSIT)
-    // stack: code_size_cost, leftover_gas, success, address, kexit_info
-    DUP2 DUP2 GT %jumpi(create_oog)
-    SWAP1 SUB
-    // stack: leftover_gas, success, address, kexit_info
-    %pop_checkpoint
-
-    // Store the code hash of the new contract.
-    %returndatasize
-    PUSH @SEGMENT_RETURNDATA GET_CONTEXT %build_address_no_offset
-    // stack: addr, len
-    KECCAK_GENERAL
-    // stack: codehash, leftover_gas, success, address, kexit_info
-    %observe_new_contract
-    DUP4
-    // stack: address, codehash, leftover_gas, success, address, kexit_info
-    %set_codehash
-
-    // Set the return data size to 0.
-    %mstore_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-
-after_constructor_contd:
-    // stack: leftover_gas, success, address, kexit_info
-    %shl_const(192)
-    // stack: leftover_gas << 192, success, address, kexit_info
-    SWAP2
-    // stack: address, success, leftover_gas << 192, kexit_info
-    MUL
-    // stack: address_if_success, leftover_gas << 192, kexit_info
-    SWAP2
-    // stack: kexit_info, leftover_gas << 192, address_if_success
-    SUB
-    // stack: kexit_info, address_if_success
-    EXIT_KERNEL
-
-after_constructor_failed:
-    %revert_checkpoint
-    %stack (success, leftover_gas, new_ctx, address, kexit_info) -> (leftover_gas, success, address, kexit_info)
-    %jump(after_constructor_contd)
-
-create_insufficient_balance:
-    %mstore_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-    %stack (address, value, code_offset, code_len, kexit_info) -> (kexit_info, 0)
-    EXIT_KERNEL
-
-nonce_overflow:
-    %mstore_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-    %stack (sender, address, value, code_offset, code_len, kexit_info) -> (kexit_info, 0)
-    EXIT_KERNEL
-
-create_collision:
-    %revert_checkpoint
-    %mstore_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-    %stack (new_ctx, address, kexit_info) -> (kexit_info, 0)
-    EXIT_KERNEL
-
-create_first_byte_ef:
-    %revert_checkpoint
-    %stack (leftover_gas, success, address, kexit_info) -> (kexit_info, 0)
-    EXIT_KERNEL
-
-create_code_too_large:
-    %revert_checkpoint
-    %stack (code_size, leftover_gas, success, address, kexit_info) -> (kexit_info, 0)
-    EXIT_KERNEL
-
-create_oog:
-    %revert_checkpoint
-    %mstore_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-    %stack (code_size_cost, leftover_gas, success, address, kexit_info) -> (kexit_info, 0)
-    EXIT_KERNEL
-
-create_too_deep:
-    %mstore_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-    %stack (address, value, code_offset, code_len, kexit_info) -> (kexit_info, 0)
-    // stack: kexit_info, 0
-    EXIT_KERNEL
-
-%macro set_codehash
-    %stack (addr, codehash) -> (addr, codehash, %%after)
-    %jump(set_codehash)
-%%after:
-    // stack: (empty)
-%endmacro
-
-// Pre stack: addr, codehash, redest
-// Post stack: (empty)
-global set_codehash:
-    // stack: addr, codehash, retdest
-    DUP1 %insert_touched_addresses
-    DUP1 %mpt_read_state_trie
-    // stack: account_ptr, addr, codehash, retdest
-    %add_const(3)
-    // stack: codehash_ptr, addr, codehash, retdest
-    DUP1 %mload_trie_data
-    // stack: prev_codehash, codehash_ptr, addr, codehash, retdest
-    DUP3 %journal_add_code_change // Add the code change to the journal.
-    %stack (codehash_ptr, addr, codehash) -> (codehash_ptr, codehash)
-    %mstore_trie_data
-    // stack: retdest
-    JUMP
-
-// Check and charge gas cost for initcode size. See EIP-3860.
-// Pre stack: code_size, kexit_info
-// Post stack: kexit_info
-%macro check_initcode_size
-    DUP1 %gt_const(@MAX_INITCODE_SIZE) %jumpi(fault_exception)
-    // stack: code_size, kexit_info
-    %num_bytes_to_num_words %mul_const(@INITCODE_WORD_COST)
-    %charge_gas
-%endmacro
-
-
-// This should be called whenever a new contract is created.
-// It does nothing, but just provides a single hook where code can react to newly created contracts.
-// When called, the code corresponding to `codehash` should be stored in the return data.
-// Pre stack: codehash, retdest
-// Post stack: codehash
-global observe_new_contract:
-    // stack codehash, retdest
-    SWAP1 JUMP
-
-%macro observe_new_contract
-    %stack (codehash) -> (codehash, %%after)
-    %jump(observe_new_contract)
-%%after:
-    // stack: codehash
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/create_addresses.asm
+++ b/evm/src/cpu/kernel/asm/core/create_addresses.asm
@ -1,76 +0,0 @@
-// Computes the address of a contract based on the conventional scheme, i.e.
-//     address = KEC(RLP(sender, nonce))[12:]
-//
-// Pre stack: sender, nonce, retdest
-// Post stack: address
-global get_create_address:
-    // stack: sender, nonce, retdest
-    %alloc_rlp_block
-    // stack: rlp_start, sender, nonce, retdest
-    %stack (rlp_start, sender, nonce) -> (rlp_start, sender, nonce, rlp_start)
-    // stack: rlp_start, sender, nonce, rlp_start, retdest
-    %encode_rlp_160 // TODO: or encode_rlp_scalar?
-    // stack: rlp_pos, nonce, rlp_start, retdest
-    %encode_rlp_scalar
-    // stack: rlp_pos, rlp_start, retdest
-    %prepend_rlp_list_prefix
-    // stack: RLP_ADDR, rlp_len, retdest
-    KECCAK_GENERAL
-    // stack: hash, retdest
-    %u256_to_addr
-    // stack: address, retdest
-    %observe_new_address
-    SWAP1
-    JUMP
-
-// Convenience macro to call get_create_address and return where we left off.
-%macro get_create_address
-    %stack (sender, nonce) -> (sender, nonce, %%after)
-    %jump(get_create_address)
-%%after:
-%endmacro
-
-// Computes the address for a contract based on the CREATE2 rule, i.e.
-//     address = KEC(0xff || sender || salt || code_hash)[12:]
-// Clobbers @SEGMENT_KERNEL_GENERAL.
-// Pre stack: sender, code_hash, salt, retdest
-// Post stack: address
-global get_create2_address:
-    // stack: sender, code_hash, salt, retdest
-    PUSH @SEGMENT_KERNEL_GENERAL
-    DUP1
-    PUSH 0xff
-    MSTORE_GENERAL
-    // stack: addr, sender, code_hash, salt, retdest
-    %increment
-    %stack (addr, sender, code_hash, salt, retdest) -> (addr, sender, salt, code_hash, retdest)
-    MSTORE_32BYTES_20
-    // stack: addr, salt, code_hash, retdest
-    MSTORE_32BYTES_32
-    // stack: addr, code_hash, retdest
-    MSTORE_32BYTES_32
-    POP
-    %stack (retdest) -> (@SEGMENT_KERNEL_GENERAL, 85, retdest) // offset == context == 0
-    // addr, len, retdest
-    KECCAK_GENERAL
-    // stack: hash, retdest
-    %u256_to_addr
-    // stack: address, retdest
-    %observe_new_address
-    SWAP1
-    JUMP
-
-// This should be called whenever a new address is created. This is only for debugging. It does
-// nothing, but just provides a single hook where code can react to newly created addresses.
-global observe_new_address:
-    // stack: address, retdest
-    SWAP1
-    // stack: retdest, address
-    JUMP
-
-// Convenience macro to call observe_new_address and return where we left off.
-%macro observe_new_address
-    %stack (address) -> (address, %%after)
-    %jump(observe_new_address)
-%%after:
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/create_contract_account.asm
+++ b/evm/src/cpu/kernel/asm/core/create_contract_account.asm
@ -1,62 +0,0 @@
-// Create a smart contract account with the given address and the given endowment value.
-// Pre stack: address
-// Post stack: status
-%macro create_contract_account
-    // stack: address
-    DUP1 %insert_touched_addresses
-    DUP1 %mpt_read_state_trie
-    // stack: existing_account_ptr, address
-    // If the account doesn't exist, there's no need to check its balance or nonce,
-    // so we can skip ahead, setting existing_balance = existing_account_ptr = 0.
-    DUP1 ISZERO %jumpi(%%add_account)
-
-    // Check that the nonce is 0.
-    // stack: existing_account_ptr, address
-    DUP1 %mload_trie_data // nonce = account[0]
-    // stack: nonce, existing_account_ptr, address
-    %jumpi(%%error_collision)
-    // stack: existing_account_ptr, address
-    // Check that the code is empty.
-    %add_const(3)
-    // stack: existing_codehash_ptr, address
-    DUP1 %mload_trie_data // codehash = account[3]
-    %eq_const(@EMPTY_STRING_HASH) ISZERO %jumpi(%%error_collision)
-    // stack: existing_codehash_ptr, address
-    %sub_const(2) %mload_trie_data // balance = account[1]
-    %jump(%%do_insert)
-
-%%add_account:
-    // stack: existing_balance, address
-    DUP2 %journal_add_account_created
-%%do_insert:
-    // stack: new_acct_value, address
-    // Write the new account's data to MPT data, and get a pointer to it.
-    %get_trie_data_size
-    // stack: account_ptr, new_acct_value, address
-    PUSH 0 DUP4 %journal_add_nonce_change
-    PUSH 1 %append_to_trie_data // nonce = 1
-    // stack: account_ptr, new_acct_value, address
-    SWAP1 %append_to_trie_data // balance = new_acct_value
-    // stack: account_ptr, address
-    PUSH 0 %append_to_trie_data // storage_root = nil
-    // stack: account_ptr, address
-    PUSH @EMPTY_STRING_HASH %append_to_trie_data // code_hash = keccak('')
-    // stack: account_ptr, address
-    SWAP1
-    // stack: address, account_ptr
-    %addr_to_state_key
-    // stack: state_key, account_ptr
-    %mpt_insert_state_trie
-    // stack: (empty)
-    PUSH 0 // success
-    %jump(%%end)
-
-// If the nonce is nonzero or the code is non-empty, that means a contract has already been deployed to this address.
-// (This should be impossible with contract creation transactions or CREATE, but possible with CREATE2.)
-// So we return 1 to indicate an error.
-%%error_collision:
-    %stack (existing_account_ptr, address) -> (1)
-
-%%end:
-    // stack: status
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/create_receipt.asm
+++ b/evm/src/cpu/kernel/asm/core/create_receipt.asm
@ -1,249 +0,0 @@
-// Pre-stack: status, leftover_gas, prev_cum_gas, txn_nb, num_nibbles, retdest
-// Post stack: new_cum_gas, txn_nb
-// A receipt is stored in MPT_TRIE_DATA as:
-// [payload_len, status, cum_gas_used, bloom, logs_payload_len, num_logs, [logs]]
-//
-// In this function, we:
-// - compute cum_gas, 
-// - check if the transaction failed and set number of logs to 0 if it is the case, 
-// - compute the bloom filter,
-// - write the receipt in MPT_TRIE_DATA ,
-// - insert a new node in receipt_trie,
-// - set the bloom filter back to 0
-global process_receipt:    
-    // stack: status, leftover_gas, prev_cum_gas, txn_nb, num_nibbles, retdest
-    DUP2 DUP4
-    // stack: prev_cum_gas, leftover_gas, status, leftover_gas, prev_cum_gas, txn_nb, num_nibbles, retdest
-    %compute_cumulative_gas
-    // stack: new_cum_gas, status, leftover_gas, prev_cum_gas, txn_nb, num_nibbles, retdest
-    SWAP3 POP
-    // stack: status, leftover_gas, new_cum_gas, txn_nb, num_nibbles, retdest
-    SWAP1 POP
-    // stack: status, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Now, we need to check whether the transaction has failed.
-    DUP1 ISZERO %jumpi(failed_receipt)
-
-process_receipt_after_status:
-    // stack: status, new_cum_gas, txn_nb, num_nibbles, retdest
-    PUSH process_receipt_after_bloom
-    %jump(logs_bloom)
-
-process_receipt_after_bloom:
-    // stack: status, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP2 DUP4
-    // stack: txn_nb, new_cum_gas, status, new_cum_gas, txn_nb, num_nibbles, retdest
-    SWAP2
-    // stack: status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-
-    // Compute the total RLP payload length of the receipt.
-    PUSH 1 // status is always 1 byte.
-    // stack: payload_len, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP3
-    %rlp_scalar_len // cum_gas is a simple scalar.
-    ADD
-    // stack: payload_len, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Next is the bloom_filter, which is a 256-byte array. Its RLP encoding is 
-    // 1 + 2 + 256 bytes.
-    %add_const(259)
-    // stack: payload_len, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Last is the logs.
-    %mload_global_metadata(@GLOBAL_METADATA_LOGS_PAYLOAD_LEN)
-    %rlp_list_len
-    ADD
-    // stack: payload_len, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Now we can write the receipt in MPT_TRIE_DATA.
-    %get_trie_data_size
-    // stack: receipt_ptr, payload_len, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Write transaction type if necessary. RLP_RAW contains, at index 0, the current transaction type.
-    PUSH @SEGMENT_RLP_RAW // ctx == virt == 0
-    MLOAD_GENERAL
-    // stack: first_txn_byte, receipt_ptr, payload_len, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP1 %eq_const(1) %jumpi(receipt_nonzero_type)
-    DUP1 %eq_const(2) %jumpi(receipt_nonzero_type)
-    // If we are here, we are dealing with a legacy transaction, and we do not need to write the type.
-    POP
-
-process_receipt_after_type:
-    // stack: receipt_ptr, payload_len, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Write payload_len.
-    SWAP1
-    %append_to_trie_data
-    // stack: receipt_ptr, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Write status.
-    SWAP1
-    %append_to_trie_data
-    // stack: receipt_ptr, new_cum_gas, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Write cum_gas_used.
-    SWAP1
-    %append_to_trie_data
-    // stack: receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Write Bloom filter.
-    PUSH 256 // Bloom length.
-    PUSH @SEGMENT_TXN_BLOOM // ctx == virt == 0
-    // stack: bloom_addr, 256, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    %get_trie_data_size
-    PUSH @SEGMENT_TRIE_DATA ADD // MPT dest address.
-    // stack: DST, SRC, 256, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    %memcpy_bytes
-    // stack: receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Update trie data size.
-    %get_trie_data_size
-    %add_const(256)
-    %set_trie_data_size
-
-    // Now we write logs.
-    // stack: receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // We start with the logs payload length.
-    %mload_global_metadata(@GLOBAL_METADATA_LOGS_PAYLOAD_LEN)
-    %append_to_trie_data
-    // stack: receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_LOGS_LEN)
-    // Then the number of logs.
-    // stack: num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP1 %append_to_trie_data
-    PUSH 0
-
-// Each log is written in MPT_TRIE_DATA as:
-// [payload_len, address, num_topics, [topics], data_len, [data]].
-process_receipt_logs_loop:
-    // stack: i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP2 DUP2
-    EQ
-    // stack: i == num_logs, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    %jumpi(process_receipt_after_write)
-    // stack: i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP1
-    %mload_kernel(@SEGMENT_LOGS)
-    // stack: log_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Write payload_len.
-    PUSH @SEGMENT_LOGS_DATA %build_kernel_address
-    DUP1
-    MLOAD_GENERAL
-    %append_to_trie_data
-    // stack: log_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Write address.
-    %increment
-    // stack: addr_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP1
-    MLOAD_GENERAL
-    %append_to_trie_data
-    // stack: addr_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    //Write num_topics.
-    %increment
-    // stack: num_topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP1
-    MLOAD_GENERAL
-    // stack: num_topics, num_topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP1
-    %append_to_trie_data
-    // stack: num_topics, num_topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    SWAP1 %increment SWAP1
-    // stack: num_topics, topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    PUSH 0
-
-process_receipt_topics_loop:
-    // stack: j, num_topics, topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP2 DUP2
-    EQ
-    // stack: j == num_topics, j, num_topics, topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    %jumpi(process_receipt_topics_end)
-    // stack: j, num_topics, topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Write j-th topic.
-    DUP3 DUP2
-    ADD
-    // stack: cur_topic_ptr, j, num_topics, topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    MLOAD_GENERAL
-    %append_to_trie_data
-    // stack: j, num_topics, topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    %increment
-    %jump(process_receipt_topics_loop)
-
-process_receipt_topics_end:
-    // stack: num_topics, num_topics, topics_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    POP
-    ADD
-    // stack: data_len_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Write data_len
-    DUP1
-    MLOAD_GENERAL
-    // stack: data_len, data_len_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP1
-    %append_to_trie_data
-    // stack: data_len, data_len_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    SWAP1 %increment SWAP1
-    // stack: data_len, data_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    PUSH 0
-
-process_receipt_data_loop:
-    // stack: j, data_len, data_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP2 DUP2
-    EQ
-    // stack: j == data_len, j, data_len, data_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    %jumpi(process_receipt_data_end)
-    // stack: j, data_len, data_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    // Write j-th data byte.
-    DUP3 DUP2
-    ADD
-    // stack: cur_data_ptr, j, data_len, data_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    MLOAD_GENERAL
-    %append_to_trie_data
-    // stack: j, data_len, data_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    %increment
-    %jump(process_receipt_data_loop)
-
-process_receipt_data_end:
-    // stack: data_len, data_len, data_ptr, i, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    %pop3
-    %increment
-    %jump(process_receipt_logs_loop)
-
-process_receipt_after_write:
-    // stack: num_logs, num_logs, receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    %pop2
-    // stack: receipt_ptr, txn_nb, new_cum_gas, txn_nb, num_nibbles, retdest
-    SWAP1
-    // stack: txn_nb, receipt_ptr, new_cum_gas, txn_nb, num_nibbles, retdest
-    DUP5
-    %mpt_insert_receipt_trie
-    // stack: new_cum_gas, txn_nb, num_nibbles, retdest
-
-    // We don't need to reset the bloom filter segment as we only process a single transaction.
-    // TODO: Revert in case we add back support for multi-txn proofs.
-
-    %stack (new_cum_gas, txn_nb, num_nibbles, retdest) -> (retdest, new_cum_gas)
-    JUMP
-    
-receipt_nonzero_type:
-    // stack: txn_type, receipt_ptr, payload_len, status, new_cum_gas, txn_nb, new_cum_gas, txn_nb, retdest
-    %append_to_trie_data
-    %jump(process_receipt_after_type)
-
-failed_receipt:
-    // stack: status, new_cum_gas, num_nibbles, txn_nb
-    // It is the receipt of a failed transaction, so set num_logs to 0. This will also lead to Bloom filter = 0.
-    PUSH 0
-    %mstore_global_metadata(@GLOBAL_METADATA_LOGS_LEN)
-    PUSH 0 %mstore_global_metadata(@GLOBAL_METADATA_LOGS_PAYLOAD_LEN)
-    // stack: status, new_cum_gas, num_nibbles, txn_nb
-    %jump(process_receipt_after_status)
-
-%macro process_receipt
-    // stack: success, leftover_gas, cur_cum_gas, txn_nb, num_nibbles
-    %stack (success, leftover_gas, cur_cum_gas, txn_nb, num_nibbles) -> (success, leftover_gas, cur_cum_gas, txn_nb, num_nibbles, %%after)
-    %jump(process_receipt)
-%%after:
-%endmacro
-
-%macro compute_cumulative_gas
-    // stack: cur_cum_gas, leftover_gas
-    DUP2
-    // stack: leftover_gas, prev_cum_gas, leftover_gas
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    // stack: gas_limit, leftover_gas, prev_cum_gas, leftover_gas
-    DUP2 DUP2 LT %jumpi(panic)
-    // stack: gas_limit, leftover_gas, prev_cum_gas, leftover_gas
-    SUB
-    // stack: used_txn_gas, prev_cum_gas, leftover_gas
-    ADD SWAP1 POP
-    // stack: new_cum_gas
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/exception.asm
+++ b/evm/src/cpu/kernel/asm/core/exception.asm
@ -1,436 +0,0 @@
-// These exception codes are arbitrary and assigned by us.
-// Note that exceptions can only be triggered in user mode. Triggering an exception
-// in kernel mode wwill fail the constraints.
-global exception_jumptable:
-    // exception 0: out of gas
-    JUMPTABLE exc_out_of_gas
-
-    // exception 1: invalid opcode
-    JUMPTABLE exc_invalid_opcode
-
-    // exception 2: stack underflow
-    JUMPTABLE exc_stack_underflow
-
-    // exception 3: invalid jump destination
-    JUMPTABLE exc_invalid_jump_destination
-
-    // exception 4: invalid jumpi destination
-    JUMPTABLE exc_invalid_jumpi_destination
-
-    // exception 5: stack overflow
-    JUMPTABLE exc_stack_overflow
-
-    // exceptions 6 and 7: unused
-    JUMPTABLE panic
-    JUMPTABLE panic
-
-
-global exc_out_of_gas:
-    // stack: trap_info
-    %ctx_gas_limit
-    // stack: gas_limit, trap_info
-    DUP2 %shr_const(192)
-    // stack: gas_used, gas_limit, trap_info
-    DUP2 DUP2
-    // stack: gas_used, gas_limit, gas_used, gas_limit, trap_info
-    // If gas_used is already over the limit, panic. The exception should have
-    // been raised earlier.
-    GT %jumpi(panic)
-    // stack: gas_used, gas_limit, trap_info
-    DUP3 %opcode_from_exp_trap_info
-    // stack: opcode, gas_used, gas_limit, trap_info
-    %add_const(gas_cost_for_opcode)
-    %mload_kernel_code
-    // stack: gas_cost, gas_used, gas_limit, trap_info
-    ADD
-    // stack: new_gas_used, gas_limit, trap_info
-    GT
-    // stack: is_oog, trap_info
-    SWAP1 POP
-    // stack: is_oog
-    %jumpi(fault_exception)
-    // If we didn't jump, we shouldn't have raised the exception.
-    PANIC
-
-
-global exc_invalid_opcode:
-    // stack: trap_info
-    // check if the opcode that triggered this trap is _actually_ invalid
-    %opcode_from_exp_trap_info
-    PUSH @INVALID_OPCODES_USER
-    // stack: invalid_opcodes_user, opcode
-    SWAP1
-    // stack: opcode, invalid_opcodes_user
-    SHR
-    %mod_const(2)
-    // stack: opcode_is_invalid
-    // if the opcode is indeed invalid, then perform an exceptional exit
-    %jumpi(fault_exception)
-    // otherwise, panic because this trap should not have been entered
-    PANIC
-
-
-global exc_stack_underflow:
-    // stack: trap_info
-    %opcode_from_exp_trap_info
-    // stack: opcode
-    %add_const(min_stack_len_for_opcode)
-    %mload_kernel_code
-    // stack: min_stack_length
-    %stack_length
-    // stack: user_stack_length + 1, min_stack_length
-    GT
-    // stack: user_stack_length >= min_stack_length
-    %jumpi(panic)
-    %jump(fault_exception)
-
-
-// Debugging note: this will underflow if entered without at least one item on the stack (in
-// addition to trap_info). This is expected; it means that the exc_stack_underflow handler should
-// have been used instead.
-global exc_invalid_jump_destination:
-    // stack: trap_info, jump_dest
-    // check that the triggering opcode is indeed JUMP
-    %opcode_from_exp_trap_info
-    // stack: opcode, jump_dest
-    %eq_const(0x56)
-    // if it's JUMP, then verify that we're actually jumping to an invalid address
-    %jumpi(invalid_jump_jumpi_destination_common)
-    // otherwise, panic
-    PANIC
-
-
-// Debugging note: this will underflow if entered without at least two items on the stack (in
-// addition to trap_info). This is expected; it means that the exc_stack_underflow handler should
-// have been used instead.
-global exc_invalid_jumpi_destination:
-    // stack: trap_info, jump_dest, condition
-    // check that the triggering opcode is indeed JUMPI
-    %opcode_from_exp_trap_info
-    // stack: opcode, jump_dest, condition
-    %sub_const(0x57)
-    // if it's not JUMPI, then panic
-    %jumpi(panic)
-    // otherwise, verify that the condition is nonzero
-    // stack: jump_dest, condition
-    SWAP1
-    // if it's nonzero, then verify that we're actually jumping to an invalid address
-    %jumpi(invalid_jump_jumpi_destination_common)
-    // otherwise, panic
-    PANIC
-
-
-global invalid_jump_jumpi_destination_common:
-    // We have a jump destination on the stack. We want to `PANIC` if it is valid, and jump to
-    // `fault_exception` if it is not. An address is a valid jump destination if it points to a
-    // `JUMPDEST` instruction. In practice, since in this implementation memory addresses are
-    // limited to 32 bits, we check two things:
-    //  1. the address is no more than 32 bits long, and
-    //  2. it points to a `JUMPDEST` instruction.
-    // stack: jump_dest
-    DUP1
-    %shr_const(32)
-    %jumpi(fault_exception) // This keeps one copy of jump_dest on the stack, but that's fine.
-    // jump_dest is a valid address; check if it points to a `JUMP_DEST`.
-    %mload_current(@SEGMENT_JUMPDEST_BITS)
-    // stack: is_valid_jumpdest
-    %jumpi(panic) // Trap should never have been entered.
-    %jump(fault_exception)
-
-
-global exc_stack_overflow:
-    // stack: trap_info
-    // check that the triggering opcode _can_ overflow (i.e., it increases the stack size by 1)
-    %opcode_from_exp_trap_info
-    PUSH @STACK_LENGTH_INCREASING_OPCODES_USER
-    // stack: stack_length_increasing_opcodes_user, opcode
-    SWAP1
-    // stack: opcode, stack_length_increasing_opcodes_user
-    SHR
-    %mod_const(2)
-    // stack: opcode_increases_stack_length
-    // if the opcode indeed increases the stack length, then check whether the stack size is at its
-    // maximum value
-    %jumpi(exc_stack_overflow_check_stack_length)
-    // otherwise, panic because this trap should not have been entered
-    PANIC
-global exc_stack_overflow_check_stack_length:
-    // stack: (empty)
-    %stack_length
-    %eq_const(1024)
-    // if true, stack length is at its maximum allowed value, so the instruction would indeed cause
-    // an overflow.
-    %jumpi(fault_exception)
-    PANIC
-
-
-// Given the exception trap info, load the opcode that caused the exception
-%macro opcode_from_exp_trap_info
-    %mod_const(0x100000000) // get program counter from low 32 bits of trap_info
-    %mload_current_code
-%endmacro
-
-
-min_stack_len_for_opcode:
-    BYTES 0  // 0x00, STOP
-    BYTES 2  // 0x01, ADD
-    BYTES 2  // 0x02, MUL
-    BYTES 2  // 0x03, SUB
-    BYTES 2  // 0x04, DIV
-    BYTES 2  // 0x05, SDIV
-    BYTES 2  // 0x06, MOD
-    BYTES 2  // 0x07, SMOD
-    BYTES 3  // 0x08, ADDMOD
-    BYTES 3  // 0x09, MULMOD
-    BYTES 2  // 0x0a, EXP
-    BYTES 2  // 0x0b, SIGNEXTEND
-    %rep 4  // 0x0c-0x0f, invalid
-        BYTES 0
-    %endrep
-
-    BYTES 2  // 0x10, LT
-    BYTES 2  // 0x11, GT
-    BYTES 2  // 0x12, SLT
-    BYTES 2  // 0x13, SGT
-    BYTES 2  // 0x14, EQ
-    BYTES 1  // 0x15, ISZERO
-    BYTES 2  // 0x16, AND
-    BYTES 2  // 0x17, OR
-    BYTES 2  // 0x18, XOR
-    BYTES 1  // 0x19, NOT
-    BYTES 2  // 0x1a, BYTE
-    BYTES 2  // 0x1b, SHL
-    BYTES 2  // 0x1c, SHR
-    BYTES 2  // 0x1d, SAR
-    BYTES 0  // 0x1e, invalid
-    BYTES 0  // 0x1f, invalid
-
-    BYTES 2  // 0x20, KECCAK256
-    %rep 15 // 0x21-0x2f, invalid
-        BYTES 0
-    %endrep
-
-    BYTES 0  // 0x30, ADDRESS
-    BYTES 1  // 0x31, BALANCE
-    BYTES 0  // 0x32, ORIGIN
-    BYTES 0  // 0x33, CALLER
-    BYTES 0  // 0x34, CALLVALUE
-    BYTES 1  // 0x35, CALLDATALOAD
-    BYTES 0  // 0x36, CALLDATASIZE
-    BYTES 3  // 0x37, CALLDATACOPY
-    BYTES 0  // 0x38, CODESIZE
-    BYTES 3  // 0x39, CODECOPY
-    BYTES 0  // 0x3a, GASPRICE
-    BYTES 1  // 0x3b, EXTCODESIZE
-    BYTES 4  // 0x3c, EXTCODECOPY
-    BYTES 0  // 0x3d, RETURNDATASIZE
-    BYTES 3  // 0x3e, RETURNDATACOPY
-    BYTES 1  // 0x3f, EXTCODEHASH
-
-    BYTES 1  // 0x40, BLOCKHASH
-    BYTES 0  // 0x41, COINBASE
-    BYTES 0  // 0x42, TIMESTAMP
-    BYTES 0  // 0x43, NUMBER
-    BYTES 0  // 0x44, DIFFICULTY
-    BYTES 0  // 0x45, GASLIMIT
-    BYTES 0  // 0x46, CHAINID
-    BYTES 0  // 0x47, SELFBALANCE
-    BYTES 0  // 0x48, BASEFEE
-    %rep 7  // 0x49-0x4f, invalid
-        BYTES 0
-    %endrep
-
-    BYTES 1  // 0x50, POP
-    BYTES 1  // 0x51, MLOAD
-    BYTES 2  // 0x52, MSTORE
-    BYTES 2  // 0x53, MSTORE8
-    BYTES 1  // 0x54, SLOAD
-    BYTES 2  // 0x55, SSTORE
-    BYTES 1  // 0x56, JUMP
-    BYTES 2  // 0x57, JUMPI
-    BYTES 0  // 0x58, PC
-    BYTES 0  // 0x59, MSIZE
-    BYTES 0  // 0x5a, GAS
-    BYTES 0  // 0x5b, JUMPDEST
-    %rep 3  // 0x5c-0x5e, invalid
-        BYTES 0
-    %endrep
-
-    %rep 33 // 0x5f-0x7f, PUSH0-PUSH32
-        BYTES 0
-    %endrep
-
-    BYTES 1  // 0x80, DUP1
-    BYTES 2  // 0x81, DUP2
-    BYTES 3  // 0x82, DUP3
-    BYTES 4  // 0x83, DUP4
-    BYTES 5  // 0x84, DUP5
-    BYTES 6  // 0x85, DUP6
-    BYTES 7  // 0x86, DUP7
-    BYTES 8  // 0x87, DUP8
-    BYTES 9  // 0x88, DUP9
-    BYTES 10 // 0x89, DUP10
-    BYTES 11 // 0x8a, DUP11
-    BYTES 12 // 0x8b, DUP12
-    BYTES 13 // 0x8c, DUP13
-    BYTES 14 // 0x8d, DUP14
-    BYTES 15 // 0x8e, DUP15
-    BYTES 16 // 0x8f, DUP16
-
-    BYTES 2  // 0x90, SWAP1
-    BYTES 3  // 0x91, SWAP2
-    BYTES 4  // 0x92, SWAP3
-    BYTES 5  // 0x93, SWAP4
-    BYTES 6  // 0x94, SWAP5
-    BYTES 7  // 0x95, SWAP6
-    BYTES 8  // 0x96, SWAP7
-    BYTES 9  // 0x97, SWAP8
-    BYTES 10 // 0x98, SWAP9
-    BYTES 11 // 0x99, SWAP10
-    BYTES 12 // 0x9a, SWAP11
-    BYTES 13 // 0x9b, SWAP12
-    BYTES 14 // 0x9c, SWAP13
-    BYTES 15 // 0x9d, SWAP14
-    BYTES 16 // 0x9e, SWAP15
-    BYTES 17 // 0x9f, SWAP16
-
-    BYTES 2  // 0xa0, LOG0
-    BYTES 3  // 0xa1, LOG1
-    BYTES 4  // 0xa2, LOG2
-    BYTES 5  // 0xa3, LOG3
-    BYTES 6  // 0xa4, LOG4
-
-    %rep 27 // 0xa5-0xbf, invalid
-        BYTES 0
-    %endrep
-
-    %rep 32 // 0xc0-0xdf, MSTORE_32BYTES
-        BYTES 4
-    %endrep
-    
-    %rep 16 // 0xe0-0xef, invalid
-        BYTES 0
-    %endrep
-
-    BYTES 3  // 0xf0, CREATE
-    BYTES 7  // 0xf1, CALL
-    BYTES 7  // 0xf2, CALLCODE
-    BYTES 2  // 0xf3, RETURN
-    BYTES 6  // 0xf4, DELEGATECALL
-    BYTES 4  // 0xf5, CREATE2
-    %rep 4  // 0xf6-0xf9, invalid
-        BYTES 0
-    %endrep
-    BYTES 6  // 0xfa, STATICCALL
-    BYTES 0  // 0xfb, invalid
-    BYTES 0  // 0xfc, invalid
-    BYTES 2  // 0xfd, REVERT
-    BYTES 0  // 0xfe, invalid
-    BYTES 1  // 0xff, SELFDESTRUCT
-
-// A zero indicates either that the opcode is kernel-only,
-// or that it's handled with a syscall.
-gas_cost_for_opcode:
-    BYTES 0  // 0x00, STOP
-    BYTES @GAS_VERYLOW  // 0x01, ADD
-    BYTES @GAS_LOW  // 0x02, MUL
-    BYTES @GAS_VERYLOW  // 0x03, SUB
-    BYTES @GAS_LOW  // 0x04, DIV
-    BYTES @GAS_LOW  // 0x05, SDIV
-    BYTES @GAS_LOW  // 0x06, MOD
-    BYTES @GAS_LOW  // 0x07, SMOD
-    BYTES @GAS_MID  // 0x08, ADDMOD
-    BYTES @GAS_MID  // 0x09, MULMOD
-    BYTES 0  // 0x0a, EXP
-    BYTES 0  // 0x0b, SIGNEXTEND
-    %rep 4  // 0x0c-0x0f, invalid
-        BYTES 0
-    %endrep
-
-    BYTES @GAS_VERYLOW  // 0x10, LT
-    BYTES @GAS_VERYLOW  // 0x11, GT
-    BYTES @GAS_VERYLOW  // 0x12, SLT
-    BYTES @GAS_VERYLOW  // 0x13, SGT
-    BYTES @GAS_VERYLOW  // 0x14, EQ
-    BYTES @GAS_VERYLOW  // 0x15, ISZERO
-    BYTES @GAS_VERYLOW  // 0x16, AND
-    BYTES @GAS_VERYLOW  // 0x17, OR
-    BYTES @GAS_VERYLOW  // 0x18, XOR
-    BYTES @GAS_VERYLOW  // 0x19, NOT
-    BYTES @GAS_VERYLOW  // 0x1a, BYTE
-    BYTES @GAS_VERYLOW  // 0x1b, SHL
-    BYTES @GAS_VERYLOW  // 0x1c, SHR
-    BYTES @GAS_VERYLOW  // 0x1d, SAR
-    BYTES 0  // 0x1e, invalid
-    BYTES 0  // 0x1f, invalid
-
-    BYTES 0  // 0x20, KECCAK256
-    %rep 15 // 0x21-0x2f, invalid
-        BYTES 0
-    %endrep
-
-    %rep 25 //0x30-0x48, only syscalls
-    BYTES 0  
-    %endrep
-
-    %rep 7  // 0x49-0x4f, invalid
-        BYTES 0
-    %endrep
-
-    BYTES @GAS_BASE  // 0x50, POP
-    BYTES 0  // 0x51, MLOAD
-    BYTES 0  // 0x52, MSTORE
-    BYTES 0  // 0x53, MSTORE8
-    BYTES 0  // 0x54, SLOAD
-    BYTES 0  // 0x55, SSTORE
-    BYTES @GAS_MID  // 0x56, JUMP
-    BYTES @GAS_HIGH  // 0x57, JUMPI
-    BYTES @GAS_BASE  // 0x58, PC
-    BYTES 0  // 0x59, MSIZE
-    BYTES 0  // 0x5a, GAS
-    BYTES @GAS_JUMPDEST  // 0x5b, JUMPDEST
-    %rep 3  // 0x5c-0x5e, invalid
-        BYTES 0
-    %endrep
-
-    BYTES @GAS_BASE // 0x5f, PUSH0
-    %rep 32 // 0x60-0x7f, PUSH1-PUSH32
-        BYTES @GAS_VERYLOW
-    %endrep
-
-    %rep 16 // 0x80-0x8f, DUP1-DUP16
-        BYTES @GAS_VERYLOW
-    %endrep
-
-    %rep 16 // 0x90-0x9f, SWAP1-SWAP16
-        BYTES @GAS_VERYLOW
-    %endrep
-
-    BYTES 0  // 0xa0, LOG0
-    BYTES 0  // 0xa1, LOG1
-    BYTES 0  // 0xa2, LOG2
-    BYTES 0  // 0xa3, LOG3
-    BYTES 0  // 0xa4, LOG4
-    %rep 11 // 0xa5-0xaf, invalid
-        BYTES 0
-    %endrep
-
-    %rep 64 // 0xb0-0xef, invalid
-        BYTES 0
-    %endrep
-
-    BYTES 0  // 0xf0, CREATE
-    BYTES 0  // 0xf1, CALL
-    BYTES 0  // 0xf2, CALLCODE
-    BYTES 0  // 0xf3, RETURN
-    BYTES 0  // 0xf4, DELEGATECALL
-    BYTES 0  // 0xf5, CREATE2
-    %rep 4  // 0xf6-0xf9, invalid
-        BYTES 0
-    %endrep
-    BYTES 0  // 0xfa, STATICCALL
-    BYTES 0  // 0xfb, invalid
-    BYTES 0  // 0xfc, invalid
-    BYTES 0  // 0xfd, REVERT
-    BYTES 0  // 0xfe, invalid
-    BYTES 0  // 0xff, SELFDESTRUCT
--- a/evm/src/cpu/kernel/asm/core/gas.asm
+++ b/evm/src/cpu/kernel/asm/core/gas.asm
@ -1,129 +0,0 @@
-global sys_gas:
-    // stack: kexit_info
-    %charge_gas_const(@GAS_BASE)
-    // stack: kexit_info
-    DUP1 %shr_const(192)
-    // stack: gas_used, kexit_info
-    %ctx_gas_limit
-    // stack: gas_limit, gas_used, kexit_info
-    SUB
-    // stack: gas_remaining, kexit_info
-    SWAP1
-    EXIT_KERNEL
-
-%macro ctx_gas_limit
-    %mload_context_metadata(@CTX_METADATA_GAS_LIMIT)
-%endmacro
-
-
-// TODO: `%refund_gas` and `refund_gas_hook` are hooks used for debugging. They should be removed at some point and `refund_gas_original` renamed to `refund_gas`.
-%macro refund_gas
-    PUSH %%after %jump(refund_gas_hook)
-%%after:
-    %refund_gas_original
-%endmacro
-
-global refund_gas_hook:
-    JUMP
-
-%macro refund_gas_original
-    // stack: amount
-    DUP1 %journal_refund
-    %mload_global_metadata(@GLOBAL_METADATA_REFUND_COUNTER)
-    ADD
-    %mstore_global_metadata(@GLOBAL_METADATA_REFUND_COUNTER)
-%endmacro
-
-// TODO: `%charge_gas` and `charge_gas_hook` are hooks used for debugging. They should be removed at some point and `charge_gas_original` renamed to `charge_gas`.
-%macro charge_gas
-    PUSH %%after %jump(charge_gas_hook)
-%%after:
-    %charge_gas_original
-%endmacro
-
-global charge_gas_hook:
-    JUMP
-
-// Charge gas. Faults if we exceed the limit for the current context.
-%macro charge_gas_original
-    // stack: gas, kexit_info
-    %shl_const(192)
-    ADD
-    // stack: kexit_info'
-    %ctx_gas_limit
-    // stack: gas_limit, kexit_info'
-    DUP2 %shr_const(192)
-    // stack: gas_used, gas_limit, kexit_info'
-    GT
-    // stack: out_of_gas, kexit_info'
-    %jumpi(fault_exception)
-    // stack: kexit_info'
-%endmacro
-
-// Charge a constant amount of gas.
-%macro charge_gas_const(gas)
-    // stack: kexit_info
-    PUSH $gas
-    // stack: gas, kexit_info
-    %charge_gas
-    // stack: kexit_info'
-%endmacro
-
-// Charge gas and exit kernel code.
-%macro charge_gas_and_exit
-    // stack: gas, kexit_info
-    %charge_gas
-    // stack: kexit_info'
-    EXIT_KERNEL
-%endmacro
-
-global sys_gasprice:
-    // stack: kexit_info
-    %charge_gas_const(@GAS_BASE)
-    // stack: kexit_info
-    %mload_txn_field(@TXN_FIELD_COMPUTED_FEE_PER_GAS)
-    // stack: gas_price, kexit_info
-    SWAP1
-    EXIT_KERNEL
-
-// Checks how much gas is remaining in this context, given the current kexit_info.
-%macro leftover_gas
-    // stack: kexit_info
-    %shr_const(192)
-    // stack: gas_used
-    %mload_context_metadata(@CTX_METADATA_GAS_LIMIT)
-    // stack: gas_limit, gas_used
-    SWAP1
-    // stack: gas_used, gas_limit
-    DUP2 DUP2 LT
-    // stack: gas_used < gas_limit, gas_used, gas_limit
-    SWAP2
-    // stack: gas_limit, gas_used, gas_used < gas_limit
-    SUB
-    // stack: gas_limit - gas_used, gas_used < gas_limit
-    MUL
-    // stack: leftover_gas = (gas_limit - gas_used) * (gas_used < gas_limit)
-%endmacro
-
-// Given the current kexit_info, drains all but one 64th of its remaining gas.
-// Returns how much gas was drained.
-%macro drain_all_but_one_64th_gas
-    // stack: kexit_info
-    DUP1 %leftover_gas
-    // stack: leftover_gas, kexit_info
-    %all_but_one_64th
-    // stack: all_but_one_64th, kexit_info
-    %stack (all_but_one_64th, kexit_info) -> (all_but_one_64th, kexit_info, all_but_one_64th)
-    %charge_gas
-    // stack: kexit_info, drained_gas
-%endmacro
-
-// This is L(n), the "all but one 64th" function in the yellowpaper, i.e.
-//     L(n) = n - floor(n / 64)
-%macro all_but_one_64th
-    // stack: n
-    DUP1 %shr_const(6)
-    // stack: floor(n / 64), n
-    SWAP1 SUB
-    // stack: n - floor(n / 64)
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/intrinsic_gas.asm
+++ b/evm/src/cpu/kernel/asm/core/intrinsic_gas.asm
@ -1,84 +0,0 @@
-global intrinsic_gas:
-    // stack: retdest
-    // Calculate the number of zero and nonzero bytes in the txn data.
-    PUSH 0 // zeros = 0
-    PUSH 0 // i = 0
-
-count_zeros_loop:
-    // stack: i, zeros, retdest
-    DUP1
-    %mload_txn_field(@TXN_FIELD_DATA_LEN)
-    EQ
-    // stack: i == data.len, i, zeros, retdest
-    %jumpi(count_zeros_finish)
-
-    // stack: i, zeros, retdest
-    DUP1
-    %mload_kernel(@SEGMENT_TXN_DATA)
-    ISZERO
-    // stack: data[i] == 0, i, zeros
-    %stack (data_i_is_zero, i, zeros) -> (data_i_is_zero, zeros, i)
-    ADD
-    // stack: zeros', i, retdest
-    SWAP1
-    // stack: i, zeros', retdest
-    %increment
-    // stack: i', zeros', retdest
-    %jump(count_zeros_loop)
-
-count_zeros_finish:
-    // stack: i, zeros, retdest
-    POP
-    // stack: zeros, retdest
-    DUP1
-    // stack: zeros, zeros, retdest
-    %mload_txn_field(@TXN_FIELD_DATA_LEN)
-    // stack: data.len, zeros, zeros, retdest
-    SUB
-    // stack: nonzeros, zeros, retdest
-    %mul_const(@GAS_TXDATANONZERO)
-    // stack: gas_nonzeros, zeros, retdest
-    SWAP1
-    %mul_const(@GAS_TXDATAZERO)
-    // stack: gas_zeros, gas_nonzeros, retdest
-    ADD
-    // stack: gas_txndata, retdest
-
-    %is_contract_creation
-    DUP1
-    %mul_const(@GAS_TXCREATE)
-    // stack: gas_creation, is_creation, gas_txndata, retdest
-    SWAP1
-    // stack: is_creation, gas_creation, gas_txndata, retdest
-    DUP1
-    // stack: is_creation, is_creation, gas_creation, gas_txndata, retdest
-    %mload_txn_field(@TXN_FIELD_DATA_LEN) %gt_const(@MAX_INITCODE_SIZE)
-    // stack: initcode_size > max, is_creation, is_creation, gas_creation, gas_txndata, retdest
-    MUL // Cheaper than AND
-    %assert_zero
-    // stack: is_creation, gas_creation, gas_txndata, retdest
-    %mload_txn_field(@TXN_FIELD_DATA_LEN) %num_bytes_to_num_words
-    // stack: initcode_words, is_creation, gas_creation, gas_txndata, retdest
-    %mul_const(@INITCODE_WORD_COST) MUL ADD
-    // stack: gas_creation, gas_txndata, retdest
-
-    PUSH @GAS_TRANSACTION
-    // stack: gas_txn, gas_creation, gas_txndata, retdest
-
-    ADD
-    ADD
-    // stack: total_gas, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_ACCESS_LIST_DATA_COST)
-    ADD
-
-    SWAP1
-    JUMP
-
-// Convenience macro to call intrinsic_gas and return where we left off.
-%macro intrinsic_gas
-    // stack: (empty)
-    PUSH %%after
-    %jump(intrinsic_gas)
-%%after:
-    // stack: (empty)
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/jumpdest_analysis.asm
+++ b/evm/src/cpu/kernel/asm/core/jumpdest_analysis.asm
@ -1,344 +0,0 @@
-// Set @SEGMENT_JUMPDEST_BITS to one between positions [init_pos, final_pos], 
-// for the given context's code.
-// Pre stack: init_pos, ctx, final_pos, retdest
-// Post stack: (empty)
-global verify_path_and_write_jumpdest_table:
-    SWAP2
-    DUP2
-    ADD // final_addr
-    // stack: final_addr, ctx, i, retdest
-    SWAP2
-    ADD // init_addr
-loop:
-    // stack: i, final_pos, retdest
-    DUP2 DUP2 EQ // i == final_pos
-    %jumpi(proof_ok)
-    DUP2 DUP2 GT // i > final_pos
-    %jumpi(proof_not_ok)
-
-     // stack: i, final_pos, retdest
-    DUP1
-    MLOAD_GENERAL // SEGMENT_CODE == 0
-    // stack: opcode, i, final_pos, retdest
-
-    DUP1 
-    // Slightly more efficient than `%eq_const(0x5b) ISZERO`
-    PUSH 0x5b
-    SUB
-    // stack: opcode != JUMPDEST, opcode, i, final_pos, retdest
-    %jumpi(continue)
-
-    // stack: JUMPDEST, i, code_len, retdest
-    %stack (JUMPDEST, i) -> (@SEGMENT_JUMPDEST_BITS, i, JUMPDEST, i)
-    ADD // address to write jumpdest bit, i already contains the context
-    PUSH 1
-    // stack: 1, addr, JUMPDEST, i
-    MSTORE_GENERAL
-
-continue:
-    // stack: opcode, i, final_pos, retdest
-    %add_const(code_bytes_to_skip)
-    %mload_kernel_code
-    // stack: bytes_to_skip, i, final_pos, retdest
-    ADD
-    // stack: i, final_pos, retdest
-    %jump(loop)
-
-proof_ok:
-    // stack: i, final_pos, retdest
-    // We already know final_pos is a jumpdest
-    %stack (i, final_pos) -> (@SEGMENT_JUMPDEST_BITS, final_pos)
-    ADD // final_pos already contains the context
-    PUSH 1
-    MSTORE_GENERAL
-    JUMP
-proof_not_ok:
-    %pop2
-    JUMP
-
-// Determines how many bytes away is the next opcode, based on the opcode we read.
-// If we read a PUSH<n> opcode, next opcode is in n + 1 bytes, otherwise it's the next one.
-//
-// Note that the range of PUSH opcodes is [0x60, 0x80). I.e. PUSH1 is 0x60
-// and PUSH32 is 0x7f.
-code_bytes_to_skip:
-    %rep 96
-        BYTES 1 // 0x00-0x5f
-    %endrep
-
-    BYTES 2
-    BYTES 3
-    BYTES 4
-    BYTES 5
-    BYTES 6
-    BYTES 7
-    BYTES 8
-    BYTES 9
-    BYTES 10
-    BYTES 11
-    BYTES 12
-    BYTES 13
-    BYTES 14
-    BYTES 15
-    BYTES 16
-    BYTES 17
-    BYTES 18
-    BYTES 19
-    BYTES 20
-    BYTES 21
-    BYTES 22
-    BYTES 23
-    BYTES 24
-    BYTES 25
-    BYTES 26
-    BYTES 27
-    BYTES 28
-    BYTES 29
-    BYTES 30
-    BYTES 31
-    BYTES 32
-    BYTES 33
-
-    %rep 128
-        BYTES 1 // 0x80-0xff
-    %endrep
-
-
-// A proof attesting that jumpdest is a valid jump destination is
-// either 0 or an index 0 < i <= jumpdest - 32.
-// A proof is valid if:
-// - i == 0 and we can go from the first opcode to jumpdest and code[jumpdest] = 0x5b
-// - i > 0 and:
-//     a) for j in {i+0,..., i+31} code[j] != PUSHk for all k >= 32 - j - i,
-//     b) we can go from opcode i+32 to jumpdest,
-//     c) code[jumpdest] = 0x5b.
-// To reduce the number of instructions, when i > 32 we load all the bytes code[j], ...,
-// code[j + 31] in a single 32-byte word, and check a) directly on the packed bytes.
-// We perform the "packed verification" computing a boolean formula evaluated on the bits of 
-// code[j],..., code[j+31] of the form p_1 AND p_2 AND p_3 AND p_4 AND p_5, where:
-//     - p_k is either TRUE, for one subset of the j's which depends on k (for example,
-//       for k = 1, it is TRUE for the first 15 positions), or has_prefix_k => bit_{k + 1}_is_0
-//       for the j's not in the subset.
-//     - has_prefix_k is a predicate that is TRUE if and only if code[j] has the same prefix of size k + 2
-//       as PUSH{32-(j-i)}.
-// stack: proof_prefix_addr, jumpdest, ctx, retdest
-// stack: (empty)
-global write_table_if_jumpdest:
-    // stack: proof_prefix_addr, jumpdest, ctx, retdest
-    %stack
-        (proof_prefix_addr, jumpdest, ctx) ->
-        (ctx, jumpdest, jumpdest, ctx, proof_prefix_addr)
-    ADD // combine context and offset to make an address (SEGMENT_CODE == 0)
-    MLOAD_GENERAL
-    // stack: opcode, jumpdest, ctx, proof_prefix_addr, retdest
-
-    %jump_neq_const(0x5b, return)
-
-    //stack: jumpdest, ctx, proof_prefix_addr, retdest
-    SWAP2 DUP1
-    // stack: proof_prefix_addr, proof_prefix_addr, ctx, jumpdest
-    ISZERO
-    %jumpi(verify_path_and_write_jumpdest_table)
-
-
-    // stack: proof_prefix_addr, ctx, jumpdest, retdest
-    // If we are here we need to check that the next 32 bytes are less
-    // than JUMPXX for XX < 32 - i <=> opcode < 0x7f - i = 127 - i, 0 <= i < 32,
-    // or larger than 127
-    
-    %stack
-        (proof_prefix_addr, ctx) ->
-        (ctx, proof_prefix_addr, 32, proof_prefix_addr, ctx)
-    ADD // combine context and offset to make an address (SEGMENT_CODE == 0)
-    MLOAD_32BYTES
-    // packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP1 %shl_const(1)
-    DUP2 %shl_const(2)
-    AND
-    // stack: (is_1_at_pos_2_and_3|(X)⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    // X denotes any value in {0,1} and Z^i is Z repeated i times
-    NOT
-    // stack: (is_0_at_2_or_3|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP2
-    OR
-    // stack: (is_1_at_1 or is_0_at_2_or_3|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    // stack: (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-
-    // Compute in_range and has_prefix' = 
-    //   - in_range = (0xFF|X⁷)³²                     and ~has_prefix' = ~has_prefix OR is_0_at_4, for the first 15 bytes
-    //   - in_range = (has_prefix => is_0_at_4 |X⁷)³² and ~has_prefix' = ~has_prefix,              for the next 15 bytes
-    //   - in_range = (~has_prefix|X⁷)³²              and ~has_prefix' = ~has_prefix,              for the last byte.
-    DUP2 %shl_const(3)
-    NOT
-    // stack: (is_0_at_4|X⁷)³²,  (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00
-    AND
-    // stack: (is_0_at_4|X⁷)³¹|0⁸,  (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP1
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF0000000000000000000000000000000000
-    AND
-    // stack: (is_0_at_4|X⁷)¹⁵|(0⁸)¹⁷, (is_0_at_4|X⁷)³¹|0⁸, (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP3
-    OR
-    // (~has_prefix'|X⁷)³²,  (is_0_at_4|X⁷)³¹|0⁸, (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    SWAP2
-    OR
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF0000000000000000000000000000000000
-    OR
-    // stack: (in_range|X⁷)³², (~has_prefix'|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-
-    // Compute in_range' and ~has_prefix as
-    //   - in_range' = in_range                                     and has_prefix' = ~has_prefix OR is_0_at_5, for bytes in positions 1-7 and 16-23 
-    //   - in_range' = in_range AND (has_prefix => is_0_at_5 |X⁷)³² and has_prefix' = ~has_prefix,              for the rest.
-
-    DUP3 %shl_const(4)
-    NOT
-    // stack: (is_0_at_5|X⁷)³²,  (in_range|X⁷)³², (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP1
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0xFFFFFFFFFFFFFF0000000000000000FFFFFFFFFFFFFFFF000000000000000000
-    AND
-    // stack: (is_0_at_5|X⁷)⁷|(0⁸)⁸|(is_0_at_5|X⁷)⁸|(0⁸)⁸, (is_0_at_5|X⁷)³²,  (in_range|X⁷)³², (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP4
-    OR
-    // stack: (~has_prefix'|X⁷)³², (is_0_at_5|X⁷)³²,  (in_range|X⁷)³², (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    SWAP3
-    OR
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0xFFFFFFFFFFFFFF0000000000000000FFFFFFFFFFFFFFFF000000000000000000
-    OR
-    AND
-    // stack: (in_range'|X⁷)³²,  (~has_prefix'|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-
-    // Compute in_range' and ~has_prefix' as
-    //   - in_range' = in_range                                     and ~has_prefix' = ~has_prefix OR is_0_at_6, for bytes in positions 1-3, 8-11, 16-19, and 24-27 
-    //   - in_range' = in_range AND (has_prefix => is_0_at_6 |X⁷)³² and ~has_prefix' = has_prefix,               for the rest.
-    DUP3 %shl_const(5)
-    NOT
-    // stack: (is_0_at_6|X⁷)³²,  (in_range|X⁷)³², (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP1
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0xFFFFFF00000000FFFFFFFF00000000FFFFFFFF00000000FFFFFFFF0000000000
-    AND
-    // stack: (is_0_at_6|X⁷)³|(0⁸)⁴|((is_0_at_6|X⁷)⁴|(0⁸)⁴)³, (is_0_at_6|X⁷)³²,  (in_range|X⁷)³², (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP4
-    OR
-    // stack: (~has_prefix'|X⁷)³², (is_0_at_6|X⁷)³²,  (in_range|X⁷)³², (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    SWAP3
-    OR
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0xFFFFFF00000000FFFFFFFF00000000FFFFFFFF00000000FFFFFFFF0000000000
-    OR
-    AND
-    // stack: (in_range'|X⁷)³², (~has_prefix'|X⁷)³², (in_range|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-
-    // Compute in_range' and ~has_prefix' as
-    //   - in_range' = in_range                                     and ~has_prefix' = has_prefix OR is_0_at_7, for bytes in 1, 4-5, 8-9, 12-13, 16-17, 20-21, 24-25, 28-29
-    //   - in_range' = in_range AND (has_prefix => is_0_at_7 |X⁷)³² and ~has_prefix' = ~has_prefix,             for the rest.
-    DUP3 %shl_const(6)
-    NOT
-    // stack: (is_0_at_7|X⁷)³², (in_range|X⁷)³², (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP1
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0xFF0000FFFF0000FFFF0000FFFF0000FFFF0000FFFF0000FFFF0000FFFF000000
-    AND
-    // stack:  is_0_at_7|X⁷|(0⁸)²|((is_0_at_7|X⁷)²|(0⁸)²)⁷, (is_0_at_7|X⁷)³², (in_range|X⁷)³², (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP4
-    OR
-    // (~has_prefix'|X⁷)³², (is_0_at_7|X⁷)³²,  (in_range|X⁷)³², (~has_prefix|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    SWAP3
-    OR
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0xFF0000FFFF0000FFFF0000FFFF0000FFFF0000FFFF0000FFFF0000FFFF000000
-    OR
-    AND
-    // stack: (in_range'|X⁷)³², (~has_prefix'|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-
-    // Compute in_range' as
-    //   - in_range' = in_range,                                    for odd positions
-    //   - in_range' = in_range AND (has_prefix => is_0_at_8 |X⁷)³², for the rest
-
-    SWAP1
-    // stack: (~has_prefix|X⁷)³², (in_range|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    DUP3 %shl_const(7)
-    NOT
-    // stack: (is_0_at_8|X⁷)³², (~has_prefix|X⁷)³², (in_range|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-    OR
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0x00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF00FF
-    OR
-    AND
-    // stack: (in_range|X⁷)³², packed_opcodes, proof_prefix_addr, ctx, jumpdest, retdest
-
-    // Get rid of the irrelevant bits
-    // pos 0102030405060708091011121314151617181920212223242526272829303132
-    PUSH 0x8080808080808080808080808080808080808080808080808080808080808080
-    AND
-    %jump_neq_const(0x8080808080808080808080808080808080808080808080808080808080808080, return_pop_opcode)
-    POP
-    %add_const(32)
-
-    // check the remaining path
-    %jump(verify_path_and_write_jumpdest_table)
-return_pop_opcode:
-    POP
-return:
-    // stack: proof_prefix_addr, ctx, jumpdest, retdest
-    // or
-    // stack: jumpdest, ctx, proof_prefix_addr, retdest
-    %pop3
-    JUMP
-
-%macro write_table_if_jumpdest
-    %stack (proof_prefix_addr, jumpdest, ctx) -> (proof_prefix_addr, jumpdest, ctx, %%after)
-    %jump(write_table_if_jumpdest)
-%%after:
-%endmacro
-
-// Write the jumpdest table. This is done by
-// non-deterministically guessing the sequence of jumpdest
-// addresses used during program execution within the current context.
-// For each jumpdest address we also non-deterministically guess
-// a proof, which is another address in the code such that 
-// is_jumpdest doesn't abort, when the proof is at the top of the stack
-// an the jumpdest address below. If that's the case we set the
-// corresponding bit in @SEGMENT_JUMPDEST_BITS to 1.
-// 
-// stack: ctx, code_len, retdest
-// stack: (empty)
-global jumpdest_analysis:
-    // If address > 0 then address is interpreted as address' + 1
-    // and the next prover input should contain a proof for address'.
-    PROVER_INPUT(jumpdest_table::next_address)
-    DUP1 %jumpi(check_proof)
-    // If address == 0 there are no more jump destinations to check
-    POP
-// This is just a hook used for avoiding verification of the jumpdest
-// table in another context. It is useful during proof generation,
-// allowing the avoidance of table verification when simulating user code.
-global jumpdest_analysis_end:
-    %pop2
-    JUMP
-check_proof:
-    // stack: address, ctx, code_len, retdest
-    DUP3 DUP2 %assert_le
-    %decrement
-    // stack: proof, ctx, code_len, retdest
-    DUP2 SWAP1
-    // stack: address, ctx, ctx, code_len, retdest
-    // We read the proof
-    PROVER_INPUT(jumpdest_table::next_proof)
-    // stack: proof, address, ctx, ctx, code_len, retdest
-    %write_table_if_jumpdest
-    // stack: ctx, code_len, retdest
-    
-    %jump(jumpdest_analysis)
-
-%macro jumpdest_analysis
-    %stack (ctx, code_len) -> (ctx, code_len, %%after)
-    %jump(jumpdest_analysis)
-%%after:
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/log.asm
+++ b/evm/src/cpu/kernel/asm/core/log.asm
@ -1,272 +0,0 @@
-global sys_log0:
-    %check_static
-    // stack: kexit_info, offset, size
-    DUP3 ISZERO %jumpi(log0_after_mem_gas)
-    DUP3 DUP3
-    %add_or_fault
-    // stack: offset+size, kexit_info, offset, size
-    DUP1 %ensure_reasonable_offset
-    %update_mem_bytes
-log0_after_mem_gas:
-    // stack: kexit_info, offset, size
-    DUP3 %mul_const(@GAS_LOGDATA) %add_const(@GAS_LOG)
-    // stack: gas, kexit_info, offset, size
-    %charge_gas
-    %address
-    PUSH 0
-    %stack (zero, address, kexit_info, offset, size) -> (address, zero, size, offset, finish_sys_log, kexit_info)
-    %jump(log_n_entry)
-
-global sys_log1:
-    %check_static
-    // stack: kexit_info, offset, size, topic
-    DUP3 ISZERO %jumpi(log1_after_mem_gas)
-    DUP3 DUP3
-    %add_or_fault
-    // stack: offset+size, kexit_info, offset, size, topic
-    DUP1 %ensure_reasonable_offset
-    %update_mem_bytes
-log1_after_mem_gas:
-    // stack: kexit_info, offset, size, topic
-    DUP3 %mul_const(@GAS_LOGDATA) %add_const(@GAS_LOG) %add_const(@GAS_LOGTOPIC)
-    // stack: gas, kexit_info, offset, size, topic
-    %charge_gas
-    %address
-    PUSH 1
-    %stack (one, address, kexit_info, offset, size, topic) -> (address, one, topic, size, offset, finish_sys_log, kexit_info)
-    %jump(log_n_entry)
-
-global sys_log2:
-    %check_static
-    // stack: kexit_info, offset, size, topic1, topic2
-    DUP3 ISZERO %jumpi(log2_after_mem_gas)
-    DUP3 DUP3
-    %add_or_fault
-    // stack: offset+size, kexit_info, offset, size, topic1, topic2
-    DUP1 %ensure_reasonable_offset
-    %update_mem_bytes
-log2_after_mem_gas:
-    // stack: kexit_info, offset, size, topic1, topic2
-    DUP3 %mul_const(@GAS_LOGDATA) %add_const(@GAS_LOG) %add_const(@GAS_LOGTOPIC) %add_const(@GAS_LOGTOPIC)
-    // stack: gas, kexit_info, offset, size, topic1, topic2
-    %charge_gas
-    %address
-    PUSH 2
-    %stack (two, address, kexit_info, offset, size, topic1, topic2) -> (address, two, topic1, topic2, size, offset, finish_sys_log, kexit_info)
-    %jump(log_n_entry)
-
-global sys_log3:
-    %check_static
-    // stack: kexit_info, offset, size, topic1, topic2, topic3
-    DUP3 ISZERO %jumpi(log3_after_mem_gas)
-    DUP3 DUP3
-    %add_or_fault
-    // stack: offset+size, kexit_info, offset, size, topic1, topic2, topic3
-    DUP1 %ensure_reasonable_offset
-    %update_mem_bytes
-log3_after_mem_gas:
-    // stack: kexit_info, offset, size, topic1, topic2, topic3
-    DUP3 %mul_const(@GAS_LOGDATA) %add_const(@GAS_LOG) %add_const(@GAS_LOGTOPIC) %add_const(@GAS_LOGTOPIC) %add_const(@GAS_LOGTOPIC)
-    // stack: gas, kexit_info, offset, size, topic1, topic2, topic3
-    %charge_gas
-    %address
-    PUSH 3
-    %stack (three, address, kexit_info, offset, size, topic1, topic2, topic3) -> (address, three, topic1, topic2, topic3, size, offset, finish_sys_log, kexit_info)
-    %jump(log_n_entry)
-
-global sys_log4:
-    %check_static
-    // stack: kexit_info, offset, size, topic1, topic2, topic3, topic4
-    DUP3 ISZERO %jumpi(log4_after_mem_gas)
-    DUP3 DUP3
-    %add_or_fault
-    // stack: offset+size, kexit_info, offset, size, topic1, topic2, topic3, topic4
-    DUP1 %ensure_reasonable_offset
-    %update_mem_bytes
-log4_after_mem_gas:
-    // stack: kexit_info, offset, size, topic1, topic2, topic3, topic4
-    DUP3 %mul_const(@GAS_LOGDATA) %add_const(@GAS_LOG) %add_const(@GAS_LOGTOPIC) %add_const(@GAS_LOGTOPIC) %add_const(@GAS_LOGTOPIC) %add_const(@GAS_LOGTOPIC)
-    // stack: gas, kexit_info, offset, size, topic1, topic2, topic3, topic4
-    %charge_gas
-    %address
-    PUSH 4
-    %stack (four, address, kexit_info, offset, size, topic1, topic2, topic3, topic4) -> (address, four, topic1, topic2, topic3, topic4, size, offset, finish_sys_log, kexit_info)
-    %jump(log_n_entry)
-
-finish_sys_log:
-    // stack: kexit_info
-    EXIT_KERNEL
-
-global log_n_entry:
-    // stack: address, num_topics, topics, data_len, data_offset, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_LOGS_LEN)
-    %mload_global_metadata(@GLOBAL_METADATA_LOGS_DATA_LEN)
-    // stack: log_ptr, logs_len, address, num_topics, topics, data_len, data_offset, retdest
-    DUP1 DUP3
-    // stack: log_ptr, logs_len, log_ptr, logs_len, address, num_topics, topics, data_len, data_offset, retdest
-    %mstore_kernel(@SEGMENT_LOGS)
-    // stack: log_ptr, logs_len, address, num_topics, topics, data_len, data_offset, retdest
-    SWAP1 %increment
-    %mstore_global_metadata(@GLOBAL_METADATA_LOGS_LEN)
-    // stack: log_ptr, address, num_topics, topics, data_len, data_offset, retdest
-    %increment
-    // stack: addr_ptr, address, num_topics, topics, data_len, data_offset, retdest
-    // Store the address.
-    DUP2 DUP2
-    %mstore_kernel(@SEGMENT_LOGS_DATA)
-    %increment
-    // stack: num_topics_ptr, address, num_topics, topics, data_len, data_offset, retdest
-    SWAP1 POP
-    // stack: num_topics_ptr, num_topics, topics, data_len, data_offset, retdest
-    // Store num_topics.
-    DUP2 DUP2
-    %mstore_kernel(@SEGMENT_LOGS_DATA)
-    %increment
-    // stack: topics_ptr, num_topics, topics, data_len, data_offset, retdest
-    DUP2
-    // stack: num_topics, topics_ptr, num_topics, topics, data_len, data_offset, retdest
-    ISZERO
-    %jumpi(log_after_topics)
-    // stack: topics_ptr, num_topics, topics, data_len, data_offset, retdest
-    // Store the first topic.
-    DUP3 DUP2
-    %mstore_kernel(@SEGMENT_LOGS_DATA)
-    %increment
-    %stack (curr_topic_ptr, num_topics, topic1) -> (curr_topic_ptr, num_topics)
-    DUP2 %eq_const(1)
-    %jumpi(log_after_topics)
-    // stack: curr_topic_ptr, num_topics, remaining_topics, data_len, data_offset, retdest
-    // Store the second topic.
-    DUP3 DUP2
-    %mstore_kernel(@SEGMENT_LOGS_DATA)
-    %increment
-    %stack (curr_topic_ptr, num_topics, topic2) -> (curr_topic_ptr, num_topics)
-    DUP2 %eq_const(2)
-    %jumpi(log_after_topics)
-    // stack: curr_topic_ptr, num_topics, remaining_topics, data_len, data_offset, retdest
-    // Store the third topic.
-    DUP3 DUP2
-    %mstore_kernel(@SEGMENT_LOGS_DATA)
-    %increment
-    %stack (curr_topic_ptr, num_topics, topic3) -> (curr_topic_ptr, num_topics)
-    DUP2 %eq_const(3)
-    %jumpi(log_after_topics)
-    // stack: curr_topic_ptr, num_topics, remaining_topic, data_len, data_offset, retdest
-    // Store the fourth topic.
-    DUP3 DUP2
-    %mstore_kernel(@SEGMENT_LOGS_DATA)
-    %increment
-    %stack (data_len_ptr, num_topics, topic4) -> (data_len_ptr, num_topics)
-    DUP2 %eq_const(4)
-    %jumpi(log_after_topics)
-    // Invalid num_topics.
-    PANIC
-
-log_after_topics:
-    // stack: data_len_ptr, num_topics, data_len, data_offset, retdest
-    // Compute RLP length of the log.
-    DUP3
-    // stack: data_len, data_len_ptr, num_topics, data_len, data_offset, retdest
-    DUP5 SWAP1
-    %rlp_data_len
-    // stack: rlp_data_len, data_len_ptr, num_topics, data_len, data_offset, retdest
-    DUP3
-    // stack: num_topics, rlp_data_len, data_len_ptr, num_topics, data_len, data_offset, retdest
-    // Each topic is encoded with 1+32 bytes.
-    %mul_const(33)
-    %rlp_list_len
-    // stack: rlp_topics_len, rlp_data_len, data_len_ptr, num_topics, data_len, data_offset, retdest
-    ADD
-    // The address is encoded with 1+20 bytes.
-    %add_const(21)
-    // stack: log_payload_len, data_len_ptr, num_topics, data_len, data_offset, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_LOGS_DATA_LEN)
-    DUP2 SWAP1
-    // stack: log_ptr, log_payload_len, log_payload_len, data_len_ptr, num_topics, data_len, data_offset, retdest
-    %mstore_kernel(@SEGMENT_LOGS_DATA)
-    // stack: log_payload_len, data_len_ptr, num_topics, data_len, data_offset, retdest
-    %rlp_list_len
-    // stack: rlp_log_len, data_len_ptr, num_topics, data_len, data_offset, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_LOGS_PAYLOAD_LEN)
-    // Add payload length and logs_data_len to journal.
-    DUP1 %mload_global_metadata(@GLOBAL_METADATA_LOGS_DATA_LEN) %journal_add_log
-    ADD
-    %mstore_global_metadata(@GLOBAL_METADATA_LOGS_PAYLOAD_LEN)
-    // stack: data_len_ptr, num_topics, data_len, data_offset, retdest
-    // Store data_len.
-    DUP3 DUP2
-    %mstore_kernel(@SEGMENT_LOGS_DATA)
-    %increment
-    // stack: data_ptr, num_topics, data_len, data_offset, retdest
-    SWAP1 POP
-    // stack: data_ptr, data_len, data_offset, retdest
-    DUP1 SWAP2
-    // stack: data_len, data_ptr, data_ptr, data_offset, retdest
-    ADD
-    // stack: next_log_ptr, data_ptr, data_offset, retdest
-    SWAP1
-    // stack: data_ptr, next_log_ptr, data_offset, retdest
-    SWAP2
-    PUSH @SEGMENT_MAIN_MEMORY GET_CONTEXT %build_address
-    SWAP2
-    // stack: data_ptr, next_log_ptr, data_addr, retdest
-    
-
-store_log_data_loop:
-    // stack: cur_data_ptr, next_log_ptr, cur_data_addr, retdest
-    DUP2 DUP2 EQ
-    // stack: cur_data_ptr == next_log_ptr, cur_data_ptr, next_log_ptr, cur_data_addr, retdest
-    %jumpi(store_log_data_loop_end)
-    // stack: cur_data_ptr, next_log_ptr, cur_data_addr, retdest
-    DUP3
-    MLOAD_GENERAL
-    // stack: cur_data, cur_data_ptr, next_log_ptr, cur_data_addr, retdest
-    // Store current data byte.
-    DUP2
-    %mstore_kernel(@SEGMENT_LOGS_DATA)
-    // stack: cur_data_ptr, next_log_ptr, cur_data_addr, retdest
-    SWAP2 %increment SWAP2
-    // stack: cur_data_ptr, next_log_ptr, next_data_addr, retdest
-    %increment
-    %jump(store_log_data_loop)
-
-store_log_data_loop_end:
-    // stack: cur_data_ptr, next_log_ptr, cur_data_offset, retdest
-    POP
-    %mstore_global_metadata(@GLOBAL_METADATA_LOGS_DATA_LEN)
-    POP
-    JUMP
-
-rlp_data_len:
-    // stack: data_len, data_ptr, retdest
-    DUP1 ISZERO %jumpi(data_single_byte) // data will be encoded with a single byte
-    DUP1 PUSH 1 EQ %jumpi(one_byte_data) // data is encoded with either 1 or 2 bytes
-    // If we are here, data_len >= 2, and we can use rlp_list_len to determine the encoding length
-    %rlp_list_len
-    // stack: rlp_data_len, data_ptr, retdest
-    SWAP1 POP SWAP1
-    JUMP
-
-data_single_byte:
-    // stack: data_len, data_ptr, retdest
-    %pop2
-    PUSH 1
-    SWAP1
-    JUMP
-
-one_byte_data:
-    // stack: data_len, data_ptr, retdest
-    DUP2
-    %mload_current(@SEGMENT_MAIN_MEMORY)
-    // stack: data_byte, data_len, data_ptr, retdest
-    %lt_const(0x80) %jumpi(data_single_byte) // special byte that only requires one byte to be encoded
-    %pop2
-    PUSH 2 SWAP1
-    JUMP
-
-%macro rlp_data_len
-    // stack: data_len, data_ptr
-    %stack (data_len, data_ptr) -> (data_len, data_ptr, %%after)
-    %jump(rlp_data_len)
-%%after:
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/nonce.asm
+++ b/evm/src/cpu/kernel/asm/core/nonce.asm
@ -1,49 +0,0 @@
-// Get the nonce of the given account.
-// Pre stack: address, retdest
-// Post stack: (empty)
-global nonce:
-    // stack: address, retdest
-    %mpt_read_state_trie
-    // stack: account_ptr, retdest
-    // The nonce is the first account field, so we deref the account pointer itself.
-    // Note: We don't need to handle account_ptr=0, as trie_data[0] = 0,
-    // so the deref will give 0 (the default nonce) as desired.
-    %mload_trie_data
-    // stack: nonce, retdest
-    SWAP1 JUMP
-
-// Convenience macro to call nonce and return where we left off.
-%macro nonce
-    %stack (address) -> (address, %%after)
-    %jump(nonce)
-%%after:
-%endmacro
-
-// Increment the given account's nonce. Assumes the account already exists; panics otherwise.
-global increment_nonce:
-    // stack: address, retdest
-    DUP1
-    %mpt_read_state_trie
-    // stack: account_ptr, address, retdest
-    DUP1 ISZERO %jumpi(increment_nonce_no_such_account)
-    // stack: nonce_ptr, address, retdest
-    DUP1 %mload_trie_data
-    // stack: nonce, nonce_ptr, address, retdest
-    DUP1 DUP4 %journal_add_nonce_change
-    // stack: nonce, nonce_ptr, address, retdest
-    %increment
-    SWAP1
-    // stack: nonce_ptr, nonce', address, retdest
-    %mstore_trie_data
-    // stack: address, retdest
-    POP
-    JUMP
-global increment_nonce_no_such_account:
-    PANIC
-
-// Convenience macro to call increment_nonce and return where we left off.
-%macro increment_nonce
-    %stack (address) -> (address, %%after)
-    %jump(increment_nonce)
-%%after:
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm
@ -1,139 +0,0 @@
-global precompile_blake2_f:
-    // stack: retdest, new_ctx, (old stack)
-    POP
-    // stack: new_ctx, (old stack)
-    %set_new_ctx_parent_pc(after_precompile)
-    // stack: new_ctx, (old stack)
-    DUP1
-    SET_CONTEXT
-    %checkpoint // Checkpoint
-    %increment_call_depth
-    // stack: (empty)
-    PUSH 0x100000000 // = 2^32 (is_kernel = true)
-    // stack: kexit_info
-
-    PUSH blake2_f_contd
-    // stack: blake2_f_contd, kexit_info
-
-    // Load inputs from calldata memory into stack.
-
-    %calldatasize
-    // stack: calldatasize, blake2_f_contd, kexit_info
-    DUP1
-    // stack: calldatasize, calldatasize, blake2_f_contd, kexit_info
-    %eq_const(213) ISZERO %jumpi(fault_exception)
-    // stack: calldatasize, blake2_f_contd, kexit_info
-    %decrement
-    // stack: flag_addr=212, blake2_f_contd, kexit_info
-    DUP1
-    // stack: flag_addr, flag_addr, blake2_f_contd, kexit_info
-    PUSH @SEGMENT_CALLDATA
-    GET_CONTEXT
-    %build_address
-    // stack: addr, flag_addr, blake2_f_contd, kexit_info
-    MLOAD_GENERAL
-    // stack: flag, flag_addr, blake2_f_contd, kexit_info
-    DUP1
-    // stack: flag, flag, flag_addr, blake2_f_contd, kexit_info
-    %gt_const(1) %jumpi(fault_exception) // Check flag < 2 (flag = 0 or flag = 1)
-    // stack: flag, flag_addr, blake2_f_contd, kexit_info
-    SWAP1
-    // stack: flag_addr, flag, blake2_f_contd, kexit_info
-    %sub_const(8)
-    // stack: t1_addr=flag_addr-8, flag, blake2_f_contd, kexit_info
-
-    %stack (t1_addr) -> (@SEGMENT_CALLDATA, t1_addr, t1_addr)
-    // stack: @SEGMENT_CALLDATA, t1_addr, t1_addr, flag, blake2_f_contd, kexit_info
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, t1_addr, t1_addr, flag, blake2_f_contd, kexit_info
-    %build_address
-    %mload_packing_u64_LE
-    // stack: t_1, t1_addr, flag, blake2_f_contd, kexit_info
-    SWAP1
-    // stack: t1_addr, t_1, flag, blake2_f_contd, kexit_info
-    %sub_const(8)
-    // stack: t0_addr=t1_addr-8, t_1, flag, blake2_f_contd, kexit_info
-
-    %stack (t0_addr) -> (@SEGMENT_CALLDATA, t0_addr, t0_addr)
-    // stack: @SEGMENT_CALLDATA, t0_addr, t0_addr, t_1, flag, blake2_f_contd, kexit_info
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, t0_addr, t0_addr, t_1, flag, blake2_f_contd, kexit_info
-    %build_address
-    %mload_packing_u64_LE
-    // stack: t_0, t0_addr, t_1, flag, blake2_f_contd, kexit_info
-    SWAP1
-    // stack: t0_addr = m0_addr + 8 * 16, t_0, t_1, flag, blake2_f_contd, kexit_info
-
-    %rep 16
-        // stack: m0_addr + 8 * (16 - i), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        %sub_const(8)
-        // stack: m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        DUP1
-        // stack: m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        PUSH @SEGMENT_CALLDATA
-        // stack: @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        GET_CONTEXT
-        // stack: ctx, @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        %build_address
-        %mload_packing_u64_LE
-        // stack: m_i, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        SWAP1
-        // stack: m0_addr + 8 * (16 - i - 1), m_i, m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-    %endrep
-    // stack: m0_addr = h0_addr + 8 * 8, m_0, ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-
-    %rep 8
-        // stack: h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        %sub_const(8)
-        // stack: h0_addr + 8 * (8 - i - 1), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        DUP1
-        // stack: h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        PUSH @SEGMENT_CALLDATA
-        // stack: @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        GET_CONTEXT
-        // stack: ctx, @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        %build_address
-        %mload_packing_u64_LE
-        // stack: h_i, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-        SWAP1
-        // stack: h0_addr + 8 * (8 - i), h_i, h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-    %endrep
-    // stack: h0_addr + 8 * 8 = 68, h_0, ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-    POP
-
-    %stack () -> (@SEGMENT_CALLDATA, 4)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 4, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-    %build_address_no_offset
-    MLOAD_32BYTES
-    // stack: rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-    
-    DUP1
-    // stack: rounds, rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-    %charge_gas
-    
-    // stack: rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info
-    %jump(blake2_f)
-blake2_f_contd:
-    // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', kexit_info
-    // Store the result hash to the parent's return data using `mstore_unpacking_u64_LE`.
-
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 64)
-    // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', kexit_info
-    PUSH @SEGMENT_RETURNDATA
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-    // stack: parent_ctx, segment, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', kexit_info
-    %build_address_no_offset
-    // stack: addr0=0, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', kexit_info
-
-    %rep 8
-        // stack: addri, h_i', ..., h_7', kexit_info
-        %stack (addr, h_i) -> (addr, h_i, addr)
-        %mstore_unpacking_u64_LE
-        // stack: addr_i, h_(i+1)', ..., h_7', kexit_info
-        %add_const(8)
-        // stack: addr_(i+1), h_(i+1)', ..., h_7', kexit_info
-    %endrep
-
-    // stack: kexit_info    
-    %jump(pop_and_return_success)
--- a/evm/src/cpu/kernel/asm/core/precompiles/bn_add.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/bn_add.asm
@ -1,63 +0,0 @@
-global precompile_bn_add:
-    // stack: address, retdest, new_ctx, (old stack)
-    %pop2
-    // stack: new_ctx, (old stack)
-    %set_new_ctx_parent_pc(after_precompile)
-    // stack: new_ctx, (old stack)
-    DUP1
-    SET_CONTEXT
-    %checkpoint // Checkpoint
-    %increment_call_depth
-    // stack: (empty)
-    PUSH 0x100000000 // = 2^32 (is_kernel = true)
-    // stack: kexit_info
-
-    %charge_gas_const(@BN_ADD_GAS)
-
-    // Load x0, y0, x1, y1 from the call data using `MLOAD_32BYTES`.
-    PUSH bn_add_return
-    // stack: bn_add_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 96, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 96, 32, bn_add_return, kexit_info
-    %build_address
-    MLOAD_32BYTES
-    // stack: y1, bn_add_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 64, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 64, 32, y1, bn_add_return, kexit_info
-    %build_address
-    MLOAD_32BYTES
-    // stack: x1, y1, bn_add_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 32, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 32, 32, x1, y1, bn_add_return, kexit_info
-    %build_address
-    MLOAD_32BYTES
-    // stack: y0, x1, y1, bn_add_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 32, y0, x1, y1, bn_add_return, kexit_info
-    %build_address_no_offset
-    MLOAD_32BYTES
-    // stack: x0, y0, x1, y1, bn_add_return, kexit_info
-    %jump(bn_add)
-bn_add_return:
-    // stack: x, y, kexit_info
-    DUP2 %eq_const(@U256_MAX) // bn_add returns (U256_MAX, U256_MAX) on bad input.
-    DUP2 %eq_const(@U256_MAX) // bn_add returns (U256_MAX, U256_MAX) on bad input.
-    MUL // Cheaper than AND
-    %jumpi(fault_exception)
-    // stack: x, y, kexit_info
-
-    // Store the result (x, y) to the parent's return data using `mstore_unpacking`.
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 64)
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-    %stack (parent_ctx, x, y) -> (parent_ctx, @SEGMENT_RETURNDATA, x, parent_ctx, y)
-    %build_address_no_offset
-    MSTORE_32BYTES_32
-    POP
-    %stack (parent_ctx, y) -> (parent_ctx, @SEGMENT_RETURNDATA, 32, y)
-    %build_address
-    MSTORE_32BYTES_32
-    %jump(pop_and_return_success)
--- a/evm/src/cpu/kernel/asm/core/precompiles/bn_mul.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/bn_mul.asm
@ -1,58 +0,0 @@
-global precompile_bn_mul:
-    // stack: address, retdest, new_ctx, (old stack)
-    %pop2
-    // stack: new_ctx, (old stack)
-    %set_new_ctx_parent_pc(after_precompile)
-    // stack: new_ctx, (old stack)
-    DUP1
-    SET_CONTEXT
-    %checkpoint // Checkpoint
-    %increment_call_depth
-    // stack: (empty)
-    PUSH 0x100000000 // = 2^32 (is_kernel = true)
-    // stack: kexit_info
-
-    %charge_gas_const(@BN_MUL_GAS)
-
-    // Load x, y, n from the call data using `MLOAD_32BYTES`.
-    PUSH bn_mul_return
-    // stack: bn_mul_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 64, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 64, 32, bn_mul_return, kexit_info
-    %build_address
-    MLOAD_32BYTES
-    // stack: n, bn_mul_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 32, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 32, 32, n, bn_mul_return, kexit_info
-    %build_address
-    MLOAD_32BYTES
-    // stack: y, n, bn_mul_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 32, y, n, bn_mul_return, kexit_info
-    %build_address_no_offset
-    MLOAD_32BYTES
-    // stack: x, y, n, bn_mul_return, kexit_info
-    %jump(bn_mul)
-bn_mul_return:
-    // stack: Px, Py, kexit_info
-    DUP2 %eq_const(@U256_MAX) // bn_mul returns (U256_MAX, U256_MAX) on bad input.
-    DUP2 %eq_const(@U256_MAX) // bn_mul returns (U256_MAX, U256_MAX) on bad input.
-    MUL // Cheaper than AND
-    %jumpi(fault_exception)
-    // stack: Px, Py, kexit_info
-
-    // Store the result (Px, Py) to the parent's return data using `mstore_unpacking`.
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 64)
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-    %stack (parent_ctx, Px, Py) -> (parent_ctx, @SEGMENT_RETURNDATA, Px, parent_ctx, Py)
-    %build_address_no_offset
-    MSTORE_32BYTES_32
-bn_mul_contd6:
-    POP
-    %stack (parent_ctx, Py) -> (parent_ctx, @SEGMENT_RETURNDATA, 32, Py)
-    %build_address
-    MSTORE_32BYTES_32
-    %jump(pop_and_return_success)
--- a/evm/src/cpu/kernel/asm/core/precompiles/ecrec.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/ecrec.asm
@ -1,60 +0,0 @@
-global precompile_ecrec:
-    // stack: address, retdest, new_ctx, (old stack)
-    %pop2
-    // stack: new_ctx, (old stack)
-    %set_new_ctx_parent_pc(after_precompile)
-    // stack: new_ctx, (old stack)
-    DUP1
-    SET_CONTEXT
-    %checkpoint // Checkpoint
-    %increment_call_depth
-    // stack: (empty)
-    PUSH 0x100000000 // = 2^32 (is_kernel = true)
-    // stack: kexit_info
-
-    %charge_gas_const(@ECREC_GAS)
-
-    // Load hash, v, r, s from the call data using `MLOAD_32BYTES`.
-    PUSH ecrec_return
-    // stack: ecrec_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 96, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 96, 32, ecrec_return, kexit_info
-    %build_address
-    MLOAD_32BYTES
-    // stack: s, ecrec_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 64, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 64, 32, s, ecrec_return, kexit_info
-    %build_address
-    MLOAD_32BYTES
-    // stack: r, s, ecrec_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 32, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 32, 32, r, s, ecrec_return, kexit_info
-    %build_address
-    MLOAD_32BYTES
-    // stack: v, r, s, ecrec_return, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 32)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 32, v, r, s, ecrec_return, kexit_info
-    %build_address_no_offset
-    MLOAD_32BYTES
-    // stack: hash, v, r, s, ecrec_return, kexit_info
-    %jump(ecrecover)
-ecrec_return:
-    // stack: address, kexit_info
-    DUP1 %eq_const(@U256_MAX) %jumpi(ecrec_bad_input) // ecrecover returns U256_MAX on bad input.
-
-    // Store the result address to the parent's return data using `mstore_unpacking`.
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 32)
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-    %stack (parent_ctx, address) -> (parent_ctx, @SEGMENT_RETURNDATA, address)
-    %build_address_no_offset
-    MSTORE_32BYTES_32
-    %jump(pop_and_return_success)
-
-// On bad input, return empty return data but still return success.
-ecrec_bad_input:
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-    %jump(pop_and_return_success)
--- a/evm/src/cpu/kernel/asm/core/precompiles/expmod.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/expmod.asm
@ -1,470 +0,0 @@
-// Mod 16 to the range [1, 16].
-%macro mod_16
-    // stack: x
-    %mod_const(16)
-    DUP1 %jumpi(%%after)
-    POP PUSH 16
-%%after:
-%endmacro
-
-// Load bytes, packing 16 bytes into each limb, and store limbs on the stack.
-// We pass around total_num_limbs and len for conveience, because we can't access them from the stack
-// if they're hidden behind the variable number of limbs.
-mload_bytes_as_limbs:
-    // stack: addr, num_bytes, retdest, total_num_limbs, len, ..limbs
-    DUP2
-    // stack: num_bytes, addr, num_bytes, retdest, total_num_limbs, len, ..limbs
-    %mod_16
-    // stack: min(16, num_bytes), addr, num_bytes, retdest, total_num_limbs, len, ..limbs
-    DUP2
-    // stack: addr, min(16, num_bytes), addr, num_bytes, retdest, total_num_limbs, len, ..limbs
-    MLOAD_32BYTES
-    // stack: new_limb, addr, num_bytes, retdest, total_num_limbs, len, ..limbs
-    %stack (new, addr, numb, ret, tot, len) -> (numb, addr, ret, tot, len, new)
-    // stack: num_bytes, addr, retdest, total_num_limbs, len, new_limb, ..limbs
-    DUP1
-    %mod_16
-    // stack: num_bytes%16, num_bytes, addr, retdest, total_num_limbs, len, new_limb, ..limbs
-    DUP1 SWAP2
-    SUB
-    // stack: num_bytes_new, num_bytes%16, addr, retdest, total_num_limbs, len, new_limb, ..limbs
-    DUP1
-    ISZERO
-    %jumpi(mload_bytes_return)
-    SWAP1
-    // stack: num_bytes%16, num_bytes_new, addr, retdest, total_num_limbs, len, new_limb, ..limbs
-    DUP3 // addr
-    ADD // increment offset
-    // stack: addr_new, num_bytes_new, addr, retdest, total_num_limbs, len, new_limb, ..limbs
-    SWAP2 POP
-    // stack: num_bytes_new, addr_new, retdest, total_num_limbs, len, new_limb, ..limbs
-    SWAP1
-    %jump(mload_bytes_as_limbs)
-mload_bytes_return:
-    // stack: num_bytes_new, num_bytes%16, addr, retdest, total_num_limbs, len, new_limb, ..limbs
-    %pop3
-    // stack: retdest, total_num_limbs, len, ..limbs
-    JUMP
-
-%macro mload_bytes_as_limbs
-    %stack (addr, num_bytes, total_num_limbs) -> (addr, num_bytes, %%after, total_num_limbs)
-    %jump(mload_bytes_as_limbs)
-%%after:
-%endmacro
-
-store_limbs:
-    // stack: offset, retdest, num_limbs, limb[num_limbs - 1], ..limb[0]
-    DUP3
-    // stack: num_limbs, offset, retdest, num_limbs, limb[num_limbs - 1], ..limb[0]
-    ISZERO
-    %jumpi(store_limbs_return)
-    // stack: offset, retdest, num_limbs, limb[num_limbs - 1], ..limb[0]
-    %stack (offset, ret, num, limb) -> (offset, limb, offset, ret, num)
-    // stack: offset, limb[num_limbs - 1], offset, retdest, num_limbs, limb[num_limbs - 2], ..limb[0]
-    %mstore_current_general
-    // stack: offset, retdest, num_limbs, limb[num_limbs - 2], ..limb[0]
-    %increment
-    SWAP2
-    %decrement
-    SWAP2
-    // stack: offset + 1, retdest, num_limbs - 1, limb[num_limbs - 2], ..limb[0]
-    %jump(store_limbs)
-store_limbs_return:
-    // stack: offset, retdest, num_limbs=0
-    POP
-    SWAP1
-    POP
-    JUMP
-
-%macro store_limbs
-    %stack (offset, num_limbs) -> (offset, %%after, num_limbs)
-    %jump(store_limbs)
-%%after:
-%endmacro
-
-%macro expmod_gas_f
-    // stack: x
-    // Overflow check
-    DUP1 %ge_const(0x800000000000000000000000000000007) %jumpi(fault_exception)
-    // stack: x
-    %ceil_div_const(8)
-    // stack: ceil(x/8)
-    %square
-    // stack: ceil(x/8)^2
-%endmacro
-
-calculate_l_E_prime:
-    // stack: l_E, l_B, retdest
-    // Throw a fault early if the lengths are too large.
-    DUP2 %gt_const(0x100000000000000000000000000000000) %jumpi(fault_exception)
-    DUP1 %gt_const(0x100000000000000000000000000000000) %jumpi(fault_exception)
-    DUP1 ISZERO %jumpi(case_le_zero)
-    // stack: l_E, l_B, retdest
-    DUP1 %le_const(32)
-    // stack: l_E <= 32, l_E, l_B, retdest
-    %jumpi(case_le_32)
-    // stack: l_E, l_B, retdest
-    PUSH 32
-    // stack: 32, l_E, l_B, retdest
-    DUP3
-    // stack: l_B, 32, l_E, l_B, retdest
-    %add_const(96)
-    // stack: 96 + l_B, 32, l_E, l_B, retdest
-    PUSH @SEGMENT_CALLDATA
-    GET_CONTEXT
-    %build_address
-    MLOAD_32BYTES
-    // stack: i[96 + l_B..128 + l_B], l_E, l_B, retdest
-    %log2_floor
-    // stack: log2(i[96 + l_B..128 + l_B]), l_E, l_B, retdest
-    SWAP1
-    // stack: l_E, log2(i[96 + l_B..128 + l_B]), l_B, retdest
-    %sub_const(32)
-    // Overflow check
-    DUP1 %ge_const(0x2000000000000000000000000000000000000000000000000000000000000000) %jumpi(fault_exception)
-    %mul_const(8)
-    // stack: 8 * (l_E - 32), log2(i[96 + l_B..128 + l_B]), l_B, retdest
-    ADD
-    // stack: 8 * (l_E - 32) + log2(i[96 + l_B..128 + l_B]), l_B, retdest
-    SWAP1
-    POP
-    // stack: 8 * (l_E - 32) + log2(i[96 + l_B..128 + l_B]), retdest
-    SWAP1
-    // stack: retdest, 8 * (l_E - 32) + log2(i[96 + l_B..128 + l_B])
-    JUMP
-case_le_zero:
-    %stack (l_E, l_B, retdest) -> (retdest, 0)
-    JUMP
-case_le_32:
-    // stack: l_E, l_B, retdest
-    SWAP1
-    // stack: l_B, l_E, retdest
-    %add_const(96)
-    // stack: 96 + l_B, l_E, retdest
-    PUSH @SEGMENT_CALLDATA
-    GET_CONTEXT
-    %build_address
-    MLOAD_32BYTES
-    // stack: E, retdest
-    %log2_floor
-    // stack: log2(E), retdest
-    SWAP1
-    // stack: retdest, log2(E)
-    JUMP
-
-global precompile_expmod:
-    // stack: address, retdest, new_ctx, (old stack)
-    %pop2
-    // stack: new_ctx, (old stack)
-    %set_new_ctx_parent_pc(after_precompile)
-    // stack: new_ctx, (old stack)
-    DUP1
-    SET_CONTEXT
-    %checkpoint // Checkpoint
-    %increment_call_depth
-    // stack: (empty)
-    PUSH 0x100000000 // = 2^32 (is_kernel = true)
-    // stack: kexit_info
-
-    // Load l_B from i[0..32].
-    %stack () -> (@SEGMENT_CALLDATA, 32)
-    // stack: @SEGMENT_CALLDATA, 32, kexit_info
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 32, kexit_info
-    %build_address_no_offset
-    MLOAD_32BYTES
-    // stack: l_B, kexit_info
-
-    // Load l_E from i[32..64].
-    %stack () -> (@SEGMENT_CALLDATA, 32, 32)
-    GET_CONTEXT
-    %build_address
-    MLOAD_32BYTES
-    // stack: l_E, l_B, kexit_info
-
-    // Load l_M from i[64..96].
-    %stack () -> (@SEGMENT_CALLDATA, 64, 32)
-    GET_CONTEXT
-    %build_address
-    MLOAD_32BYTES
-    // stack: l_M, l_E, l_B, kexit_info
-    DUP3 ISZERO DUP2 ISZERO
-    MUL // AND
-    // stack: l_M==0 && l_B==0, l_M, l_E, l_B, kexit_info
-    %jumpi(zero_base_zero_mod)
-    %stack (l: 3) -> (l, l)
-    // stack: l_M, l_E, l_B, l_M, l_E, l_B, kexit_info
-    %max_3
-    // stack: max_len, l_M, l_E, l_B, kexit_info
-    
-    %ceil_div_const(16)
-    // stack: len=ceil(max_len/16), l_M, l_E, l_B, kexit_info
-
-    // Calculate gas costs.
-
-    PUSH l_E_prime_return
-    // stack: l_E_prime_return, len, l_M, l_E, l_B, kexit_info
-    DUP5
-    DUP5
-    // stack: l_E, l_B, l_E_prime_return, len, l_M, l_E, l_B, kexit_info
-    %jump(calculate_l_E_prime)
-l_E_prime_return:
-    // stack: l_E_prime, len, l_M, l_E, l_B, kexit_info
-    DUP5
-    // stack: l_B, l_E_prime, len, l_M, l_E, l_B, kexit_info
-    DUP4
-    // stack: l_M, l_B, l_E_prime, len, l_M, l_E, l_B, kexit_info
-    %max
-    // stack: max(l_M, l_B), l_E_prime, len, l_M, l_E, l_B, kexit_info
-    %expmod_gas_f
-    // stack: f(max(l_M, l_B)), l_E_prime, len, l_M, l_E, l_B, kexit_info
-    SWAP1
-    // stack: l_E_prime, f(max(l_M, l_B)), len, l_M, l_E, l_B, kexit_info
-    %max_const(1)
-    // stack: max(1, l_E_prime), f(max(l_M, l_B)), len, l_M, l_E, l_B, kexit_info
-    MUL
-    // stack: max(1, l_E_prime) * f(max(l_M, l_B)), len, l_M, l_E, l_B, kexit_info
-    %div_const(3) // G_quaddivisor
-    // stack: (max(1, l_E_prime) * f(max(l_M, l_B))) / G_quaddivisor, len, l_M, l_E, l_B, kexit_info
-    %max_const(200)
-    // stack: g_r, len, l_M, l_E, l_B, kexit_info
-    %stack (g_r, l: 4, kexit_info) -> (g_r, kexit_info, l)
-    // stack: g_r, kexit_info, len, l_M, l_E, l_B
-    %charge_gas
-    // stack: kexit_info, len, l_M, l_E, l_B
-    %stack (kexit_info, l: 4) -> (l, kexit_info)
-    // stack: len, l_M, l_E, l_B, kexit_info
-
-    // Copy B to memory.
-    // stack: len, l_M, l_E, l_B, kexit_info
-    DUP1
-    // stack: len, len, l_M, l_E, l_B, kexit_info
-    DUP5
-    // stack: num_bytes=l_B, len, len, l_M, l_E, l_B, kexit_info
-    DUP1
-    %ceil_div_const(16)
-    // stack: num_limbs, num_bytes, len, len, l_M, l_E, l_B, kexit_info
-    DUP2
-    ISZERO
-    %jumpi(copy_b_len_zero)
-    SWAP1
-    // stack: num_bytes, num_limbs, len, len, l_M, l_E, l_B, kexit_info
-    %stack () -> (@SEGMENT_CALLDATA, 96)
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 96, num_bytes, num_limbs, len, len, l_M, l_E, l_B, kexit_info
-    %build_address
-    %mload_bytes_as_limbs
-    // stack: num_limbs, len, limbs[num_limbs-1], .., limbs[0], len, l_M, l_E, l_B, kexit_info
-    SWAP1
-    POP
-    // stack: num_limbs, limbs[num_limbs-1], .., limbs[0], len, l_M, l_E, l_B, kexit_info
-    PUSH 0
-    // stack: b_loc=0, num_limbs, limbs[num_limbs-1], .., limbs[0], len, l_M, l_E, l_B, kexit_info
-    %store_limbs
-    // stack: len, l_M, l_E, l_B, kexit_info
-    %jump(copy_b_end)
-copy_b_len_zero:
-    // stack: num_limbs, num_bytes, len, len, l_M, l_E, l_B, kexit_info
-    %pop3
-copy_b_end:
-    
-    // Copy E to memory.
-    // stack: len, l_M, l_E, l_B, kexit_info
-    DUP1
-    // stack: len, len, l_M, l_E, l_B, kexit_info
-    DUP4
-    // stack: num_bytes=l_E, len, len, l_M, l_E, l_B, kexit_info
-    DUP1
-    %ceil_div_const(16)
-    // stack: num_limbs, num_bytes, len, len, l_M, l_E, l_B, kexit_info
-    DUP2
-    ISZERO
-    %jumpi(copy_e_len_zero)
-    SWAP1
-    // stack: num_bytes, num_limbs, len, len, l_M, l_E, l_B, kexit_info
-    DUP7
-    %add_const(96)
-    // stack: 96 + l_B, num_bytes, num_limbs, len, len, l_M, l_E, l_B, kexit_info
-    PUSH @SEGMENT_CALLDATA
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 96 + l_B, num_bytes, num_limbs, len, len, l_M, l_E, l_B, kexit_info
-    %build_address
-    %mload_bytes_as_limbs
-    // stack: num_limbs, len, limbs[num_limbs-1], .., limbs[0], len, l_M, l_E, l_B, kexit_info
-    SWAP1
-    // stack: e_loc=len, num_limbs, limbs[num_limbs-1], .., limbs[0], len, l_M, l_E, l_B, kexit_info
-    %store_limbs
-    // stack: len, l_M, l_E, l_B, kexit_info
-    %jump(copy_e_end)
-copy_e_len_zero:
-    // stack: num_limbs, num_bytes, len, len, l_M, l_E, l_B, kexit_info
-    %pop3
-copy_e_end:
-
-    // Copy M to memory.
-    // stack: len, l_M, l_E, l_B, kexit_info
-    DUP1
-    // stack: len, len, l_M, l_E, l_B, kexit_info
-    DUP3
-    // stack: num_bytes=l_M, len, len, l_M, l_E, l_B, kexit_info
-    DUP1
-    %ceil_div_const(16)
-    // stack: num_limbs, num_bytes, len, len, l_M, l_E, l_B, kexit_info
-    DUP2
-    ISZERO
-    %jumpi(copy_m_len_zero)
-    SWAP1
-    // stack: num_bytes, num_limbs, len, len, l_M, l_E, l_B, kexit_info
-    DUP7
-    DUP7
-    ADD
-    %add_const(96)
-    // stack: 96 + l_B + l_E, num_bytes, num_limbs, len, len, l_M, l_E, l_B, kexit_info
-    PUSH @SEGMENT_CALLDATA
-    GET_CONTEXT
-    // stack: ctx, @SEGMENT_CALLDATA, 96 + l_B + l_E, num_bytes, num_limbs, len, len, l_M, l_E, l_B, kexit_info
-    %build_address
-    %mload_bytes_as_limbs
-    // stack: num_limbs, len, limbs[num_limbs-1], .., limbs[0], len, l_M, l_E, l_B, kexit_info
-    SWAP1
-    %mul_const(2)
-    // stack: m_loc=2*len, num_limbs, limbs[num_limbs-1], .., limbs[0], len, l_M, l_E, l_B, kexit_info
-    %store_limbs
-    // stack: len, l_M, l_E, l_B, kexit_info
-    %jump(copy_m_end)
-copy_m_len_zero:
-    // stack: num_limbs, num_bytes, len, len, l_M, l_E, l_B, kexit_info
-    %pop3
-copy_m_end:
-
-    %stack (len, l_M, ls: 2) -> (len, l_M)
-    // stack: len, l_M, kexit_info
-
-    PUSH expmod_contd
-    // stack: expmod_contd, len, l_M, kexit_info
-    DUP2
-    // stack: len, expmod_contd, len, l_M, kexit_info
-
-    DUP1
-    %mul_const(11)
-    // stack: s5=11*len, len, expmod_contd, len, l_M, kexit_info
-    SWAP1
-    // stack: len, s5, expmod_contd, len, l_M, kexit_info
-
-    DUP1
-    %mul_const(9)
-    // stack: s4=9*len, len, s5, expmod_contd, len, l_M, kexit_info
-    SWAP1
-    // stack: len, s4, s5, expmod_contd, len, l_M, kexit_info
-
-    DUP1
-    %mul_const(7)
-    // stack: s3=7*len, len, s4, s5, expmod_contd, len, l_M, kexit_info
-    SWAP1
-    // stack: len, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-
-    DUP1
-    %mul_const(5)
-    // stack: s2=5*len, len, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-    SWAP1
-    // stack: len, s2, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-
-    DUP1
-    %mul_const(4)
-    // stack: s1=4*len, len, s2, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-    SWAP1
-    // stack: len, s1, s2, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-
-    DUP1
-    %mul_const(3)
-    // stack: out=3*len, len, s1, s2, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-    SWAP1
-    // stack: len, out, s1, s2, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-
-    DUP1
-    %mul_const(2)
-    // stack: m_loc=2*len, len, out, s1, s2, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-    SWAP1
-    // stack: len, m_loc, out, s1, s2, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-
-    PUSH 0
-    // stack: b_loc=0, e_loc=len, m_loc, out, s1, s2, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-    DUP2
-    // stack: len, b_loc, e_loc, m_loc, out, s1, s2, s3, s4, s5, expmod_contd, len, l_M, kexit_info
-
-    %jump(modexp_bignum)
-
-expmod_contd:
-    // stack: len, l_M, kexit_info
-
-    // Copy the result value from memory to the parent's return data.
-
-    // Store return data size: l_M (number of bytes).
-    SWAP1
-    // stack: l_M, len, kexit_info
-    DUP1 %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE)
-    // stack: l_M, len, kexit_info
-    DUP1 ISZERO %jumpi(zero_modulus)
-    // stack: l_M, len, kexit_info
-    DUP1 %ceil_div_const(16)
-    // stack: l_M_128, l_M, len, kexit_info
-    SWAP1 %mod_16
-    // stack: l_M%16, l_M_128, len, kexit_info
-    SWAP2
-    // stack: len, l_M_128, l_M%16, kexit_info
-    %mul_const(3)
-    // stack: out=3*len, l_M_128, l_M%16, kexit_info
-    %decrement
-    DUP2
-    DUP2
-    ADD
-    // stack: cur_offset=out+l_M_128-1, end_offset=out-1, l_M_128, l_M%16, kexit_info
-    DUP1 %mload_current_general
-    %stack (cur_limb, cur_offset, end_offset, l_M_128, l_M_mod16, kexit_info) ->
-        (@SEGMENT_RETURNDATA, cur_limb, l_M_mod16, cur_offset, end_offset, l_M_128, kexit_info)
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-    %build_address_no_offset
-    %mstore_unpacking
-    // stack: address, cur_offset, end_offset, l_M_128, kexit_info
-    SWAP1
-    %decrement
-    // stack: cur_offset, address, end_offset, l_M_128, kexit_info
-    // Store in big-endian format.
-expmod_store_loop:
-    // stack: cur_offset, address, end_offset, l_M_128, kexit_info
-    DUP3 DUP2 EQ %jumpi(expmod_store_end)
-    // stack: cur_offset, address, end_offset, l_M_128, kexit_info
-    DUP1 %mload_current_general
-    %stack (cur_limb, cur_offset, address, end_offset, l_M_128, kexit_info) ->
-         (address, cur_limb, cur_offset, end_offset, l_M_128, kexit_info)
-    %stack (address, cur_limb) -> (address, cur_limb, 16)
-    %mstore_unpacking
-    // stack: address', cur_offset, end_offset, l_M_128, kexit_info)
-    SWAP1 %decrement
-    // stack: cur_offset-1, address', end_offset, l_M_128, kexit_info)
-    %jump(expmod_store_loop)
-expmod_store_end:
-    // stack: cur_offset, address, end_offset, l_M_128, kexit_info
-    %pop4
-the_end:
-    // stack: kexit_info
-    %leftover_gas
-    // stack: leftover_gas
-    PUSH 1 // success
-    %jump(terminate_common)
-
-zero_modulus:
-    // stack: l_M, len, kexit_info
-    %pop2
-    %jump(the_end)
-
-zero_base_zero_mod:
-    // stack: l_M, l_E, l_B, kexit_info
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE)
-    // stack: l_E, l_B, kexit_info
-    %pop2
-    // stack: kexit_info
-    PUSH 200
-    %charge_gas
-    // stack: kexit_info
-    %jump(the_end)
--- a/evm/src/cpu/kernel/asm/core/precompiles/id.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/id.asm
@ -1,47 +0,0 @@
-global precompile_id:
-    // stack: address, retdest, new_ctx, (old stack)
-    %pop2
-    // stack: new_ctx, (old stack)
-    %set_new_ctx_parent_pc(after_precompile)
-    // stack: new_ctx, (old stack)
-    DUP1
-    SET_CONTEXT
-    %checkpoint // Checkpoint
-    %increment_call_depth
-    // stack: (empty)
-    PUSH 0x100000000 // = 2^32 (is_kernel = true)
-    // stack: kexit_info
-
-    %calldatasize
-    %num_bytes_to_num_words
-    // stack: data_words_len, kexit_info
-    %mul_const(@ID_DYNAMIC_GAS)
-    PUSH @ID_STATIC_GAS
-    ADD
-    // stack: gas, kexit_info
-    %charge_gas
-
-    // Simply copy the call data to the parent's return data.
-    %calldatasize
-    DUP1 %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE)
-
-    PUSH id_contd SWAP1
-
-    PUSH @SEGMENT_CALLDATA
-    GET_CONTEXT
-    %build_address_no_offset
-    // stack: SRC, size, id_contd
-
-    PUSH @SEGMENT_RETURNDATA
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-    %build_address_no_offset
-
-    // stack: DST, SRC, size, id_contd
-    %jump(memcpy_bytes)
-
-id_contd:
-    // stack: kexit_info
-    %leftover_gas
-    // stack: leftover_gas
-    PUSH 1 // success
-    %jump(terminate_common)
--- a/evm/src/cpu/kernel/asm/core/precompiles/main.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/main.asm
@ -1,69 +0,0 @@
-%macro handle_precompiles
-    // stack: address, new_ctx, (old stack)
-    PUSH %%after
-    SWAP1
-    // stack: address, %%after, new_ctx, (old stack)
-    %jump(handle_precompiles)
-%%after:
-    // stack: new_ctx, (old stack)
-%endmacro
-
-global handle_precompiles:
-    // stack: address, retdest, new_ctx, (old stack)
-    DUP1 %eq_const(@ECREC)  %jumpi(precompile_ecrec)
-    DUP1 %eq_const(@SHA256) %jumpi(precompile_sha256)
-    DUP1 %eq_const(@RIP160) %jumpi(precompile_rip160)
-    DUP1 %eq_const(@ID)     %jumpi(precompile_id)
-    DUP1 %eq_const(@EXPMOD) %jumpi(precompile_expmod)
-    DUP1 %eq_const(@BN_ADD) %jumpi(precompile_bn_add)
-    DUP1 %eq_const(@BN_MUL) %jumpi(precompile_bn_mul)
-    DUP1 %eq_const(@SNARKV) %jumpi(precompile_snarkv)
-    %eq_const(@BLAKE2_F) %jumpi(precompile_blake2_f)
-    // stack: retdest
-    JUMP
-
-global pop_and_return_success:
-    // stack: _unused, kexit_info
-    POP
-    %leftover_gas
-    // stack: leftover_gas
-    PUSH 1 // success
-    %jump(terminate_common)
-
-global after_precompile:
-    %mload_global_metadata(@GLOBAL_METADATA_IS_PRECOMPILE_FROM_EOA) %jumpi(process_message_txn_after_call)
-    %stack (success, leftover_gas, new_ctx, kexit_info, callgas, address, value, args_offset, args_size, ret_offset, ret_size) ->
-        (success, leftover_gas, new_ctx, kexit_info, ret_offset, ret_size)
-    %jump(after_call_instruction)
-
-%macro handle_precompiles_from_eoa
-    // stack: retdest
-    %mload_txn_field(@TXN_FIELD_TO)
-    // stack: addr, retdest
-    DUP1 %is_precompile
-    %jumpi(handle_precompiles_from_eoa)
-    // stack: addr, retdest
-    POP
-%endmacro
-
-global handle_precompiles_from_eoa:
-    PUSH 1 %mstore_global_metadata(@GLOBAL_METADATA_IS_PRECOMPILE_FROM_EOA)
-    // stack: addr, retdest
-    %create_context
-    // stack: new_ctx, addr, retdest
-    %non_intrinisic_gas %set_new_ctx_gas_limit
-    // stack: new_ctx, addr, retdest
-
-    // Set calldatasize and copy txn data to calldata.
-    %mload_txn_field(@TXN_FIELD_DATA_LEN)
-    %stack (calldata_size, new_ctx) -> (calldata_size, new_ctx, calldata_size)
-    %set_new_ctx_calldata_size
-    %stack (new_ctx, calldata_size) -> (@SEGMENT_TXN_DATA, @SEGMENT_CALLDATA, new_ctx, calldata_size, handle_precompiles_from_eoa_finish, new_ctx)
-    SWAP2 %build_address_no_offset // DST
-    // stack: DST, SRC, calldata_size, handle_precompiles_from_eoa_finish, new_ctx
-    %jump(memcpy_bytes)
-
-handle_precompiles_from_eoa_finish:
-    %stack (new_ctx, addr, retdest) -> (addr, new_ctx, retdest)
-    %handle_precompiles
-    PANIC // We already checked that a precompile is called, so this should be unreachable.
--- a/evm/src/cpu/kernel/asm/core/precompiles/rip160.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/rip160.asm
@ -1,50 +0,0 @@
-global precompile_rip160:
-    // stack: address, retdest, new_ctx, (old stack)
-    %pop2
-    // stack: new_ctx, (old stack)
-    %set_new_ctx_parent_pc(after_precompile)
-    // stack: new_ctx, (old stack)
-    DUP1
-    SET_CONTEXT
-    %checkpoint // Checkpoint
-    %increment_call_depth
-    // stack: (empty)
-    PUSH 0x100000000 // = 2^32 (is_kernel = true)
-    // stack: kexit_info
-
-    %calldatasize
-    %num_bytes_to_num_words
-    // stack: data_words_len, kexit_info
-    %mul_const(@RIP160_DYNAMIC_GAS)
-    PUSH @RIP160_STATIC_GAS
-    ADD
-    // stack: gas, kexit_info
-    %charge_gas
-
-    // Copy the call data to the kernel general segment (ripemd expects it there) and call ripemd.
-    %calldatasize
-    GET_CONTEXT
-
-    %stack (ctx, size) ->
-        (
-        ctx, @SEGMENT_CALLDATA,            // SRC
-        ctx,
-        size, ripemd,                      // count, retdest
-        200, size, rip160_contd            // ripemd input: virt, num_bytes, retdest
-        )
-    %build_address_no_offset
-    %stack(addr, ctx) -> (ctx, @SEGMENT_KERNEL_GENERAL, 200, addr)
-    %build_address
-    // stack: DST, SRC, count, retdest, virt, num_bytes, retdest
-
-    %jump(memcpy_bytes)
-
-rip160_contd:
-    // stack: hash, kexit_info
-    // Store the result hash to the parent's return data using `mstore_unpacking`.
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 32)
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-    %stack (parent_ctx, hash) -> (parent_ctx, @SEGMENT_RETURNDATA, hash)
-    %build_address_no_offset
-    MSTORE_32BYTES_32
-    %jump(pop_and_return_success)
--- a/evm/src/cpu/kernel/asm/core/precompiles/sha256.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/sha256.asm
@ -1,50 +0,0 @@
-global precompile_sha256:
-    // stack: address, retdest, new_ctx, (old stack)
-    %pop2
-    // stack: new_ctx, (old stack)
-    %set_new_ctx_parent_pc(after_precompile)
-    // stack: new_ctx, (old stack)
-    DUP1
-    SET_CONTEXT
-    %checkpoint // Checkpoint
-    %increment_call_depth
-    // stack: (empty)
-    PUSH 0x100000000 // = 2^32 (is_kernel = true)
-    // stack: kexit_info
-
-    %calldatasize
-    %num_bytes_to_num_words
-    // stack: data_words_len, kexit_info
-    %mul_const(@SHA256_DYNAMIC_GAS)
-    PUSH @SHA256_STATIC_GAS
-    ADD
-    // stack: gas, kexit_info
-    %charge_gas
-
-    // Copy the call data to the kernel general segment (sha2 expects it there) and call sha2.
-    %calldatasize
-    GET_CONTEXT
-
-    %stack (ctx, size) ->
-        (
-        ctx, @SEGMENT_CALLDATA,          // SRC
-        ctx,
-        size, sha2,                      // count, retdest
-        0, size, sha256_contd            // sha2 input: virt, num_bytes, retdest
-        )
-    %build_address_no_offset
-    %stack(addr, ctx) -> (ctx, @SEGMENT_KERNEL_GENERAL, 1, addr)
-    %build_address
-    // stack: DST, SRC, count, retdest, virt, num_bytes, retdest
-
-    %jump(memcpy_bytes)
-
-sha256_contd:
-    // stack: hash, kexit_info
-    // Store the result hash to the parent's return data using `mstore_unpacking`.
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 32)
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-    %stack (parent_ctx, hash) -> (parent_ctx, @SEGMENT_RETURNDATA, hash)
-    %build_address_no_offset
-    MSTORE_32BYTES_32
-    %jump(pop_and_return_success)
--- a/evm/src/cpu/kernel/asm/core/precompiles/snarkv.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/snarkv.asm
@ -1,130 +0,0 @@
-global precompile_snarkv:
-    // stack: address, retdest, new_ctx, (old stack)
-    %pop2
-    // stack: new_ctx, (old stack)
-    %set_new_ctx_parent_pc(after_precompile)
-    // stack: new_ctx, (old stack)
-    DUP1
-    SET_CONTEXT
-    %checkpoint // Checkpoint
-    %increment_call_depth
-    // stack: (empty)
-    PUSH 0x100000000 // = 2^32 (is_kernel = true)
-    // stack: kexit_info
-
-    PUSH 192 %calldatasize DUP2 DUP2
-    // stack: calldata_size, 192, calldata_size, 192, kexit_info
-    MOD %jumpi(fault_exception) // calldata_size should be a multiple of 192
-    DIV
-    // stack: k, kexit_info
-    DUP1 %mul_const(@SNARKV_DYNAMIC_GAS) %add_const(@SNARKV_STATIC_GAS)
-    %stack (gas, k, kexit_info) -> (gas, kexit_info, k)
-    %charge_gas
-    SWAP1
-    // stack: k, kexit_info
-    PUSH 0
-loading_loop:
-    // stack: i, k, kexit_info
-    DUP2 DUP2 EQ %jumpi(loading_done)
-    // stack: i, k, kexit_info
-    DUP1 %mul_const(192)
-    // stack: px, i, k, kexit_info
-    GET_CONTEXT
-    %stack (ctx, px) -> (ctx, @SEGMENT_CALLDATA, px, 32, px)
-    %build_address
-    MLOAD_32BYTES
-loading_loop_contd:
-    // stack: x, px, i, k, kexit_info
-    SWAP1 %add_const(32)
-    GET_CONTEXT
-    %stack (ctx, py) -> (ctx, @SEGMENT_CALLDATA, py, 32, py)
-    %build_address
-    MLOAD_32BYTES
-loading_loop_contd2:
-    // stack: y, py, x, i, k, kexit_info
-    SWAP1 %add_const(32)
-    GET_CONTEXT
-    %stack (ctx, px_im) -> (ctx, @SEGMENT_CALLDATA, px_im, 32, px_im)
-    %build_address
-    MLOAD_32BYTES
-loading_loop_contd3:
-    // stack: x_im, px_im, y, x, i, k, kexit_info
-    SWAP1 %add_const(32)
-    // stack: px_re, x_im, y, x, i, k, kexit_info
-    GET_CONTEXT
-    %stack (ctx, px_re) -> (ctx, @SEGMENT_CALLDATA, px_re, 32, px_re)
-    %build_address
-    MLOAD_32BYTES
-loading_loop_contd4:
-    // stack: x_re, px_re, x_im, y, x, i, k, kexit_info
-    SWAP1 %add_const(32)
-    // stack: py_im, x_re, x_im, y, x, i, k, kexit_info
-    GET_CONTEXT
-    %stack (ctx, py_im) -> (ctx, @SEGMENT_CALLDATA, py_im, 32, py_im)
-    %build_address
-    MLOAD_32BYTES
-loading_loop_contd5:
-    // stack: y_im, py_im, x_re, x_im, y, x, i, k, kexit_info
-    SWAP1 %add_const(32)
-    // stack: py_re, y_im, x_re, x_im, y, x, i, k, kexit_info
-    GET_CONTEXT
-    %stack (ctx, py_re) -> (ctx, @SEGMENT_CALLDATA, py_re, 32)
-    %build_address
-    MLOAD_32BYTES
-loading_loop_contd6:
-    // stack: y_re, y_im, x_re, x_im, y, x, i, k, kexit_info
-    SWAP1  // the EVM serializes the imaginary part first
-    // stack: y_im, y_re, x_re, x_im, y, x, i, k, kexit_info
-    DUP7
-    // stack: i, y_im, y_re, x_re, x_im, y, x, i, k, kexit_info
-    %mul_const(6) %add_const(@SNARKV_INP)
-    %add_const(5)
-    %mstore_bn254_pairing
-    // stack: y_re, x_re, x_im, y, x, i, k, kexit_info
-    DUP6
-    // stack: i, y_re, x_re, x_im, y, x, i, k, kexit_info
-    %mul_const(6) %add_const(@SNARKV_INP)
-    %add_const(4)
-    %mstore_bn254_pairing
-    SWAP1  // the EVM serializes the imaginary part first
-    // stack: x_im, x_re, y, x, i, k, kexit_info
-    DUP5
-    // stack: i, x_im, x_re, y, x, i, k, kexit_info
-    %mul_const(6) %add_const(@SNARKV_INP)
-    %add_const(3)
-    %mstore_bn254_pairing
-    // stack: x_re, y, x, i, k, kexit_info
-    DUP4
-    // stack: i, x_re, y, x, i, k, kexit_info
-    %mul_const(6) %add_const(@SNARKV_INP)
-    %add_const(2)
-    %mstore_bn254_pairing
-    // stack: y, x, i, k, kexit_info
-    DUP3
-    // stack: i, y, x, i, k, kexit_info
-    %mul_const(6) %add_const(@SNARKV_INP)
-    %add_const(1)
-    %mstore_bn254_pairing
-    // stack: x, i, k, kexit_info
-    DUP2
-    // stack: i, x, i, k, kexit_info
-    %mul_const(6) %add_const(@SNARKV_INP)
-    %mstore_bn254_pairing
-    // stack: i, k, kexit_info
-    %increment
-    %jump(loading_loop)
-
-loading_done:
-    %stack (i, k) -> (k, @SNARKV_INP, @SNARKV_OUT, got_result)
-    %jump(bn254_pairing)
-got_result:
-    // stack: result, kexit_info
-    DUP1 %eq_const(@U256_MAX) %jumpi(fault_exception)
-    // stack: result, kexit_info
-    // Store the result bool (repr. by a U256) to the parent's return data using `mstore_unpacking`.
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 32)
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-    %stack (parent_ctx, address) -> (parent_ctx, @SEGMENT_RETURNDATA, address)
-    %build_address_no_offset
-    MSTORE_32BYTES_32
-    %jump(pop_and_return_success)
--- a/evm/src/cpu/kernel/asm/core/process_txn.asm
+++ b/evm/src/cpu/kernel/asm/core/process_txn.asm
@ -1,472 +0,0 @@
-// After the transaction data has been parsed into a normalized set of fields
-// (see NormalizedTxnField), this routine processes the transaction.
-
-// TODO: Save checkpoints in @CTX_METADATA_STATE_TRIE_CHECKPOINT_PTR and @SEGMENT_STORAGE_TRIE_CHECKPOINT_PTRS.
-
-// Pre stack: retdest
-// Post stack: success, leftover_gas
-global process_normalized_txn:
-    // stack: retdest
-    %compute_fees
-    // stack: retdest
-
-    // Compute this transaction's intrinsic gas and store it.
-    %intrinsic_gas
-    DUP1
-    %mstore_txn_field(@TXN_FIELD_INTRINSIC_GAS)
-    // stack: intrinsic_gas, retdest
-
-    // Assert gas_limit >= intrinsic_gas.
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    %assert_ge(invalid_txn)
-
-    // Assert block gas limit >= txn gas limit.
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    %mload_global_metadata(@GLOBAL_METADATA_BLOCK_GAS_LIMIT)
-    %assert_ge(invalid_txn)
-
-    %mload_txn_field(@TXN_FIELD_ORIGIN)
-    // stack: sender, retdest
-
-    // Check that txn nonce matches account nonce.
-    DUP1 %nonce
-    DUP1 %eq_const(@MAX_NONCE) %assert_zero(invalid_txn_2) // EIP-2681
-    // stack: sender_nonce, sender, retdest
-    %mload_txn_field(@TXN_FIELD_NONCE)
-    // stack: tx_nonce, sender_nonce, sender, retdest
-    %assert_eq(invalid_txn_1)
-    // stack: sender, retdest
-
-    // Assert sender has no code.
-    DUP1 %ext_code_empty %assert_nonzero(invalid_txn_1)
-    // stack: sender, retdest
-
-    // Assert sender balance >= gas_limit * gas_price + value.
-    %balance
-    // stack: sender_balance, retdest
-    %mload_txn_field(@TXN_FIELD_COMPUTED_FEE_PER_GAS)
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    MUL
-    %mload_txn_field(@TXN_FIELD_VALUE)
-    ADD
-    %assert_le(invalid_txn)
-    // stack: retdest
-
-    // Assert chain ID matches block metadata
-    %mload_txn_field(@TXN_FIELD_CHAIN_ID_PRESENT)
-    // stack: chain_id_present, retdest
-    DUP1
-    %mload_txn_field(@TXN_FIELD_CHAIN_ID)
-    // stack: tx_chain_id, chain_id_present, chain_id_present, retdest
-    MUL SWAP1
-    // stack: chain_id_present, filtered_tx_chain_id, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_BLOCK_CHAIN_ID)
-    MUL
-    // stack: filtered_block_chain_id, filtered_tx_chain_id, retdest
-    %assert_eq(invalid_txn)
-    // stack: retdest
-
-global buy_gas:
-    %mload_txn_field(@TXN_FIELD_COMPUTED_FEE_PER_GAS)
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    MUL
-    // stack: gas_cost, retdest
-    %mload_txn_field(@TXN_FIELD_ORIGIN)
-    // stack: sender_addr, gas_cost, retdest
-    %deduct_eth
-    // stack: deduct_eth_status, retdest
-    %jumpi(panic)
-    // stack: retdest
-
-global increment_sender_nonce:
-    %mload_txn_field(@TXN_FIELD_ORIGIN)
-    DUP1 %increment_nonce
-
-global warm_origin:
-    // stack: origin, retdest
-    %insert_accessed_addresses_no_return
-
-global warm_precompiles:
-    // Add precompiles to accessed addresses.
-    PUSH @ECREC %insert_accessed_addresses_no_return
-    PUSH @SHA256 %insert_accessed_addresses_no_return
-    PUSH @RIP160 %insert_accessed_addresses_no_return
-    PUSH @ID %insert_accessed_addresses_no_return
-    PUSH @EXPMOD %insert_accessed_addresses_no_return
-    PUSH @BN_ADD %insert_accessed_addresses_no_return
-    PUSH @BN_MUL %insert_accessed_addresses_no_return
-    PUSH @SNARKV %insert_accessed_addresses_no_return
-    PUSH @BLAKE2_F %insert_accessed_addresses_no_return
-
-// EIP-3651
-global warm_coinbase:
-    %mload_global_metadata(@GLOBAL_METADATA_BLOCK_BENEFICIARY)
-    %insert_accessed_addresses_no_return
-
-global process_based_on_type:
-    %is_contract_creation
-    %jumpi(process_contract_creation_txn)
-    %jump(process_message_txn)
-
-global process_contract_creation_txn:
-    // stack: retdest
-
-    %mload_txn_field(@TXN_FIELD_ORIGIN)
-    // stack: origin, retdest
-    DUP1 %nonce
-    // stack: origin_nonce, origin, retdest
-    %decrement // Need the non-incremented nonce
-    SWAP1
-    // stack: origin, origin_nonce, retdest
-    %get_create_address
-    // stack: address, retdest
-    DUP1 %insert_accessed_addresses_no_return
-
-    %checkpoint
-
-    // Create the new contract account in the state trie.
-    DUP1
-    // stack: address, address, retdest
-    %create_contract_account
-    // stack: status, address, retdest
-    %jumpi(create_contract_account_fault)
-
-    // stack: address, retdest
-    // Transfer value to new contract
-    DUP1 %mload_txn_field(@TXN_FIELD_VALUE)
-    SWAP1
-    %mload_txn_field(@TXN_FIELD_ORIGIN)
-    DUP3 DUP3 DUP3
-    %transfer_eth %jumpi(panic)
-    %journal_add_balance_transfer
-    // stack: address, retdest
-
-    %create_context
-    // stack: new_ctx, address, retdest
-
-    // Store constructor code length
-    PUSH @CTX_METADATA_CODE_SIZE
-    // stack: offset, new_ctx, address, retdest
-    DUP2 // new_ctx
-    ADD // CTX_METADATA_CODE_SIZE is already scaled by its segment
-    // stack: addr, new_ctx, address, retdest
-    %mload_txn_field(@TXN_FIELD_DATA_LEN)
-    // stack: data_len, addr, new_ctx, address, retdest
-    MSTORE_GENERAL
-    // stack: new_ctx, address, retdest
-
-    // Copy the code from txdata to the new context's code segment.
-    PUSH process_contract_creation_txn_after_code_loaded
-    %mload_txn_field(@TXN_FIELD_DATA_LEN)
-    PUSH @SEGMENT_TXN_DATA // SRC (context == offset == 0)
-    DUP4 // DST (segment == 0 (i.e. CODE), and offset == 0)
-    %jump(memcpy_bytes)
-
-global process_contract_creation_txn_after_code_loaded:
-    // stack: new_ctx, address, retdest
-
-    // Each line in the block below does not change the stack.
-    DUP2 %set_new_ctx_addr
-    %mload_txn_field(@TXN_FIELD_ORIGIN) %set_new_ctx_caller
-    %mload_txn_field(@TXN_FIELD_VALUE) %set_new_ctx_value
-    %set_new_ctx_parent_ctx
-    %set_new_ctx_parent_pc(process_contract_creation_txn_after_constructor)
-    %non_intrinisic_gas %set_new_ctx_gas_limit
-    // stack: new_ctx, address, retdest
-
-    %enter_new_ctx
-    // (Old context) stack: new_ctx, address, retdest
-
-global process_contract_creation_txn_after_constructor:
-    // stack: success, leftover_gas, new_ctx, address, retdest
-    // We eventually return leftover_gas and success.
-    %stack (success, leftover_gas, new_ctx, address, retdest) -> (success, leftover_gas, new_ctx, address, retdest, success)
-
-    ISZERO %jumpi(contract_creation_fault_3)
-
-    // EIP-3541: Reject new contract code starting with the 0xEF byte
-    PUSH 0 %mload_current(@SEGMENT_RETURNDATA) %eq_const(0xEF) %jumpi(contract_creation_fault_3_zero_leftover)
-
-    // stack: leftover_gas, new_ctx, address, retdest, success
-    %returndatasize // Size of the code.
-    // stack: code_size, leftover_gas, new_ctx, address, retdest, success
-    DUP1 %gt_const(@MAX_CODE_SIZE) %jumpi(contract_creation_fault_4)
-    // stack: code_size, leftover_gas, new_ctx, address, retdest, success
-    %mul_const(@GAS_CODEDEPOSIT) SWAP1
-    // stack: leftover_gas, codedeposit_cost, new_ctx, address, retdest, success
-    DUP2 DUP2 LT %jumpi(contract_creation_fault_4)
-    // stack: leftover_gas, codedeposit_cost, new_ctx, address, retdest, success
-    SUB
-
-    // Store the code hash of the new contract.
-    // stack: leftover_gas, new_ctx, address, retdest, success
-    %returndatasize
-    PUSH @SEGMENT_RETURNDATA
-    GET_CONTEXT
-    %build_address_no_offset
-    // stack: addr, len
-    KECCAK_GENERAL
-    // stack: codehash, leftover_gas, new_ctx, address, retdest, success
-    %observe_new_contract
-    DUP4
-    // stack: address, codehash, leftover_gas, new_ctx, address, retdest, success
-    %set_codehash
-
-    %stack (leftover_gas, new_ctx, address, retdest, success) -> (leftover_gas, new_ctx, address, retdest, success, leftover_gas)
-    %pay_coinbase_and_refund_sender
-    // stack: leftover_gas', new_ctx, address, retdest, success, leftover_gas
-    SWAP5 POP
-    %delete_all_touched_addresses
-    %delete_all_selfdestructed_addresses
-    // stack: new_ctx, address, retdest, success, leftover_gas
-    POP
-    POP
-    JUMP
-
-global process_message_txn:
-    // stack: retdest
-    %mload_txn_field(@TXN_FIELD_VALUE)
-    %mload_txn_field(@TXN_FIELD_TO)
-    DUP1 %insert_accessed_addresses_no_return
-    %mload_txn_field(@TXN_FIELD_ORIGIN)
-    // stack: from, to, amount, retdest
-    %transfer_eth
-    // stack: transfer_eth_status, retdest
-    %jumpi(process_message_txn_insufficient_balance)
-    // stack: retdest
-
-    %handle_precompiles_from_eoa
-
-    // If to's code is empty, return.
-    %mload_txn_field(@TXN_FIELD_TO) %ext_code_empty
-    // stack: code_empty, retdest
-    %jumpi(process_message_txn_return)
-
-    // Otherwise, load to's code and execute it in a new context.
-    // stack: retdest
-    %create_context
-    // stack: new_ctx, retdest
-    PUSH process_message_txn_code_loaded
-    DUP2 // new_ctx
-    %mload_txn_field(@TXN_FIELD_TO)
-    // stack: address, new_ctx, process_message_txn_code_loaded, new_ctx, retdest
-    %jump(load_code_padded)
-
-global process_message_txn_insufficient_balance:
-    // stack: retdest
-    PANIC // TODO
-
-global process_message_txn_return:
-    // stack: retdest
-    // Since no code was executed, the leftover gas is the non-intrinsic gas.
-    %non_intrinisic_gas
-    DUP1
-    // stack: leftover_gas, leftover_gas, retdest
-    %pay_coinbase_and_refund_sender
-    // stack: leftover_gas', leftover_gas, retdest
-    SWAP1 POP
-    %delete_all_touched_addresses
-    // stack: leftover_gas', retdest
-    SWAP1
-    PUSH 1 // success
-    SWAP1
-    // stack: retdest, success, leftover_gas
-    JUMP
-
-global process_message_txn_code_loaded:
-    // stack: code_size, new_ctx, retdest
-    %set_new_ctx_code_size
-    // stack: new_ctx, retdest
-
-    // Each line in the block below does not change the stack.
-    %mload_txn_field(@TXN_FIELD_TO) %set_new_ctx_addr
-    %mload_txn_field(@TXN_FIELD_ORIGIN) %set_new_ctx_caller
-    %mload_txn_field(@TXN_FIELD_VALUE) %set_new_ctx_value
-    %set_new_ctx_parent_ctx
-    %set_new_ctx_parent_pc(process_message_txn_after_call)
-    %non_intrinisic_gas %set_new_ctx_gas_limit
-    // stack: new_ctx, retdest
-
-    // Set calldatasize and copy txn data to calldata.
-    %mload_txn_field(@TXN_FIELD_DATA_LEN)
-    %stack (calldata_size, new_ctx, retdest) -> (calldata_size, new_ctx, calldata_size, retdest)
-    %set_new_ctx_calldata_size
-    %stack (new_ctx, calldata_size, retdest) -> (new_ctx, @SEGMENT_CALLDATA, @SEGMENT_TXN_DATA, calldata_size, process_message_txn_code_loaded_finish, new_ctx, retdest)
-    %build_address_no_offset // DST
-    %jump(memcpy_bytes)
-
-process_message_txn_code_loaded_finish:
-    %enter_new_ctx
-    // (Old context) stack: new_ctx, retdest
-
-global process_message_txn_after_call:
-    // stack: success, leftover_gas, new_ctx, retdest
-    // We will return leftover_gas and success.
-    %stack (success, leftover_gas, new_ctx, retdest) -> (success, leftover_gas, new_ctx, retdest, success, leftover_gas)
-    ISZERO %jumpi(process_message_txn_fail)
-process_message_txn_after_call_contd:
-    // stack: leftover_gas, new_ctx, retdest, success, leftover_gas
-    %pay_coinbase_and_refund_sender
-    // stack: leftover_gas', new_ctx, retdest, success, leftover_gas
-    SWAP4 POP
-    %delete_all_touched_addresses
-    %delete_all_selfdestructed_addresses
-    // stack: new_ctx, retdest, success, leftover_gas
-    POP
-    JUMP
-
-process_message_txn_fail:
-    // stack: leftover_gas, new_ctx, retdest, success, leftover_gas
-    // Transfer value back to the caller.
-    %mload_txn_field(@TXN_FIELD_VALUE) ISZERO %jumpi(process_message_txn_after_call_contd)
-    %mload_txn_field(@TXN_FIELD_VALUE)
-    %mload_txn_field(@TXN_FIELD_ORIGIN)
-    %mload_txn_field(@TXN_FIELD_TO)
-    %transfer_eth %jumpi(panic)
-    %jump(process_message_txn_after_call_contd)
-
-%macro pay_coinbase_and_refund_sender
-    // stack: leftover_gas
-    DUP1
-    // stack: leftover_gas, leftover_gas
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    SUB
-    // stack: used_gas, leftover_gas
-    %mload_global_metadata(@GLOBAL_METADATA_REFUND_COUNTER)
-    // stack: refund, used_gas, leftover_gas
-    DUP2 %div_const(@MAX_REFUND_QUOTIENT) // max_refund = used_gas/5
-    // stack: max_refund, refund, used_gas, leftover_gas
-    %min
-    %stack (refund, used_gas, leftover_gas) -> (leftover_gas, refund, refund, used_gas)
-    ADD
-    // stack: leftover_gas', refund, used_gas
-    SWAP2
-    // stack: used_gas, refund, leftover_gas'
-    SUB
-    // stack: used_gas', leftover_gas'
-
-    // Pay the coinbase.
-    %mload_txn_field(@TXN_FIELD_COMPUTED_PRIORITY_FEE_PER_GAS)
-    MUL
-    // stack: used_gas_tip, leftover_gas'
-    %mload_global_metadata(@GLOBAL_METADATA_BLOCK_BENEFICIARY)
-    // stack: coinbase, used_gas_tip, leftover_gas'
-    %add_eth
-    // stack: leftover_gas'
-    DUP1
-
-    // Refund gas to the origin.
-    %mload_txn_field(@TXN_FIELD_COMPUTED_FEE_PER_GAS)
-    MUL
-    // stack: leftover_gas_cost, leftover_gas'
-    %mload_txn_field(@TXN_FIELD_ORIGIN)
-    // stack: origin, leftover_gas_cost, leftover_gas'
-    %add_eth
-    // stack: leftover_gas'
-%endmacro
-
-// Sets @TXN_FIELD_MAX_FEE_PER_GAS and @TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS.
-%macro compute_fees
-    // stack: (empty)
-    %mload_global_metadata(@GLOBAL_METADATA_BLOCK_BASE_FEE)
-    %mload_txn_field(@TXN_FIELD_MAX_PRIORITY_FEE_PER_GAS)
-    %mload_txn_field(@TXN_FIELD_MAX_FEE_PER_GAS)
-    // stack: max_fee, max_priority_fee, base_fee
-    DUP3 DUP2 %assert_ge(invalid_txn_3) // Assert max_fee >= base_fee
-    // stack: max_fee, max_priority_fee, base_fee
-    DUP2 DUP2 %assert_ge(invalid_txn_3) // Assert max_fee >= max_priority_fee
-    %stack (max_fee, max_priority_fee, base_fee) -> (max_fee, base_fee, max_priority_fee, base_fee)
-    SUB
-    // stack: max_fee - base_fee, max_priority_fee, base_fee
-    %min
-    // stack: computed_priority_fee, base_fee
-    %stack (computed_priority_fee, base_fee) -> (computed_priority_fee, base_fee, computed_priority_fee)
-    ADD
-    // stack: computed_fee, computed_priority_fee
-    %mstore_txn_field(@TXN_FIELD_COMPUTED_FEE_PER_GAS)
-    %mstore_txn_field(@TXN_FIELD_COMPUTED_PRIORITY_FEE_PER_GAS)
-    // stack: (empty)
-%endmacro
-
-%macro non_intrinisic_gas
-    // stack: (empty)
-    %mload_txn_field(@TXN_FIELD_INTRINSIC_GAS)
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    SUB
-    // stack: gas_limit - intrinsic_gas
-%endmacro
-
-create_contract_account_fault:
-    %revert_checkpoint
-    // stack: address, retdest
-    POP
-    PUSH 0 // leftover_gas
-    // stack: leftover_gas, retdest
-    %pay_coinbase_and_refund_sender
-    // stack: leftover_gas', retdest
-    %delete_all_touched_addresses
-    %delete_all_selfdestructed_addresses
-    // stack: leftover_gas', retdest
-    SWAP1 PUSH 0 // success
-    // stack: success, retdest, leftover_gas
-    SWAP1
-    JUMP
-
-contract_creation_fault_3:
-    %revert_checkpoint
-    %stack (leftover_gas, new_ctx, address, retdest, success) -> (leftover_gas, retdest, success)
-    %pay_coinbase_and_refund_sender
-    // stack: leftover_gas', retdest, success
-    %delete_all_touched_addresses
-    %delete_all_selfdestructed_addresses
-    %stack (leftover_gas, retdest, success) -> (retdest, 0, leftover_gas)
-    JUMP
-
-contract_creation_fault_3_zero_leftover:
-    %revert_checkpoint
-    // stack: leftover_gas, new_ctx, address, retdest, success
-    %pop3
-    PUSH 0 // leftover gas
-    // stack: leftover_gas, retdest, success
-    %pay_coinbase_and_refund_sender
-    %delete_all_touched_addresses
-    %delete_all_selfdestructed_addresses
-    %stack (leftover_gas, retdest, success) -> (retdest, 0, leftover_gas)
-    JUMP
-
-contract_creation_fault_4:
-    %revert_checkpoint
-    // stack: code_size/leftover_gas, leftover_gas/codedeposit_cost, new_ctx, address, retdest, success
-    %pop4
-    PUSH 0 // leftover gas
-    // stack: leftover_gas, retdest, success
-    %pay_coinbase_and_refund_sender
-    %delete_all_touched_addresses
-    %delete_all_selfdestructed_addresses
-    %stack (leftover_gas, retdest, success) -> (retdest, 0, leftover_gas)
-    JUMP
-
-
-global invalid_txn:
-    POP
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    PUSH 0
-    %jump(txn_after)
-
-global invalid_txn_1:
-    %pop2
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    PUSH 0
-    %jump(txn_after)
-
-global invalid_txn_2:
-    %pop3
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    PUSH 0
-    %jump(txn_after)
-
-global invalid_txn_3:
-    %pop4
-    %mload_txn_field(@TXN_FIELD_GAS_LIMIT)
-    PUSH 0
-    %jump(txn_after)
--- a/evm/src/cpu/kernel/asm/core/selfdestruct_list.asm
+++ b/evm/src/cpu/kernel/asm/core/selfdestruct_list.asm
@ -1,78 +0,0 @@
-/// Self-destruct list.
-/// Implemented as an array, with the length stored in the global metadata.
-/// Note: This array allows duplicates.
-
-%macro insert_selfdestruct_list
-    // stack: addr
-    %mload_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN)
-    DUP1 PUSH @SEGMENT_SELFDESTRUCT_LIST %build_kernel_address
-    %stack (write_addr, len, addr) -> (addr, write_addr, len)
-    MSTORE_GENERAL // Store new address at the end of the array.
-    // stack: len
-    %increment
-    %mstore_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN) // Store new length.
-%endmacro
-
-/// Remove one occurrence of the address from the list.
-/// Panics if the address is not in the list.
-global remove_selfdestruct_list:
-    // stack: addr, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN)
-    // stack: len, addr, retdest
-    PUSH @SEGMENT_SELFDESTRUCT_LIST ADD
-    PUSH @SEGMENT_SELFDESTRUCT_LIST
-remove_selfdestruct_list_loop:
-    // `i` and `len` are both scaled by SEGMENT_SELFDESTRUCT_LIST
-    %stack (i, len, addr, retdest) -> (i, len, i, len, addr, retdest)
-    EQ %jumpi(panic)
-    // stack: i, len, addr, retdest
-    DUP1 MLOAD_GENERAL
-    // stack: loaded_addr, i, len, addr, retdest
-    DUP4
-    // stack: addr, loaded_addr, i, len, addr, retdest
-    EQ %jumpi(remove_selfdestruct_list_found)
-    // stack: i, len, addr, retdest
-    %increment
-    %jump(remove_selfdestruct_list_loop)
-remove_selfdestruct_list_found:
-    %stack (i, len, addr, retdest) -> (len, 1, i, retdest)
-    SUB
-    PUSH @SEGMENT_SELFDESTRUCT_LIST
-    DUP2 SUB // unscale
-    %mstore_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN) // Decrement the list length.
-    // stack: len-1, i, retdest
-    MLOAD_GENERAL // Load the last address in the list.
-    // stack: last_addr, i, retdest
-    MSTORE_GENERAL // Store the last address at the position of the removed address.
-    JUMP
-
-global delete_all_selfdestructed_addresses:
-    // stack: retdest
-    %mload_global_metadata(@GLOBAL_METADATA_SELFDESTRUCT_LIST_LEN)
-    // stack: len, retdest
-    PUSH @SEGMENT_SELFDESTRUCT_LIST ADD
-    PUSH @SEGMENT_SELFDESTRUCT_LIST
-delete_all_selfdestructed_addresses_loop:
-    // `i` and `len` are both scaled by SEGMENT_SELFDESTRUCT_LIST
-    // stack: i, len, retdest
-    DUP2 DUP2 EQ %jumpi(delete_all_selfdestructed_addresses_done)
-    // stack: i, len, retdest
-    DUP1 MLOAD_GENERAL
-    // stack: loaded_addr, i, len, retdest
-    DUP1 %is_non_existent ISZERO %jumpi(bingo)
-    // stack: loaded_addr, i, len, retdest
-    POP %increment %jump(delete_all_selfdestructed_addresses_loop)
-bingo:
-    // stack: loaded_addr, i, len, retdest
-    %delete_account
-    %increment %jump(delete_all_selfdestructed_addresses_loop)
-delete_all_selfdestructed_addresses_done:
-    // stack: i, len, retdest
-    %pop2 JUMP
-
-%macro delete_all_selfdestructed_addresses
-    %stack () -> (%%after)
-    %jump(delete_all_selfdestructed_addresses)
-%%after:
-    // stack: (empty)
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/syscall.asm
+++ b/evm/src/cpu/kernel/asm/core/syscall.asm
@ -1,155 +0,0 @@
-global syscall_jumptable:
-    // 0x00-0x0f
-    JUMPTABLE sys_stop
-    JUMPTABLE panic // add is implemented natively
-    JUMPTABLE panic // mul is implemented natively
-    JUMPTABLE panic // sub is implemented natively
-    JUMPTABLE panic // div is implemented natively
-    JUMPTABLE sys_sdiv
-    JUMPTABLE panic // mod is implemented natively
-    JUMPTABLE sys_smod
-    JUMPTABLE panic // addmod is implemented natively
-    JUMPTABLE panic // mulmod is implemented natively
-    JUMPTABLE sys_exp
-    JUMPTABLE sys_signextend
-    JUMPTABLE panic // 0x0c is an invalid opcode
-    JUMPTABLE panic // 0x0d is an invalid opcode
-    JUMPTABLE panic // 0x0e is an invalid opcode
-    JUMPTABLE panic // 0x0f is an invalid opcode
-
-    // 0x10-0x1f
-    JUMPTABLE panic // lt is implemented natively
-    JUMPTABLE panic // gt is implemented natively
-    JUMPTABLE sys_slt
-    JUMPTABLE sys_sgt
-    JUMPTABLE panic // eq is implemented natively
-    JUMPTABLE panic // iszero is implemented natively
-    JUMPTABLE panic // and is implemented natively
-    JUMPTABLE panic // or is implemented natively
-    JUMPTABLE panic // xor is implemented natively
-    JUMPTABLE panic // not is implemented natively
-    JUMPTABLE panic // byte is implemented natively
-    JUMPTABLE panic // shl is implemented natively
-    JUMPTABLE panic // shr is implemented natively
-    JUMPTABLE sys_sar
-    JUMPTABLE panic // 0x1e is an invalid opcode
-    JUMPTABLE panic // 0x1f is an invalid opcode
-
-    // 0x20-0x2f
-    JUMPTABLE sys_keccak256
-    %rep 15
-        JUMPTABLE panic // 0x21-0x2f are invalid opcodes
-    %endrep
-
-    // 0x30-0x3f
-    JUMPTABLE sys_address
-    JUMPTABLE sys_balance
-    JUMPTABLE sys_origin
-    JUMPTABLE sys_caller
-    JUMPTABLE sys_callvalue
-    JUMPTABLE sys_calldataload
-    JUMPTABLE sys_calldatasize
-    JUMPTABLE sys_calldatacopy
-    JUMPTABLE sys_codesize
-    JUMPTABLE sys_codecopy
-    JUMPTABLE sys_gasprice
-    JUMPTABLE sys_extcodesize
-    JUMPTABLE sys_extcodecopy
-    JUMPTABLE sys_returndatasize
-    JUMPTABLE sys_returndatacopy
-    JUMPTABLE sys_extcodehash
-
-    // 0x40-0x4f
-    JUMPTABLE sys_blockhash
-    JUMPTABLE sys_coinbase
-    JUMPTABLE sys_timestamp
-    JUMPTABLE sys_number
-    JUMPTABLE sys_prevrandao
-    JUMPTABLE sys_gaslimit
-    JUMPTABLE sys_chainid
-    JUMPTABLE sys_selfbalance
-    JUMPTABLE sys_basefee
-    %rep 7
-        JUMPTABLE panic // 0x49-0x4f are invalid opcodes
-    %endrep
-
-    // 0x50-0x5f
-    JUMPTABLE panic // pop is implemented natively
-    JUMPTABLE sys_mload
-    JUMPTABLE sys_mstore
-    JUMPTABLE sys_mstore8
-    JUMPTABLE sys_sload
-    JUMPTABLE sys_sstore
-    JUMPTABLE panic // jump is implemented natively
-    JUMPTABLE panic // jumpi is implemented natively
-    JUMPTABLE panic // pc is implemented natively
-    JUMPTABLE sys_msize
-    JUMPTABLE sys_gas
-    JUMPTABLE panic // jumpdest is implemented natively
-    JUMPTABLE panic // 0x5c is an invalid opcode
-    JUMPTABLE panic // 0x5d is an invalid opcode
-    JUMPTABLE panic // 0x5e is an invalid opcode
-    JUMPTABLE panic // 0x5f is an invalid opcode
-
-    // 0x60-0x6f
-    %rep 16
-        JUMPTABLE panic // push1-push16 are implemented natively
-    %endrep
-
-    // 0x70-0x7f
-    %rep 16
-        JUMPTABLE panic // push17-push32 are implemented natively
-    %endrep
-
-    // 0x80-0x8f
-    %rep 16
-        JUMPTABLE panic // dup1-dup16 are implemented natively
-    %endrep
-
-    // 0x90-0x9f
-    %rep 16
-        JUMPTABLE panic // swap1-swap16 are implemented natively
-    %endrep
-
-    // 0xa0-0xaf
-    JUMPTABLE sys_log0
-    JUMPTABLE sys_log1
-    JUMPTABLE sys_log2
-    JUMPTABLE sys_log3
-    JUMPTABLE sys_log4
-    %rep 11
-        JUMPTABLE panic // 0xa5-0xaf are invalid opcodes
-    %endrep
-
-    // 0xb0-0xbf
-    %rep 16
-        JUMPTABLE panic // 0xb0-0xbf are invalid opcodes
-    %endrep
-
-    // 0xc0-0xdf
-    %rep 32
-        JUMPTABLE panic // mstore_32bytes_1-32 are implemented natively
-    %endrep
-
-    // 0xe0-0xef
-    %rep 16
-        JUMPTABLE panic // 0xe0-0xef are invalid opcodes
-    %endrep
-
-    // 0xf0-0xff
-    JUMPTABLE sys_create
-    JUMPTABLE sys_call
-    JUMPTABLE sys_callcode
-    JUMPTABLE sys_return
-    JUMPTABLE sys_delegatecall
-    JUMPTABLE sys_create2
-    JUMPTABLE panic // 0xf6 is an invalid opcode
-    JUMPTABLE panic // 0xf7 is an invalid opcode
-    JUMPTABLE panic // 0xf8 is an invalid opcode
-    JUMPTABLE panic // 0xf9 is an invalid opcode
-    JUMPTABLE sys_staticcall
-    JUMPTABLE panic // 0xfb is an invalid opcode
-    JUMPTABLE panic // 0xfc is an invalid opcode
-    JUMPTABLE sys_revert
-    JUMPTABLE panic // 0xfe is an invalid opcode
-    JUMPTABLE sys_selfdestruct
--- a/evm/src/cpu/kernel/asm/core/terminate.asm
+++ b/evm/src/cpu/kernel/asm/core/terminate.asm
@ -1,225 +0,0 @@
-// Handlers for operations which terminate the current context, namely STOP,
-// RETURN, SELFDESTRUCT, REVERT, and exceptions such as stack underflow.
-
-global sys_stop:
-    // stack: kexit_info
-    // Set the parent context's return data size to 0.
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-
-    %leftover_gas
-    // stack: leftover_gas
-    PUSH 1 // success
-    %jump(terminate_common)
-
-global sys_return:
-    // stack: kexit_info, offset, size
-    %stack (kexit_info, offset, size) -> (offset, size, kexit_info, offset, size)
-    %add_or_fault 
-    // stack: offset+size, kexit_info, offset, size
-    DUP4 ISZERO %jumpi(return_zero_size)
-    // stack: offset+size, kexit_info, offset, size
-    DUP1 %ensure_reasonable_offset
-    %update_mem_bytes
-    %jump(return_after_gas)
-return_zero_size:
-    POP
-return_after_gas:
-    // Load the parent's context.
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-
-    // Store the return data size in the parent context's metadata.
-    %stack (parent_ctx, kexit_info, offset, size) ->
-        (parent_ctx, @CTX_METADATA_RETURNDATA_SIZE, size, offset, size, parent_ctx, kexit_info)
-    ADD // addr (CTX offsets are already scaled by their segment)
-    SWAP1
-    // stack: size, addr, offset, size, parent_ctx, kexit_info
-    MSTORE_GENERAL
-    // stack: offset, size, parent_ctx, kexit_info
-
-    // Store the return data in the parent context's returndata segment.
-    PUSH @SEGMENT_MAIN_MEMORY
-    GET_CONTEXT
-    %build_address
-
-    %stack (addr, size, parent_ctx, kexit_info) ->
-        (
-        parent_ctx, @SEGMENT_RETURNDATA, // DST
-        addr, // SRC
-        size, sys_return_finish, kexit_info // count, retdest, ...
-        )
-    %build_address_no_offset
-    // stack: DST, SRC, size, sys_return_finish, kexit_info
-    %jump(memcpy_bytes)
-
-sys_return_finish:
-    // stack: kexit_info
-    %leftover_gas
-    // stack: leftover_gas
-    PUSH 1 // success
-    %jump(terminate_common)
-
-global sys_selfdestruct:
-    %check_static
-    // stack: kexit_info, recipient
-    SWAP1 %u256_to_addr
-    %address DUP1 %balance
-
-    // Insert recipient into the accessed addresses list.
-    // stack: balance, address, recipient, kexit_info
-    DUP3 %insert_accessed_addresses
-
-    // Set the parent context's return data size to 0.
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-
-    // Compute gas.
-    // stack: cold_access, balance, address, recipient, kexit_info
-    %mul_const(@GAS_COLDACCOUNTACCESS)
-    DUP2
-    // stack: balance, gas_coldaccess, balance, address, recipient, kexit_info
-    ISZERO %not_bit
-    // stack: balance!=0, gas_coldaccess, balance, address, recipient, kexit_info
-    DUP5 %is_dead MUL %mul_const(@GAS_NEWACCOUNT)
-    // stack: gas_newaccount, gas_coldaccess, balance, address, recipient, kexit_info
-    ADD %add_const(@GAS_SELFDESTRUCT)
-    %stack (gas, balance, address, recipient, kexit_info) -> (gas, kexit_info, balance, address, recipient)
-    %charge_gas
-    %stack (kexit_info, balance, address, recipient) -> (balance, address, recipient, kexit_info)
-
-    // Insert address into the selfdestruct set.
-    // stack: balance, address, recipient, kexit_info
-    DUP2 %insert_selfdestruct_list
-
-    // Set the balance of the address to 0.
-    // stack: balance, address, recipient, kexit_info
-    PUSH 0
-    // stack: 0, balance, address, recipient, kexit_info
-    DUP3 %mpt_read_state_trie
-    // stack: account_ptr, 0, balance, address, recipient, kexit_info
-    %add_const(1)
-    // stack: balance_ptr, 0, balance, address, recipient, kexit_info
-    %mstore_trie_data
-
-    %stack (balance, address, recipient, kexit_info) ->
-        (address, recipient, address, recipient, balance, kexit_info)
-
-    // If the recipient is the same as the address, then we're done.
-    // Otherwise, send the balance to the recipient.
-    // stack: address, recipient, address, recipient, balance, kexit_info
-    EQ %jumpi(sys_selfdestruct_journal_add)
-    %stack (address, recipient, balance, kexit_info) -> (recipient, balance, address, recipient, balance, kexit_info)
-    %add_eth
-
-sys_selfdestruct_journal_add:
-    // stack: address, recipient, balance, kexit_info
-    %journal_add_account_destroyed
-
-    // stack: kexit_info
-    %leftover_gas
-    // stack: leftover_gas
-    PUSH 1 // success
-    %jump(terminate_common)
-
-global sys_revert:
-    // stack: kexit_info, offset, size
-    %stack (kexit_info, offset, size) -> (offset, size, kexit_info, offset, size)
-    %add_or_fault
-    // stack: offset+size, kexit_info, offset, size
-    DUP4 ISZERO %jumpi(revert_zero_size)
-    // stack: offset+size, kexit_info, offset, size
-    DUP1 %ensure_reasonable_offset
-    %update_mem_bytes
-    %jump(revert_after_gas)
-revert_zero_size:
-    POP
-revert_after_gas:
-    // Load the parent's context.
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-
-    // Store the return data size in the parent context's metadata.
-    %stack (parent_ctx, kexit_info, offset, size) ->
-        (parent_ctx, @CTX_METADATA_RETURNDATA_SIZE, size, offset, size, parent_ctx, kexit_info)
-    ADD // addr (CTX offsets are already scaled by their segment)
-    SWAP1
-    // stack: size, addr, offset, size, parent_ctx, kexit_info
-    MSTORE_GENERAL
-    // stack: offset, size, parent_ctx, kexit_info
-
-    // Store the return data in the parent context's returndata segment.
-    PUSH @SEGMENT_MAIN_MEMORY
-    GET_CONTEXT
-    %build_address
-
-    %stack (addr, size, parent_ctx, kexit_info) ->
-        (
-        parent_ctx, @SEGMENT_RETURNDATA, // DST
-        addr,  // SRC
-        size, sys_revert_finish, kexit_info // count, retdest, ...
-        )
-    %build_address_no_offset
-    // stack: DST, SRC, size, sys_revert_finish, kexit_info
-    %jump(memcpy_bytes)
-
-sys_revert_finish:
-    %leftover_gas
-    // stack: leftover_gas
-    %revert_checkpoint
-    PUSH 0 // success
-    %jump(terminate_common)
-
-// The execution is in an exceptional halting state if
-// - there is insufficient gas
-// - the instruction is invalid
-// - there are insufficient stack items
-// - a JUMP/JUMPI destination is invalid
-// - the new stack size would be larger than 1024, or
-// - state modification is attempted during a static call
-global fault_exception:
-    // stack: (empty)
-    %revert_checkpoint
-    PUSH 0 // leftover_gas
-    // Set the parent context's return data size to 0.
-    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 0)
-    PUSH 0 // success
-    %jump(terminate_common)
-
-global terminate_common:
-    // stack: success, leftover_gas
-    // TODO: Panic if we exceeded our gas limit?
-
-    // We want to move the success flag from our (child) context's stack to the
-    // parent context's stack. We will write it to memory, specifically
-    // SEGMENT_KERNEL_GENERAL[0], then load it after the context switch.
-    PUSH 0
-    // stack: 0, success, leftover_gas
-    %mstore_kernel_general
-    // stack: leftover_gas
-
-    // Similarly, we write leftover_gas to SEGMENT_KERNEL_GENERAL[1] so that
-    // we can later read it after switching to the parent context.
-    PUSH 1
-    // stack: 1, leftover_gas
-    %mstore_kernel_general
-    // stack: (empty)
-
-    // Similarly, we write the parent PC to SEGMENT_KERNEL_GENERAL[2] so that
-    // we can later read it after switching to the parent context.
-    PUSH 2
-    PUSH @SEGMENT_KERNEL_GENERAL
-    %build_kernel_address
-    %mload_context_metadata(@CTX_METADATA_PARENT_PC)
-    MSTORE_GENERAL
-    // stack: (empty)
-
-    // Go back to the parent context.
-    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
-    SET_CONTEXT
-    %decrement_call_depth
-    // stack: (empty)
-
-    // Load the fields that we stored in SEGMENT_KERNEL_GENERAL.
-    PUSH 1 %mload_kernel_general // leftover_gas
-    PUSH 0 %mload_kernel_general // success
-    PUSH 2 %mload_kernel_general // parent_pc
-
-    // stack: parent_pc, success, leftover_gas
-    JUMP
--- a/evm/src/cpu/kernel/asm/core/touched_addresses.asm
+++ b/evm/src/cpu/kernel/asm/core/touched_addresses.asm
@ -1,112 +0,0 @@
-%macro insert_touched_addresses
-    %stack (addr) -> (addr, %%after)
-    %jump(insert_touched_addresses)
-%%after:
-    // stack: (empty)
-%endmacro
-
-%macro insert_touched_addresses_no_return
-    %insert_touched_addresses
-    POP
-%endmacro
-
-/// Inserts the address into the list if it is not already present.
-global insert_touched_addresses:
-    // stack: addr, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN)
-    // stack: len, addr, retdest
-    PUSH @SEGMENT_TOUCHED_ADDRESSES ADD
-    PUSH @SEGMENT_TOUCHED_ADDRESSES
-insert_touched_addresses_loop:
-    // `i` and `len` are both scaled by SEGMENT_TOUCHED_ADDRESSES
-    %stack (i, len, addr, retdest) -> (i, len, i, len, addr, retdest)
-    EQ %jumpi(insert_address)
-    // stack: i, len, addr, retdest
-    DUP1 MLOAD_GENERAL
-    // stack: loaded_addr, i, len, addr, retdest
-    DUP4
-    // stack: addr, loaded_addr, i, len, addr, retdest
-    EQ %jumpi(insert_touched_addresses_found)
-    // stack: i, len, addr, retdest
-    %increment
-    %jump(insert_touched_addresses_loop)
-
-insert_address:
-    %stack (i, len, addr, retdest) -> (i, addr, len, @SEGMENT_TOUCHED_ADDRESSES, retdest)
-    DUP2 %journal_add_account_touched // Add a journal entry for the touched account.
-    %swap_mstore // Store new address at the end of the array.
-    // stack: len, segment, retdest
-    SUB // unscale
-    %increment
-    %mstore_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN) // Store new length.
-    JUMP
-
-insert_touched_addresses_found:
-    %stack (i, len, addr, retdest) -> (retdest)
-    JUMP
-
-/// Remove the address from the list.
-/// Panics if the address is not in the list.
-/// TODO: Unused?
-global remove_touched_addresses:
-    // stack: addr, retdest
-    %mload_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN)
-    // stack: len, addr, retdest
-    PUSH @SEGMENT_TOUCHED_ADDRESSES ADD
-    PUSH @SEGMENT_TOUCHED_ADDRESSES
-remove_touched_addresses_loop:
-    // `i` and `len` are both scaled by SEGMENT_TOUCHED_ADDRESSES
-    %stack (i, len, addr, retdest) -> (i, len, i, len, addr, retdest)
-    EQ %jumpi(panic)
-    // stack: i, len, addr, retdest
-    DUP1 MLOAD_GENERAL
-    // stack: loaded_addr, i, len, addr, retdest
-    DUP4
-    // stack: addr, loaded_addr, i, len, addr, retdest
-    EQ %jumpi(remove_touched_addresses_found)
-    // stack: i, len, addr, retdest
-    %increment
-    %jump(remove_touched_addresses_loop)
-remove_touched_addresses_found:
-    %stack (i, len, addr, retdest) -> (len, 1, i, retdest)
-    SUB
-    PUSH @SEGMENT_TOUCHED_ADDRESSES DUP2
-    SUB // unscale
-    %mstore_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN) // Decrement the list length.
-    // stack: len-1, i, retdest
-    MLOAD_GENERAL // Load the last address in the list.
-    // stack: last_addr, i, retdest
-    MSTORE_GENERAL // Store the last address at the position of the removed address.
-    JUMP
-
-
-global delete_all_touched_addresses:
-    // stack: retdest
-    %mload_global_metadata(@GLOBAL_METADATA_TOUCHED_ADDRESSES_LEN)
-    // stack: len, retdest
-    PUSH @SEGMENT_TOUCHED_ADDRESSES ADD
-    PUSH @SEGMENT_TOUCHED_ADDRESSES
-delete_all_touched_addresses_loop:
-    // `i` and `len` are both scaled by SEGMENT_TOUCHED_ADDRESSES
-    // stack: i, len, retdest
-    DUP2 DUP2 EQ %jumpi(delete_all_touched_addresses_done)
-    // stack: i, len, retdest
-    DUP1 MLOAD_GENERAL
-    // stack: loaded_addr, i, len, retdest
-    DUP1 %is_empty %jumpi(bingo)
-    // stack: loaded_addr, i, len, retdest
-    POP %increment %jump(delete_all_touched_addresses_loop)
-bingo:
-    // stack: loaded_addr, i, len, retdest
-    %delete_account
-    %increment %jump(delete_all_touched_addresses_loop)
-delete_all_touched_addresses_done:
-    // stack: i, len, retdest
-    %pop2 JUMP
-
-%macro delete_all_touched_addresses
-    %stack () -> (%%after)
-    %jump(delete_all_touched_addresses)
-%%after:
-    // stack: (empty)
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/transfer.asm
+++ b/evm/src/cpu/kernel/asm/core/transfer.asm
@ -1,112 +0,0 @@
-// Transfers some ETH from one address to another. The amount is given in wei.
-// Pre stack: from, to, amount, retdest
-// Post stack: status (0 indicates success)
-global transfer_eth:
-    // stack: from, to, amount, retdest
-    %stack (from, to, amount, retdest)
-        -> (from, amount, to, amount, retdest)
-    %deduct_eth
-    // stack: deduct_eth_status, to, amount, retdest
-    %jumpi(transfer_eth_failure)
-    // stack: to, amount, retdest
-    %add_eth
-    %stack (retdest) -> (retdest, 0)
-    JUMP
-global transfer_eth_failure:
-    %stack (to, amount, retdest) -> (retdest, 1)
-    JUMP
-
-// Convenience macro to call transfer_eth and return where we left off.
-%macro transfer_eth
-    %stack (from, to, amount) -> (from, to, amount, %%after)
-    %jump(transfer_eth)
-%%after:
-%endmacro
-
-// Returns 0 on success, or 1 if addr has insufficient balance. Panics if addr isn't found in the trie.
-// Pre stack: addr, amount, retdest
-// Post stack: status (0 indicates success)
-global deduct_eth:
-    // stack: addr, amount, retdest
-    DUP1 %insert_touched_addresses
-    %mpt_read_state_trie
-    // stack: account_ptr, amount, retdest
-    DUP1 ISZERO %jumpi(deduct_eth_no_such_account) // If the account pointer is null, return 1.
-    %add_const(1)
-    // stack: balance_ptr, amount, retdest
-    DUP1 %mload_trie_data
-    // stack: balance, balance_ptr, amount, retdest
-    DUP1 DUP4 GT
-    // stack: amount > balance, balance, balance_ptr, amount, retdest
-    %jumpi(deduct_eth_insufficient_balance)
-    %stack (balance, balance_ptr, amount, retdest) -> (balance, amount, balance_ptr, retdest, 0)
-    SUB
-    SWAP1
-    // stack: balance_ptr, balance - amount, retdest, 0
-    %mstore_trie_data
-    // stack: retdest, 0
-    JUMP
-global deduct_eth_no_such_account:
-    %stack (account_ptr, amount, retdest) -> (retdest, 1)
-    JUMP
-global deduct_eth_insufficient_balance:
-    %stack (balance, balance_ptr, amount, retdest) -> (retdest, 1)
-    JUMP
-
-// Convenience macro to call deduct_eth and return where we left off.
-%macro deduct_eth
-    %stack (addr, amount) -> (addr, amount, %%after)
-    %jump(deduct_eth)
-%%after:
-%endmacro
-
-// Pre stack: addr, amount, redest
-// Post stack: (empty)
-global add_eth:
-    // stack: addr, amount, retdest
-    DUP1 %insert_touched_addresses
-    DUP1 %mpt_read_state_trie
-    // stack: account_ptr, addr, amount, retdest
-    DUP1 ISZERO %jumpi(add_eth_new_account) // If the account pointer is null, we need to create the account.
-    %add_const(1)
-    // stack: balance_ptr, addr, amount, retdest
-    DUP1 %mload_trie_data
-    // stack: balance, balance_ptr, addr, amount, retdest
-    %stack (balance, balance_ptr, addr, amount) -> (amount, balance, balance_ptr)
-    ADD
-    // stack: new_balance, balance_ptr, retdest
-    SWAP1
-    // stack: balance_ptr, new_balance, retdest
-    %mstore_trie_data
-    // stack: retdest
-    JUMP
-global add_eth_new_account:
-    // stack: null_account_ptr, addr, amount, retdest
-    POP
-    // stack: addr, amount, retdest
-    DUP2 ISZERO %jumpi(add_eth_new_account_zero)
-    DUP1 %journal_add_account_created
-    %get_trie_data_size // pointer to new account we're about to create
-    // stack: new_account_ptr, addr, amount, retdest
-    SWAP2
-    // stack: amount, addr, new_account_ptr, retdest
-    PUSH 0 %append_to_trie_data // nonce
-    %append_to_trie_data // balance
-    // stack: addr, new_account_ptr, retdest
-    PUSH 0 %append_to_trie_data // storage root pointer
-    PUSH @EMPTY_STRING_HASH %append_to_trie_data // code hash
-    // stack: addr, new_account_ptr, retdest
-    %addr_to_state_key
-    // stack: key, new_account_ptr, retdest
-    %jump(mpt_insert_state_trie)
-
-add_eth_new_account_zero:
-    // stack: addr, amount, retdest
-    %pop2 JUMP
-
-// Convenience macro to call add_eth and return where we left off.
-%macro add_eth
-    %stack (addr, amount) -> (addr, amount, %%after)
-    %jump(add_eth)
-%%after:
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/util.asm
+++ b/evm/src/cpu/kernel/asm/core/util.asm
@ -1,88 +0,0 @@
-// Return the next context ID, and record the old context ID in the new one's
-// @CTX_METADATA_PARENT_CONTEXT field. Does not actually enter the new context.
-%macro create_context
-    // stack: (empty)
-    %next_context_id
-    %set_new_ctx_parent_ctx
-    // stack: new_ctx
-%endmacro
-
-// Get and increment @GLOBAL_METADATA_LARGEST_CONTEXT to determine the next context ID.
-%macro next_context_id
-    // stack: (empty)
-    %mload_global_metadata(@GLOBAL_METADATA_LARGEST_CONTEXT)
-    %add_const(0x10000000000000000) // scale each context by 2^64
-    // stack: new_ctx
-    DUP1
-    %mstore_global_metadata(@GLOBAL_METADATA_LARGEST_CONTEXT)
-    // stack: new_ctx
-%endmacro
-
-// Returns whether the current transaction is a contract creation transaction.
-%macro is_contract_creation
-    // stack: (empty)
-    %mload_global_metadata(@GLOBAL_METADATA_CONTRACT_CREATION)
-%endmacro
-
-%macro is_precompile
-    // stack: addr
-    DUP1 %ge_const(@ECREC) SWAP1 %le_const(@BLAKE2_F)
-    // stack: addr>=1, addr<=9
-    MUL // Cheaper than AND
-%endmacro
-
-// Returns 1 if the account is non-existent, 0 otherwise.
-%macro is_non_existent
-    // stack: addr
-    %mpt_read_state_trie ISZERO
-%endmacro
-
-// Returns 1 if the account is empty, 0 otherwise.
-%macro is_empty
-    // stack: addr
-    %mpt_read_state_trie
-    // stack: account_ptr
-    DUP1 ISZERO %jumpi(%%false)
-    // stack: account_ptr
-    DUP1 %mload_trie_data
-    // stack: nonce, account_ptr
-    ISZERO %not_bit %jumpi(%%false)
-    %increment DUP1 %mload_trie_data
-    // stack: balance, balance_ptr
-    ISZERO %not_bit %jumpi(%%false)
-    %add_const(2) %mload_trie_data
-    // stack: code_hash
-    PUSH @EMPTY_STRING_HASH
-    EQ
-    %jump(%%after)
-%%false:
-    // stack: account_ptr
-    POP
-    PUSH 0
-%%after:
-%endmacro
-
-// Returns 1 if the account is dead (i.e., empty or non-existent), 0 otherwise.
-%macro is_dead
-    // stack: addr
-    DUP1 %is_non_existent
-    SWAP1 %is_empty
-    OR
-%endmacro
-
-// Gets the size of the stack _before_ the macro is run
-// WARNING: this macro is side-effecting. It writes the current stack length to offset
-// `CTX_METADATA_STACK_SIZE`, segment `SEGMENT_CONTEXT_METADATA` in the current context. But I can't
-// imagine it being an issue unless someone's doing something dumb.
-%macro stack_length
-    // stack: (empty)
-    GET_CONTEXT
-    // stack: current_ctx
-    // It seems odd to switch to the context that we are already in. We do this because SET_CONTEXT
-    // saves the stack length of the context we are leaving in its metadata segment.
-    SET_CONTEXT
-    // stack: (empty)
-    // We can now read this stack length from memory.
-    %mload_context_metadata(@CTX_METADATA_STACK_SIZE)
-    // stack: stack_length
-%endmacro
--- a/evm/src/cpu/kernel/asm/core/withdrawals.asm
+++ b/evm/src/cpu/kernel/asm/core/withdrawals.asm
@ -1,25 +0,0 @@
-%macro withdrawals
-    // stack: (empty)
-    PUSH %%after
-    %jump(withdrawals)
-%%after:
-    // stack: (empty)
-%endmacro
-
-global withdrawals:
-    // stack: retdest
-    PROVER_INPUT(withdrawal)
-    // stack: address, retdest
-    PROVER_INPUT(withdrawal)
-    // stack: amount, address, retdest
-    DUP2 %eq_const(@U256_MAX) %jumpi(withdrawals_end)
-    SWAP1
-    // stack: address, amount, retdest
-    %add_eth
-    // stack: retdest
-    %jump(withdrawals)
-
-withdrawals_end:
-    // stack: amount, address, retdest
-    %pop2
-    JUMP
--- a/evm/src/cpu/kernel/asm/curve/bls381/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bls381/util.asm
@ -1,101 +0,0 @@
-%macro add_fp381
-    // stack:         x0, x1, y0, y1
-    PROVER_INPUT(sf::bls381_base::add_hi)
-    // stack:     z1, x0, x1, y0, y1
-    SWAP4
-    // stack:     y1, x0, x1, y0, z1
-    PROVER_INPUT(sf::bls381_base::add_lo)
-    // stack: z0, y1, x0, x1, y0, z1
-    SWAP4
-    // stack: y0, y1, x0, x1, z0, z1
-    %pop4
-    // stack:                 z0, z1
-%endmacro
-
-%macro sub_fp381
-    // stack:         x0, x1, y0, y1
-    PROVER_INPUT(sf::bls381_base::sub_hi)
-    // stack:     z1, x0, x1, y0, y1
-    SWAP4
-    // stack:     y1, x0, x1, y0, z1
-    PROVER_INPUT(sf::bls381_base::sub_lo)
-    // stack: z0, y1, x0, x1, y0, z1
-    SWAP4
-    // stack: y0, y1, x0, x1, z0, z1
-    %pop4
-    // stack:                 z0, z1
-%endmacro
-
-%macro mul_fp381
-    // stack:         x0, x1, y0, y1
-    PROVER_INPUT(sf::bls381_base::mul_hi)
-    // stack:     z1, x0, x1, y0, y1
-    SWAP4
-    // stack:     y1, x0, x1, y0, z1
-    PROVER_INPUT(sf::bls381_base::mul_lo)
-    // stack: z0, y1, x0, x1, y0, z1
-    SWAP4
-    // stack: y0, y1, x0, x1, z0, z1
-    %pop4
-    // stack:                 z0, z1
-%endmacro
-
-%macro add_fp381_2
-    // stack: x_re, x_im, y_re, y_im
-    %stack (x_re: 2, x_im: 2, y_re: 2, y_im: 2) -> (y_im, x_im, y_re, x_re)
-    // stack: y_im, x_im, y_re, x_re
-    %add_fp381
-    // stack:       z_im, y_re, x_re
-    %stack (z_im: 2, y_re: 2, x_re: 2) -> (x_re, y_re, z_im)
-    // stack:       x_re, y_re, z_im
-    %add_fp381
-    // stack:             z_re, z_im
-%endmacro
-
-%macro sub_fp381_2
-    // stack: x_re, x_im, y_re, y_im
-    %stack (x_re: 2, x_im: 2, y_re: 2, y_im: 2) -> (x_im, y_im, y_re, x_re)
-    // stack: x_im, y_im, y_re, x_re
-    %sub_fp381
-    // stack:       z_im, y_re, x_re
-    %stack (z_im: 2, y_re: 2, x_re: 2) -> (x_re, y_re, z_im)
-    // stack:       x_re, y_re, z_im
-    %sub_fp381
-    // stack:             z_re, z_im
-%endmacro
-
-// note that {x,y}_{re,im} all take up two stack terms
-global mul_fp381_2:
-    // stack:                          x_re, x_im, y_re, y_im, jumpdest
-    DUP4
-    DUP4
-    // stack:                    x_im, x_re, x_im, y_re, y_im, jumpdest
-    DUP8
-    DUP8
-    // stack:              y_re, x_im, x_re, x_im, y_re, y_im, jumpdest
-    DUP12
-    DUP12
-    // stack:        y_im, y_re, x_im, x_re, x_im, y_re, y_im, jumpdest
-    DUP8
-    DUP8
-    // stack: x_re , y_im, y_re, x_im, x_re, x_im, y_re, y_im, jumpdest
-    %mul_fp381
-    // stack: x_re * y_im, y_re, x_im, x_re, x_im, y_re, y_im, jumpdest
-    %stack (v: 2, y_re: 2, x_im: 2) ->  (x_im, y_re, v)
-    // stack:  x_im , y_re, x_re*y_im, x_re, x_im, y_re, y_im, jumpdest
-    %mul_fp381
-    // stack:  x_im * y_re, x_re*y_im, x_re, x_im, y_re, y_im, jumpdest
-    %add_fp381
-    // stack:                    z_im, x_re, x_im, y_re, y_im, jumpdest
-    %stack (z_im: 2, x_re: 2, x_im: 2, y_re: 2, y_im: 2) -> (x_im, y_im, y_re, x_re, z_im)
-    // stack:                   x_im , y_im, y_re, x_re, z_im, jumpdest
-    %mul_fp381
-    // stack:                   x_im * y_im, y_re, x_re, z_im, jumpdest
-    %stack (v: 2, y_re: 2, x_re: 2) -> (x_re, y_re, v)
-    // stack:                    x_re , y_re, x_im*y_im, z_im, jumpdest
-    %mul_fp381
-    // stack:                    x_re * y_re, x_im*y_im, z_im, jumpdest
-    %sub_fp381
-    // stack:                                      z_re, z_im, jumpdest
-    %stack (z_re: 2, z_im: 2, jumpdest) -> (jumpdest, z_re, z_im)
-    JUMP
--- a/Show More
+++ b/Show More