From b34b3875f74db3cd3fcc928f483bca8a9e7bfe0a Mon Sep 17 00:00:00 2001 From: BGluth Date: Mon, 12 Dec 2022 18:23:22 -0700 Subject: [PATCH 01/44] Removed unused deps unovered by `cargo-udeps` Some deps were moved to `[dev-dependencies]`. --- evm/Cargo.toml | 6 +++--- system_zero/Cargo.toml | 2 +- util/Cargo.toml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/evm/Cargo.toml b/evm/Cargo.toml index 03850f7a..e942a9f9 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" [dependencies] anyhow = "1.0.40" -env_logger = "0.9.0" eth_trie_utils = "0.4.0" ethereum-types = "0.14.0" hex = { version = "0.4.3", optional = true } @@ -23,11 +22,9 @@ plonky2 = { path = "../plonky2", default-features = false, features = ["timing"] plonky2_util = { path = "../util" } rand = "0.8.5" rand_chacha = "0.3.1" -ripemd = "0.1.3" rlp = "0.5.1" rlp-derive = "0.1.0" serde = { version = "1.0.144", features = ["derive"] } -sha2 = "0.10.2" static_assertions = "1.1.0" tiny-keccak = "2.0.2" @@ -36,7 +33,10 @@ jemallocator = "0.5.0" [dev-dependencies] criterion = "0.4.0" +env_logger = "0.10.0" hex = "0.4.3" +ripemd = "0.1.3" +sha2 = "0.10.6" [features] default = ["parallel"] diff --git a/system_zero/Cargo.toml b/system_zero/Cargo.toml index 58a5e489..03aaea20 100644 --- a/system_zero/Cargo.toml +++ b/system_zero/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" [dependencies] anyhow = "1.0.40" -env_logger = "0.9.0" itertools = "0.10.0" log = "0.4.14" plonky2 = { path = "../plonky2" } @@ -17,6 +16,7 @@ starky = { path = "../starky" } [dev-dependencies] criterion = "0.4.0" +env_logger = "0.10.0" [[bench]] name = "lookup_permuted_cols" diff --git a/util/Cargo.toml b/util/Cargo.toml index 4e0b4b15..4419db2a 100644 --- a/util/Cargo.toml +++ b/util/Cargo.toml @@ -4,5 +4,5 @@ description = "Utilities used by Plonky2" version = "0.1.0" edition = "2021" -[dependencies] +[dev-dependencies] rand = { version = "0.8.5", default-features = false, features = ["getrandom"] } From efa80edaa9b63e6d60c274c53878c7097c2989b6 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 9 Dec 2022 09:34:12 -0800 Subject: [PATCH 02/44] blake initial --- evm/src/cpu/kernel/aggregator.rs | 22 +- evm/src/cpu/kernel/asm/ripemd/box.asm | 96 ------ evm/src/cpu/kernel/asm/ripemd/compression.asm | 160 ---------- evm/src/cpu/kernel/asm/ripemd/constants.asm | 117 ------- evm/src/cpu/kernel/asm/ripemd/functions.asm | 150 --------- evm/src/cpu/kernel/asm/ripemd/main.asm | 107 ------- evm/src/cpu/kernel/asm/ripemd/memory.asm | 137 --------- evm/src/cpu/kernel/asm/ripemd/update.asm | 108 ------- evm/src/cpu/kernel/asm/sha2/compression.asm | 285 ------------------ evm/src/cpu/kernel/asm/sha2/constants.asm | 65 ---- .../cpu/kernel/asm/sha2/message_schedule.asm | 240 --------------- evm/src/cpu/kernel/asm/sha2/ops.asm | 130 -------- evm/src/cpu/kernel/asm/sha2/store_pad.asm | 89 ------ evm/src/cpu/kernel/asm/sha2/temp_words.asm | 32 -- evm/src/cpu/kernel/asm/sha2/write_length.asm | 119 -------- 15 files changed, 15 insertions(+), 1842 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/ripemd/box.asm delete mode 100644 evm/src/cpu/kernel/asm/ripemd/compression.asm delete mode 100644 evm/src/cpu/kernel/asm/ripemd/constants.asm delete mode 100644 evm/src/cpu/kernel/asm/ripemd/functions.asm delete mode 100644 evm/src/cpu/kernel/asm/ripemd/main.asm delete mode 100644 evm/src/cpu/kernel/asm/ripemd/memory.asm delete mode 100644 evm/src/cpu/kernel/asm/ripemd/update.asm delete mode 100644 evm/src/cpu/kernel/asm/sha2/compression.asm delete mode 100644 evm/src/cpu/kernel/asm/sha2/constants.asm delete mode 100644 evm/src/cpu/kernel/asm/sha2/message_schedule.asm delete mode 100644 evm/src/cpu/kernel/asm/sha2/ops.asm delete mode 100644 evm/src/cpu/kernel/asm/sha2/store_pad.asm delete mode 100644 evm/src/cpu/kernel/asm/sha2/temp_words.asm delete mode 100644 evm/src/cpu/kernel/asm/sha2/write_length.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 09af921a..48130237 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -39,6 +39,20 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/fields/fp6_mul.asm"), include_str!("asm/fields/fp12_mul.asm"), include_str!("asm/halt.asm"), + include_str!("asm/hash/ripemd/box.asm"), + include_str!("asm/hash/ripemd/compression.asm"), + include_str!("asm/hash/ripemd/constants.asm"), + include_str!("asm/hash/ripemd/functions.asm"), + include_str!("asm/hash/ripemd/main.asm"), + include_str!("asm/hash/ripemd/memory.asm"), + include_str!("asm/hash/ripemd/update.asm"), + include_str!("asm/hash/sha2/compression.asm"), + include_str!("asm/hash/sha2/constants.asm"), + include_str!("asm/hash/sha2/message_schedule.asm"), + include_str!("asm/hash/sha2/ops.asm"), + include_str!("asm/hash/sha2/store_pad.asm"), + include_str!("asm/hash/sha2/temp_words.asm"), + include_str!("asm/hash/sha2/write_length.asm"), include_str!("asm/main.asm"), include_str!("asm/memory/core.asm"), include_str!("asm/memory/memcpy.asm"), @@ -68,18 +82,12 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/ripemd/main.asm"), include_str!("asm/ripemd/memory.asm"), include_str!("asm/ripemd/update.asm"), + include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/decode.asm"), include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/encode_rlp_string.asm"), include_str!("asm/rlp/num_bytes.asm"), include_str!("asm/rlp/read_to_memory.asm"), - include_str!("asm/sha2/compression.asm"), - include_str!("asm/sha2/constants.asm"), - include_str!("asm/sha2/message_schedule.asm"), - include_str!("asm/sha2/ops.asm"), - include_str!("asm/sha2/store_pad.asm"), - include_str!("asm/sha2/temp_words.asm"), - include_str!("asm/sha2/write_length.asm"), include_str!("asm/shift.asm"), include_str!("asm/transactions/router.asm"), include_str!("asm/transactions/type_0.asm"), diff --git a/evm/src/cpu/kernel/asm/ripemd/box.asm b/evm/src/cpu/kernel/asm/ripemd/box.asm deleted file mode 100644 index d60d9b8c..00000000 --- a/evm/src/cpu/kernel/asm/ripemd/box.asm +++ /dev/null @@ -1,96 +0,0 @@ -/// Note that we unpack STATE: 5 to a, b, c, d, e -/// All additions are u32 -/// -/// def box(a, b, c, d, e, F, K): -/// -/// box = get_box(sides, rounds, boxes) -/// a += F(b, c, d) -/// r = load(r)(box) -/// x = load_offset(r) -/// a += x + K -/// s = load(s)(box) -/// a = rol(s, a) -/// a += e -/// c = rol(10, c) -/// -/// return e, a, b, c, d, F, K - -global box: - // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt - PUSH pre_rol - DUP5 - DUP5 - DUP5 - DUP10 - // stack: F, b, c, d, pre_rol, a, b, c, d, e, F, K, boxes, rounds, sides, virt - JUMP -pre_rol: - // stack: F(b, c, d), a, b, c, d, e, F, K, boxes, rounds, sides, virt - ADD - // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt - %get_box - // stack: box, a, b, c, d, e, F, K, boxes, rounds, sides, virt - DUP12 - DUP2 - %mload_kernel_code(r_data) - ADD - // stack: virt + r, box, a, b, c, d, e, F, K, boxes, rounds, sides, virt - %mload_kernel_general_u32_LE - // stack: x, box, a, b, c, d, e, F, K, boxes, rounds, sides, virt - SWAP1 - SWAP2 - // stack: a, x, box, b, c, d, e, F, K, boxes, rounds, sides, virt - ADD - DUP8 - ADD - %as_u32 - // stack: a, box, b, c, d, e, F, K, boxes, rounds, sides, virt - PUSH mid_rol - SWAP2 - // stack: box, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides, virt - %mload_kernel_code(s_data) - // stack: s, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides, virt - %jump(rol) -mid_rol: - // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt - DUP5 - // stack: e, a, b, c, d, e, F, K, boxes, rounds, sides, virt - ADD - %as_u32 - // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt - %stack (a, b, c) -> (10, c, post_rol, a, b) - // stack: 10, c, post_rol, a, b, d, e, F, K, boxes, rounds, sides, virt - %jump(rol) -post_rol: - // stack: c, a, b, d, e, F, K, boxes , rounds, sides, virt - %stack (c, a, b, d, e, F, K, boxes) -> (boxes, 1, a, b, c, d, F, K, e) - // stack: boxes, 1, a, b, c, d, F, K, e, rounds, sides, virt - SUB - SWAP7 - // stack: e, a, b, c, d, F, K, boxes-1, rounds, sides, virt - %jump(round) - - -%macro get_round - // stack: sides, rounds - %mul_const(5) - PUSH 10 - SUB - SUB - // stack: 10 - 5*sides - rounds -%endmacro - -%macro get_box - // stack: ARGS: 7, boxes, rounds, sides - DUP10 - %mul_const(80) - DUP10 - %mul_const(16) - DUP10 - // stack: boxes , 16*rounds , 80*sides, ARGS: 7, boxes, rounds, sides - PUSH 176 - SUB - SUB - SUB - // stack: 176 - boxes - 16*rounds - 80*sides, ARGS: 7, boxes, rounds, sides -%endmacro diff --git a/evm/src/cpu/kernel/asm/ripemd/compression.asm b/evm/src/cpu/kernel/asm/ripemd/compression.asm deleted file mode 100644 index a83bf832..00000000 --- a/evm/src/cpu/kernel/asm/ripemd/compression.asm +++ /dev/null @@ -1,160 +0,0 @@ -/// _block is stored in memory: its address virt stays on the stack -/// def compress(STATE: 5, _block): -/// -/// STATEL = STATE -/// STATEL = loop(STATEL) -/// -/// STATER = state -/// STATER = loop(STATER) -/// -/// return mix(STATER, STATEL, STATE) -/// -/// -/// def mix(STATER, STATEL, STATE): -/// return -/// u32(s1 + l2 + r3), -/// u32(s2 + l3 + r4), -/// u32(s3 + l4 + r0), -/// u32(s4 + l0 + r1), -/// u32(s0 + l1 + r2) -/// -/// where si, li, ri, oi, VR, RD respectively denote -/// STATE[i], STATEL[i], STATER[i], OUTPUT[i], virt, retdest - -global compress: - // stack: STATE, virt, retdest - PUSH switch - DUP7 - %stack () -> (0, 0, 16, 5, 1) - // stack: 0, 0, 16, 5, 1, virt, switch, STATE, virt, retdest - DUP12 - DUP12 - DUP12 - DUP12 - DUP12 - // stack: STATE, 0, 0, 16, 5, 1, virt, switch, STATE, virt, retdest - %jump(loop) -switch: - // stack: STATEL, STATE, virt, retdest - PUSH mix - DUP12 - %stack () -> (16, 5, 0) - // stack: 16, 5, 0, virt, mix, STATEL, STATE, virt, retdest - DUP15 - DUP15 - DUP15 - DUP15 - DUP15 - // stack: STATE, 16, 5, 0, virt, mix, STATEL, STATE, virt, retdest - %stack (STATE: 5) -> (STATE, 0, 0) - // stack: STATE, 0, 0, 16, 5, 0, virt, mix, STATEL, STATE, virt, retdest - %jump(loop) -mix: - // stack: r0, r1, r2, r3, r4, l0, l1, l2, l3, l4, s0, s1, s2, s3, s4, VR, RD - SWAP10 - // stack: s0, r1, r2, r3, r4, l0, l1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD - SWAP1 - // stack: r1, s0, r2, r3, r4, l0, l1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD - SWAP6 - // stack: l1, s0, r2, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD - %add3_u32 - // stack: o4, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD - SWAP14 - // stack: RD, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, s3, s4, VR, o4 - SWAP11 - // stack: s3, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, RD, s4, VR, o4 - SWAP10 - // stack: s2, r3, r4, l0, r1, l2, l3, l4, r0, s1, s3, RD, s4, VR, o4 - SWAP1 - // stack: r3, s2, r4, l0, r1, l2, l3, l4, r0, s1, s3, RD, s4, VR, o4 - SWAP6 - // stack: l3, s2, r4, l0, r1, l2, r3, l4, r0, s1, s3, RD, s4, VR, o4 - %add3_u32 - // stack: o1, l0, r1, l2, r3, l4, r0, s1, s3, RD, s4, VR, o4 - SWAP9 - // stack: RD, l0, r1, l2, r3, l4, r0, s1, s3, o1, s4, VR, o4 - SWAP10 - // stack: s4, l0, r1, l2, r3, l4, r0, s1, s3, o1, RD, VR, o4 - %add3_u32 - // stack: o3, l2, r3, l4, r0, s1, s3, o1, RD, VR, o4 - SWAP9 - // stack: VR, l2, r3, l4, r0, s1, s3, o1, RD, o3, o4 - SWAP5 - // stack: s1, l2, r3, l4, r0, VR, s3, o1, RD, o3, o4 - %add3_u32 - // stack: o0, l4, r0, VR, s3, o1, RD, o3, o4 - SWAP4 - // stack: s3, l4, r0, VR, o0, o1, RD, o3, o4 - %add3_u32 - // stack: o2, VR, o0, o1, RD, o3, o4 - SWAP4 - // stack: RD, VR, o0, o1, o2, o3, o4 - SWAP1 - // stack: VR, RD, o0, o1, o2, o3, o4 - POP - // stack: RD, o0, o1, o2, o3, o4 - JUMP - - -/// def loop(STATE: 5): -/// while rounds: -/// update_round_vars() -/// round(STATE: 5, F, K, rounds, sides) -/// -/// def update_round_vars(): -/// F = load(F)(sides, rounds) -/// K = load(K)(sides, rounds) -/// -/// def round(STATE, rounds, sides): -/// while boxes: -/// box(STATE, F, K) -/// boxes -= 1 -/// boxes = 16 -/// rounds -= 1 - -loop: - // stack: STATE, F, K, 16, rounds, sides, virt, retdest - DUP9 - // stack: round, STATE, F, K, 16, rounds, sides, virt, retdest - %jumpi(update_round_vars) - // stack: STATE, F, K, 16, 0, sides, virt, retdest - %stack (STATE: 5, F, K, boxes, rounds, sides, virt, retdest) -> (retdest, STATE) - // stack: retdest, STATE - JUMP -update_round_vars: - // stack: STATE, F , K , 16, rounds, sides, virt, retdest - DUP9 - DUP11 - %get_round - DUP1 - // stack: rnd, rnd, STATE, F , K , 16, rounds, sides, virt, retdest - SWAP7 - POP - %push_f - SWAP7 - // stack: rnd, rnd, STATE, F', K , 16, rounds, sides, virt, retdest - SWAP8 - POP - %mload_kernel_code_u32(k_data) - SWAP7 - POP - // stack: STATE, F', K', 16, rounds, sides, virt, retdest - %jump(round) -global round: - // stack: STATE, F, K, boxes, rounds , sides, virt, retdest - DUP8 - // stack: boxes, STATE, F, K, boxes, rounds , sides, virt, retdest - %jumpi(box) - // stack: STATE, F, K, 0, rounds , sides, virt, retdest - SWAP7 - POP - PUSH 16 - SWAP7 - // stack: STATE, F, K, 16, rounds , sides, virt, retdest - PUSH 1 - DUP10 - SUB - SWAP9 - POP - // stack: STATE, F, K, 16, rounds-1, sides, virt, retdest - %jump(loop) diff --git a/evm/src/cpu/kernel/asm/ripemd/constants.asm b/evm/src/cpu/kernel/asm/ripemd/constants.asm deleted file mode 100644 index 7a8959fe..00000000 --- a/evm/src/cpu/kernel/asm/ripemd/constants.asm +++ /dev/null @@ -1,117 +0,0 @@ -global k_data: - // Left - BYTES 0x00, 0x00, 0x00, 0x00 - BYTES 0x5A, 0x82, 0x79, 0x99 - BYTES 0x6E, 0xD9, 0xEB, 0xA1 - BYTES 0x8F, 0x1B, 0xBC, 0xDC - BYTES 0xA9, 0x53, 0xFD, 0x4E - // Right - BYTES 0x50, 0xA2, 0x8B, 0xE6 - BYTES 0x5C, 0x4D, 0xD1, 0x24 - BYTES 0x6D, 0x70, 0x3E, 0xF3 - BYTES 0x7A, 0x6D, 0x76, 0xE9 - BYTES 0x00, 0x00, 0x00, 0x00 - -global s_data: - // Left Round 0 - BYTES 11, 14, 15, 12 - BYTES 05, 08, 07, 09 - BYTES 11, 13, 14, 15 - BYTES 06, 07, 09, 08 - // Left Round 1 - BYTES 07, 06, 08, 13 - BYTES 11, 09, 07, 15 - BYTES 07, 12, 15, 09 - BYTES 11, 07, 13, 12 - // Left Round 2 - BYTES 11, 13, 06, 07 - BYTES 14, 09, 13, 15 - BYTES 14, 08, 13, 06 - BYTES 05, 12, 07, 05 - // Left Round 3 - BYTES 11, 12, 14, 15 - BYTES 14, 15, 09, 08 - BYTES 09, 14, 05, 06 - BYTES 08, 06, 05, 12 - // Left Round 4 - BYTES 09, 15, 05, 11 - BYTES 06, 08, 13, 12 - BYTES 05, 12, 13, 14 - BYTES 11, 08, 05, 06 - // Right Round 0 - BYTES 08, 09, 09, 11 - BYTES 13, 15, 15, 05 - BYTES 07, 07, 08, 11 - BYTES 14, 14, 12, 06 - // Right Round 1 - BYTES 09, 13, 15, 07 - BYTES 12, 08, 09, 11 - BYTES 07, 07, 12, 07 - BYTES 06, 15, 13, 11 - // Right Round 2 - BYTES 09, 07, 15, 11 - BYTES 08, 06, 06, 14 - BYTES 12, 13, 05, 14 - BYTES 13, 13, 07, 05 - // Right Round 3 - BYTES 15, 05, 08, 11 - BYTES 14, 14, 06, 14 - BYTES 06, 09, 12, 09 - BYTES 12, 05, 15, 08 - // Right Round 4 - BYTES 08, 05, 12, 09 - BYTES 12, 05, 14, 06 - BYTES 08, 13, 06, 05 - BYTES 15, 13, 11, 11 - -global r_data: - // Left Round 0 - BYTES 00, 04, 08, 12 - BYTES 16, 20, 24, 28 - BYTES 32, 36, 40, 44 - BYTES 48, 52, 56, 60 - // Left Round 1 - BYTES 28, 16, 52, 04 - BYTES 40, 24, 60, 12 - BYTES 48, 00, 36, 20 - BYTES 08, 56, 44, 32 - // Left Round 2 - BYTES 12, 40, 56, 16 - BYTES 36, 60, 32, 04 - BYTES 08, 28, 00, 24 - BYTES 52, 44, 20, 48 - // Left Round 3 - BYTES 04, 36, 44, 40 - BYTES 00, 32, 48, 16 - BYTES 52, 12, 28, 60 - BYTES 56, 20, 24, 08 - // Left Round 4 - BYTES 16, 00, 20, 36 - BYTES 28, 48, 08, 40 - BYTES 56, 04, 12, 32 - BYTES 44, 24, 60, 52 - // Right Round 0 - BYTES 20, 56, 28, 00 - BYTES 36, 08, 44, 16 - BYTES 52, 24, 60, 32 - BYTES 04, 40, 12, 48 - // Right Round 1 - BYTES 24, 44, 12, 28 - BYTES 00, 52, 20, 40 - BYTES 56, 60, 32, 48 - BYTES 16, 36, 04, 08 - // Right Round 2 - BYTES 60, 20, 04, 12 - BYTES 28, 56, 24, 36 - BYTES 44, 32, 48, 08 - BYTES 40, 00, 16, 52 - // Right Round 3 - BYTES 32, 24, 16, 04 - BYTES 12, 44, 60, 00 - BYTES 20, 48, 08, 52 - BYTES 36, 28, 40, 56 - // Right Round 4 - BYTES 48, 60, 40, 16 - BYTES 04, 20, 32, 28 - BYTES 24, 08, 52, 56 - BYTES 00, 12, 36, 44 diff --git a/evm/src/cpu/kernel/asm/ripemd/functions.asm b/evm/src/cpu/kernel/asm/ripemd/functions.asm deleted file mode 100644 index ac111215..00000000 --- a/evm/src/cpu/kernel/asm/ripemd/functions.asm +++ /dev/null @@ -1,150 +0,0 @@ -/// def rol(n, x): -/// return (u32(x << n)) | (x >> (32 - n)) - -global rol: - // stack: n, x, retdest - SWAP1 - DUP1 - DUP3 - // stack: n, x, x, n, retdest - PUSH 32 - SUB - // stack: 32-n, x, x, n, retdest - SHR - // stack: x >> (32-n), x, n, retdest - SWAP2 - // stack: n, x, x >> (32-n), retdest - SHL - // stack: x << n, x >> (32-n), retdest - %as_u32 - // stack: u32(x << n), x >> (32-n), retdest - OR - // stack: u32(x << n) | (x >> (32-n)), retdest - SWAP1 - JUMP - -// def push_f(rnd): -// Fs = [F0, F1, F2, F3, F4, F4, F3, F2, F1, F0] -// acc = 0 -// for i, F in enumerate(Fs): -// acc += (i==rnd)*F -// return acc, rnd -// -// %this_f(i,F) enacts -// acc += (i==rnd)*F - -%macro push_f - // stack: rnd - PUSH 0 - %this_f(0,F0) - %this_f(1,F1) - %this_f(2,F2) - %this_f(3,F3) - %this_f(4,F4) - %this_f(5,F4) - %this_f(6,F3) - %this_f(7,F2) - %this_f(8,F1) - %this_f(9,F0) - // stack: F, rnd -%endmacro - -%macro this_f(i, F) - // stack: acc, rnd - DUP2 - // stack: rnd , acc, rnd - %eq_const($i) - // stack: rnd==i , acc, rnd - %mul_const($F) - // stack: (rnd==i)*F , acc, rnd - ADD - // stack: (rnd==j)*F + acc, rnd -%endmacro - -/// def F0(x, y, z): -/// return x ^ y ^ z - -global F0: - // stack: x , y , z, retdest - XOR - // stack: x ^ y , z, retdest - XOR - // stack: x ^ y ^ z, retdest - SWAP1 - JUMP - -/// def F1(x, y, z): -/// return (x & y) | (u32(~x) & z) - -global F1: - // stack: x, y, z, retdest - DUP1 - // stack: x, x, y, z, retdest - SWAP2 - // stack: y, x, x, z, retdest - AND - // stack: y & x, x, z, retdest - SWAP2 - // stack: z, x, y & x , retdest - SWAP1 - // stack: x, z, y & x , retdest - %not_u32 - // stack: ~x, z, y & x , retdest - AND - // stack: ~x & z , y & x , retdest - OR - // stack: (~x & z) | (y & x), retdest - SWAP1 - JUMP - -/// def F2(x, y, z): -/// return (x | u32(~y)) ^ z - -global F2: - // stack: x , y, z, retdest - SWAP1 - // stack: y , x, z, retdest - %not_u32 - // stack: ~y , x , z, retdest - OR - // stack: ~y | x , z, retdest - XOR - // stack: (~y | x) ^ z, retdest - SWAP1 - JUMP - -/// def F3(x, y, z): -/// return (x & z) | (u32(~z) & y) - -global F3: - // stack: x, y , z , retdest - DUP3 - // stack: z , x, y , z , retdest - AND - // stack: z & x, y , z , retdest - SWAP2 - // stack: z, y, z & x , retdest - %not_u32 - // stack: ~z , y, z & x , retdest - AND - // stack: ~z & y, z & x , retdest - OR - // stack: (~z & y) | (z & x), retdest - SWAP1 - JUMP - -/// def F4(x, y, z): -/// return x ^ (y | u32(~z)) - -global F4: - // stack: x, y, z, retdest - SWAP2 - // stack: z, y, x, retdest - %not_u32 - // stack: ~z, y, x, retdest - OR - // stack: ~z | y, x, retdest - XOR - // stack: (~z | y) ^ x, retdest - SWAP1 - JUMP diff --git a/evm/src/cpu/kernel/asm/ripemd/main.asm b/evm/src/cpu/kernel/asm/ripemd/main.asm deleted file mode 100644 index bbcb4068..00000000 --- a/evm/src/cpu/kernel/asm/ripemd/main.asm +++ /dev/null @@ -1,107 +0,0 @@ -/// Variables beginning with _ are in memory -/// -/// def ripemd160(_input): -/// STATE, count, _buffer = [0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0], 0, [0]*64 -/// STATE, count, _buffer = ripemd_update(STATE, count, _buffer, len(input) , bytes = _input ) -/// STATE, count, _buffer = ripemd_update(STATE, count, _buffer, padlength(len(input)), bytes = [0x80]+[0]*63) -/// STATE, count, _buffer = ripemd_update(STATE, count, _buffer, 8, bytes = size(len(_input))) -/// return process(STATE) -/// -/// ripemd is called on a stack with ADDR and length -/// ripemd_stack is called on a stack with length, followed by the input bytes -/// -/// ripemd_update receives and return the stack in the form: -/// stack: STATE, count, length, virt -/// where virt is the virtual address of the bytes argument - -global ripemd_stack: - // stack: length, INPUT - %stack (length) -> (64, length, 0x80, 63, length, length) - // stack: 64, length, 0x80, 63, length, length, INPUT - %jump(ripemd_storage) // stores the following into memory - // init _buffer at virt 0 [consumes 64] - // store _size at virt 64 [consumes length] - // store _padding at virt 72 [consumes 0x80, 63] - // store _input at virt 136 [consumes length] - -global ripemd: - // stack: ADDR, length - %stack (ADDR: 3, length) -> (64, length, 0x80, 63, length, ADDR, length) - // stack: 64, length, 0x80, 63, length, ADDR, length - %jump(ripemd_storage) // stores the following into memory - // init _buffer at virt 0 [consumes 64] - // store _size at virt 64 [consumes length] - // store _padding at virt 72 [consumes 0x80, 63] - // store _input at virt 136 [consumes ADDR, length] - -global ripemd_init: - // stack: length - %stack (length) -> ( 0, length, 136, ripemd_1, ripemd_2, process) - // stack: count = 0, length, virt = 136, ripemd_1, ripemd_2, process - %stack () -> (0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0) - // stack: 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0, count, length, virt, LABELS - %jump(ripemd_update) -ripemd_1: - // stack: STATE, count, length , virt , LABELS - DUP7 - // stack: length, STATE, count, length , virt , LABELS - %padlength - // stack: padlength, STATE, count, length , virt , LABELS - SWAP7 - POP - // stack: STATE, count, length = padlength, virt , LABELS - %stack (STATE: 5, count, length, virt) -> (STATE, count, length, 72) - // STATE, count, length , virt = 72, LABELS - %jump(ripemd_update) -ripemd_2: - // stack: STATE, count, length , virt , LABELS - %stack (STATE: 5, count, length, virt) -> (STATE, count, 8, 64) - // stack: STATE, count, length = 8, virt = 64, LABELS - %jump(ripemd_update) -global process: - // stack: a , b, c, d, e, count, length, virt - %reverse_bytes_u32 - %shl_const(128) - // stack: a', b, c, d, e, VARS - SWAP1 - %reverse_bytes_u32 - %shl_const(96) - OR - // stack: b' a', c, d, e, VARS - SWAP1 - %reverse_bytes_u32 - %shl_const(64) - OR - // stack: c' b' a', d, e, VARS - SWAP1 - %reverse_bytes_u32 - %shl_const(32) - OR - // stack: d' c' b' a', e, VARS - SWAP1 - %reverse_bytes_u32 - OR - // stack: e' d' c' b' a', VARS - %stack (result, VARS: 3, retdest) -> (retdest, result) - // stack: 0xdeadbeef, result - JUMP - - -/// def padlength(length): -/// t = length % 64 -/// return 56 + 64*(t > 55) - t - -%macro padlength - // stack: count - %mod_const(64) - // stack: t = count % 64 - PUSH 55 - DUP2 - // stack: t , 55 , t - GT - // stack: t > 55 , t - %mul_const(64) - %add_const(56) - // stack: 56 + 64*(t > 55), t - SUB -%endmacro diff --git a/evm/src/cpu/kernel/asm/ripemd/memory.asm b/evm/src/cpu/kernel/asm/ripemd/memory.asm deleted file mode 100644 index e3b7cbe6..00000000 --- a/evm/src/cpu/kernel/asm/ripemd/memory.asm +++ /dev/null @@ -1,137 +0,0 @@ -global ripemd_storage: // starts by initializing buffer - // stack: i [init: 64] - %store_zeros(64, ripemd_storage) - // stack: (empty) - %jump(store_size) - -store_size: - // stack: length - %shl_const(3) - // stack: abcdefgh - %extract_and_store_byte(64) - // stack: abcdefg - %extract_and_store_byte(65) - // stack: abcdef - %extract_and_store_byte(66) - // stack: abcde - %extract_and_store_byte(67) - // stack: abcd - %extract_and_store_byte(68) - // stack: abc - %extract_and_store_byte(69) - // stack: ab - %extract_and_store_byte(70) - // stack: a - %mstore_kernel_general(71) - // stack: 0x80 // padding has 0x80 in first position and zeros elsewhere - %mstore_kernel_general(72) // store first padding term here so as to avoid extra label - %jump(store_padding) - -store_padding: - // stack: i [init 63], length - %store_zeros(136, store_padding) - // stack: length - DUP1 - %jumpi(store_input_stack) - POP - %jump(ripemd_init) - -store_input_stack: - // stack: rem, length, REM_INP - %stack (rem, length, head) -> (length, rem, 136, head, rem, length) - SUB - ADD - // stack: offset, byte, rem, length, REM_INP - %mstore_kernel_general - // stack: rem, length, REM_INP - %decrement - DUP1 - // stack: rem - 1, rem - 1, length, REM_INP - %jumpi(store_input_stack) - // stack: 0, length - POP - %jump(ripemd_init) - -store_input: - // stack: rem , ADDR , length - DUP4 - DUP4 - DUP4 - MLOAD_GENERAL - // stack: byte, rem , ADDR , length - DUP2 - DUP7 - SUB - %add_const(136) - // stack: offset, byte, rem , ADDR , length - %mstore_kernel_general - // stack: rem , ADDR , length - %decrement - // stack: rem-1, ADDR , length - SWAP3 - %increment - SWAP3 - // stack: rem-1, ADDR+1, length - DUP1 - %jumpi(store_input) - // stack: 0 , ADDR , length - %pop4 - // stack: length - %jump(ripemd_init) - -/// def buffer_update(get, set, times): -/// for i in range(times): -/// buffer[set+i] = bytestring[get+i] - -global buffer_update: - // stack: get , set , times , retdest - DUP2 - DUP2 - // stack: get, set, get , set , times , retdest - %mupdate_kernel_general - // stack: get , set , times , retdest - %increment - SWAP1 - %increment - SWAP1 - SWAP2 - %decrement - SWAP2 - // stack: get+1, set+1, times-1, retdest - DUP3 - %jumpi(buffer_update) - // stack: get , set , 0 , retdest - %pop3 - JUMP - - -%macro store_zeros(N, label) - // stack: i - %stack (i) -> ($N, i, 0, i) - SUB - // stack: offset = N-i, 0, i - %mstore_kernel_general - // stack: i - %decrement - DUP1 - // stack: i-1, i-1 - %jumpi($label) - // stack: 0 - POP -%endmacro - -%macro extract_and_store_byte(offset) - // stack: xsy - PUSH 0x100 - DUP2 - MOD - // stack: y, xsy - %stack (y, xsy) -> (xsy, y, 0x100, y) - // stack: xsy, y, 0x100, y - SUB - DIV - SWAP1 - // stack: y, xs - %mstore_kernel_general($offset) - // stack: xs -%endmacro diff --git a/evm/src/cpu/kernel/asm/ripemd/update.asm b/evm/src/cpu/kernel/asm/ripemd/update.asm deleted file mode 100644 index a0c3ef68..00000000 --- a/evm/src/cpu/kernel/asm/ripemd/update.asm +++ /dev/null @@ -1,108 +0,0 @@ -/// ripemd_update will receive and return the stack in the form: -/// stack: STATE, count, length, virt -/// -/// def ripemd_update(state, count, buffer, length, bytestring): -/// have = (count // 8) % 64 -/// need = 64 - have -/// shift = 0 -/// P = length >= need and have -/// Q = length >= need -/// if P: -/// update_1() -/// if Q: -/// update_2() -/// R = length > shift -/// if R: -/// buffer_update(virt + shift, have, length - shift) -/// -/// return state, count + 8*length, buffer - -global ripemd_update: - // stack: STATE, count, length, virt, retdest - %stack (STATE: 5, count, length, virt) -> (count, 8, 64, STATE, count, length, virt) - DIV - MOD - // stack: have, STATE, count, length, virt, retdest - DUP1 - PUSH 64 - SUB - PUSH 0 - // stack: shift, need, have, STATE, count, length, virt, retdest - %stack (shift, need, have, STATE: 5, count, length) -> (length, need, STATE, shift, need, have, count, length) - // stack: length, need, STATE, shift, need, have, count, length, virt, retdest - LT - ISZERO - // stack: Q, STATE, shift, need, have, count, length, virt, retdest - %stack (Q, STATE: 5, shift, need, have) -> (have, Q, Q, STATE, shift, need, have) - %gt_const(0) - AND - // stack: P, Q, STATE, shift, need, have, count, length, virt, retdest - %jumpi(update_1) - // stack: Q, STATE, shift, need, have, count, length, virt, retdest - %jumpi(update_2) -final_update: - // stack: STATE, shift, need, have, count, length, virt, retdest - %stack (STATE: 5, shift, need, have, count, length) -> (length, shift, return_step, STATE, shift, need, have, count, length) - SUB - // stack: ARGS: 2, STATE, shift, need, have, count, length, virt, retdest - %stack (ARGS: 2, STATE: 5, shift, need, have, count, length, virt) -> (shift, virt, have, ARGS, STATE, shift, need, have, count, length, virt) - ADD - // stack: ARGS: 4, STATE, shift, need, have, count, length, virt, retdest - %stack (ARGS: 4, STATE: 5, shift, need, have, count, length) -> (length, shift, ARGS, STATE, shift, need, have, count, length) - GT - // stack: R, ARGS: 4, STATE, shift, need, have, count, length, virt, retdest - %jumpi(buffer_update) - // stack: ARGS: 4, STATE, shift, need, have, count, length, virt, retdest - %pop3 - JUMP -return_step: - // stack: STATE, shift, need, have, count, length, virt, retdest - SWAP8 - DUP10 - %mul_const(8) - ADD - SWAP8 - // stack: STATE, shift, need, have, count, length, virt, retdest - %stack (STATE: 5, shift, need, have, count, length, virt, retdest) -> (retdest, STATE, count, length, virt) - JUMP - - -/// def update_1(): -/// buffer_update(virt, have, need) -/// shift = need -/// have = 0 -/// state = compress(state, buffer) - -update_1: - // stack: Q, STATE, shift, need, have, count, length, virt, retdest - %stack (Q, STATE: 5, shift, need, have, count, length, virt) -> (virt, have, need, update_1a, STATE, shift, need, have, count, length, virt) - %jump(buffer_update) -update_1a: - // stack: STATE, shift, need, have, count, length, virt, retdest - %stack (STATE: 5, shift, need, have) -> (STATE, 0, update_2, need, need, 0) - // stack: STATE, 0, update_2, shift = need, need, have = 0, count, length, virt, retdest - %jump(compress) - -/// def update_2(): -/// while length >= shift + 64: -/// shift += 64 -/// state = compress(state, bytestring[shift-64:]) - -update_2: - // stack: STATE, shift, need, have, count, length, virt, retdest - %stack (STATE: 5, shift, need, have, count, length) -> (64, shift, length, STATE, shift, need, have, count, length) - ADD - GT - // stack: cond, STATE, shift, need, have, count, length, virt, retdest - %jumpi(final_update) - SWAP5 - %add_const(64) - SWAP5 - %stack (STATE: 5, shift) -> (shift, 64, STATE, shift) - DUP13 - ADD - SUB - // stack: offset, STATE, shift, need, have, count, length, virt, retdest - %stack (offset, STATE: 5) -> (STATE, offset, update_2) - // stack: STATE, offset, update_2, shift, need, have, count, length, virt, retdest - %jump(compress) diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm deleted file mode 100644 index 8850c1c8..00000000 --- a/evm/src/cpu/kernel/asm/sha2/compression.asm +++ /dev/null @@ -1,285 +0,0 @@ -// We use memory starting at 320 * num_blocks + 2 (after the message schedule -// space) as scratch space to store stack values. -%macro scratch_space_addr_from_num_blocks - // stack: num_blocks - %mul_const(320) - %add_const(2) -%endmacro - -global sha2_compression: - // stack: message_schedule_addr, retdest - PUSH 0 - // stack: i=0, message_schedule_addr, retdest - SWAP1 - // stack: message_schedule_addr, i=0, retdest - PUSH 0 - // stack: 0, message_schedule_addr, i=0, retdest - %mload_kernel_general - // stack: num_blocks, message_schedule_addr, i=0, retdest - DUP1 - // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest - %scratch_space_addr_from_num_blocks - // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest - SWAP1 - // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - // Push the initial hash values; these constants are called H^(0) in the spec. - PUSH 0x5be0cd19 // H^(0)_7 - PUSH 0x1f83d9ab // H^(0)_6 - PUSH 0x9b05688c // H^(0)_5 - PUSH 0x510e527f // H^(0)_4 - PUSH 0xa54ff53a // H^(0)_3 - PUSH 0x3c6ef372 // H^(0)_2 - PUSH 0xbb67ae85 // H^(0)_1 - PUSH 0x6a09e667 // H^(0)_0 - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest -compression_start_block: - // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. - DUP10 - // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP2 - DUP2 - // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP3 - DUP2 - // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP4 - DUP2 - // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP5 - DUP2 - // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP6 - DUP2 - // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP7 - DUP2 - // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP8 - DUP2 - // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP9 - DUP2 - // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - POP - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest -compression_loop: - // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. - // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP11 - // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP13 - // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mul_const(4) - // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - ADD - // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_general_u32 - // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - PUSH sha2_constants_k - // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP14 - // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mul_const(4) - // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - ADD - // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_code_u32 - // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %stack (start: 6, e, f, g, h) -> (e, f, g, h, start, e, f, g, h) - // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %sha2_temp_word1 - // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %stack (t, a, b, c) -> (a, b, c, t, a, b, c) - // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %sha2_temp_word2 - // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP6 - // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP3 - // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP2 - // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %stack (a, e, b, c, d, old_d, f, g, h, old_h) -> (a, b, c, d, e, f, g, h) - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP12 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %increment - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP1 - // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %eq_const(64) - // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP1 - // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP12 - // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SUB - // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP13 - // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - SWAP1 - // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - PUSH 256 - MUL - // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - ADD - // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - SWAP12 - // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest - SWAP10 - // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - POP - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - %and_const(63) - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest - SWAP12 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - POP - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - DUP12 - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - ISZERO - %jumpi(compression_end_block) - %jump(compression_loop) -compression_end_block: - // Add the initial values of the eight working variables (from the start of this block's compression) back into them. - // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_general_u32 - // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP1 - // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(4) - %mload_kernel_general_u32 - // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP2 - // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(8) - %mload_kernel_general_u32 - // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP3 - // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(12) - %mload_kernel_general_u32 - // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP4 - // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(16) - %mload_kernel_general_u32 - // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP5 - // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(20) - %mload_kernel_general_u32 - // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP6 - // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(24) - %mload_kernel_general_u32 - // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP7 - // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(28) - %mload_kernel_general_u32 - // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP8 - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - DUP1 - // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - ISZERO - // In this case, we've finished all the blocks. - %jumpi(compression_end) - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - %stack (num_blocks, working: 8) -> (working, num_blocks) - %jump(compression_start_block) -compression_end: - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - POP - // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - %shl_const(32) - OR - %shl_const(32) - OR - %shl_const(32) - OR - %shl_const(32) - OR - %shl_const(32) - OR - %shl_const(32) - OR - %shl_const(32) - OR - // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest - SWAP3 - // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest - %pop3 - // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest - SWAP1 - JUMP diff --git a/evm/src/cpu/kernel/asm/sha2/constants.asm b/evm/src/cpu/kernel/asm/sha2/constants.asm deleted file mode 100644 index 6ce4d907..00000000 --- a/evm/src/cpu/kernel/asm/sha2/constants.asm +++ /dev/null @@ -1,65 +0,0 @@ -global sha2_constants_k: - BYTES 66, 138, 47, 152 - BYTES 113, 55, 68, 145 - BYTES 181, 192, 251, 207 - BYTES 233, 181, 219, 165 - BYTES 57, 86, 194, 91 - BYTES 89, 241, 17, 241 - BYTES 146, 63, 130, 164 - BYTES 171, 28, 94, 213 - BYTES 216, 7, 170, 152 - BYTES 18, 131, 91, 1 - BYTES 36, 49, 133, 190 - BYTES 85, 12, 125, 195 - BYTES 114, 190, 93, 116 - BYTES 128, 222, 177, 254 - BYTES 155, 220, 6, 167 - BYTES 193, 155, 241, 116 - BYTES 228, 155, 105, 193 - BYTES 239, 190, 71, 134 - BYTES 15, 193, 157, 198 - BYTES 36, 12, 161, 204 - BYTES 45, 233, 44, 111 - BYTES 74, 116, 132, 170 - BYTES 92, 176, 169, 220 - BYTES 118, 249, 136, 218 - BYTES 152, 62, 81, 82 - BYTES 168, 49, 198, 109 - BYTES 176, 3, 39, 200 - BYTES 191, 89, 127, 199 - BYTES 198, 224, 11, 243 - BYTES 213, 167, 145, 71 - BYTES 6, 202, 99, 81 - BYTES 20, 41, 41, 103 - BYTES 39, 183, 10, 133 - BYTES 46, 27, 33, 56 - BYTES 77, 44, 109, 252 - BYTES 83, 56, 13, 19 - BYTES 101, 10, 115, 84 - BYTES 118, 106, 10, 187 - BYTES 129, 194, 201, 46 - BYTES 146, 114, 44, 133 - BYTES 162, 191, 232, 161 - BYTES 168, 26, 102, 75 - BYTES 194, 75, 139, 112 - BYTES 199, 108, 81, 163 - BYTES 209, 146, 232, 25 - BYTES 214, 153, 6, 36 - BYTES 244, 14, 53, 133 - BYTES 16, 106, 160, 112 - BYTES 25, 164, 193, 22 - BYTES 30, 55, 108, 8 - BYTES 39, 72, 119, 76 - BYTES 52, 176, 188, 181 - BYTES 57, 28, 12, 179 - BYTES 78, 216, 170, 74 - BYTES 91, 156, 202, 79 - BYTES 104, 46, 111, 243 - BYTES 116, 143, 130, 238 - BYTES 120, 165, 99, 111 - BYTES 132, 200, 120, 20 - BYTES 140, 199, 2, 8 - BYTES 144, 190, 255, 250 - BYTES 164, 80, 108, 235 - BYTES 190, 249, 163, 247 - BYTES 198, 113, 120, 242 diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm deleted file mode 100644 index 78d98634..00000000 --- a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm +++ /dev/null @@ -1,240 +0,0 @@ -// We put the message schedule in memory starting at 64 * num_blocks + 2. -%macro message_schedule_addr_from_num_blocks - // stack: num_blocks - %mul_const(64) - %add_const(2) -%endmacro - -// Precodition: stack contains address of one message block, followed by output address -// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks -// of message schedule (in four-byte increments) -gen_message_schedule_from_block: - // stack: block_addr, output_addr, retdest - DUP1 - // stack: block_addr, block_addr, output_addr, retdest - %add_const(32) - // stack: block_addr + 32, block_addr, output_addr, retdest - SWAP1 - // stack: block_addr, block_addr + 32, output_addr, retdest - %mload_kernel_general_u256 - // stack: block[0], block_addr + 32, output_addr, retdest - SWAP1 - // stack: block_addr + 32, block[0], output_addr, retdest - %mload_kernel_general_u256 - // stack: block[1], block[0], output_addr, retdest - SWAP2 - // stack: output_addr, block[0], block[1], retdest - %add_const(28) - PUSH 8 - // stack: counter=8, output_addr + 28, block[0], block[1], retdest - %jump(gen_message_schedule_from_block_0_loop) -gen_message_schedule_from_block_0_loop: - // Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule. - // stack: counter, output_addr, block[0], block[1], retdest - SWAP2 - // stack: block[0], output_addr, counter, block[1], retdest - DUP1 - // stack: block[0], block[0], output_addr, counter, block[1], retdest - %shr_const(32) - // stack: block[0] >> 32, block[0], output_addr, counter, block[1], retdest - SWAP1 - // stack: block[0], block[0] >> 32, output_addr, counter, block[1], retdest - %as_u32 - // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest - DUP3 - // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest - %mstore_kernel_general_u32 - // stack: block[0] >> 32, output_addr, counter, block[1], retdest - SWAP1 - // stack: output_addr, block[0] >> 32, counter, block[1], retdest - %sub_const(4) - // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest - SWAP1 - // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest - SWAP2 - // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest - %decrement - DUP1 - ISZERO - %jumpi(gen_message_schedule_from_block_0_end) - %jump(gen_message_schedule_from_block_0_loop) -gen_message_schedule_from_block_0_end: - // stack: old counter=0, output_addr, block[0], block[1], retdest - POP - PUSH 8 - // stack: counter=8, output_addr, block[0], block[1], retdest - %stack (counter, out, b0, b1) -> (out, counter, b1, b0) - // stack: output_addr, counter, block[1], block[0], retdest - %add_const(64) - // stack: output_addr + 64, counter, block[1], block[0], retdest - SWAP1 - // stack: counter, output_addr + 64, block[1], block[0], retdest -gen_message_schedule_from_block_1_loop: - // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule. - // stack: counter, output_addr, block[1], block[0], retdest - SWAP2 - // stack: block[1], output_addr, counter, block[0], retdest - DUP1 - // stack: block[1], block[1], output_addr, counter, block[0], retdest - %shr_const(32) - // stack: block[1] >> 32, block[1], output_addr, counter, block[0], retdest - SWAP1 - // stack: block[1], block[1] >> 32, output_addr, counter, block[0], retdest - %as_u32 - // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest - DUP3 - // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest - %mstore_kernel_general_u32 - // stack: block[1] >> 32, output_addr, counter, block[0], retdest - SWAP1 - // stack: output_addr, block[1] >> 32, counter, block[0], retdest - %sub_const(4) - // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest - SWAP1 - // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest - SWAP2 - // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest - %decrement - DUP1 - ISZERO - %jumpi(gen_message_schedule_from_block_1_end) - %jump(gen_message_schedule_from_block_1_loop) -gen_message_schedule_from_block_1_end: - // stack: old counter=0, output_addr, block[1], block[0], retdest - POP - // stack: output_addr, block[0], block[1], retdest - PUSH 48 - // stack: counter=48, output_addr, block[0], block[1], retdest - SWAP1 - // stack: output_addr, counter, block[0], block[1], retdest - %add_const(36) - // stack: output_addr + 36, counter, block[0], block[1], retdest - SWAP1 - // stack: counter, output_addr + 36, block[0], block[1], retdest -gen_message_schedule_remaining_loop: - // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks. - // stack: counter, output_addr, block[0], block[1], retdest - SWAP1 - // stack: output_addr, counter, block[0], block[1], retdest - DUP1 - // stack: output_addr, output_addr, counter, block[0], block[1], retdest - PUSH 2 - PUSH 4 - MUL - SWAP1 - SUB - // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest - %sha2_sigma_1 - // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest - SWAP1 - // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - DUP1 - // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - PUSH 7 - PUSH 4 - MUL - SWAP1 - SUB - // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - SWAP1 - // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - DUP1 - // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - PUSH 15 - PUSH 4 - MUL - SWAP1 - SUB - // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %sha2_sigma_0 - // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - SWAP1 - // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - DUP1 - // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - PUSH 16 - PUSH 4 - MUL - SWAP1 - SUB - // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - %mload_kernel_general_u32 - // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - SWAP1 - // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - SWAP4 - // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - %add_u32 - %add_u32 - %add_u32 - // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - DUP2 - // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - %mstore_kernel_general_u32 - // stack: output_addr, counter, block[0], block[1], retdest - %add_const(4) - // stack: output_addr + 4, counter, block[0], block[1], retdest - SWAP1 - // stack: counter, output_addr + 4, block[0], block[1], retdest - %decrement - // stack: counter - 1, output_addr + 4, block[0], block[1], retdest - DUP1 - ISZERO - %jumpi(gen_message_schedule_remaining_end) - %jump(gen_message_schedule_remaining_loop) -gen_message_schedule_remaining_end: - // stack: counter=0, output_addr, block[0], block[1], retdest - %pop4 - JUMP - -// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] -// stack contains output_addr -// Postcondition: starting at output_addr, set of 256 bytes per block -// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) -global sha2_gen_all_message_schedules: - // stack: output_addr, retdest - DUP1 - // stack: output_addr, output_addr, retdest - PUSH 0 - // stack: 0, output_addr, output_addr, retdest - %mload_kernel_general - // stack: num_blocks, output_addr, output_addr, retdest - PUSH 1 - // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest -gen_all_message_schedules_loop: - // stack: cur_addr, counter, cur_output_addr, output_addr, retdest - PUSH gen_all_message_schedules_loop_end - // stack: new_retdest = gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest - DUP4 - // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest - DUP3 - // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest - %jump(gen_message_schedule_from_block) -gen_all_message_schedules_loop_end: - // stack: cur_addr, counter, cur_output_addr, output_addr, retdest - %add_const(64) - // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest - SWAP1 - %decrement - SWAP1 - // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest - SWAP2 - %add_const(256) - SWAP2 - // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - DUP2 - // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - ISZERO - %jumpi(gen_all_message_schedules_end) - %jump(gen_all_message_schedules_loop) -gen_all_message_schedules_end: - // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - %pop3 - // stack: output_addr, retdest - %jump(sha2_compression) diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm deleted file mode 100644 index 7d8054ca..00000000 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ /dev/null @@ -1,130 +0,0 @@ -// 32-bit right rotation -%macro rotr(rot) - // stack: value - PUSH $rot - // stack: rot, value - DUP2 - DUP2 - // stack: rot, value, rot, value - SHR - // stack: value >> rot, rot, value - %stack (shifted, rot, value) -> (rot, value, shifted) - // stack: rot, value, value >> rot - PUSH 32 - SUB - // stack: 32 - rot, value, value >> rot - SHL - // stack: value << (32 - rot), value >> rot - %as_u32 - // stack: (value << (32 - rot)) % (1 << 32), value >> rot - ADD -%endmacro - -%macro sha2_sigma_0 - // stack: x - DUP1 - // stack: x, x - %rotr(7) - // stack: rotr(x, 7), x - %stack (rotated, x) -> (x, x, rotated) - // stack: x, x, rotr(x, 7) - %rotr(18) - // stack: rotr(x, 18), x, rotr(x, 7) - SWAP1 - // stack: x, rotr(x, 18), rotr(x, 7) - PUSH 3 - SHR - // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) - XOR - XOR -%endmacro - -%macro sha2_sigma_1 - // stack: x - DUP1 - // stack: x, x - %rotr(17) - // stack: rotr(x, 17), x - %stack (rotated, x) -> (x, x, rotated) - // stack: x, x, rotr(x, 17) - %rotr(19) - // stack: rotr(x, 19), x, rotr(x, 17) - SWAP1 - // stack: x, rotr(x, 19), rotr(x, 17) - PUSH 10 - SHR - // stack: shr(x, 10), rotr(x, 19), rotr(x, 17) - XOR - XOR -%endmacro - -%macro sha2_bigsigma_0 - // stack: x - DUP1 - // stack: x, x - %rotr(2) - // stack: rotr(x, 2), x - %stack (rotated, x) -> (x, x, rotated) - // stack: x, x, rotr(x, 2) - %rotr(13) - // stack: rotr(x, 13), x, rotr(x, 2) - SWAP1 - // stack: x, rotr(x, 13), rotr(x, 2) - %rotr(22) - // stack: rotr(x, 22), rotr(x, 13), rotr(x, 2) - XOR - XOR -%endmacro - -%macro sha2_bigsigma_1 - // stack: x - DUP1 - // stack: x, x - %rotr(6) - // stack: rotr(x, 6), x - %stack (rotated, x) -> (x, x, rotated) - // stack: x, x, rotr(x, 6) - %rotr(11) - // stack: rotr(x, 11), x, rotr(x, 6) - SWAP1 - // stack: x, rotr(x, 11), rotr(x, 6) - %rotr(25) - // stack: rotr(x, 25), rotr(x, 11), rotr(x, 6) - XOR - XOR -%endmacro - -%macro sha2_choice - // stack: x, y, z - DUP1 - // stack: x, x, y, z - NOT - // stack: not x, x, y, z - %stack (notx, x, y, z) -> (notx, z, x, y) - // stack: not x, z, x, y - AND - // stack: (not x) and z, x, y - %stack (nxz, x, y) -> (x, y, nxz) - // stack: x, y, (not x) and z - AND - // stack: x and y, (not x) and z - OR -%endmacro - -%macro sha2_majority - // stack: x, y, z - %stack (xyz: 3) -> (xyz, xyz) - // stack: x, y, z, x, y, z - AND - // stack: x and y, z, x, y, z - SWAP2 - // stack: x, z, x and y, y, z - AND - // stack: x and z, x and y, y, z - %stack (a: 2, b: 2) -> (b, a) - // stack: y, z, x and z, x and y - AND - // stack: y and z, x and z, x and y - OR - OR -%endmacro diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm deleted file mode 100644 index 7594eb81..00000000 --- a/evm/src/cpu/kernel/asm/sha2/store_pad.asm +++ /dev/null @@ -1,89 +0,0 @@ -global sha2: - %jump(sha2_store) - -global sha2_store: - // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - DUP1 - // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - PUSH 0 - // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - %mstore_kernel_general - // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - PUSH 1 - // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest -store_loop: - // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - DUP2 - // stack: counter, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - ISZERO - %jumpi(store_end) - // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - %stack (addr, counter, val) -> (addr, val, counter, addr) - // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest - %mstore_kernel_general - // stack: counter, addr, ... , x[num_bytes-1], retdest - %decrement - // stack: counter-1, addr, ... , x[num_bytes-1], retdest - SWAP1 - // stack: addr, counter-1, ... , x[num_bytes-1], retdest - %increment - // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest - %jump(store_loop) -store_end: - // stack: addr, counter, retdest - %pop2 - // stack: retdest - %jump(sha2_pad) - -// Precodition: input is in memory, starting at 0 of kernel general segment, of the form -// num_bytes, x[0], x[1], ..., x[num_bytes - 1] -// Postcodition: output is in memory, starting at 0, of the form -// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] -global sha2_pad: - // stack: retdest - PUSH 0 - %mload_kernel_general - // stack: num_bytes, retdest - // STEP 1: append 1 - // insert 128 (= 1 << 7) at x[num_bytes+1] - // stack: num_bytes, retdest - PUSH 1 - PUSH 7 - SHL - // stack: 128, num_bytes, retdest - DUP2 - // stack: num_bytes, 128, num_bytes, retdest - %increment - // stack: num_bytes+1, 128, num_bytes, retdest - %mstore_kernel_general - // stack: num_bytes, retdest - // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 - DUP1 - // stack: num_bytes, num_bytes, retdest - %add_const(8) - %div_const(64) - - %increment - // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest - // STEP 3: calculate length := num_bytes*8 - SWAP1 - // stack: num_bytes, num_blocks, retdest - PUSH 8 - MUL - // stack: length = num_bytes*8, num_blocks, retdest - // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] - DUP2 - // stack: num_blocks, length, num_blocks, retdest - PUSH 64 - MUL - // stack: last_addr = num_blocks*64, length, num_blocks, retdest - %sha2_write_length - // stack: num_blocks, retdest - DUP1 - // stack: num_blocks, num_blocks, retdest - // STEP 5: write num_blocks to x[0] - PUSH 0 - %mstore_kernel_general - // stack: num_blocks, retdest - %message_schedule_addr_from_num_blocks - %jump(sha2_gen_all_message_schedules) diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm deleted file mode 100644 index ed610947..00000000 --- a/evm/src/cpu/kernel/asm/sha2/temp_words.asm +++ /dev/null @@ -1,32 +0,0 @@ -// "T_1" in the SHA-256 spec -%macro sha2_temp_word1 - // stack: e, f, g, h, K[i], W[i] - DUP1 - // stack: e, e, f, g, h, K[i], W[i] - %sha2_bigsigma_1 - // stack: Sigma_1(e), e, f, g, h, K[i], W[i] - %stack (sig, e, f, g) -> (e, f, g, sig) - // stack: e, f, g, Sigma_1(e), h, K[i], W[i] - %sha2_choice - // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] - %add_u32 - %add_u32 - %add_u32 - %add_u32 - // stack: Ch(e, f, g) + Sigma_1(e) + h + K[i] + W[i] -%endmacro - -// "T_2" in the SHA-256 spec -%macro sha2_temp_word2 - // stack: a, b, c - DUP1 - // stack: a, a, b, c - %sha2_bigsigma_0 - // stack: Sigma_0(a), a, b, c - SWAP3 - // stack: c, a, b, Sigma_0(a) - %sha2_majority - // stack: Maj(c, a, b), Sigma_0(a) - %add_u32 - // stack: Maj(c, a, b) + Sigma_0(a) -%endmacro diff --git a/evm/src/cpu/kernel/asm/sha2/write_length.asm b/evm/src/cpu/kernel/asm/sha2/write_length.asm deleted file mode 100644 index 5727498c..00000000 --- a/evm/src/cpu/kernel/asm/sha2/write_length.asm +++ /dev/null @@ -1,119 +0,0 @@ -%macro sha2_write_length - // stack: last_addr, length - SWAP1 - // stack: length, last_addr - DUP1 - // stack: length, length, last_addr - %and_const(0xff) - // stack: length % (1 << 8), length, last_addr - DUP3 - // stack: last_addr, length % (1 << 8), length, last_addr - %mstore_kernel_general - - // stack: length, last_addr - SWAP1 - %decrement - SWAP1 - // stack: length, last_addr - 1 - %shr_const(8) - // stack: length >> 8, last_addr - 1 - DUP1 - // stack: length >> 8, length >> 8, last_addr - 1 - %and_const(0xff) - // stack: (length >> 8) % (1 << 8), length >> 8, last_addr - 1 - DUP3 - // stack: last_addr - 1, (length >> 8) % (1 << 8), length >> 8, last_addr - 1 - %mstore_kernel_general - - // stack: length >> 8, last_addr - 1 - SWAP1 - %decrement - SWAP1 - // stack: length >> 8, last_addr - 2 - %shr_const(8) - // stack: length >> 16, last_addr - 2 - DUP1 - // stack: length >> 16, length >> 16, last_addr - 2 - %and_const(0xff) - // stack: (length >> 16) % (1 << 8), length >> 16, last_addr - 2 - DUP3 - // stack: last_addr - 2, (length >> 16) % (1 << 8), length >> 16, last_addr - 2 - %mstore_kernel_general - - // stack: length >> 16, last_addr - 2 - SWAP1 - %decrement - SWAP1 - // stack: length >> 16, last_addr - 3 - %shr_const(8) - // stack: length >> 24, last_addr - 3 - DUP1 - // stack: length >> 24, length >> 24, last_addr - 3 - %and_const(0xff) - // stack: (length >> 24) % (1 << 8), length >> 24, last_addr - 3 - DUP3 - // stack: last_addr - 3, (length >> 24) % (1 << 8), length >> 24, last_addr - 3 - %mstore_kernel_general - - // stack: length >> 24, last_addr - 3 - SWAP1 - %decrement - SWAP1 - // stack: length >> 24, last_addr - 4 - %shr_const(8) - // stack: length >> 32, last_addr - 4 - DUP1 - // stack: length >> 32, length >> 32, last_addr - 4 - %and_const(0xff) - // stack: (length >> 32) % (1 << 8), length >> 32, last_addr - 4 - DUP3 - // stack: last_addr - 4, (length >> 32) % (1 << 8), length >> 32, last_addr - 4 - %mstore_kernel_general - - // stack: length >> 32, last_addr - 4 - SWAP1 - %decrement - SWAP1 - // stack: length >> 32, last_addr - 5 - %shr_const(8) - // stack: length >> 40, last_addr - 5 - DUP1 - // stack: length >> 40, length >> 40, last_addr - 5 - %and_const(0xff) - // stack: (length >> 40) % (1 << 8), length >> 40, last_addr - 5 - DUP3 - // stack: last_addr - 5, (length >> 40) % (1 << 8), length >> 40, last_addr - 5 - %mstore_kernel_general - - // stack: length >> 40, last_addr - 5 - SWAP1 - %decrement - SWAP1 - // stack: length >> 40, last_addr - 6 - %shr_const(8) - // stack: length >> 48, last_addr - 6 - DUP1 - // stack: length >> 48, length >> 48, last_addr - 6 - %and_const(0xff) - // stack: (length >> 48) % (1 << 8), length >> 48, last_addr - 6 - DUP3 - // stack: last_addr - 6, (length >> 48) % (1 << 8), length >> 48, last_addr - 6 - %mstore_kernel_general - - // stack: length >> 48, last_addr - 6 - SWAP1 - %decrement - SWAP1 - // stack: length >> 48, last_addr - 7 - %shr_const(8) - // stack: length >> 56, last_addr - 7 - DUP1 - // stack: length >> 56, length >> 56, last_addr - 7 - %and_const(0xff) - // stack: (length >> 56) % (1 << 8), length >> 56, last_addr - 7 - DUP3 - // stack: last_addr - 7, (length >> 56) % (1 << 8), length >> 56, last_addr - 7 - %mstore_kernel_general - %pop2 - // stack: (empty) -%endmacro From 1089bbf29e05286d5e65e84f0485eb7653c4398d Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 3 Nov 2022 16:01:08 -0700 Subject: [PATCH 03/44] blake initial --- .../cpu/kernel/asm/hash/blake/compression.asm | 2 + .../cpu/kernel/asm/hash/blake/constants.asm | 34 +++ .../cpu/kernel/asm/hash/blake/g_functions.asm | 3 + .../kernel/asm/hash/blake/permutations.asm | 172 +++++++++++ .../cpu/kernel/asm/hash/blake/store_pad.asm | 89 ++++++ evm/src/cpu/kernel/asm/hash/ripemd/box.asm | 96 ++++++ .../kernel/asm/hash/ripemd/compression.asm | 160 ++++++++++ .../cpu/kernel/asm/hash/ripemd/constants.asm | 117 +++++++ .../cpu/kernel/asm/hash/ripemd/functions.asm | 150 +++++++++ evm/src/cpu/kernel/asm/hash/ripemd/main.asm | 107 +++++++ evm/src/cpu/kernel/asm/hash/ripemd/memory.asm | 137 +++++++++ evm/src/cpu/kernel/asm/hash/ripemd/update.asm | 108 +++++++ .../cpu/kernel/asm/hash/sha2/compression.asm | 285 ++++++++++++++++++ .../cpu/kernel/asm/hash/sha2/constants.asm | 65 ++++ .../kernel/asm/hash/sha2/message_schedule.asm | 240 +++++++++++++++ evm/src/cpu/kernel/asm/hash/sha2/ops.asm | 130 ++++++++ .../cpu/kernel/asm/hash/sha2/store_pad.asm | 89 ++++++ .../cpu/kernel/asm/hash/sha2/temp_words.asm | 32 ++ .../cpu/kernel/asm/hash/sha2/write_length.asm | 119 ++++++++ 19 files changed, 2135 insertions(+) create mode 100644 evm/src/cpu/kernel/asm/hash/blake/compression.asm create mode 100644 evm/src/cpu/kernel/asm/hash/blake/constants.asm create mode 100644 evm/src/cpu/kernel/asm/hash/blake/g_functions.asm create mode 100644 evm/src/cpu/kernel/asm/hash/blake/permutations.asm create mode 100644 evm/src/cpu/kernel/asm/hash/blake/store_pad.asm create mode 100644 evm/src/cpu/kernel/asm/hash/ripemd/box.asm create mode 100644 evm/src/cpu/kernel/asm/hash/ripemd/compression.asm create mode 100644 evm/src/cpu/kernel/asm/hash/ripemd/constants.asm create mode 100644 evm/src/cpu/kernel/asm/hash/ripemd/functions.asm create mode 100644 evm/src/cpu/kernel/asm/hash/ripemd/main.asm create mode 100644 evm/src/cpu/kernel/asm/hash/ripemd/memory.asm create mode 100644 evm/src/cpu/kernel/asm/hash/ripemd/update.asm create mode 100644 evm/src/cpu/kernel/asm/hash/sha2/compression.asm create mode 100644 evm/src/cpu/kernel/asm/hash/sha2/constants.asm create mode 100644 evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm create mode 100644 evm/src/cpu/kernel/asm/hash/sha2/ops.asm create mode 100644 evm/src/cpu/kernel/asm/hash/sha2/store_pad.asm create mode 100644 evm/src/cpu/kernel/asm/hash/sha2/temp_words.asm create mode 100644 evm/src/cpu/kernel/asm/hash/sha2/write_length.asm diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm new file mode 100644 index 00000000..fceabeb1 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -0,0 +1,2 @@ +global blake_compression: + // stack: diff --git a/evm/src/cpu/kernel/asm/hash/blake/constants.asm b/evm/src/cpu/kernel/asm/hash/blake/constants.asm new file mode 100644 index 00000000..12f026e2 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/constants.asm @@ -0,0 +1,34 @@ +global blake_iv: + // IV constants (big-endian) + + // IV_0 + BYTES 106, 9, 230, 103 + BYTES 243, 188, 201, 8 + + // IV_1 + BYTES 187, 103, 174, 133 + BYTES 132, 202, 167, 59 + + // IV_2 + BYTES 60, 110, 243, 114 + BYTES 254, 148, 248, 43 + + // IV_3 + BYTES 165, 79, 245, 58 + BYTES 95, 29, 54, 241 + + // IV_4 + BYTES 81, 14, 82, 127 + BYTES 173, 230, 130, 209 + + // IV_5 + BYTES 155, 5, 104, 140 + BYTES 43, 62, 108, 31 + + // IV_6 + BYTES 31, 131, 217, 171 + BYTES 251, 65, 189, 107 + + // IV_7 + BYTES 91, 224, 205, 25 + BYTES 19, 126, 33, 121 diff --git a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm new file mode 100644 index 00000000..e8b1dab2 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm @@ -0,0 +1,3 @@ +global blake_g_function: + // stack: i, a, b, c, d + \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm new file mode 100644 index 00000000..ba7ad291 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm @@ -0,0 +1,172 @@ +permutation_1_constants: + BYTES 14 + BYTES 10 + BYTES 4 + BYTES 8 + BYTES 9 + BYTES 15 + BYTES 13 + BYTES 6 + BYTES 1 + BYTES 12 + BYTES 0 + BYTES 2 + BYTES 11 + BYTES 7 + BYTES 5 + BYTES 3 + +permutation_2_constants: + BYTES 11 + BYTES 8 + BYTES 12 + BYTES 0 + BYTES 5 + BYTES 2 + BYTES 15 + BYTES 13 + BYTES 10 + BYTES 14 + BYTES 3 + BYTES 6 + BYTES 7 + BYTES 1 + BYTES 9 + BYTES 4 + +permutation_3_constants: + BYTES 7 + BYTES 9 + BYTES 3 + BYTES 1 + BYTES 13 + BYTES 12 + BYTES 11 + BYTES 14 + BYTES 2 + BYTES 6 + BYTES 5 + BYTES 10 + BYTES 4 + BYTES 0 + BYTES 15 + BYTES 8 + +permutation_4_constants: + BYTES 9 + BYTES 0 + BYTES 5 + BYTES 7 + BYTES 2 + BYTES 4 + BYTES 10 + BYTES 15 + BYTES 14 + BYTES 1 + BYTES 11 + BYTES 12 + BYTES 6 + BYTES 8 + BYTES 3 + BYTES 13 + +permutation_5_constants: + BYTES 2 + BYTES 12 + BYTES 6 + BYTES 10 + BYTES 0 + BYTES 11 + BYTES 8 + BYTES 3 + BYTES 4 + BYTES 13 + BYTES 7 + BYTES 5 + BYTES 15 + BYTES 14 + BYTES 1 + BYTES 9 + +permutation_6_constants: + BYTES 12 + BYTES 5 + BYTES 1 + BYTES 15 + BYTES 14 + BYTES 13 + BYTES 4 + BYTES 10 + BYTES 0 + BYTES 7 + BYTES 6 + BYTES 3 + BYTES 9 + BYTES 2 + BYTES 8 + BYTES 11 + +permutation_7_constants: + BYTES 13 + BYTES 11 + BYTES 7 + BYTES 14 + BYTES 12 + BYTES 1 + BYTES 3 + BYTES 9 + BYTES 5 + BYTES 0 + BYTES 15 + BYTES 4 + BYTES 8 + BYTES 6 + BYTES 2 + BYTES 10 + +permutation_8_constants: + BYTES 6 + BYTES 15 + BYTES 14 + BYTES 9 + BYTES 11 + BYTES 3 + BYTES 0 + BYTES 8 + BYTES 12 + BYTES 2 + BYTES 13 + BYTES 7 + BYTES 1 + BYTES 4 + BYTES 10 + BYTES 5 + +permutation_9_constants: + BYTES 10 + BYTES 2 + BYTES 8 + BYTES 4 + BYTES 7 + BYTES 6 + BYTES 1 + BYTES 5 + BYTES 15 + BYTES 11 + BYTES 9 + BYTES 14 + BYTES 3 + BYTES 12 + BYTES 13 + BYTES 0 + +%macro blake_permutation(round, i) + PUSH permutation_1_constants + PUSH $round + %mod_const(10) + %mul_const(16) + ADD + %add_const($i) + %mload_kernel_code +%endmacro + \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/store_pad.asm b/evm/src/cpu/kernel/asm/hash/blake/store_pad.asm new file mode 100644 index 00000000..7594eb81 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/store_pad.asm @@ -0,0 +1,89 @@ +global sha2: + %jump(sha2_store) + +global sha2_store: + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + DUP1 + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + PUSH 0 + // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + %mstore_kernel_general + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + PUSH 1 + // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest +store_loop: + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + DUP2 + // stack: counter, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + ISZERO + %jumpi(store_end) + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + %stack (addr, counter, val) -> (addr, val, counter, addr) + // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest + %mstore_kernel_general + // stack: counter, addr, ... , x[num_bytes-1], retdest + %decrement + // stack: counter-1, addr, ... , x[num_bytes-1], retdest + SWAP1 + // stack: addr, counter-1, ... , x[num_bytes-1], retdest + %increment + // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest + %jump(store_loop) +store_end: + // stack: addr, counter, retdest + %pop2 + // stack: retdest + %jump(sha2_pad) + +// Precodition: input is in memory, starting at 0 of kernel general segment, of the form +// num_bytes, x[0], x[1], ..., x[num_bytes - 1] +// Postcodition: output is in memory, starting at 0, of the form +// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +global sha2_pad: + // stack: retdest + PUSH 0 + %mload_kernel_general + // stack: num_bytes, retdest + // STEP 1: append 1 + // insert 128 (= 1 << 7) at x[num_bytes+1] + // stack: num_bytes, retdest + PUSH 1 + PUSH 7 + SHL + // stack: 128, num_bytes, retdest + DUP2 + // stack: num_bytes, 128, num_bytes, retdest + %increment + // stack: num_bytes+1, 128, num_bytes, retdest + %mstore_kernel_general + // stack: num_bytes, retdest + // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 + DUP1 + // stack: num_bytes, num_bytes, retdest + %add_const(8) + %div_const(64) + + %increment + // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest + // STEP 3: calculate length := num_bytes*8 + SWAP1 + // stack: num_bytes, num_blocks, retdest + PUSH 8 + MUL + // stack: length = num_bytes*8, num_blocks, retdest + // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] + DUP2 + // stack: num_blocks, length, num_blocks, retdest + PUSH 64 + MUL + // stack: last_addr = num_blocks*64, length, num_blocks, retdest + %sha2_write_length + // stack: num_blocks, retdest + DUP1 + // stack: num_blocks, num_blocks, retdest + // STEP 5: write num_blocks to x[0] + PUSH 0 + %mstore_kernel_general + // stack: num_blocks, retdest + %message_schedule_addr_from_num_blocks + %jump(sha2_gen_all_message_schedules) diff --git a/evm/src/cpu/kernel/asm/hash/ripemd/box.asm b/evm/src/cpu/kernel/asm/hash/ripemd/box.asm new file mode 100644 index 00000000..d60d9b8c --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/ripemd/box.asm @@ -0,0 +1,96 @@ +/// Note that we unpack STATE: 5 to a, b, c, d, e +/// All additions are u32 +/// +/// def box(a, b, c, d, e, F, K): +/// +/// box = get_box(sides, rounds, boxes) +/// a += F(b, c, d) +/// r = load(r)(box) +/// x = load_offset(r) +/// a += x + K +/// s = load(s)(box) +/// a = rol(s, a) +/// a += e +/// c = rol(10, c) +/// +/// return e, a, b, c, d, F, K + +global box: + // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt + PUSH pre_rol + DUP5 + DUP5 + DUP5 + DUP10 + // stack: F, b, c, d, pre_rol, a, b, c, d, e, F, K, boxes, rounds, sides, virt + JUMP +pre_rol: + // stack: F(b, c, d), a, b, c, d, e, F, K, boxes, rounds, sides, virt + ADD + // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt + %get_box + // stack: box, a, b, c, d, e, F, K, boxes, rounds, sides, virt + DUP12 + DUP2 + %mload_kernel_code(r_data) + ADD + // stack: virt + r, box, a, b, c, d, e, F, K, boxes, rounds, sides, virt + %mload_kernel_general_u32_LE + // stack: x, box, a, b, c, d, e, F, K, boxes, rounds, sides, virt + SWAP1 + SWAP2 + // stack: a, x, box, b, c, d, e, F, K, boxes, rounds, sides, virt + ADD + DUP8 + ADD + %as_u32 + // stack: a, box, b, c, d, e, F, K, boxes, rounds, sides, virt + PUSH mid_rol + SWAP2 + // stack: box, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides, virt + %mload_kernel_code(s_data) + // stack: s, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides, virt + %jump(rol) +mid_rol: + // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt + DUP5 + // stack: e, a, b, c, d, e, F, K, boxes, rounds, sides, virt + ADD + %as_u32 + // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt + %stack (a, b, c) -> (10, c, post_rol, a, b) + // stack: 10, c, post_rol, a, b, d, e, F, K, boxes, rounds, sides, virt + %jump(rol) +post_rol: + // stack: c, a, b, d, e, F, K, boxes , rounds, sides, virt + %stack (c, a, b, d, e, F, K, boxes) -> (boxes, 1, a, b, c, d, F, K, e) + // stack: boxes, 1, a, b, c, d, F, K, e, rounds, sides, virt + SUB + SWAP7 + // stack: e, a, b, c, d, F, K, boxes-1, rounds, sides, virt + %jump(round) + + +%macro get_round + // stack: sides, rounds + %mul_const(5) + PUSH 10 + SUB + SUB + // stack: 10 - 5*sides - rounds +%endmacro + +%macro get_box + // stack: ARGS: 7, boxes, rounds, sides + DUP10 + %mul_const(80) + DUP10 + %mul_const(16) + DUP10 + // stack: boxes , 16*rounds , 80*sides, ARGS: 7, boxes, rounds, sides + PUSH 176 + SUB + SUB + SUB + // stack: 176 - boxes - 16*rounds - 80*sides, ARGS: 7, boxes, rounds, sides +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/ripemd/compression.asm b/evm/src/cpu/kernel/asm/hash/ripemd/compression.asm new file mode 100644 index 00000000..a83bf832 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/ripemd/compression.asm @@ -0,0 +1,160 @@ +/// _block is stored in memory: its address virt stays on the stack +/// def compress(STATE: 5, _block): +/// +/// STATEL = STATE +/// STATEL = loop(STATEL) +/// +/// STATER = state +/// STATER = loop(STATER) +/// +/// return mix(STATER, STATEL, STATE) +/// +/// +/// def mix(STATER, STATEL, STATE): +/// return +/// u32(s1 + l2 + r3), +/// u32(s2 + l3 + r4), +/// u32(s3 + l4 + r0), +/// u32(s4 + l0 + r1), +/// u32(s0 + l1 + r2) +/// +/// where si, li, ri, oi, VR, RD respectively denote +/// STATE[i], STATEL[i], STATER[i], OUTPUT[i], virt, retdest + +global compress: + // stack: STATE, virt, retdest + PUSH switch + DUP7 + %stack () -> (0, 0, 16, 5, 1) + // stack: 0, 0, 16, 5, 1, virt, switch, STATE, virt, retdest + DUP12 + DUP12 + DUP12 + DUP12 + DUP12 + // stack: STATE, 0, 0, 16, 5, 1, virt, switch, STATE, virt, retdest + %jump(loop) +switch: + // stack: STATEL, STATE, virt, retdest + PUSH mix + DUP12 + %stack () -> (16, 5, 0) + // stack: 16, 5, 0, virt, mix, STATEL, STATE, virt, retdest + DUP15 + DUP15 + DUP15 + DUP15 + DUP15 + // stack: STATE, 16, 5, 0, virt, mix, STATEL, STATE, virt, retdest + %stack (STATE: 5) -> (STATE, 0, 0) + // stack: STATE, 0, 0, 16, 5, 0, virt, mix, STATEL, STATE, virt, retdest + %jump(loop) +mix: + // stack: r0, r1, r2, r3, r4, l0, l1, l2, l3, l4, s0, s1, s2, s3, s4, VR, RD + SWAP10 + // stack: s0, r1, r2, r3, r4, l0, l1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD + SWAP1 + // stack: r1, s0, r2, r3, r4, l0, l1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD + SWAP6 + // stack: l1, s0, r2, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD + %add3_u32 + // stack: o4, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, s3, s4, VR, RD + SWAP14 + // stack: RD, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, s3, s4, VR, o4 + SWAP11 + // stack: s3, r3, r4, l0, r1, l2, l3, l4, r0, s1, s2, RD, s4, VR, o4 + SWAP10 + // stack: s2, r3, r4, l0, r1, l2, l3, l4, r0, s1, s3, RD, s4, VR, o4 + SWAP1 + // stack: r3, s2, r4, l0, r1, l2, l3, l4, r0, s1, s3, RD, s4, VR, o4 + SWAP6 + // stack: l3, s2, r4, l0, r1, l2, r3, l4, r0, s1, s3, RD, s4, VR, o4 + %add3_u32 + // stack: o1, l0, r1, l2, r3, l4, r0, s1, s3, RD, s4, VR, o4 + SWAP9 + // stack: RD, l0, r1, l2, r3, l4, r0, s1, s3, o1, s4, VR, o4 + SWAP10 + // stack: s4, l0, r1, l2, r3, l4, r0, s1, s3, o1, RD, VR, o4 + %add3_u32 + // stack: o3, l2, r3, l4, r0, s1, s3, o1, RD, VR, o4 + SWAP9 + // stack: VR, l2, r3, l4, r0, s1, s3, o1, RD, o3, o4 + SWAP5 + // stack: s1, l2, r3, l4, r0, VR, s3, o1, RD, o3, o4 + %add3_u32 + // stack: o0, l4, r0, VR, s3, o1, RD, o3, o4 + SWAP4 + // stack: s3, l4, r0, VR, o0, o1, RD, o3, o4 + %add3_u32 + // stack: o2, VR, o0, o1, RD, o3, o4 + SWAP4 + // stack: RD, VR, o0, o1, o2, o3, o4 + SWAP1 + // stack: VR, RD, o0, o1, o2, o3, o4 + POP + // stack: RD, o0, o1, o2, o3, o4 + JUMP + + +/// def loop(STATE: 5): +/// while rounds: +/// update_round_vars() +/// round(STATE: 5, F, K, rounds, sides) +/// +/// def update_round_vars(): +/// F = load(F)(sides, rounds) +/// K = load(K)(sides, rounds) +/// +/// def round(STATE, rounds, sides): +/// while boxes: +/// box(STATE, F, K) +/// boxes -= 1 +/// boxes = 16 +/// rounds -= 1 + +loop: + // stack: STATE, F, K, 16, rounds, sides, virt, retdest + DUP9 + // stack: round, STATE, F, K, 16, rounds, sides, virt, retdest + %jumpi(update_round_vars) + // stack: STATE, F, K, 16, 0, sides, virt, retdest + %stack (STATE: 5, F, K, boxes, rounds, sides, virt, retdest) -> (retdest, STATE) + // stack: retdest, STATE + JUMP +update_round_vars: + // stack: STATE, F , K , 16, rounds, sides, virt, retdest + DUP9 + DUP11 + %get_round + DUP1 + // stack: rnd, rnd, STATE, F , K , 16, rounds, sides, virt, retdest + SWAP7 + POP + %push_f + SWAP7 + // stack: rnd, rnd, STATE, F', K , 16, rounds, sides, virt, retdest + SWAP8 + POP + %mload_kernel_code_u32(k_data) + SWAP7 + POP + // stack: STATE, F', K', 16, rounds, sides, virt, retdest + %jump(round) +global round: + // stack: STATE, F, K, boxes, rounds , sides, virt, retdest + DUP8 + // stack: boxes, STATE, F, K, boxes, rounds , sides, virt, retdest + %jumpi(box) + // stack: STATE, F, K, 0, rounds , sides, virt, retdest + SWAP7 + POP + PUSH 16 + SWAP7 + // stack: STATE, F, K, 16, rounds , sides, virt, retdest + PUSH 1 + DUP10 + SUB + SWAP9 + POP + // stack: STATE, F, K, 16, rounds-1, sides, virt, retdest + %jump(loop) diff --git a/evm/src/cpu/kernel/asm/hash/ripemd/constants.asm b/evm/src/cpu/kernel/asm/hash/ripemd/constants.asm new file mode 100644 index 00000000..7a8959fe --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/ripemd/constants.asm @@ -0,0 +1,117 @@ +global k_data: + // Left + BYTES 0x00, 0x00, 0x00, 0x00 + BYTES 0x5A, 0x82, 0x79, 0x99 + BYTES 0x6E, 0xD9, 0xEB, 0xA1 + BYTES 0x8F, 0x1B, 0xBC, 0xDC + BYTES 0xA9, 0x53, 0xFD, 0x4E + // Right + BYTES 0x50, 0xA2, 0x8B, 0xE6 + BYTES 0x5C, 0x4D, 0xD1, 0x24 + BYTES 0x6D, 0x70, 0x3E, 0xF3 + BYTES 0x7A, 0x6D, 0x76, 0xE9 + BYTES 0x00, 0x00, 0x00, 0x00 + +global s_data: + // Left Round 0 + BYTES 11, 14, 15, 12 + BYTES 05, 08, 07, 09 + BYTES 11, 13, 14, 15 + BYTES 06, 07, 09, 08 + // Left Round 1 + BYTES 07, 06, 08, 13 + BYTES 11, 09, 07, 15 + BYTES 07, 12, 15, 09 + BYTES 11, 07, 13, 12 + // Left Round 2 + BYTES 11, 13, 06, 07 + BYTES 14, 09, 13, 15 + BYTES 14, 08, 13, 06 + BYTES 05, 12, 07, 05 + // Left Round 3 + BYTES 11, 12, 14, 15 + BYTES 14, 15, 09, 08 + BYTES 09, 14, 05, 06 + BYTES 08, 06, 05, 12 + // Left Round 4 + BYTES 09, 15, 05, 11 + BYTES 06, 08, 13, 12 + BYTES 05, 12, 13, 14 + BYTES 11, 08, 05, 06 + // Right Round 0 + BYTES 08, 09, 09, 11 + BYTES 13, 15, 15, 05 + BYTES 07, 07, 08, 11 + BYTES 14, 14, 12, 06 + // Right Round 1 + BYTES 09, 13, 15, 07 + BYTES 12, 08, 09, 11 + BYTES 07, 07, 12, 07 + BYTES 06, 15, 13, 11 + // Right Round 2 + BYTES 09, 07, 15, 11 + BYTES 08, 06, 06, 14 + BYTES 12, 13, 05, 14 + BYTES 13, 13, 07, 05 + // Right Round 3 + BYTES 15, 05, 08, 11 + BYTES 14, 14, 06, 14 + BYTES 06, 09, 12, 09 + BYTES 12, 05, 15, 08 + // Right Round 4 + BYTES 08, 05, 12, 09 + BYTES 12, 05, 14, 06 + BYTES 08, 13, 06, 05 + BYTES 15, 13, 11, 11 + +global r_data: + // Left Round 0 + BYTES 00, 04, 08, 12 + BYTES 16, 20, 24, 28 + BYTES 32, 36, 40, 44 + BYTES 48, 52, 56, 60 + // Left Round 1 + BYTES 28, 16, 52, 04 + BYTES 40, 24, 60, 12 + BYTES 48, 00, 36, 20 + BYTES 08, 56, 44, 32 + // Left Round 2 + BYTES 12, 40, 56, 16 + BYTES 36, 60, 32, 04 + BYTES 08, 28, 00, 24 + BYTES 52, 44, 20, 48 + // Left Round 3 + BYTES 04, 36, 44, 40 + BYTES 00, 32, 48, 16 + BYTES 52, 12, 28, 60 + BYTES 56, 20, 24, 08 + // Left Round 4 + BYTES 16, 00, 20, 36 + BYTES 28, 48, 08, 40 + BYTES 56, 04, 12, 32 + BYTES 44, 24, 60, 52 + // Right Round 0 + BYTES 20, 56, 28, 00 + BYTES 36, 08, 44, 16 + BYTES 52, 24, 60, 32 + BYTES 04, 40, 12, 48 + // Right Round 1 + BYTES 24, 44, 12, 28 + BYTES 00, 52, 20, 40 + BYTES 56, 60, 32, 48 + BYTES 16, 36, 04, 08 + // Right Round 2 + BYTES 60, 20, 04, 12 + BYTES 28, 56, 24, 36 + BYTES 44, 32, 48, 08 + BYTES 40, 00, 16, 52 + // Right Round 3 + BYTES 32, 24, 16, 04 + BYTES 12, 44, 60, 00 + BYTES 20, 48, 08, 52 + BYTES 36, 28, 40, 56 + // Right Round 4 + BYTES 48, 60, 40, 16 + BYTES 04, 20, 32, 28 + BYTES 24, 08, 52, 56 + BYTES 00, 12, 36, 44 diff --git a/evm/src/cpu/kernel/asm/hash/ripemd/functions.asm b/evm/src/cpu/kernel/asm/hash/ripemd/functions.asm new file mode 100644 index 00000000..ac111215 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/ripemd/functions.asm @@ -0,0 +1,150 @@ +/// def rol(n, x): +/// return (u32(x << n)) | (x >> (32 - n)) + +global rol: + // stack: n, x, retdest + SWAP1 + DUP1 + DUP3 + // stack: n, x, x, n, retdest + PUSH 32 + SUB + // stack: 32-n, x, x, n, retdest + SHR + // stack: x >> (32-n), x, n, retdest + SWAP2 + // stack: n, x, x >> (32-n), retdest + SHL + // stack: x << n, x >> (32-n), retdest + %as_u32 + // stack: u32(x << n), x >> (32-n), retdest + OR + // stack: u32(x << n) | (x >> (32-n)), retdest + SWAP1 + JUMP + +// def push_f(rnd): +// Fs = [F0, F1, F2, F3, F4, F4, F3, F2, F1, F0] +// acc = 0 +// for i, F in enumerate(Fs): +// acc += (i==rnd)*F +// return acc, rnd +// +// %this_f(i,F) enacts +// acc += (i==rnd)*F + +%macro push_f + // stack: rnd + PUSH 0 + %this_f(0,F0) + %this_f(1,F1) + %this_f(2,F2) + %this_f(3,F3) + %this_f(4,F4) + %this_f(5,F4) + %this_f(6,F3) + %this_f(7,F2) + %this_f(8,F1) + %this_f(9,F0) + // stack: F, rnd +%endmacro + +%macro this_f(i, F) + // stack: acc, rnd + DUP2 + // stack: rnd , acc, rnd + %eq_const($i) + // stack: rnd==i , acc, rnd + %mul_const($F) + // stack: (rnd==i)*F , acc, rnd + ADD + // stack: (rnd==j)*F + acc, rnd +%endmacro + +/// def F0(x, y, z): +/// return x ^ y ^ z + +global F0: + // stack: x , y , z, retdest + XOR + // stack: x ^ y , z, retdest + XOR + // stack: x ^ y ^ z, retdest + SWAP1 + JUMP + +/// def F1(x, y, z): +/// return (x & y) | (u32(~x) & z) + +global F1: + // stack: x, y, z, retdest + DUP1 + // stack: x, x, y, z, retdest + SWAP2 + // stack: y, x, x, z, retdest + AND + // stack: y & x, x, z, retdest + SWAP2 + // stack: z, x, y & x , retdest + SWAP1 + // stack: x, z, y & x , retdest + %not_u32 + // stack: ~x, z, y & x , retdest + AND + // stack: ~x & z , y & x , retdest + OR + // stack: (~x & z) | (y & x), retdest + SWAP1 + JUMP + +/// def F2(x, y, z): +/// return (x | u32(~y)) ^ z + +global F2: + // stack: x , y, z, retdest + SWAP1 + // stack: y , x, z, retdest + %not_u32 + // stack: ~y , x , z, retdest + OR + // stack: ~y | x , z, retdest + XOR + // stack: (~y | x) ^ z, retdest + SWAP1 + JUMP + +/// def F3(x, y, z): +/// return (x & z) | (u32(~z) & y) + +global F3: + // stack: x, y , z , retdest + DUP3 + // stack: z , x, y , z , retdest + AND + // stack: z & x, y , z , retdest + SWAP2 + // stack: z, y, z & x , retdest + %not_u32 + // stack: ~z , y, z & x , retdest + AND + // stack: ~z & y, z & x , retdest + OR + // stack: (~z & y) | (z & x), retdest + SWAP1 + JUMP + +/// def F4(x, y, z): +/// return x ^ (y | u32(~z)) + +global F4: + // stack: x, y, z, retdest + SWAP2 + // stack: z, y, x, retdest + %not_u32 + // stack: ~z, y, x, retdest + OR + // stack: ~z | y, x, retdest + XOR + // stack: (~z | y) ^ x, retdest + SWAP1 + JUMP diff --git a/evm/src/cpu/kernel/asm/hash/ripemd/main.asm b/evm/src/cpu/kernel/asm/hash/ripemd/main.asm new file mode 100644 index 00000000..bbcb4068 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/ripemd/main.asm @@ -0,0 +1,107 @@ +/// Variables beginning with _ are in memory +/// +/// def ripemd160(_input): +/// STATE, count, _buffer = [0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0], 0, [0]*64 +/// STATE, count, _buffer = ripemd_update(STATE, count, _buffer, len(input) , bytes = _input ) +/// STATE, count, _buffer = ripemd_update(STATE, count, _buffer, padlength(len(input)), bytes = [0x80]+[0]*63) +/// STATE, count, _buffer = ripemd_update(STATE, count, _buffer, 8, bytes = size(len(_input))) +/// return process(STATE) +/// +/// ripemd is called on a stack with ADDR and length +/// ripemd_stack is called on a stack with length, followed by the input bytes +/// +/// ripemd_update receives and return the stack in the form: +/// stack: STATE, count, length, virt +/// where virt is the virtual address of the bytes argument + +global ripemd_stack: + // stack: length, INPUT + %stack (length) -> (64, length, 0x80, 63, length, length) + // stack: 64, length, 0x80, 63, length, length, INPUT + %jump(ripemd_storage) // stores the following into memory + // init _buffer at virt 0 [consumes 64] + // store _size at virt 64 [consumes length] + // store _padding at virt 72 [consumes 0x80, 63] + // store _input at virt 136 [consumes length] + +global ripemd: + // stack: ADDR, length + %stack (ADDR: 3, length) -> (64, length, 0x80, 63, length, ADDR, length) + // stack: 64, length, 0x80, 63, length, ADDR, length + %jump(ripemd_storage) // stores the following into memory + // init _buffer at virt 0 [consumes 64] + // store _size at virt 64 [consumes length] + // store _padding at virt 72 [consumes 0x80, 63] + // store _input at virt 136 [consumes ADDR, length] + +global ripemd_init: + // stack: length + %stack (length) -> ( 0, length, 136, ripemd_1, ripemd_2, process) + // stack: count = 0, length, virt = 136, ripemd_1, ripemd_2, process + %stack () -> (0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0) + // stack: 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0, count, length, virt, LABELS + %jump(ripemd_update) +ripemd_1: + // stack: STATE, count, length , virt , LABELS + DUP7 + // stack: length, STATE, count, length , virt , LABELS + %padlength + // stack: padlength, STATE, count, length , virt , LABELS + SWAP7 + POP + // stack: STATE, count, length = padlength, virt , LABELS + %stack (STATE: 5, count, length, virt) -> (STATE, count, length, 72) + // STATE, count, length , virt = 72, LABELS + %jump(ripemd_update) +ripemd_2: + // stack: STATE, count, length , virt , LABELS + %stack (STATE: 5, count, length, virt) -> (STATE, count, 8, 64) + // stack: STATE, count, length = 8, virt = 64, LABELS + %jump(ripemd_update) +global process: + // stack: a , b, c, d, e, count, length, virt + %reverse_bytes_u32 + %shl_const(128) + // stack: a', b, c, d, e, VARS + SWAP1 + %reverse_bytes_u32 + %shl_const(96) + OR + // stack: b' a', c, d, e, VARS + SWAP1 + %reverse_bytes_u32 + %shl_const(64) + OR + // stack: c' b' a', d, e, VARS + SWAP1 + %reverse_bytes_u32 + %shl_const(32) + OR + // stack: d' c' b' a', e, VARS + SWAP1 + %reverse_bytes_u32 + OR + // stack: e' d' c' b' a', VARS + %stack (result, VARS: 3, retdest) -> (retdest, result) + // stack: 0xdeadbeef, result + JUMP + + +/// def padlength(length): +/// t = length % 64 +/// return 56 + 64*(t > 55) - t + +%macro padlength + // stack: count + %mod_const(64) + // stack: t = count % 64 + PUSH 55 + DUP2 + // stack: t , 55 , t + GT + // stack: t > 55 , t + %mul_const(64) + %add_const(56) + // stack: 56 + 64*(t > 55), t + SUB +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/ripemd/memory.asm b/evm/src/cpu/kernel/asm/hash/ripemd/memory.asm new file mode 100644 index 00000000..e3b7cbe6 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/ripemd/memory.asm @@ -0,0 +1,137 @@ +global ripemd_storage: // starts by initializing buffer + // stack: i [init: 64] + %store_zeros(64, ripemd_storage) + // stack: (empty) + %jump(store_size) + +store_size: + // stack: length + %shl_const(3) + // stack: abcdefgh + %extract_and_store_byte(64) + // stack: abcdefg + %extract_and_store_byte(65) + // stack: abcdef + %extract_and_store_byte(66) + // stack: abcde + %extract_and_store_byte(67) + // stack: abcd + %extract_and_store_byte(68) + // stack: abc + %extract_and_store_byte(69) + // stack: ab + %extract_and_store_byte(70) + // stack: a + %mstore_kernel_general(71) + // stack: 0x80 // padding has 0x80 in first position and zeros elsewhere + %mstore_kernel_general(72) // store first padding term here so as to avoid extra label + %jump(store_padding) + +store_padding: + // stack: i [init 63], length + %store_zeros(136, store_padding) + // stack: length + DUP1 + %jumpi(store_input_stack) + POP + %jump(ripemd_init) + +store_input_stack: + // stack: rem, length, REM_INP + %stack (rem, length, head) -> (length, rem, 136, head, rem, length) + SUB + ADD + // stack: offset, byte, rem, length, REM_INP + %mstore_kernel_general + // stack: rem, length, REM_INP + %decrement + DUP1 + // stack: rem - 1, rem - 1, length, REM_INP + %jumpi(store_input_stack) + // stack: 0, length + POP + %jump(ripemd_init) + +store_input: + // stack: rem , ADDR , length + DUP4 + DUP4 + DUP4 + MLOAD_GENERAL + // stack: byte, rem , ADDR , length + DUP2 + DUP7 + SUB + %add_const(136) + // stack: offset, byte, rem , ADDR , length + %mstore_kernel_general + // stack: rem , ADDR , length + %decrement + // stack: rem-1, ADDR , length + SWAP3 + %increment + SWAP3 + // stack: rem-1, ADDR+1, length + DUP1 + %jumpi(store_input) + // stack: 0 , ADDR , length + %pop4 + // stack: length + %jump(ripemd_init) + +/// def buffer_update(get, set, times): +/// for i in range(times): +/// buffer[set+i] = bytestring[get+i] + +global buffer_update: + // stack: get , set , times , retdest + DUP2 + DUP2 + // stack: get, set, get , set , times , retdest + %mupdate_kernel_general + // stack: get , set , times , retdest + %increment + SWAP1 + %increment + SWAP1 + SWAP2 + %decrement + SWAP2 + // stack: get+1, set+1, times-1, retdest + DUP3 + %jumpi(buffer_update) + // stack: get , set , 0 , retdest + %pop3 + JUMP + + +%macro store_zeros(N, label) + // stack: i + %stack (i) -> ($N, i, 0, i) + SUB + // stack: offset = N-i, 0, i + %mstore_kernel_general + // stack: i + %decrement + DUP1 + // stack: i-1, i-1 + %jumpi($label) + // stack: 0 + POP +%endmacro + +%macro extract_and_store_byte(offset) + // stack: xsy + PUSH 0x100 + DUP2 + MOD + // stack: y, xsy + %stack (y, xsy) -> (xsy, y, 0x100, y) + // stack: xsy, y, 0x100, y + SUB + DIV + SWAP1 + // stack: y, xs + %mstore_kernel_general($offset) + // stack: xs +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/ripemd/update.asm b/evm/src/cpu/kernel/asm/hash/ripemd/update.asm new file mode 100644 index 00000000..a0c3ef68 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/ripemd/update.asm @@ -0,0 +1,108 @@ +/// ripemd_update will receive and return the stack in the form: +/// stack: STATE, count, length, virt +/// +/// def ripemd_update(state, count, buffer, length, bytestring): +/// have = (count // 8) % 64 +/// need = 64 - have +/// shift = 0 +/// P = length >= need and have +/// Q = length >= need +/// if P: +/// update_1() +/// if Q: +/// update_2() +/// R = length > shift +/// if R: +/// buffer_update(virt + shift, have, length - shift) +/// +/// return state, count + 8*length, buffer + +global ripemd_update: + // stack: STATE, count, length, virt, retdest + %stack (STATE: 5, count, length, virt) -> (count, 8, 64, STATE, count, length, virt) + DIV + MOD + // stack: have, STATE, count, length, virt, retdest + DUP1 + PUSH 64 + SUB + PUSH 0 + // stack: shift, need, have, STATE, count, length, virt, retdest + %stack (shift, need, have, STATE: 5, count, length) -> (length, need, STATE, shift, need, have, count, length) + // stack: length, need, STATE, shift, need, have, count, length, virt, retdest + LT + ISZERO + // stack: Q, STATE, shift, need, have, count, length, virt, retdest + %stack (Q, STATE: 5, shift, need, have) -> (have, Q, Q, STATE, shift, need, have) + %gt_const(0) + AND + // stack: P, Q, STATE, shift, need, have, count, length, virt, retdest + %jumpi(update_1) + // stack: Q, STATE, shift, need, have, count, length, virt, retdest + %jumpi(update_2) +final_update: + // stack: STATE, shift, need, have, count, length, virt, retdest + %stack (STATE: 5, shift, need, have, count, length) -> (length, shift, return_step, STATE, shift, need, have, count, length) + SUB + // stack: ARGS: 2, STATE, shift, need, have, count, length, virt, retdest + %stack (ARGS: 2, STATE: 5, shift, need, have, count, length, virt) -> (shift, virt, have, ARGS, STATE, shift, need, have, count, length, virt) + ADD + // stack: ARGS: 4, STATE, shift, need, have, count, length, virt, retdest + %stack (ARGS: 4, STATE: 5, shift, need, have, count, length) -> (length, shift, ARGS, STATE, shift, need, have, count, length) + GT + // stack: R, ARGS: 4, STATE, shift, need, have, count, length, virt, retdest + %jumpi(buffer_update) + // stack: ARGS: 4, STATE, shift, need, have, count, length, virt, retdest + %pop3 + JUMP +return_step: + // stack: STATE, shift, need, have, count, length, virt, retdest + SWAP8 + DUP10 + %mul_const(8) + ADD + SWAP8 + // stack: STATE, shift, need, have, count, length, virt, retdest + %stack (STATE: 5, shift, need, have, count, length, virt, retdest) -> (retdest, STATE, count, length, virt) + JUMP + + +/// def update_1(): +/// buffer_update(virt, have, need) +/// shift = need +/// have = 0 +/// state = compress(state, buffer) + +update_1: + // stack: Q, STATE, shift, need, have, count, length, virt, retdest + %stack (Q, STATE: 5, shift, need, have, count, length, virt) -> (virt, have, need, update_1a, STATE, shift, need, have, count, length, virt) + %jump(buffer_update) +update_1a: + // stack: STATE, shift, need, have, count, length, virt, retdest + %stack (STATE: 5, shift, need, have) -> (STATE, 0, update_2, need, need, 0) + // stack: STATE, 0, update_2, shift = need, need, have = 0, count, length, virt, retdest + %jump(compress) + +/// def update_2(): +/// while length >= shift + 64: +/// shift += 64 +/// state = compress(state, bytestring[shift-64:]) + +update_2: + // stack: STATE, shift, need, have, count, length, virt, retdest + %stack (STATE: 5, shift, need, have, count, length) -> (64, shift, length, STATE, shift, need, have, count, length) + ADD + GT + // stack: cond, STATE, shift, need, have, count, length, virt, retdest + %jumpi(final_update) + SWAP5 + %add_const(64) + SWAP5 + %stack (STATE: 5, shift) -> (shift, 64, STATE, shift) + DUP13 + ADD + SUB + // stack: offset, STATE, shift, need, have, count, length, virt, retdest + %stack (offset, STATE: 5) -> (STATE, offset, update_2) + // stack: STATE, offset, update_2, shift, need, have, count, length, virt, retdest + %jump(compress) diff --git a/evm/src/cpu/kernel/asm/hash/sha2/compression.asm b/evm/src/cpu/kernel/asm/hash/sha2/compression.asm new file mode 100644 index 00000000..8850c1c8 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/sha2/compression.asm @@ -0,0 +1,285 @@ +// We use memory starting at 320 * num_blocks + 2 (after the message schedule +// space) as scratch space to store stack values. +%macro scratch_space_addr_from_num_blocks + // stack: num_blocks + %mul_const(320) + %add_const(2) +%endmacro + +global sha2_compression: + // stack: message_schedule_addr, retdest + PUSH 0 + // stack: i=0, message_schedule_addr, retdest + SWAP1 + // stack: message_schedule_addr, i=0, retdest + PUSH 0 + // stack: 0, message_schedule_addr, i=0, retdest + %mload_kernel_general + // stack: num_blocks, message_schedule_addr, i=0, retdest + DUP1 + // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest + %scratch_space_addr_from_num_blocks + // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest + SWAP1 + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + // Push the initial hash values; these constants are called H^(0) in the spec. + PUSH 0x5be0cd19 // H^(0)_7 + PUSH 0x1f83d9ab // H^(0)_6 + PUSH 0x9b05688c // H^(0)_5 + PUSH 0x510e527f // H^(0)_4 + PUSH 0xa54ff53a // H^(0)_3 + PUSH 0x3c6ef372 // H^(0)_2 + PUSH 0xbb67ae85 // H^(0)_1 + PUSH 0x6a09e667 // H^(0)_0 + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest +compression_start_block: + // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. + DUP10 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP2 + DUP2 + // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP3 + DUP2 + // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP4 + DUP2 + // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP5 + DUP2 + // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP6 + DUP2 + // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP7 + DUP2 + // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP8 + DUP2 + // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %add_const(4) + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + + DUP9 + DUP2 + // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + %mstore_kernel_general_u32 + // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + POP + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest +compression_loop: + // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP11 + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP13 + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + ADD + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + PUSH sha2_constants_k + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP14 + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mul_const(4) + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + ADD + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_code_u32 + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %stack (start: 6, e, f, g, h) -> (e, f, g, h, start, e, f, g, h) + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word1 + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %stack (t, a, b, c) -> (a, b, c, t, a, b, c) + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %sha2_temp_word2 + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP6 + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP3 + // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP2 + // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %stack (a, e, b, c, d, old_d, f, g, h, old_h) -> (a, b, c, d, e, f, g, h) + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %increment + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP1 + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %eq_const(64) + // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP1 + // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP12 + // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SUB + // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP13 + // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + SWAP1 + // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + PUSH 256 + MUL + // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + ADD + // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + SWAP12 + // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest + SWAP10 + // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + POP + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + %and_const(63) + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest + SWAP12 + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + POP + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + DUP12 + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + ISZERO + %jumpi(compression_end_block) + %jump(compression_loop) +compression_end_block: + // Add the initial values of the eight working variables (from the start of this block's compression) back into them. + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %mload_kernel_general_u32 + // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP1 + // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(4) + %mload_kernel_general_u32 + // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP2 + // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(8) + %mload_kernel_general_u32 + // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP3 + // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(12) + %mload_kernel_general_u32 + // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP4 + // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(16) + %mload_kernel_general_u32 + // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP5 + // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(20) + %mload_kernel_general_u32 + // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP6 + // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(24) + %mload_kernel_general_u32 + // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP7 + // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + DUP10 + %add_const(28) + %mload_kernel_general_u32 + // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + %add_u32 + // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + SWAP8 + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + DUP1 + // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + ISZERO + // In this case, we've finished all the blocks. + %jumpi(compression_end) + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + %stack (num_blocks, working: 8) -> (working, num_blocks) + %jump(compression_start_block) +compression_end: + // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + POP + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest + SWAP3 + // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + %pop3 + // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest + SWAP1 + JUMP diff --git a/evm/src/cpu/kernel/asm/hash/sha2/constants.asm b/evm/src/cpu/kernel/asm/hash/sha2/constants.asm new file mode 100644 index 00000000..6ce4d907 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/sha2/constants.asm @@ -0,0 +1,65 @@ +global sha2_constants_k: + BYTES 66, 138, 47, 152 + BYTES 113, 55, 68, 145 + BYTES 181, 192, 251, 207 + BYTES 233, 181, 219, 165 + BYTES 57, 86, 194, 91 + BYTES 89, 241, 17, 241 + BYTES 146, 63, 130, 164 + BYTES 171, 28, 94, 213 + BYTES 216, 7, 170, 152 + BYTES 18, 131, 91, 1 + BYTES 36, 49, 133, 190 + BYTES 85, 12, 125, 195 + BYTES 114, 190, 93, 116 + BYTES 128, 222, 177, 254 + BYTES 155, 220, 6, 167 + BYTES 193, 155, 241, 116 + BYTES 228, 155, 105, 193 + BYTES 239, 190, 71, 134 + BYTES 15, 193, 157, 198 + BYTES 36, 12, 161, 204 + BYTES 45, 233, 44, 111 + BYTES 74, 116, 132, 170 + BYTES 92, 176, 169, 220 + BYTES 118, 249, 136, 218 + BYTES 152, 62, 81, 82 + BYTES 168, 49, 198, 109 + BYTES 176, 3, 39, 200 + BYTES 191, 89, 127, 199 + BYTES 198, 224, 11, 243 + BYTES 213, 167, 145, 71 + BYTES 6, 202, 99, 81 + BYTES 20, 41, 41, 103 + BYTES 39, 183, 10, 133 + BYTES 46, 27, 33, 56 + BYTES 77, 44, 109, 252 + BYTES 83, 56, 13, 19 + BYTES 101, 10, 115, 84 + BYTES 118, 106, 10, 187 + BYTES 129, 194, 201, 46 + BYTES 146, 114, 44, 133 + BYTES 162, 191, 232, 161 + BYTES 168, 26, 102, 75 + BYTES 194, 75, 139, 112 + BYTES 199, 108, 81, 163 + BYTES 209, 146, 232, 25 + BYTES 214, 153, 6, 36 + BYTES 244, 14, 53, 133 + BYTES 16, 106, 160, 112 + BYTES 25, 164, 193, 22 + BYTES 30, 55, 108, 8 + BYTES 39, 72, 119, 76 + BYTES 52, 176, 188, 181 + BYTES 57, 28, 12, 179 + BYTES 78, 216, 170, 74 + BYTES 91, 156, 202, 79 + BYTES 104, 46, 111, 243 + BYTES 116, 143, 130, 238 + BYTES 120, 165, 99, 111 + BYTES 132, 200, 120, 20 + BYTES 140, 199, 2, 8 + BYTES 144, 190, 255, 250 + BYTES 164, 80, 108, 235 + BYTES 190, 249, 163, 247 + BYTES 198, 113, 120, 242 diff --git a/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm new file mode 100644 index 00000000..78d98634 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm @@ -0,0 +1,240 @@ +// We put the message schedule in memory starting at 64 * num_blocks + 2. +%macro message_schedule_addr_from_num_blocks + // stack: num_blocks + %mul_const(64) + %add_const(2) +%endmacro + +// Precodition: stack contains address of one message block, followed by output address +// Postcondition: 256 bytes starting at given output address contain the 64 32-bit chunks +// of message schedule (in four-byte increments) +gen_message_schedule_from_block: + // stack: block_addr, output_addr, retdest + DUP1 + // stack: block_addr, block_addr, output_addr, retdest + %add_const(32) + // stack: block_addr + 32, block_addr, output_addr, retdest + SWAP1 + // stack: block_addr, block_addr + 32, output_addr, retdest + %mload_kernel_general_u256 + // stack: block[0], block_addr + 32, output_addr, retdest + SWAP1 + // stack: block_addr + 32, block[0], output_addr, retdest + %mload_kernel_general_u256 + // stack: block[1], block[0], output_addr, retdest + SWAP2 + // stack: output_addr, block[0], block[1], retdest + %add_const(28) + PUSH 8 + // stack: counter=8, output_addr + 28, block[0], block[1], retdest + %jump(gen_message_schedule_from_block_0_loop) +gen_message_schedule_from_block_0_loop: + // Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule. + // stack: counter, output_addr, block[0], block[1], retdest + SWAP2 + // stack: block[0], output_addr, counter, block[1], retdest + DUP1 + // stack: block[0], block[0], output_addr, counter, block[1], retdest + %shr_const(32) + // stack: block[0] >> 32, block[0], output_addr, counter, block[1], retdest + SWAP1 + // stack: block[0], block[0] >> 32, output_addr, counter, block[1], retdest + %as_u32 + // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest + DUP3 + // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest + %mstore_kernel_general_u32 + // stack: block[0] >> 32, output_addr, counter, block[1], retdest + SWAP1 + // stack: output_addr, block[0] >> 32, counter, block[1], retdest + %sub_const(4) + // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest + SWAP1 + // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest + SWAP2 + // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest + %decrement + DUP1 + ISZERO + %jumpi(gen_message_schedule_from_block_0_end) + %jump(gen_message_schedule_from_block_0_loop) +gen_message_schedule_from_block_0_end: + // stack: old counter=0, output_addr, block[0], block[1], retdest + POP + PUSH 8 + // stack: counter=8, output_addr, block[0], block[1], retdest + %stack (counter, out, b0, b1) -> (out, counter, b1, b0) + // stack: output_addr, counter, block[1], block[0], retdest + %add_const(64) + // stack: output_addr + 64, counter, block[1], block[0], retdest + SWAP1 + // stack: counter, output_addr + 64, block[1], block[0], retdest +gen_message_schedule_from_block_1_loop: + // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule. + // stack: counter, output_addr, block[1], block[0], retdest + SWAP2 + // stack: block[1], output_addr, counter, block[0], retdest + DUP1 + // stack: block[1], block[1], output_addr, counter, block[0], retdest + %shr_const(32) + // stack: block[1] >> 32, block[1], output_addr, counter, block[0], retdest + SWAP1 + // stack: block[1], block[1] >> 32, output_addr, counter, block[0], retdest + %as_u32 + // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest + DUP3 + // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest + %mstore_kernel_general_u32 + // stack: block[1] >> 32, output_addr, counter, block[0], retdest + SWAP1 + // stack: output_addr, block[1] >> 32, counter, block[0], retdest + %sub_const(4) + // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest + SWAP1 + // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest + SWAP2 + // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest + %decrement + DUP1 + ISZERO + %jumpi(gen_message_schedule_from_block_1_end) + %jump(gen_message_schedule_from_block_1_loop) +gen_message_schedule_from_block_1_end: + // stack: old counter=0, output_addr, block[1], block[0], retdest + POP + // stack: output_addr, block[0], block[1], retdest + PUSH 48 + // stack: counter=48, output_addr, block[0], block[1], retdest + SWAP1 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(36) + // stack: output_addr + 36, counter, block[0], block[1], retdest + SWAP1 + // stack: counter, output_addr + 36, block[0], block[1], retdest +gen_message_schedule_remaining_loop: + // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks. + // stack: counter, output_addr, block[0], block[1], retdest + SWAP1 + // stack: output_addr, counter, block[0], block[1], retdest + DUP1 + // stack: output_addr, output_addr, counter, block[0], block[1], retdest + PUSH 2 + PUSH 4 + MUL + SWAP1 + SUB + // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest + %sha2_sigma_1 + // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest + SWAP1 + // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + DUP1 + // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + PUSH 7 + PUSH 4 + MUL + SWAP1 + SUB + // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + SWAP1 + // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + DUP1 + // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + PUSH 15 + PUSH 4 + MUL + SWAP1 + SUB + // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %sha2_sigma_0 + // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + SWAP1 + // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + DUP1 + // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + PUSH 16 + PUSH 4 + MUL + SWAP1 + SUB + // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + %mload_kernel_general_u32 + // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + SWAP1 + // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest + SWAP4 + // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %add_u32 + %add_u32 + %add_u32 + // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + DUP2 + // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest + %mstore_kernel_general_u32 + // stack: output_addr, counter, block[0], block[1], retdest + %add_const(4) + // stack: output_addr + 4, counter, block[0], block[1], retdest + SWAP1 + // stack: counter, output_addr + 4, block[0], block[1], retdest + %decrement + // stack: counter - 1, output_addr + 4, block[0], block[1], retdest + DUP1 + ISZERO + %jumpi(gen_message_schedule_remaining_end) + %jump(gen_message_schedule_remaining_loop) +gen_message_schedule_remaining_end: + // stack: counter=0, output_addr, block[0], block[1], retdest + %pop4 + JUMP + +// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +// stack contains output_addr +// Postcondition: starting at output_addr, set of 256 bytes per block +// each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) +global sha2_gen_all_message_schedules: + // stack: output_addr, retdest + DUP1 + // stack: output_addr, output_addr, retdest + PUSH 0 + // stack: 0, output_addr, output_addr, retdest + %mload_kernel_general + // stack: num_blocks, output_addr, output_addr, retdest + PUSH 1 + // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest +gen_all_message_schedules_loop: + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest + PUSH gen_all_message_schedules_loop_end + // stack: new_retdest = gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest + DUP4 + // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest + DUP3 + // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest + %jump(gen_message_schedule_from_block) +gen_all_message_schedules_loop_end: + // stack: cur_addr, counter, cur_output_addr, output_addr, retdest + %add_const(64) + // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest + SWAP1 + %decrement + SWAP1 + // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest + SWAP2 + %add_const(256) + SWAP2 + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + DUP2 + // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + ISZERO + %jumpi(gen_all_message_schedules_end) + %jump(gen_all_message_schedules_loop) +gen_all_message_schedules_end: + // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest + %pop3 + // stack: output_addr, retdest + %jump(sha2_compression) diff --git a/evm/src/cpu/kernel/asm/hash/sha2/ops.asm b/evm/src/cpu/kernel/asm/hash/sha2/ops.asm new file mode 100644 index 00000000..7d8054ca --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/sha2/ops.asm @@ -0,0 +1,130 @@ +// 32-bit right rotation +%macro rotr(rot) + // stack: value + PUSH $rot + // stack: rot, value + DUP2 + DUP2 + // stack: rot, value, rot, value + SHR + // stack: value >> rot, rot, value + %stack (shifted, rot, value) -> (rot, value, shifted) + // stack: rot, value, value >> rot + PUSH 32 + SUB + // stack: 32 - rot, value, value >> rot + SHL + // stack: value << (32 - rot), value >> rot + %as_u32 + // stack: (value << (32 - rot)) % (1 << 32), value >> rot + ADD +%endmacro + +%macro sha2_sigma_0 + // stack: x + DUP1 + // stack: x, x + %rotr(7) + // stack: rotr(x, 7), x + %stack (rotated, x) -> (x, x, rotated) + // stack: x, x, rotr(x, 7) + %rotr(18) + // stack: rotr(x, 18), x, rotr(x, 7) + SWAP1 + // stack: x, rotr(x, 18), rotr(x, 7) + PUSH 3 + SHR + // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) + XOR + XOR +%endmacro + +%macro sha2_sigma_1 + // stack: x + DUP1 + // stack: x, x + %rotr(17) + // stack: rotr(x, 17), x + %stack (rotated, x) -> (x, x, rotated) + // stack: x, x, rotr(x, 17) + %rotr(19) + // stack: rotr(x, 19), x, rotr(x, 17) + SWAP1 + // stack: x, rotr(x, 19), rotr(x, 17) + PUSH 10 + SHR + // stack: shr(x, 10), rotr(x, 19), rotr(x, 17) + XOR + XOR +%endmacro + +%macro sha2_bigsigma_0 + // stack: x + DUP1 + // stack: x, x + %rotr(2) + // stack: rotr(x, 2), x + %stack (rotated, x) -> (x, x, rotated) + // stack: x, x, rotr(x, 2) + %rotr(13) + // stack: rotr(x, 13), x, rotr(x, 2) + SWAP1 + // stack: x, rotr(x, 13), rotr(x, 2) + %rotr(22) + // stack: rotr(x, 22), rotr(x, 13), rotr(x, 2) + XOR + XOR +%endmacro + +%macro sha2_bigsigma_1 + // stack: x + DUP1 + // stack: x, x + %rotr(6) + // stack: rotr(x, 6), x + %stack (rotated, x) -> (x, x, rotated) + // stack: x, x, rotr(x, 6) + %rotr(11) + // stack: rotr(x, 11), x, rotr(x, 6) + SWAP1 + // stack: x, rotr(x, 11), rotr(x, 6) + %rotr(25) + // stack: rotr(x, 25), rotr(x, 11), rotr(x, 6) + XOR + XOR +%endmacro + +%macro sha2_choice + // stack: x, y, z + DUP1 + // stack: x, x, y, z + NOT + // stack: not x, x, y, z + %stack (notx, x, y, z) -> (notx, z, x, y) + // stack: not x, z, x, y + AND + // stack: (not x) and z, x, y + %stack (nxz, x, y) -> (x, y, nxz) + // stack: x, y, (not x) and z + AND + // stack: x and y, (not x) and z + OR +%endmacro + +%macro sha2_majority + // stack: x, y, z + %stack (xyz: 3) -> (xyz, xyz) + // stack: x, y, z, x, y, z + AND + // stack: x and y, z, x, y, z + SWAP2 + // stack: x, z, x and y, y, z + AND + // stack: x and z, x and y, y, z + %stack (a: 2, b: 2) -> (b, a) + // stack: y, z, x and z, x and y + AND + // stack: y and z, x and z, x and y + OR + OR +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/hash/sha2/store_pad.asm new file mode 100644 index 00000000..7594eb81 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/sha2/store_pad.asm @@ -0,0 +1,89 @@ +global sha2: + %jump(sha2_store) + +global sha2_store: + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + DUP1 + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + PUSH 0 + // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + %mstore_kernel_general + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + PUSH 1 + // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest +store_loop: + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + DUP2 + // stack: counter, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + ISZERO + %jumpi(store_end) + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + %stack (addr, counter, val) -> (addr, val, counter, addr) + // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest + %mstore_kernel_general + // stack: counter, addr, ... , x[num_bytes-1], retdest + %decrement + // stack: counter-1, addr, ... , x[num_bytes-1], retdest + SWAP1 + // stack: addr, counter-1, ... , x[num_bytes-1], retdest + %increment + // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest + %jump(store_loop) +store_end: + // stack: addr, counter, retdest + %pop2 + // stack: retdest + %jump(sha2_pad) + +// Precodition: input is in memory, starting at 0 of kernel general segment, of the form +// num_bytes, x[0], x[1], ..., x[num_bytes - 1] +// Postcodition: output is in memory, starting at 0, of the form +// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +global sha2_pad: + // stack: retdest + PUSH 0 + %mload_kernel_general + // stack: num_bytes, retdest + // STEP 1: append 1 + // insert 128 (= 1 << 7) at x[num_bytes+1] + // stack: num_bytes, retdest + PUSH 1 + PUSH 7 + SHL + // stack: 128, num_bytes, retdest + DUP2 + // stack: num_bytes, 128, num_bytes, retdest + %increment + // stack: num_bytes+1, 128, num_bytes, retdest + %mstore_kernel_general + // stack: num_bytes, retdest + // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 + DUP1 + // stack: num_bytes, num_bytes, retdest + %add_const(8) + %div_const(64) + + %increment + // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest + // STEP 3: calculate length := num_bytes*8 + SWAP1 + // stack: num_bytes, num_blocks, retdest + PUSH 8 + MUL + // stack: length = num_bytes*8, num_blocks, retdest + // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] + DUP2 + // stack: num_blocks, length, num_blocks, retdest + PUSH 64 + MUL + // stack: last_addr = num_blocks*64, length, num_blocks, retdest + %sha2_write_length + // stack: num_blocks, retdest + DUP1 + // stack: num_blocks, num_blocks, retdest + // STEP 5: write num_blocks to x[0] + PUSH 0 + %mstore_kernel_general + // stack: num_blocks, retdest + %message_schedule_addr_from_num_blocks + %jump(sha2_gen_all_message_schedules) diff --git a/evm/src/cpu/kernel/asm/hash/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/hash/sha2/temp_words.asm new file mode 100644 index 00000000..ed610947 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/sha2/temp_words.asm @@ -0,0 +1,32 @@ +// "T_1" in the SHA-256 spec +%macro sha2_temp_word1 + // stack: e, f, g, h, K[i], W[i] + DUP1 + // stack: e, e, f, g, h, K[i], W[i] + %sha2_bigsigma_1 + // stack: Sigma_1(e), e, f, g, h, K[i], W[i] + %stack (sig, e, f, g) -> (e, f, g, sig) + // stack: e, f, g, Sigma_1(e), h, K[i], W[i] + %sha2_choice + // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] + %add_u32 + %add_u32 + %add_u32 + %add_u32 + // stack: Ch(e, f, g) + Sigma_1(e) + h + K[i] + W[i] +%endmacro + +// "T_2" in the SHA-256 spec +%macro sha2_temp_word2 + // stack: a, b, c + DUP1 + // stack: a, a, b, c + %sha2_bigsigma_0 + // stack: Sigma_0(a), a, b, c + SWAP3 + // stack: c, a, b, Sigma_0(a) + %sha2_majority + // stack: Maj(c, a, b), Sigma_0(a) + %add_u32 + // stack: Maj(c, a, b) + Sigma_0(a) +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm b/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm new file mode 100644 index 00000000..5727498c --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm @@ -0,0 +1,119 @@ +%macro sha2_write_length + // stack: last_addr, length + SWAP1 + // stack: length, last_addr + DUP1 + // stack: length, length, last_addr + %and_const(0xff) + // stack: length % (1 << 8), length, last_addr + DUP3 + // stack: last_addr, length % (1 << 8), length, last_addr + %mstore_kernel_general + + // stack: length, last_addr + SWAP1 + %decrement + SWAP1 + // stack: length, last_addr - 1 + %shr_const(8) + // stack: length >> 8, last_addr - 1 + DUP1 + // stack: length >> 8, length >> 8, last_addr - 1 + %and_const(0xff) + // stack: (length >> 8) % (1 << 8), length >> 8, last_addr - 1 + DUP3 + // stack: last_addr - 1, (length >> 8) % (1 << 8), length >> 8, last_addr - 1 + %mstore_kernel_general + + // stack: length >> 8, last_addr - 1 + SWAP1 + %decrement + SWAP1 + // stack: length >> 8, last_addr - 2 + %shr_const(8) + // stack: length >> 16, last_addr - 2 + DUP1 + // stack: length >> 16, length >> 16, last_addr - 2 + %and_const(0xff) + // stack: (length >> 16) % (1 << 8), length >> 16, last_addr - 2 + DUP3 + // stack: last_addr - 2, (length >> 16) % (1 << 8), length >> 16, last_addr - 2 + %mstore_kernel_general + + // stack: length >> 16, last_addr - 2 + SWAP1 + %decrement + SWAP1 + // stack: length >> 16, last_addr - 3 + %shr_const(8) + // stack: length >> 24, last_addr - 3 + DUP1 + // stack: length >> 24, length >> 24, last_addr - 3 + %and_const(0xff) + // stack: (length >> 24) % (1 << 8), length >> 24, last_addr - 3 + DUP3 + // stack: last_addr - 3, (length >> 24) % (1 << 8), length >> 24, last_addr - 3 + %mstore_kernel_general + + // stack: length >> 24, last_addr - 3 + SWAP1 + %decrement + SWAP1 + // stack: length >> 24, last_addr - 4 + %shr_const(8) + // stack: length >> 32, last_addr - 4 + DUP1 + // stack: length >> 32, length >> 32, last_addr - 4 + %and_const(0xff) + // stack: (length >> 32) % (1 << 8), length >> 32, last_addr - 4 + DUP3 + // stack: last_addr - 4, (length >> 32) % (1 << 8), length >> 32, last_addr - 4 + %mstore_kernel_general + + // stack: length >> 32, last_addr - 4 + SWAP1 + %decrement + SWAP1 + // stack: length >> 32, last_addr - 5 + %shr_const(8) + // stack: length >> 40, last_addr - 5 + DUP1 + // stack: length >> 40, length >> 40, last_addr - 5 + %and_const(0xff) + // stack: (length >> 40) % (1 << 8), length >> 40, last_addr - 5 + DUP3 + // stack: last_addr - 5, (length >> 40) % (1 << 8), length >> 40, last_addr - 5 + %mstore_kernel_general + + // stack: length >> 40, last_addr - 5 + SWAP1 + %decrement + SWAP1 + // stack: length >> 40, last_addr - 6 + %shr_const(8) + // stack: length >> 48, last_addr - 6 + DUP1 + // stack: length >> 48, length >> 48, last_addr - 6 + %and_const(0xff) + // stack: (length >> 48) % (1 << 8), length >> 48, last_addr - 6 + DUP3 + // stack: last_addr - 6, (length >> 48) % (1 << 8), length >> 48, last_addr - 6 + %mstore_kernel_general + + // stack: length >> 48, last_addr - 6 + SWAP1 + %decrement + SWAP1 + // stack: length >> 48, last_addr - 7 + %shr_const(8) + // stack: length >> 56, last_addr - 7 + DUP1 + // stack: length >> 56, length >> 56, last_addr - 7 + %and_const(0xff) + // stack: (length >> 56) % (1 << 8), length >> 56, last_addr - 7 + DUP3 + // stack: last_addr - 7, (length >> 56) % (1 << 8), length >> 56, last_addr - 7 + %mstore_kernel_general + %pop2 + // stack: (empty) +%endmacro From a1ea7ff93056c7ac8cdf6c7e80c1e9deb6dec06f Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 9 Dec 2022 09:34:42 -0800 Subject: [PATCH 04/44] progress --- .../cpu/kernel/asm/hash/blake/compression.asm | 7 +- .../cpu/kernel/asm/hash/blake/g_functions.asm | 99 ++++++++++++++++++- evm/src/cpu/kernel/asm/hash/blake/ops.asm | 25 +++++ evm/src/cpu/kernel/constants/mod.rs | 22 +++-- 4 files changed, 141 insertions(+), 12 deletions(-) create mode 100644 evm/src/cpu/kernel/asm/hash/blake/ops.asm diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index fceabeb1..74cdaff3 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -1,2 +1,7 @@ +%macro blake_compression_internal_state_addr + PUSH 0 +%endmacro + global blake_compression: - // stack: + // stack: h_0, ..., h_7, t_0, t_1, f_0, f_1, m_0, ..., m_15 + \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm index e8b1dab2..a4783648 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm @@ -1,3 +1,98 @@ global blake_g_function: - // stack: i, a, b, c, d - \ No newline at end of file + // Function to mix two input words, x and y, into the four words indexed by a, b, c, d (which + // are in the range 0..16) in the internal state. + // The internal state is stored in memory starting at the address start. + // stack: a, b, c, d, x, y, start + %stack (indices: 4) -> (indices, indices) + // stack: a, b, c, d, a, b, c, d, x, y, start + DUP11 + // stack: start, a, b, c, d, a, b, c, d, x, y, start + %stack (start, a, b, c, d) -> (d, start, c, start, b, start, a, start) + // stack: d, start, c, start, b, start, a, start, a, b, c, d, x, y, start + ADD + %mload_kernel_general + // stack: v[d], c, start, b, start, a, start, a, b, c, d, x, y, start + %stack (vd, remaining: 6) -> (remaining, vd) + // stack: c, start, b, start, a, start, v[d], a, b, c, d, x, y, start + ADD + %mload_kernel_general + %stack (vc, remaining: 4) -> (remaining, vc) + // stack: b, start, a, start, v[c], v[d], a, b, c, d, x, y, start + ADD + %mload_kernel_general + // stack: v[b], a, start, v[c], v[d], a, b, c, d, x, y, start + %stack (vb, remaining: 2) -> (remaining, vb) + // stack: a, start, v[b], v[c], v[d], a, b, c, d, x, y, start + ADD + %mload_kernel_general + // stack: v[a], v[b], v[c], v[d], a, b, c, d, x, y, start + DUP2 + // stack: v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start + DUP10 + // stack: x, v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start + ADD + ADD + %as_u64 + // stack: v[a]' = (v[a] + v[b] + x) % 2^64, v[b], v[c], v[d], a, b, c, d, x, y, start + %stack (a, b, c, d) -> (a, d, a, b, c, d) + // stack: v[a]', v[d], v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start + XOR + %rotr_64(32) + // stack: v[d]' = (v[d] ^ v[a]') >>> 32, v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start + %stack (top: 3, vd) -> (top) + // stack: v[d]', v[a]', v[b], v[c], a, b, c, d, x, y, start + %stack (d, a, b, c) -> (c, d, a, b, d) + // stack: v[c], v[d]', v[a]', v[b], v[d]', a, b, c, d, x, y, start + ADD + %as_u64 + // stack: v[c]' = (v[c] + v[d]') % 2^64, v[a]', v[b], v[d]', a, b, c, d, x, y, start + %stack (c, a, b, d) -> (b, c, a, c, d) + // stack: v[b], v[c]', v[a]', v[c]', v[d]', a, b, c, d, x, y, start + XOR + %rotr_64(24) + // stack: v[b]' = (v[b] ^ v[c]') >>> 24, v[a]', v[c]', v[d]', a, b, c, d, x, y, start + SWAP1 + // stack: v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start + DUP2 + // stack: v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start + DUP11 + // stack: y, v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start + ADD + ADD + %as_u64 + // stack: v[a]'' = (v[a]' + v[b]' + y) % 2^64, v[b]', v[c]', v[d]', a, b, c, d, x, y, start + SWAP3 + // stack: v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start + DUP4 + // stack: v[a]'', v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start + XOR + %rotr_64(16) + // stack: v[d]'' = (v[a]'' ^ v[d]') >>> 8, v[b]', v[c]', v[a]'', a, b, c, d, x, y, start + SWAP2 + // stack: v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start + DUP3 + // stack: v[d]'', v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start + ADD + %as_u64 + // stack: v[c]'' = (v[c]' + v[d]'') % 2^64, v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start + DUP1 + // stack: v[c]'', v[c]'', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start + SWAP2 + // stack: v[b]', v[c]'', v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start + XOR + %rotr_64(63) + // stack: v[b]'' = (v[b]' ^ v[c]'') >>> 7, v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start + %stack (vb, vc, vd, va, a, b, c, d, x, y, start) -> (start, a, va, start, b, vb, start, c, vc, start, d, vd) + // stack: start, a, v[a]'', start, b, v[b]'', start, c, v[c]'', start, d, v[d]'' + ADD + %mstore_kernel_general + ADD + %mstore_kernel_general + ADD + %mstore_kernel_general + ADD + %mstore_kernel_general + + + + diff --git a/evm/src/cpu/kernel/asm/hash/blake/ops.asm b/evm/src/cpu/kernel/asm/hash/blake/ops.asm new file mode 100644 index 00000000..c83ace55 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/ops.asm @@ -0,0 +1,25 @@ +%macro as_u64 + %and_const(0xffffffffffffffff) +%endmacro + +// 64-bit right rotation +%macro rotr_64(rot) + // stack: value + PUSH $rot + // stack: rot, value + DUP2 + DUP2 + // stack: rot, value, rot, value + SHR + // stack: value >> rot, rot, value + %stack (shifted, rot, value) -> (rot, value, shifted) + // stack: rot, value, value >> rot + PUSH 64 + SUB + // stack: 64 - rot, value, value >> rot + SHL + // stack: value << (64 - rot), value >> rot + %as_u64 + // stack: (value << (64 - rot)) % (1 << 64), value >> rot + ADD +%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/constants/mod.rs b/evm/src/cpu/kernel/constants/mod.rs index e762a643..723daac8 100644 --- a/evm/src/cpu/kernel/constants/mod.rs +++ b/evm/src/cpu/kernel/constants/mod.rs @@ -18,15 +18,16 @@ pub(crate) mod txn_fields; /// Constants that are accessible to our kernel assembly code. pub fn evm_constants() -> HashMap { let mut c = HashMap::new(); - for (name, value) in EC_CONSTANTS { - c.insert(name.into(), U256::from_big_endian(&value)); - } - for (name, value) in HASH_CONSTANTS { - c.insert(name.into(), U256::from_big_endian(&value)); + + let hex_constants = MISC_CONSTANTS.iter().chain(EC_CONSTANTS.iter()).chain(HASH_CONSTANTS.iter()); + for (name, value) in hex_constants { + c.insert(name.clone().into(), U256::from_big_endian(value)); } + for (name, value) in GAS_CONSTANTS { c.insert(name.into(), U256::from(value)); } + for segment in Segment::all() { c.insert(segment.var_name().into(), (segment as u32).into()); } @@ -49,12 +50,15 @@ pub fn evm_constants() -> HashMap { c } -const HASH_CONSTANTS: [(&str, [u8; 32]); 2] = [ - // Hash of an empty string: keccak(b'').hex() +const MISC_CONSTANTS: [(&str, [u8; 32]); 1] = [ + // 2^64 ( - "EMPTY_STRING_HASH", - hex!("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470"), + "BLAKE_WORD_SIZE", + hex!("0000000000000000000000000000000000000000000000010000000000000000"), ), +]; + +const HASH_CONSTANTS: [(&str, [u8; 32]); 1] = [ // Hash of an empty node: keccak(rlp.encode(b'')).hex() ( "EMPTY_NODE_HASH", From 9a5db4b8d29f5c32b62b2350e453b0e4793b23ae Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 4 Nov 2022 15:27:34 -0700 Subject: [PATCH 05/44] progress --- .../cpu/kernel/asm/hash/blake/compression.asm | 16 ++++++++++++- .../cpu/kernel/asm/hash/blake/constants.asm | 24 ++++++++++++++++++- .../cpu/kernel/asm/hash/blake/g_functions.asm | 4 ---- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index 74cdaff3..cfa47f64 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -4,4 +4,18 @@ global blake_compression: // stack: h_0, ..., h_7, t_0, t_1, f_0, f_1, m_0, ..., m_15 - \ No newline at end of file + %blake_compression_internal_state_addr + // stack: start, h_0, ..., h_7, t_0, t_1, f_0, f_1, m_0, ..., m_15 + %rep 8 + SWAP1 + DUP2 + %mstore_kernel_general + %increment + %endrep + // stack: start + 8, t_0, t_1, f_0, f_1, m_0, ..., m_15 + PUSH 0 + // stack: 0, start + 8, t_0, t_1, f_0, f_1, m_0, ..., m_15 + %rep 4 + + %endrep + diff --git a/evm/src/cpu/kernel/asm/hash/blake/constants.asm b/evm/src/cpu/kernel/asm/hash/blake/constants.asm index 12f026e2..d647df37 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/constants.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/constants.asm @@ -1,4 +1,4 @@ -global blake_iv: +global blake_iv_const: // IV constants (big-endian) // IV_0 @@ -32,3 +32,25 @@ global blake_iv: // IV_7 BYTES 91, 224, 205, 25 BYTES 19, 126, 33, 121 + +%macro blake_iv(i) + PUSH blake_iv_const + // stack: blake_iv_const + PUSH $i + // stack: i, blake_iv_const + %mul_const(2) + ADD + // stack: blake_iv_const + 2 * i + DUP1 + // stack: blake_iv_const + 2 * i, blake_iv_const + 2 * i + %increment + // stack: blake_iv_const + 2 * i, blake_iv_const + 2 * i + %mload_kernel_code + SWAP1 + %increment + // stack: IV_i[32:], IV_i[:32] + %shl_const(32) + // stack: IV_i[32:] << 32, IV_i[:32] + ADD + // stack: IV_i +%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm index a4783648..67cf6740 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm @@ -92,7 +92,3 @@ global blake_g_function: %mstore_kernel_general ADD %mstore_kernel_general - - - - From b40338ff2e4fca83bbc6321db4663fd3d47efd7e Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 7 Nov 2022 14:30:14 -0800 Subject: [PATCH 06/44] progress --- .../cpu/kernel/asm/hash/blake/compression.asm | 56 +++++++++++++++++++ .../cpu/kernel/asm/hash/blake/constants.asm | 56 ------------------- evm/src/cpu/kernel/asm/hash/blake/ops.asm | 2 +- .../kernel/asm/hash/blake/permutations.asm | 8 ++- 4 files changed, 62 insertions(+), 60 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/hash/blake/constants.asm diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index cfa47f64..e805f923 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -2,6 +2,10 @@ PUSH 0 %endmacro +%macro blake_compression_message_addr + PUSH 16 +%endmacro + global blake_compression: // stack: h_0, ..., h_7, t_0, t_1, f_0, f_1, m_0, ..., m_15 %blake_compression_internal_state_addr @@ -16,6 +20,58 @@ global blake_compression: PUSH 0 // stack: 0, start + 8, t_0, t_1, f_0, f_1, m_0, ..., m_15 %rep 4 + // stack: i, loc, ... + DUP2 + DUP2 + // stack: i, loc, i, loc,... + %blake_iv + // stack: IV_i, loc, i, loc,... + SWAP1 + // stack: loc, IV_i, i, loc,... + %mstore_kernel_general + // stack: i, loc,... + %increment + SWAP1 + %increment + SWAP1 + // stack: i + 1, loc + 1,... + %endrep + %rep 4 + // stack: i, loc, val, next_val, next_val,... + %stack (i, loc, val) -> (i, val, loc, i, loc) + // stack: i, val, loc, i, loc, next_val,... + %blake_iv + // stack: IV_i, val, loc, i, loc, next_val,... + XOR + // stack: val ^ IV_i, loc, i, loc, next_val,... + SWAP1 + // stack: loc, val ^ IV_i, i, loc, next_val,... + %mstore_kernel_general + // stack: i, loc, next_val,... + %increment + SWAP1 + %increment + SWAP1 + // stack: i + 1, loc + 1, next_val,... + %endrep + // stack: 8, loc + 16, m_0, ..., m_15 + POP + POP + // stack: m_0, ..., m_15 + %blake_compression_message_addr + // stack: addr, m_0, ..., m_15 + %rep 16 %endrep + PUSH 0 + // stack: round=0, m_0, ..., m_15 +compression_loop: + // stack: round, m_0, ..., m_15 + PUSH 0 + DUP2 + // stack: round, 0, round, m_0, ..., m_15 + %blake_permutation + // stack: s[0], round, m_0, ..., m_15 + + diff --git a/evm/src/cpu/kernel/asm/hash/blake/constants.asm b/evm/src/cpu/kernel/asm/hash/blake/constants.asm deleted file mode 100644 index d647df37..00000000 --- a/evm/src/cpu/kernel/asm/hash/blake/constants.asm +++ /dev/null @@ -1,56 +0,0 @@ -global blake_iv_const: - // IV constants (big-endian) - - // IV_0 - BYTES 106, 9, 230, 103 - BYTES 243, 188, 201, 8 - - // IV_1 - BYTES 187, 103, 174, 133 - BYTES 132, 202, 167, 59 - - // IV_2 - BYTES 60, 110, 243, 114 - BYTES 254, 148, 248, 43 - - // IV_3 - BYTES 165, 79, 245, 58 - BYTES 95, 29, 54, 241 - - // IV_4 - BYTES 81, 14, 82, 127 - BYTES 173, 230, 130, 209 - - // IV_5 - BYTES 155, 5, 104, 140 - BYTES 43, 62, 108, 31 - - // IV_6 - BYTES 31, 131, 217, 171 - BYTES 251, 65, 189, 107 - - // IV_7 - BYTES 91, 224, 205, 25 - BYTES 19, 126, 33, 121 - -%macro blake_iv(i) - PUSH blake_iv_const - // stack: blake_iv_const - PUSH $i - // stack: i, blake_iv_const - %mul_const(2) - ADD - // stack: blake_iv_const + 2 * i - DUP1 - // stack: blake_iv_const + 2 * i, blake_iv_const + 2 * i - %increment - // stack: blake_iv_const + 2 * i, blake_iv_const + 2 * i - %mload_kernel_code - SWAP1 - %increment - // stack: IV_i[32:], IV_i[:32] - %shl_const(32) - // stack: IV_i[32:] << 32, IV_i[:32] - ADD - // stack: IV_i -%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/ops.asm b/evm/src/cpu/kernel/asm/hash/blake/ops.asm index c83ace55..e587abef 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/ops.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/ops.asm @@ -22,4 +22,4 @@ %as_u64 // stack: (value << (64 - rot)) % (1 << 64), value >> rot ADD -%endmacro \ No newline at end of file +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm index ba7ad291..b1997e2f 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm @@ -160,13 +160,15 @@ permutation_9_constants: BYTES 13 BYTES 0 -%macro blake_permutation(round, i) +%macro blake_permutation + // stack: round, i PUSH permutation_1_constants - PUSH $round + // stack: permutation_1_constants, round, i + SWAP1 + // stack: round, permutation_1_constants, i %mod_const(10) %mul_const(16) ADD %add_const($i) %mload_kernel_code %endmacro - \ No newline at end of file From 0c919443f991f9aa001d75489bdd36f14d7ad7d5 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 7 Nov 2022 15:47:17 -0800 Subject: [PATCH 07/44] progress --- .../cpu/kernel/asm/hash/blake/compression.asm | 48 +++++++--- .../cpu/kernel/asm/hash/blake/g_functions.asm | 26 +++++- .../cpu/kernel/asm/hash/blake/store_pad.asm | 89 ------------------- 3 files changed, 59 insertions(+), 104 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/hash/blake/store_pad.asm diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index e805f923..c00c7554 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -1,9 +1,18 @@ -%macro blake_compression_internal_state_addr +// We put the message schedule in memory starting at 64 * num_blocks + 2. +%macro message_schedule_addr_from_num_blocks + +%macro blake_internal_state_addr PUSH 0 + // stack: 0 + %mload_kernel_general + // stack: num_blocks + %mul_const(128) + // stack: num_bytes %endmacro -%macro blake_compression_message_addr - PUSH 16 +%macro blake_message_addr + %blake_internal_state_addr + %add_const(16) %endmacro global blake_compression: @@ -61,17 +70,28 @@ global blake_compression: %blake_compression_message_addr // stack: addr, m_0, ..., m_15 %rep 16 - + SWAP1 + DUP2 + %mstore_kernel_general + %increment %endrep + // stack: (empty) + %blake_compression_internal_state_addr + // stack: start PUSH 0 - // stack: round=0, m_0, ..., m_15 -compression_loop: - // stack: round, m_0, ..., m_15 - PUSH 0 - DUP2 - // stack: round, 0, round, m_0, ..., m_15 - %blake_permutation - // stack: s[0], round, m_0, ..., m_15 + // stack: round=0, start + %rep 12 + // stack: round, start + %call_blake_g_function(0, 4, 8, 12, 0, 1) + %call_blake_g_function(1, 5, 9, 13, 2, 3) + %call_blake_g_function(2, 6, 10, 14, 4, 5) + %call_blake_g_function(3, 7, 11, 15, 6, 7) + %call_blake_g_function(0, 5, 10, 15, 8, 9) + %call_blake_g_function(1, 6, 11, 12, 10, 11) + %call_blake_g_function(2, 7, 8, 13, 12, 13) + %call_blake_g_function(3, 4, 9, 14, 14, 15) + // stack: round, start + %increment + // stack: round + 1, start + %endrep - - diff --git a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm index 67cf6740..5b3943bb 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm @@ -1,4 +1,4 @@ -global blake_g_function: +%macro blake_g_function // Function to mix two input words, x and y, into the four words indexed by a, b, c, d (which // are in the range 0..16) in the internal state. // The internal state is stored in memory starting at the address start. @@ -92,3 +92,27 @@ global blake_g_function: %mstore_kernel_general ADD %mstore_kernel_general +%endmacro + +%macro call_blake_g_function(a, b, c, d, x_idx, y_idx) + // stack: round, start + PUSH $y_idx + DUP2 + // stack: round, y_idx, round, start + %blake_permutation + // stack: s[y_idx], round, start + PUSH $x_idx + DUP3 + // stack: round, 2, s[y_idx], round, start + %blake_permutation + // stack: s[x_idx], s[y_idx], round, start + %stack (ss: 2, r, s) -> (ss, s, r, s) + // stack: s[x_idx], s[y_idx], start, round, start + PUSH $d + PUSH $c + PUSH $b + PUSH $a + // stack: a, b, c, d, s[x_idx], s[y_idx], start, round, start + %blake_g_function + // stack: round, start +%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/store_pad.asm b/evm/src/cpu/kernel/asm/hash/blake/store_pad.asm deleted file mode 100644 index 7594eb81..00000000 --- a/evm/src/cpu/kernel/asm/hash/blake/store_pad.asm +++ /dev/null @@ -1,89 +0,0 @@ -global sha2: - %jump(sha2_store) - -global sha2_store: - // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - DUP1 - // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - PUSH 0 - // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - %mstore_kernel_general - // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - PUSH 1 - // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest -store_loop: - // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - DUP2 - // stack: counter, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - ISZERO - %jumpi(store_end) - // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - %stack (addr, counter, val) -> (addr, val, counter, addr) - // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest - %mstore_kernel_general - // stack: counter, addr, ... , x[num_bytes-1], retdest - %decrement - // stack: counter-1, addr, ... , x[num_bytes-1], retdest - SWAP1 - // stack: addr, counter-1, ... , x[num_bytes-1], retdest - %increment - // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest - %jump(store_loop) -store_end: - // stack: addr, counter, retdest - %pop2 - // stack: retdest - %jump(sha2_pad) - -// Precodition: input is in memory, starting at 0 of kernel general segment, of the form -// num_bytes, x[0], x[1], ..., x[num_bytes - 1] -// Postcodition: output is in memory, starting at 0, of the form -// num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] -global sha2_pad: - // stack: retdest - PUSH 0 - %mload_kernel_general - // stack: num_bytes, retdest - // STEP 1: append 1 - // insert 128 (= 1 << 7) at x[num_bytes+1] - // stack: num_bytes, retdest - PUSH 1 - PUSH 7 - SHL - // stack: 128, num_bytes, retdest - DUP2 - // stack: num_bytes, 128, num_bytes, retdest - %increment - // stack: num_bytes+1, 128, num_bytes, retdest - %mstore_kernel_general - // stack: num_bytes, retdest - // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 - DUP1 - // stack: num_bytes, num_bytes, retdest - %add_const(8) - %div_const(64) - - %increment - // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest - // STEP 3: calculate length := num_bytes*8 - SWAP1 - // stack: num_bytes, num_blocks, retdest - PUSH 8 - MUL - // stack: length = num_bytes*8, num_blocks, retdest - // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] - DUP2 - // stack: num_blocks, length, num_blocks, retdest - PUSH 64 - MUL - // stack: last_addr = num_blocks*64, length, num_blocks, retdest - %sha2_write_length - // stack: num_blocks, retdest - DUP1 - // stack: num_blocks, num_blocks, retdest - // STEP 5: write num_blocks to x[0] - PUSH 0 - %mstore_kernel_general - // stack: num_blocks, retdest - %message_schedule_addr_from_num_blocks - %jump(sha2_gen_all_message_schedules) From 6e782a1a1e1493234763795f10f4cf837a52e8f3 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 7 Nov 2022 16:26:49 -0800 Subject: [PATCH 08/44] Blake progress --- evm/src/cpu/kernel/aggregator.rs | 7 +++ .../kernel/asm/hash/blake/initial_state.asm | 16 +++++ evm/src/cpu/kernel/asm/hash/blake/iv.asm | 62 +++++++++++++++++++ evm/src/cpu/kernel/asm/hash/blake/store.asm | 39 ++++++++++++ 4 files changed, 124 insertions(+) create mode 100644 evm/src/cpu/kernel/asm/hash/blake/initial_state.asm create mode 100644 evm/src/cpu/kernel/asm/hash/blake/iv.asm create mode 100644 evm/src/cpu/kernel/asm/hash/blake/store.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 48130237..cff0c644 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -39,6 +39,13 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/fields/fp6_mul.asm"), include_str!("asm/fields/fp12_mul.asm"), include_str!("asm/halt.asm"), + // include_str!("asm/hash/blake/compression.asm"), + // include_str!("asm/hash/blake/g_functions.asm"), + // include_str!("asm/hash/blake/initial_state.asm"), + // include_str!("asm/hash/blake/iv.asm"), + // include_str!("asm/hash/blake/ops.asm"), + // include_str!("asm/hash/blake/permutations.asm"), + // include_str!("asm/hash/blake/store.asm"), include_str!("asm/hash/ripemd/box.asm"), include_str!("asm/hash/ripemd/compression.asm"), include_str!("asm/hash/ripemd/constants.asm"), diff --git a/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm b/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm new file mode 100644 index 00000000..3d2bccff --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm @@ -0,0 +1,16 @@ +global blake_initial_state: + // stack: retdest + %blake_iv(7) + %blake_iv(6) + %blake_iv(5) + %blake_iv(4) + %blake_iv(3) + %blake_iv(2) + %blake_iv(1) + // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7, retdest + PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 + %blake_iv(0) + XOR + // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7, retdest + %stack () -> (0, 0) + // stack: c_0 = 0, c_1 = 0, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7, retdest \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/iv.asm b/evm/src/cpu/kernel/asm/hash/blake/iv.asm new file mode 100644 index 00000000..2ddcf970 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/iv.asm @@ -0,0 +1,62 @@ +global blake_iv_const: + // IV constants (big-endian) + + // IV_0 + BYTES 106, 9, 230, 103 + BYTES 243, 188, 201, 8 + + // IV_1 + BYTES 187, 103, 174, 133 + BYTES 132, 202, 167, 59 + + // IV_2 + BYTES 60, 110, 243, 114 + BYTES 254, 148, 248, 43 + + // IV_3 + BYTES 165, 79, 245, 58 + BYTES 95, 29, 54, 241 + + // IV_4 + BYTES 81, 14, 82, 127 + BYTES 173, 230, 130, 209 + + // IV_5 + BYTES 155, 5, 104, 140 + BYTES 43, 62, 108, 31 + + // IV_6 + BYTES 31, 131, 217, 171 + BYTES 251, 65, 189, 107 + + // IV_7 + BYTES 91, 224, 205, 25 + BYTES 19, 126, 33, 121 + +%macro blake_iv + // stack: i, ... + PUSH blake_iv_const + // stack: blake_iv_const, i, ... + SWAP1 + // stack: i, blake_iv_const, ... + %mul_const(2) + ADD + // stack: blake_iv_const + 2 * i, ... + DUP1 + // stack: blake_iv_const + 2 * i, blake_iv_const + 2 * i, ... + %increment + // stack: blake_iv_const + 2 * i, blake_iv_const + 2 * i, ... + %mload_kernel_code + SWAP1 + %increment + // stack: IV_i[32:], IV_i[:32], ... + %shl_const(32) + // stack: IV_i[32:] << 32, IV_i[:32], ... + ADD + // stack: IV_i, ... +%endmacro + +%macro blake_iv_i(i) + PUSH $i + %blake_iv +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake/store.asm b/evm/src/cpu/kernel/asm/hash/blake/store.asm new file mode 100644 index 00000000..4862b53b --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/store.asm @@ -0,0 +1,39 @@ +global blake: + %jump(blake_store) + +global blake_store: + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + DUP1 + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + %add_const(127) + %div_const(128) + // stack: num_blocks = ceil(num_bytes / 128), num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + PUSH 0 + // stack: addr=0, num_blocks, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + %mstore_kernel_general + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + PUSH 1 + // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest +store_loop: + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + DUP2 + // stack: counter, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + ISZERO + %jumpi(store_end) + // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest + %stack (addr, counter, val) -> (addr, val, counter, addr) + // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest + %mstore_kernel_general + // stack: counter, addr, ... , x[num_bytes-1], retdest + %decrement + // stack: counter-1, addr, ... , x[num_bytes-1], retdest + SWAP1 + // stack: addr, counter-1, ... , x[num_bytes-1], retdest + %increment + // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest + %jump(store_loop) +store_end: + // stack: addr, counter, retdest + %pop2 + // stack: retdest + %jump(blake_pad) From d3e5feba836ec9ab0569e3bca0f55dd527c7dfea Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 9 Nov 2022 10:29:13 -0800 Subject: [PATCH 09/44] Blake progress --- evm/src/cpu/kernel/aggregator.rs | 6 +++--- evm/src/cpu/kernel/asm/hash/blake/compression.asm | 2 +- evm/src/cpu/kernel/asm/hash/blake/initial_state.asm | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index cff0c644..13ac61fe 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -39,9 +39,9 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/fields/fp6_mul.asm"), include_str!("asm/fields/fp12_mul.asm"), include_str!("asm/halt.asm"), - // include_str!("asm/hash/blake/compression.asm"), - // include_str!("asm/hash/blake/g_functions.asm"), - // include_str!("asm/hash/blake/initial_state.asm"), + include_str!("asm/hash/blake/compression.asm"), + include_str!("asm/hash/blake/g_functions.asm"), + include_str!("asm/hash/blake/initial_state.asm"), // include_str!("asm/hash/blake/iv.asm"), // include_str!("asm/hash/blake/ops.asm"), // include_str!("asm/hash/blake/permutations.asm"), diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index c00c7554..c319e791 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -94,4 +94,4 @@ global blake_compression: %increment // stack: round + 1, start %endrep - + \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm b/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm index 3d2bccff..d65ea0db 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm @@ -1,5 +1,4 @@ -global blake_initial_state: - // stack: retdest +%macro blae_initial_state %blake_iv(7) %blake_iv(6) %blake_iv(5) @@ -7,10 +6,11 @@ global blake_initial_state: %blake_iv(3) %blake_iv(2) %blake_iv(1) - // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7, retdest + // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 %blake_iv(0) XOR - // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7, retdest + // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 %stack () -> (0, 0) - // stack: c_0 = 0, c_1 = 0, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7, retdest \ No newline at end of file + // stack: c_0 = 0, c_1 = 0, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7 +%endmacro \ No newline at end of file From 609ed6c9845742bafe38abd7598f3ae1df51abb1 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 9 Nov 2022 13:48:56 -0800 Subject: [PATCH 10/44] fixes --- evm/src/cpu/kernel/aggregator.rs | 8 ++++---- evm/src/cpu/kernel/asm/hash/blake/compression.asm | 9 +++------ evm/src/cpu/kernel/asm/hash/blake/permutations.asm | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 13ac61fe..01ba419b 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -42,10 +42,10 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/hash/blake/compression.asm"), include_str!("asm/hash/blake/g_functions.asm"), include_str!("asm/hash/blake/initial_state.asm"), - // include_str!("asm/hash/blake/iv.asm"), - // include_str!("asm/hash/blake/ops.asm"), - // include_str!("asm/hash/blake/permutations.asm"), - // include_str!("asm/hash/blake/store.asm"), + include_str!("asm/hash/blake/iv.asm"), + include_str!("asm/hash/blake/ops.asm"), + include_str!("asm/hash/blake/permutations.asm"), + include_str!("asm/hash/blake/store.asm"), include_str!("asm/hash/ripemd/box.asm"), include_str!("asm/hash/ripemd/compression.asm"), include_str!("asm/hash/ripemd/constants.asm"), diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index c319e791..7dc55a72 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -1,6 +1,3 @@ -// We put the message schedule in memory starting at 64 * num_blocks + 2. -%macro message_schedule_addr_from_num_blocks - %macro blake_internal_state_addr PUSH 0 // stack: 0 @@ -17,7 +14,7 @@ global blake_compression: // stack: h_0, ..., h_7, t_0, t_1, f_0, f_1, m_0, ..., m_15 - %blake_compression_internal_state_addr + %blake_internal_state_addr // stack: start, h_0, ..., h_7, t_0, t_1, f_0, f_1, m_0, ..., m_15 %rep 8 SWAP1 @@ -67,7 +64,7 @@ global blake_compression: POP POP // stack: m_0, ..., m_15 - %blake_compression_message_addr + %blake_message_addr // stack: addr, m_0, ..., m_15 %rep 16 SWAP1 @@ -76,7 +73,7 @@ global blake_compression: %increment %endrep // stack: (empty) - %blake_compression_internal_state_addr + %blake_internal_state_addr // stack: start PUSH 0 // stack: round=0, start diff --git a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm index b1997e2f..6f444c3b 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm @@ -169,6 +169,6 @@ permutation_9_constants: %mod_const(10) %mul_const(16) ADD - %add_const($i) + ADD %mload_kernel_code %endmacro From 5fab01b952a7769f7227ba9c669bd0786aa1dc99 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 9 Nov 2022 14:06:45 -0800 Subject: [PATCH 11/44] fix --- evm/src/cpu/kernel/asm/hash/blake/initial_state.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm b/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm index d65ea0db..4f1154ad 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm @@ -1,4 +1,4 @@ -%macro blae_initial_state +%macro blake_initial_state %blake_iv(7) %blake_iv(6) %blake_iv(5) @@ -13,4 +13,4 @@ // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 %stack () -> (0, 0) // stack: c_0 = 0, c_1 = 0, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7 -%endmacro \ No newline at end of file +%endmacro From 0cfe79021078ceedaa4fe8656ad2e36f7b8d9fac Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 9 Nov 2022 14:41:41 -0800 Subject: [PATCH 12/44] updates --- .../cpu/kernel/asm/hash/blake/compression.asm | 21 +++++++++++++++++++ .../kernel/asm/hash/blake/initial_state.asm | 16 -------------- evm/src/cpu/kernel/asm/hash/blake/store.asm | 2 +- 3 files changed, 22 insertions(+), 17 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/hash/blake/initial_state.asm diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index 7dc55a72..f06df538 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -1,3 +1,18 @@ +%macro blake_initial_state + %blake_iv(7) + %blake_iv(6) + %blake_iv(5) + %blake_iv(4) + %blake_iv(3) + %blake_iv(2) + %blake_iv(1) + // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 + PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 + %blake_iv(0) + XOR + // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 +%endmacro + %macro blake_internal_state_addr PUSH 0 // stack: 0 @@ -13,6 +28,12 @@ %endmacro global blake_compression: + %blake_initial_state + // stack: t_0, t_1, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7 + %stack: () -> (0, 0, 0) + // stack: cur_block = 0, t_0 = 0, t_1 = 0, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7 + + // stack: h_0, ..., h_7, t_0, t_1, f_0, f_1, m_0, ..., m_15 %blake_internal_state_addr // stack: start, h_0, ..., h_7, t_0, t_1, f_0, f_1, m_0, ..., m_15 diff --git a/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm b/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm deleted file mode 100644 index 4f1154ad..00000000 --- a/evm/src/cpu/kernel/asm/hash/blake/initial_state.asm +++ /dev/null @@ -1,16 +0,0 @@ -%macro blake_initial_state - %blake_iv(7) - %blake_iv(6) - %blake_iv(5) - %blake_iv(4) - %blake_iv(3) - %blake_iv(2) - %blake_iv(1) - // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 - PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 - %blake_iv(0) - XOR - // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 - %stack () -> (0, 0) - // stack: c_0 = 0, c_1 = 0, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7 -%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake/store.asm b/evm/src/cpu/kernel/asm/hash/blake/store.asm index 4862b53b..28cfa187 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/store.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/store.asm @@ -36,4 +36,4 @@ store_end: // stack: addr, counter, retdest %pop2 // stack: retdest - %jump(blake_pad) + %jump(blake_compression) From dd29ec1f46428406920bf0b4c26754b57a5c9615 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 13 Dec 2022 10:08:30 -0800 Subject: [PATCH 13/44] fixes and test infrastructure --- evm/Cargo.toml | 2 ++ evm/src/cpu/kernel/aggregator.rs | 1 - .../cpu/kernel/asm/hash/blake/compression.asm | 21 ++++++++++--------- .../kernel/asm/hash/blake/permutations.asm | 18 ++++++++-------- evm/src/cpu/kernel/tests/hash.rs | 13 ++++++++++++ 5 files changed, 35 insertions(+), 20 deletions(-) diff --git a/evm/Cargo.toml b/evm/Cargo.toml index e942a9f9..2d1e2850 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -6,6 +6,8 @@ edition = "2021" [dependencies] anyhow = "1.0.40" +blake = "2.0.2" +env_logger = "0.9.0" eth_trie_utils = "0.4.0" ethereum-types = "0.14.0" hex = { version = "0.4.3", optional = true } diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 01ba419b..25e25119 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -41,7 +41,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/halt.asm"), include_str!("asm/hash/blake/compression.asm"), include_str!("asm/hash/blake/g_functions.asm"), - include_str!("asm/hash/blake/initial_state.asm"), include_str!("asm/hash/blake/iv.asm"), include_str!("asm/hash/blake/ops.asm"), include_str!("asm/hash/blake/permutations.asm"), diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index f06df538..44b10ffb 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -1,14 +1,14 @@ %macro blake_initial_state - %blake_iv(7) - %blake_iv(6) - %blake_iv(5) - %blake_iv(4) - %blake_iv(3) - %blake_iv(2) - %blake_iv(1) + %blake_iv_i(7) + %blake_iv_i(6) + %blake_iv_i(5) + %blake_iv_i(4) + %blake_iv_i(3) + %blake_iv_i(2) + %blake_iv_i(1) // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 - %blake_iv(0) + %blake_iv_i(0) XOR // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 %endmacro @@ -19,7 +19,8 @@ %mload_kernel_general // stack: num_blocks %mul_const(128) - // stack: num_bytes + %increment + // stack: num_bytes+1 %endmacro %macro blake_message_addr @@ -30,7 +31,7 @@ global blake_compression: %blake_initial_state // stack: t_0, t_1, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7 - %stack: () -> (0, 0, 0) + %stack () -> (0, 0, 0) // stack: cur_block = 0, t_0 = 0, t_1 = 0, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7 diff --git a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm index 6f444c3b..da278ec2 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm @@ -1,4 +1,4 @@ -permutation_1_constants: +global permutation_1_constants: BYTES 14 BYTES 10 BYTES 4 @@ -16,7 +16,7 @@ permutation_1_constants: BYTES 5 BYTES 3 -permutation_2_constants: +global permutation_2_constants: BYTES 11 BYTES 8 BYTES 12 @@ -34,7 +34,7 @@ permutation_2_constants: BYTES 9 BYTES 4 -permutation_3_constants: +global permutation_3_constants: BYTES 7 BYTES 9 BYTES 3 @@ -52,7 +52,7 @@ permutation_3_constants: BYTES 15 BYTES 8 -permutation_4_constants: +global permutation_4_constants: BYTES 9 BYTES 0 BYTES 5 @@ -70,7 +70,7 @@ permutation_4_constants: BYTES 3 BYTES 13 -permutation_5_constants: +global permutation_5_constants: BYTES 2 BYTES 12 BYTES 6 @@ -88,7 +88,7 @@ permutation_5_constants: BYTES 1 BYTES 9 -permutation_6_constants: +global permutation_6_constants: BYTES 12 BYTES 5 BYTES 1 @@ -106,7 +106,7 @@ permutation_6_constants: BYTES 8 BYTES 11 -permutation_7_constants: +global permutation_7_constants: BYTES 13 BYTES 11 BYTES 7 @@ -124,7 +124,7 @@ permutation_7_constants: BYTES 2 BYTES 10 -permutation_8_constants: +global permutation_8_constants: BYTES 6 BYTES 15 BYTES 14 @@ -142,7 +142,7 @@ permutation_8_constants: BYTES 10 BYTES 5 -permutation_9_constants: +global permutation_9_constants: BYTES 10 BYTES 2 BYTES 8 diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index b7b2e5c2..52c8d9dd 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -1,6 +1,7 @@ use std::str::FromStr; use anyhow::Result; +use blake::{hash as blake_hash}; use ethereum_types::U256; use rand::{thread_rng, Rng}; use ripemd::{Digest, Ripemd160}; @@ -23,6 +24,13 @@ fn ripemd(input: Vec) -> U256 { U256::from(&hasher.finalize()[..]) } +/// Standard Blake implementation. +fn blake(input: Vec) -> U256 { + let mut result = [0; 32]; + blake_hash(256, &input, &mut result).unwrap(); + U256::from(result) +} + fn make_random_input() -> Vec { // Generate a random message, between 0 and 9999 bytes. let mut rng = thread_rng(); @@ -88,3 +96,8 @@ fn test_sha2() -> Result<()> { fn test_ripemd() -> Result<()> { test_hash("ripemd_stack", &ripemd) } + +#[test] +fn test_blake() -> Result<()> { + test_hash("blake", &blake) +} From 1367f9bcca32c0e0a42efde2ceda8760cf58caff Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 9 Nov 2022 16:37:18 -0800 Subject: [PATCH 14/44] fmt --- evm/src/cpu/kernel/constants/mod.rs | 9 ++++++--- evm/src/cpu/kernel/tests/hash.rs | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/evm/src/cpu/kernel/constants/mod.rs b/evm/src/cpu/kernel/constants/mod.rs index 723daac8..2125fa18 100644 --- a/evm/src/cpu/kernel/constants/mod.rs +++ b/evm/src/cpu/kernel/constants/mod.rs @@ -18,12 +18,15 @@ pub(crate) mod txn_fields; /// Constants that are accessible to our kernel assembly code. pub fn evm_constants() -> HashMap { let mut c = HashMap::new(); - - let hex_constants = MISC_CONSTANTS.iter().chain(EC_CONSTANTS.iter()).chain(HASH_CONSTANTS.iter()); + + let hex_constants = MISC_CONSTANTS + .iter() + .chain(EC_CONSTANTS.iter()) + .chain(HASH_CONSTANTS.iter()); for (name, value) in hex_constants { c.insert(name.clone().into(), U256::from_big_endian(value)); } - + for (name, value) in GAS_CONSTANTS { c.insert(name.into(), U256::from(value)); } diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 52c8d9dd..c26c2f88 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -1,7 +1,7 @@ use std::str::FromStr; use anyhow::Result; -use blake::{hash as blake_hash}; +use blake::hash as blake_hash; use ethereum_types::U256; use rand::{thread_rng, Rng}; use ripemd::{Digest, Ripemd160}; From 54a2e96473cc3291b156d4e1dd60dcb6703f8359 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 10 Nov 2022 15:24:08 -0800 Subject: [PATCH 15/44] progress --- evm/src/cpu/kernel/aggregator.rs | 1 + .../cpu/kernel/asm/hash/blake/compression.asm | 80 ++++++++++++++----- .../cpu/kernel/asm/hash/blake/g_functions.asm | 16 +++- 3 files changed, 74 insertions(+), 23 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 25e25119..5f9959fe 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -45,6 +45,7 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/hash/blake/ops.asm"), include_str!("asm/hash/blake/permutations.asm"), include_str!("asm/hash/blake/store.asm"), + include_str!("asm/hash/blake/util.asm"), include_str!("asm/hash/ripemd/box.asm"), include_str!("asm/hash/ripemd/compression.asm"), include_str!("asm/hash/ripemd/constants.asm"), diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index 44b10ffb..ee9850e1 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -1,4 +1,4 @@ -%macro blake_initial_state +%macro blake_initial_hash_value %blake_iv_i(7) %blake_iv_i(6) %blake_iv_i(5) @@ -13,7 +13,7 @@ // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 %endmacro -%macro blake_internal_state_addr +%macro blake_hash_value_addr PUSH 0 // stack: 0 %mload_kernel_general @@ -23,30 +23,81 @@ // stack: num_bytes+1 %endmacro +%macro blake_internal_state_addr + %blake_hash_value_addr + %add_const(8) +%endmacro + %macro blake_message_addr %blake_internal_state_addr %add_const(16) %endmacro global blake_compression: - %blake_initial_state - // stack: t_0, t_1, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7 + // stack: retdest %stack () -> (0, 0, 0) - // stack: cur_block = 0, t_0 = 0, t_1 = 0, h_0, h_1, h_2, h_3, h_4, h_5, h_6, h_7 + // stack: cur_block = 0, t_0 = 0, t_1 = 0, retdest + // TODO: load %blake_initial_hash_value and store to blake_hash_value_addr - // stack: h_0, ..., h_7, t_0, t_1, f_0, f_1, m_0, ..., m_15 +compression_loop: + // stack: cur_block, t_0, t_1, retdest + PUSH 0 + %mload_kernel_general + // stack: num_blocks, cur_block, t_0, t_1, retdest + %decrement + // stack: num_blocks - 1, cur_block, t_0, t_1, retdest + DUP2 + // stack: cur_block, num_blocks - 1, cur_block, t_0, t_1, retdest + EQ + // stack: is_last_block, cur_block, t_0, t_1, retdest + SWAP1 + // stack: cur_block, is_last_block, t_0, t_1, retdest + %mul_const(128) + %increment + // stack: cur_block_start_byte, is_last_block, t_0, t_1, retdest + %blake_message_addr + // stack: message_addr, cur_block_start_byte, is_last_block, t_0, t_1, retdest + %rep 16 + // stack: cur_message_addr, cur_block_byte, ... + DUP2 + // stack: cur_block_byte, cur_message_addr, cur_block_byte, ... + %mload_blake_word + // stack: m_i, cur_message_addr, cur_block_byte, ... + DUP2 + // stack: cur_message_addr, m_i, cur_message_addr, cur_block_byte, ... + %mstore_kernel_general + // stack: cur_message_addr, cur_block_byte, ... + %increment + // stack: cur_message_addr + 1, cur_block_byte, ... + SWAP1 + // stack: cur_block_byte, cur_message_addr + 1, ... + %add_const(64) + // stack: cur_block_byte + 64, cur_message_addr + 1, ... + SWAP1 + // stack: cur_message_addr + 1, cur_block_byte + 64, ... + %endrep + // stack: end_message_addr, end_block_start_byte, is_last_block, t_0, t_1, retdest + POP + POP + // stack: is_last_block, t_0, t_1, retdest + %mul_const(0xFFFFFFFF) + %stack (l, t0, t1) -> (t0, t1, l, 0) + // stack: t_0, t_1, invert_if_last_block, 0, retdest + // TODO: LOAD from %blake_hash_value_addr + // stack: h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest %blake_internal_state_addr - // stack: start, h_0, ..., h_7, t_0, t_1, f_0, f_1, m_0, ..., m_15 + // stack: start, h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest + // First eight words of compression state: current state h_0, ..., h_7. %rep 8 SWAP1 DUP2 %mstore_kernel_general %increment %endrep - // stack: start + 8, t_0, t_1, f_0, f_1, m_0, ..., m_15 + // stack: start + 8, t_0, t_1, invert_if_last_block, 0, retdest PUSH 0 - // stack: 0, start + 8, t_0, t_1, f_0, f_1, m_0, ..., m_15 + // stack: 0, start + 8, t_0, t_1, invert_if_last_block, 0, retdest %rep 4 // stack: i, loc, ... DUP2 @@ -82,18 +133,9 @@ global blake_compression: SWAP1 // stack: i + 1, loc + 1, next_val,... %endrep - // stack: 8, loc + 16, m_0, ..., m_15 + // stack: 8, loc + 16 POP POP - // stack: m_0, ..., m_15 - %blake_message_addr - // stack: addr, m_0, ..., m_15 - %rep 16 - SWAP1 - DUP2 - %mstore_kernel_general - %increment - %endrep // stack: (empty) %blake_internal_state_addr // stack: start diff --git a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm index 5b3943bb..5feaa403 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm @@ -101,18 +101,26 @@ // stack: round, y_idx, round, start %blake_permutation // stack: s[y_idx], round, start + %blake_message_addr + ADD + %mload_blake_word + // stack: m[s[y_idx]], round, start PUSH $x_idx DUP3 - // stack: round, 2, s[y_idx], round, start + // stack: round, 2, m[s[y_idx]], round, start %blake_permutation - // stack: s[x_idx], s[y_idx], round, start + // stack: s[x_idx], m[s[y_idx]], round, start + %blake_message_addr + ADD + %mload_blake_word + // stack: m[s[x_idx]], m[s[y_idx]], round, start %stack (ss: 2, r, s) -> (ss, s, r, s) - // stack: s[x_idx], s[y_idx], start, round, start + // stack: m[s[x_idx]], m[s[y_idx]], start, round, start PUSH $d PUSH $c PUSH $b PUSH $a - // stack: a, b, c, d, s[x_idx], s[y_idx], start, round, start + // stack: a, b, c, d, m[s[x_idx]], m[s[y_idx]], start, round, start %blake_g_function // stack: round, start %endmacro \ No newline at end of file From a38b1fb37c8815069289be4749f477e60cf1ebab Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 10 Nov 2022 15:40:44 -0800 Subject: [PATCH 16/44] progress --- evm/src/cpu/kernel/asm/hash/blake/compression.asm | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index ee9850e1..4b3fca2b 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -1,3 +1,4 @@ +// Load the initial hash value (the IV, but with params XOR'd into the first word). %macro blake_initial_hash_value %blake_iv_i(7) %blake_iv_i(6) @@ -13,6 +14,7 @@ // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 %endmacro +// Address where the working version of the hash value is stored. %macro blake_hash_value_addr PUSH 0 // stack: 0 @@ -23,11 +25,13 @@ // stack: num_bytes+1 %endmacro +// Address where the working version of the compression internal state is stored. %macro blake_internal_state_addr %blake_hash_value_addr %add_const(8) %endmacro +// Address where the current message block is stored. %macro blake_message_addr %blake_internal_state_addr %add_const(16) From df932544bda917f0c6fb586c042eb86b98c36938 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 14 Nov 2022 12:33:14 -0800 Subject: [PATCH 17/44] fixes and testing --- .../cpu/kernel/asm/hash/blake/compression.asm | 29 ++++++++++++++++--- evm/src/cpu/kernel/asm/hash/blake/iv.asm | 4 +-- evm/src/cpu/kernel/tests/hash.rs | 26 +++++++++++------ 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index 4b3fca2b..569c9555 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -41,9 +41,20 @@ global blake_compression: // stack: retdest %stack () -> (0, 0, 0) // stack: cur_block = 0, t_0 = 0, t_1 = 0, retdest - - // TODO: load %blake_initial_hash_value and store to blake_hash_value_addr - + %blake_initial_hash_value + // stack: h_0, ..., h_7, cur_block, t_0, t_1, retdest + %blake_hash_value_addr + STOP + // stack: addr, h_0, ..., h_7, cur_block, t_0, t_1, retdest + %rep 8 + DUP2 + DUP2 + %mstore_kernel_general + %increment + %endrep + // stack: addr, cur_block, t_0, t_1, retdest + POP + // stack: cur_block, t_0, t_1, retdest compression_loop: // stack: cur_block, t_0, t_1, retdest PUSH 0 @@ -88,7 +99,17 @@ compression_loop: %mul_const(0xFFFFFFFF) %stack (l, t0, t1) -> (t0, t1, l, 0) // stack: t_0, t_1, invert_if_last_block, 0, retdest - // TODO: LOAD from %blake_hash_value_addr + %blake_hash_value_addr + %rep 8 + // stack: addr, ... + DUP1 + // stack: addr, addr, ... + %mload_kernel_general + // stack: val, addr, ... + SWAP1 + // stack: addr, val, ... + %increment + %endrep // stack: h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest %blake_internal_state_addr // stack: start, h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest diff --git a/evm/src/cpu/kernel/asm/hash/blake/iv.asm b/evm/src/cpu/kernel/asm/hash/blake/iv.asm index 2ddcf970..59c26071 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/iv.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/iv.asm @@ -45,10 +45,10 @@ global blake_iv_const: DUP1 // stack: blake_iv_const + 2 * i, blake_iv_const + 2 * i, ... %increment - // stack: blake_iv_const + 2 * i, blake_iv_const + 2 * i, ... + // stack: blake_iv_const + 2 * i + 1, blake_iv_const + 2 * i, ... %mload_kernel_code SWAP1 - %increment + %mload_kernel_code // stack: IV_i[32:], IV_i[:32], ... %shl_const(32) // stack: IV_i[32:] << 32, IV_i[:32], ... diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index c26c2f88..0110d604 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -34,7 +34,7 @@ fn blake(input: Vec) -> U256 { fn make_random_input() -> Vec { // Generate a random message, between 0 and 9999 bytes. let mut rng = thread_rng(); - let num_bytes = rng.gen_range(0..10000); + let num_bytes = rng.gen_range(0..100); (0..num_bytes).map(|_| rng.gen()).collect() } @@ -59,30 +59,38 @@ fn make_input_stack(message: Vec) -> Vec { fn test_hash(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U256) -> Result<()> { // Make the input. let message_random = make_random_input(); - let message_custom = make_custom_input(); + // let message_custom = make_custom_input(); + + dbg!(message_random.clone()); // Hash the message using a standard implementation. - let expected_random = standard_implementation(message_random.clone()); - let expected_custom = standard_implementation(message_custom.clone()); + // let expected_random = standard_implementation(message_random.clone()); + // let expected_custom = standard_implementation(message_custom.clone()); // Load the message onto the stack. let initial_stack_random = make_input_stack(message_random); - let initial_stack_custom = make_input_stack(message_custom); + // let initial_stack_custom = make_input_stack(message_custom); + + dbg!(initial_stack_random.clone()); // Make the kernel. let kernel_function = KERNEL.global_labels[hash_fn_label]; + dbg!("HERE"); + // Run the kernel code. let result_random = run_interpreter(kernel_function, initial_stack_random)?; - let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; + // let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; + + dbg!(result_random.stack()); // Extract the final output. let actual_random = result_random.stack()[0]; - let actual_custom = result_custom.stack()[0]; + // let actual_custom = result_custom.stack()[0]; // Check that the result is correct. - assert_eq!(expected_random, actual_random); - assert_eq!(expected_custom, actual_custom); + // assert_eq!(expected_random, actual_random); + // assert_eq!(expected_custom, actual_custom); Ok(()) } From 772dc5c9e7711e6b033ff3801f9efaa952c9c79c Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 14 Nov 2022 16:48:33 -0800 Subject: [PATCH 18/44] util file --- evm/src/cpu/kernel/asm/hash/blake/util.asm | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 evm/src/cpu/kernel/asm/hash/blake/util.asm diff --git a/evm/src/cpu/kernel/asm/hash/blake/util.asm b/evm/src/cpu/kernel/asm/hash/blake/util.asm new file mode 100644 index 00000000..04e29ca9 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/util.asm @@ -0,0 +1,15 @@ +// Load a 64-bit word from kernel general memory. +%macro mload_blake_word + // stack: offset + DUP1 + %mload_kernel_general_u32 + // stack: hi, offset + %shl_const(32) + // stack: hi << 32, offset + SWAP1 + // stack: offset, hi << 32 + %add_const(4) + %mload_kernel_general_u32 + OR + // stack: (hi << 32) | lo +%endmacro From ebd606621fd1da99c3471697668e1f2d2e1db352 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 15 Nov 2022 14:46:45 -0800 Subject: [PATCH 19/44] progress --- .../cpu/kernel/asm/hash/blake/compression.asm | 58 +++++++++++++++---- evm/src/cpu/kernel/tests/hash.rs | 2 +- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index 569c9555..8903f8e3 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -37,6 +37,25 @@ %add_const(16) %endmacro +%macro blake_generate_new_hash_value(i) + %blake_hash_value_addr + %add_const($i) + %mload_kernel_general + // stack: h_i, ... + %blake_internal_state_addr + %add_const($i) + %mload_kernel_general + // stack: v_i, h_i, ... + %blake_internal_state_addr + %add_const($i) + %add_const(8) + %mload_kernel_general + // stack: v_(i+8), v_i, h_i, ... + XOR + XOR + // stack: h_i' = v_(i+8) ^ v_i ^ h_i, ... +%endmacro + global blake_compression: // stack: retdest %stack () -> (0, 0, 0) @@ -44,10 +63,9 @@ global blake_compression: %blake_initial_hash_value // stack: h_0, ..., h_7, cur_block, t_0, t_1, retdest %blake_hash_value_addr - STOP // stack: addr, h_0, ..., h_7, cur_block, t_0, t_1, retdest %rep 8 - DUP2 + SWAP1 DUP2 %mstore_kernel_general %increment @@ -97,6 +115,7 @@ compression_loop: POP // stack: is_last_block, t_0, t_1, retdest %mul_const(0xFFFFFFFF) + // stack: invert_if_last_block, t_0, t_1, retdest %stack (l, t0, t1) -> (t0, t1, l, 0) // stack: t_0, t_1, invert_if_last_block, 0, retdest %blake_hash_value_addr @@ -110,6 +129,8 @@ compression_loop: // stack: addr, val, ... %increment %endrep + // stack: addr, h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest + POP // stack: h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest %blake_internal_state_addr // stack: start, h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest @@ -140,8 +161,10 @@ compression_loop: SWAP1 // stack: i + 1, loc + 1,... %endrep + // stack: 4, start + 12, t_0, t_1, invert_if_last_block, 0, retdest + // XOR the values (t_0, t_1, invert_if, 0) into the last four IV values. %rep 4 - // stack: i, loc, val, next_val, next_val,... + // stack: i, loc, val, next_val,... %stack (i, loc, val) -> (i, val, loc, i, loc) // stack: i, val, loc, i, loc, next_val,... %blake_iv @@ -158,16 +181,16 @@ compression_loop: SWAP1 // stack: i + 1, loc + 1, next_val,... %endrep - // stack: 8, loc + 16 + // stack: 8, loc + 16, retdest POP POP - // stack: (empty) + // stack: retdest %blake_internal_state_addr - // stack: start + // stack: start, retdest PUSH 0 - // stack: round=0, start + // stack: round=0, start, retdest %rep 12 - // stack: round, start + // stack: round, start, retdest %call_blake_g_function(0, 4, 8, 12, 0, 1) %call_blake_g_function(1, 5, 9, 13, 2, 3) %call_blake_g_function(2, 6, 10, 14, 4, 5) @@ -176,8 +199,21 @@ compression_loop: %call_blake_g_function(1, 6, 11, 12, 10, 11) %call_blake_g_function(2, 7, 8, 13, 12, 13) %call_blake_g_function(3, 4, 9, 14, 14, 15) - // stack: round, start + // stack: round, start, retdest %increment - // stack: round + 1, start + // stack: round + 1, start, retdest %endrep - \ No newline at end of file + // stack: 12, start, retdest + POP + POP + // stack: retdest + %blake_generate_new_hash_value(7) + %blake_generate_new_hash_value(6) + %blake_generate_new_hash_value(5) + %blake_generate_new_hash_value(4) + %blake_generate_new_hash_value(3) + %blake_generate_new_hash_value(2) + %blake_generate_new_hash_value(1) + %blake_generate_new_hash_value(0) + // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest + STOP \ No newline at end of file diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 0110d604..4a4cf42e 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -34,7 +34,7 @@ fn blake(input: Vec) -> U256 { fn make_random_input() -> Vec { // Generate a random message, between 0 and 9999 bytes. let mut rng = thread_rng(); - let num_bytes = rng.gen_range(0..100); + let num_bytes = rng.gen_range(0..25); (0..num_bytes).map(|_| rng.gen()).collect() } From 9774b74b90a5b7a14bcbd22ea57b24f94746db32 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 15 Nov 2022 15:36:15 -0800 Subject: [PATCH 20/44] Blake progress --- .../cpu/kernel/asm/hash/blake/compression.asm | 3 ++- evm/src/cpu/kernel/tests/hash.rs | 16 +++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index 8903f8e3..a0303aa4 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -216,4 +216,5 @@ compression_loop: %blake_generate_new_hash_value(1) %blake_generate_new_hash_value(0) // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest - STOP \ No newline at end of file + %stack (hash: 8, retdest) -> (retdest, hash) + JUMP \ No newline at end of file diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 4a4cf42e..68efaa27 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -59,38 +59,36 @@ fn make_input_stack(message: Vec) -> Vec { fn test_hash(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U256) -> Result<()> { // Make the input. let message_random = make_random_input(); - // let message_custom = make_custom_input(); + let message_custom = make_custom_input(); dbg!(message_random.clone()); // Hash the message using a standard implementation. - // let expected_random = standard_implementation(message_random.clone()); - // let expected_custom = standard_implementation(message_custom.clone()); + let expected_random = standard_implementation(message_random.clone()); + let expected_custom = standard_implementation(message_custom.clone()); // Load the message onto the stack. let initial_stack_random = make_input_stack(message_random); - // let initial_stack_custom = make_input_stack(message_custom); + let initial_stack_custom = make_input_stack(message_custom); dbg!(initial_stack_random.clone()); // Make the kernel. let kernel_function = KERNEL.global_labels[hash_fn_label]; - dbg!("HERE"); - // Run the kernel code. let result_random = run_interpreter(kernel_function, initial_stack_random)?; - // let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; + let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; dbg!(result_random.stack()); // Extract the final output. let actual_random = result_random.stack()[0]; - // let actual_custom = result_custom.stack()[0]; + let actual_custom = result_custom.stack()[0]; // Check that the result is correct. // assert_eq!(expected_random, actual_random); - // assert_eq!(expected_custom, actual_custom); + assert_eq!(expected_custom, actual_custom); Ok(()) } From 5759fb7b3c346bb9f03c32c1a6be36323cf0c169 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 15 Nov 2022 15:40:56 -0800 Subject: [PATCH 21/44] concat --- .../cpu/kernel/asm/hash/blake/compression.asm | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index a0303aa4..d7be8706 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -216,5 +216,21 @@ compression_loop: %blake_generate_new_hash_value(1) %blake_generate_new_hash_value(0) // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest - %stack (hash: 8, retdest) -> (retdest, hash) + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + %shl_const(32) + OR + // stack: hash, retdest + SWAP1 + // stack: retdest, hash JUMP \ No newline at end of file From d3a7201348febcdecc6d2d3620342b611049ab89 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 16 Nov 2022 14:23:15 -0800 Subject: [PATCH 22/44] fixes --- evm/Cargo.toml | 2 +- .../cpu/kernel/asm/hash/blake/compression.asm | 46 +++++++++---- evm/src/cpu/kernel/asm/hash/blake/iv.asm | 10 +-- evm/src/cpu/kernel/tests/hash.rs | 66 ++++++++++--------- 4 files changed, 75 insertions(+), 49 deletions(-) diff --git a/evm/Cargo.toml b/evm/Cargo.toml index 2d1e2850..634aa563 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] anyhow = "1.0.40" -blake = "2.0.2" +blake2 = "0.10.5" env_logger = "0.9.0" eth_trie_utils = "0.4.0" ethereum-types = "0.14.0" diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index d7be8706..c61aecee 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -114,11 +114,12 @@ compression_loop: POP POP // stack: is_last_block, t_0, t_1, retdest - %mul_const(0xFFFFFFFF) + %mul_const(0xFFFFFFFFFFFFFFFF) // stack: invert_if_last_block, t_0, t_1, retdest %stack (l, t0, t1) -> (t0, t1, l, 0) // stack: t_0, t_1, invert_if_last_block, 0, retdest %blake_hash_value_addr + %add_const(7) %rep 8 // stack: addr, ... DUP1 @@ -127,7 +128,7 @@ compression_loop: // stack: val, addr, ... SWAP1 // stack: addr, val, ... - %increment + %decrement %endrep // stack: addr, h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest POP @@ -181,6 +182,24 @@ compression_loop: SWAP1 // stack: i + 1, loc + 1, next_val,... %endrep + + + + %blake_internal_state_addr + %add_const(15) + %rep 16 + // stack: addr, ... + DUP1 + // stack: addr, addr, ... + %mload_kernel_general + // stack: val, addr, ... + SWAP1 + // stack: addr, val, ... + %decrement + %endrep + POP + STOP + // stack: 8, loc + 16, retdest POP POP @@ -216,21 +235,22 @@ compression_loop: %blake_generate_new_hash_value(1) %blake_generate_new_hash_value(0) // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest - %shl_const(32) + %shl_const(64) OR - %shl_const(32) + %shl_const(64) OR - %shl_const(32) + %shl_const(64) OR - %shl_const(32) + // stack: h_0' || h_1' || h_2' || h_3', h_4', h_5', h_6', h_7', retdest + %stack (first, second: 4) -> (second, first) + // stack: h_4', h_5', h_6', h_7', h_0' || h_1' || h_2' || h_3', retdest + %shl_const(64) OR - %shl_const(32) + %shl_const(64) OR - %shl_const(32) + %shl_const(64) OR - %shl_const(32) - OR - // stack: hash, retdest - SWAP1 - // stack: retdest, hash + // stack: hash_first = h_4' || h_5' || h_6' || h_7', hash_second = h_0' || h_1' || h_2' || h_3', retdest + SWAP2 + // stack: retdest, hash_first, hash_second JUMP \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/iv.asm b/evm/src/cpu/kernel/asm/hash/blake/iv.asm index 59c26071..e94f24b7 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/iv.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/iv.asm @@ -39,20 +39,20 @@ global blake_iv_const: // stack: blake_iv_const, i, ... SWAP1 // stack: i, blake_iv_const, ... - %mul_const(2) + %mul_const(8) ADD // stack: blake_iv_const + 2 * i, ... DUP1 // stack: blake_iv_const + 2 * i, blake_iv_const + 2 * i, ... - %increment + %add_const(4) // stack: blake_iv_const + 2 * i + 1, blake_iv_const + 2 * i, ... - %mload_kernel_code + %mload_kernel_code_u32 SWAP1 - %mload_kernel_code + %mload_kernel_code_u32 // stack: IV_i[32:], IV_i[:32], ... %shl_const(32) // stack: IV_i[32:] << 32, IV_i[:32], ... - ADD + OR // stack: IV_i, ... %endmacro diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 68efaa27..32853149 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -1,8 +1,8 @@ use std::str::FromStr; use anyhow::Result; -use blake::hash as blake_hash; -use ethereum_types::U256; +use blake2::Blake2b512; +use ethereum_types::{U256, U512}; use rand::{thread_rng, Rng}; use ripemd::{Digest, Ripemd160}; use sha2::Sha256; @@ -24,11 +24,11 @@ fn ripemd(input: Vec) -> U256 { U256::from(&hasher.finalize()[..]) } -/// Standard Blake implementation. -fn blake(input: Vec) -> U256 { - let mut result = [0; 32]; - blake_hash(256, &input, &mut result).unwrap(); - U256::from(result) +/// Standard Blake2b implementation. +fn blake2b(input: Vec) -> U512 { + let mut hasher = Blake2b512::new(); + hasher.update(input); + U512::from(&hasher.finalize()[..]) } fn make_random_input() -> Vec { @@ -41,9 +41,7 @@ fn make_random_input() -> Vec { fn make_custom_input() -> Vec { // Hardcode a custom message vec![ - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, + 1, 2, 3 ] } @@ -56,54 +54,62 @@ fn make_input_stack(message: Vec) -> Vec { initial_stack } -fn test_hash(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U256) -> Result<()> { +fn test_hash(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U512) -> Result<()> { // Make the input. - let message_random = make_random_input(); + // let message_random = make_random_input(); let message_custom = make_custom_input(); - dbg!(message_random.clone()); + // dbg!(message_random.clone()); // Hash the message using a standard implementation. - let expected_random = standard_implementation(message_random.clone()); + // // let expected_random = standard_implementation(message_random.clone()); let expected_custom = standard_implementation(message_custom.clone()); + dbg!(expected_custom); + // Load the message onto the stack. - let initial_stack_random = make_input_stack(message_random); + // // let initial_stack_random = make_input_stack(message_random); let initial_stack_custom = make_input_stack(message_custom); - dbg!(initial_stack_random.clone()); + // dbg!(initial_stack_random.clone()); // Make the kernel. let kernel_function = KERNEL.global_labels[hash_fn_label]; // Run the kernel code. - let result_random = run_interpreter(kernel_function, initial_stack_random)?; + // // let result_random = run_interpreter(kernel_function, initial_stack_random)?; let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; - dbg!(result_random.stack()); + dbg!(result_custom.stack()); // Extract the final output. - let actual_random = result_random.stack()[0]; - let actual_custom = result_custom.stack()[0]; + // let actual_random = result_random.stack()[0]; + let actual_custom_first = result_custom.stack()[0]; + let actual_custom_second = result_custom.stack()[1]; + let mut actual_custom = U512::from(actual_custom_first); + actual_custom *= U512::from_big_endian(&[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]); + actual_custom += U512::from(actual_custom_second); + + dbg!(actual_custom); // Check that the result is correct. // assert_eq!(expected_random, actual_random); - assert_eq!(expected_custom, actual_custom); + // assert_eq!(expected_custom, actual_custom); Ok(()) } -#[test] -fn test_sha2() -> Result<()> { - test_hash("sha2", &sha2) -} +// #[test] +// fn test_sha2() -> Result<()> { +// test_hash("sha2", &sha2) +// } -#[test] -fn test_ripemd() -> Result<()> { - test_hash("ripemd_stack", &ripemd) -} +// #[test] +// fn test_ripemd() -> Result<()> { +// test_hash("ripemd_stack", &ripemd) +// } #[test] fn test_blake() -> Result<()> { - test_hash("blake", &blake) + test_hash("blake", &blake2b) } From 245e5faa6d341c1c095f356cf961ab4e338d4e17 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 29 Nov 2022 16:22:23 -0800 Subject: [PATCH 23/44] fixes galore --- .../cpu/kernel/asm/hash/blake/compression.asm | 175 ++++++++++++------ .../cpu/kernel/asm/hash/blake/g_functions.asm | 2 +- .../kernel/asm/hash/blake/permutations.asm | 24 ++- evm/src/cpu/kernel/asm/hash/blake/store.asm | 8 +- evm/src/cpu/kernel/asm/hash/blake/util.asm | 11 +- evm/src/cpu/kernel/tests/hash.rs | 95 ++++++---- 6 files changed, 221 insertions(+), 94 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index c61aecee..c0fdff9f 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -21,8 +21,8 @@ %mload_kernel_general // stack: num_blocks %mul_const(128) - %increment - // stack: num_bytes+1 + %add_const(2) + // stack: num_bytes+2 %endmacro // Address where the working version of the compression internal state is stored. @@ -56,41 +56,108 @@ // stack: h_i' = v_(i+8) ^ v_i ^ h_i, ... %endmacro +%macro invert_bytes_blake_word + // stack: word, ... + DUP1 + %and_const(0xff) + %shl_const(56) + SWAP1 + // stack: word, first_byte, ... + DUP1 + %shr_const(8) + %and_const(0xff) + %shl_const(48) + SWAP1 + // stack: word, second_byte, first_byte, ... + DUP1 + %shr_const(16) + %and_const(0xff) + %shl_const(40) + SWAP1 + DUP1 + %shr_const(24) + %and_const(0xff) + %shl_const(32) + SWAP1 + DUP1 + %shr_const(32) + %and_const(0xff) + %shl_const(24) + SWAP1 + DUP1 + %shr_const(40) + %and_const(0xff) + %shl_const(16) + SWAP1 + DUP1 + %shr_const(48) + %and_const(0xff) + %shl_const(8) + SWAP1 + %shr_const(56) + %and_const(0xff) + %rep 7 + OR + %endrep +%endmacro + global blake_compression: // stack: retdest - %stack () -> (0, 0, 0) - // stack: cur_block = 0, t_0 = 0, t_1 = 0, retdest + PUSH 0 + // stack: cur_block = 0, retdest %blake_initial_hash_value - // stack: h_0, ..., h_7, cur_block, t_0, t_1, retdest + // stack: h_0, ..., h_7, cur_block, retdest %blake_hash_value_addr - // stack: addr, h_0, ..., h_7, cur_block, t_0, t_1, retdest + // stack: addr, h_0, ..., h_7, cur_block, retdest %rep 8 SWAP1 DUP2 %mstore_kernel_general %increment %endrep - // stack: addr, cur_block, t_0, t_1, retdest + // stack: addr, cur_block, retdest POP - // stack: cur_block, t_0, t_1, retdest + // stack: cur_block, retdest compression_loop: - // stack: cur_block, t_0, t_1, retdest + // stack: cur_block, retdest PUSH 0 %mload_kernel_general - // stack: num_blocks, cur_block, t_0, t_1, retdest + // stack: num_blocks, cur_block, retdest %decrement - // stack: num_blocks - 1, cur_block, t_0, t_1, retdest + // stack: num_blocks - 1, cur_block, retdest DUP2 - // stack: cur_block, num_blocks - 1, cur_block, t_0, t_1, retdest + // stack: cur_block, num_blocks - 1, cur_block, retdest EQ - // stack: is_last_block, cur_block, t_0, t_1, retdest + // stack: is_last_block, cur_block, retdest SWAP1 - // stack: cur_block, is_last_block, t_0, t_1, retdest + // stack: cur_block, is_last_block, retdest + PUSH 1 + %mload_kernel_general + // stack: num_bytes, cur_block, is_last_block, retdest + DUP3 + // stack: is_last_block, num_bytes, cur_block, is_last_block, retdest + MUL + // stack: is_last_block * num_bytes, cur_block, is_last_block, retdest + DUP2 + // stack: cur_block, is_last_block * num_bytes, cur_block, is_last_block, retdest %mul_const(128) - %increment - // stack: cur_block_start_byte, is_last_block, t_0, t_1, retdest + // stack: cur_block * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest + DUP4 + // stack: is_last_block, cur_block * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest + ISZERO + // stack: not_last_block, cur_block * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest + MUL + // stack: not_last_block * (cur_block * 128), is_last_block * num_bytes, cur_block, is_last_block, retdest + ADD + // stack: t = not_last_block * (cur_block * 128) + is_last_block * num_bytes, cur_block, is_last_block, retdest + SWAP1 + // stack: cur_block, t, is_last_block, retdest + // stack: cur_block, t, is_last_block, retdest + %mul_const(128) + %add_const(2) + // stack: cur_block_start_byte, t, is_last_block, retdest %blake_message_addr - // stack: message_addr, cur_block_start_byte, is_last_block, t_0, t_1, retdest + // stack: message_addr, cur_block_start_byte, t, is_last_block, retdest %rep 16 // stack: cur_message_addr, cur_block_byte, ... DUP2 @@ -105,19 +172,19 @@ compression_loop: // stack: cur_message_addr + 1, cur_block_byte, ... SWAP1 // stack: cur_block_byte, cur_message_addr + 1, ... - %add_const(64) - // stack: cur_block_byte + 64, cur_message_addr + 1, ... + %add_const(8) + // stack: cur_block_byte + 8, cur_message_addr + 1, ... SWAP1 - // stack: cur_message_addr + 1, cur_block_byte + 64, ... + // stack: cur_message_addr + 1, cur_block_byte + 8, ... %endrep - // stack: end_message_addr, end_block_start_byte, is_last_block, t_0, t_1, retdest + // stack: end_message_addr, end_block_start_byte, t, is_last_block, retdest POP POP - // stack: is_last_block, t_0, t_1, retdest + // stack: t, is_last_block, retdest + SWAP1 + // stack: is_last_block, t, retdest %mul_const(0xFFFFFFFFFFFFFFFF) - // stack: invert_if_last_block, t_0, t_1, retdest - %stack (l, t0, t1) -> (t0, t1, l, 0) - // stack: t_0, t_1, invert_if_last_block, 0, retdest + // stack: invert_if_last_block, t, retdest %blake_hash_value_addr %add_const(7) %rep 8 @@ -130,11 +197,11 @@ compression_loop: // stack: addr, val, ... %decrement %endrep - // stack: addr, h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest + // stack: addr, h_0, ..., h_7, invert_if_last_block, t, retdest POP - // stack: h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest + // stack: h_0, ..., h_7, invert_if_last_block, t, retdest %blake_internal_state_addr - // stack: start, h_0, ..., h_7, t_0, t_1, invert_if_last_block, 0, retdest + // stack: start, h_0, ..., h_7, invert_if_last_block, t, retdest // First eight words of compression state: current state h_0, ..., h_7. %rep 8 SWAP1 @@ -142,9 +209,9 @@ compression_loop: %mstore_kernel_general %increment %endrep - // stack: start + 8, t_0, t_1, invert_if_last_block, 0, retdest + // stack: start + 8, invert_if_last_block, t, retdest PUSH 0 - // stack: 0, start + 8, t_0, t_1, invert_if_last_block, 0, retdest + // stack: 0, start + 8, invert_if_last_block, t, retdest %rep 4 // stack: i, loc, ... DUP2 @@ -162,8 +229,22 @@ compression_loop: SWAP1 // stack: i + 1, loc + 1,... %endrep - // stack: 4, start + 12, t_0, t_1, invert_if_last_block, 0, retdest - // XOR the values (t_0, t_1, invert_if, 0) into the last four IV values. + // stack: 4, start + 12, invert_if_last_block, t, retdest + %stack (i, loc, inv, t) -> (t, t, i, loc, inv) + // stack: t, t, 4, start + 12, invert_if_last_block, retdest + %shr_const(64) + // stack: t >> 64, t, 4, start + 12, invert_if_last_block, retdest + SWAP1 + // stack: t, t >> 64, 4, start + 12, invert_if_last_block, retdest + PUSH 1 + %shl_const(64) + // stack: 1 << 64, t, t >> 64, 4, start + 12, invert_if_last_block, retdest + SWAP1 + MOD + // stack: t_lo = t % (1 << 64), t_hi = t >> 64, 4, start + 12, invert_if_last_block, retdest + %stack (t_lo, t_hi, i, loc, inv) -> (i, loc, t_lo, t_hi, inv, 0) + // stack: 4, start + 12, t_lo, t_hi, invert_if_last_block, 0, retdest + // XOR the values (t % 2**64, t >> 64, invert_if, 0) into the last four IV values. %rep 4 // stack: i, loc, val, next_val,... %stack (i, loc, val) -> (i, val, loc, i, loc) @@ -182,24 +263,6 @@ compression_loop: SWAP1 // stack: i + 1, loc + 1, next_val,... %endrep - - - - %blake_internal_state_addr - %add_const(15) - %rep 16 - // stack: addr, ... - DUP1 - // stack: addr, addr, ... - %mload_kernel_general - // stack: val, addr, ... - SWAP1 - // stack: addr, val, ... - %decrement - %endrep - POP - STOP - // stack: 8, loc + 16, retdest POP POP @@ -227,13 +290,21 @@ compression_loop: POP // stack: retdest %blake_generate_new_hash_value(7) + %invert_bytes_blake_word %blake_generate_new_hash_value(6) + %invert_bytes_blake_word %blake_generate_new_hash_value(5) + %invert_bytes_blake_word %blake_generate_new_hash_value(4) + %invert_bytes_blake_word %blake_generate_new_hash_value(3) + %invert_bytes_blake_word %blake_generate_new_hash_value(2) + %invert_bytes_blake_word %blake_generate_new_hash_value(1) + %invert_bytes_blake_word %blake_generate_new_hash_value(0) + %invert_bytes_blake_word // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest %shl_const(64) OR @@ -250,7 +321,7 @@ compression_loop: OR %shl_const(64) OR - // stack: hash_first = h_4' || h_5' || h_6' || h_7', hash_second = h_0' || h_1' || h_2' || h_3', retdest - SWAP2 + // stack: hash_second = h_4' || h_5' || h_6' || h_7', hash_first = h_0' || h_1' || h_2' || h_3', retdest + %stack (second, first, ret) -> (ret, second, first) // stack: retdest, hash_first, hash_second JUMP \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm index 5feaa403..93b61875 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm @@ -39,7 +39,7 @@ XOR %rotr_64(32) // stack: v[d]' = (v[d] ^ v[a]') >>> 32, v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start - %stack (top: 3, vd) -> (top) + %stack (top: 4, vd) -> (top) // stack: v[d]', v[a]', v[b], v[c], a, b, c, d, x, y, start %stack (d, a, b, c) -> (c, d, a, b, d) // stack: v[c], v[d]', v[a]', v[b], v[d]', a, b, c, d, x, y, start diff --git a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm index da278ec2..452bb100 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/permutations.asm @@ -1,3 +1,21 @@ +global permutation_0_constants: + BYTES 0 + BYTES 1 + BYTES 2 + BYTES 3 + BYTES 4 + BYTES 5 + BYTES 6 + BYTES 7 + BYTES 8 + BYTES 9 + BYTES 10 + BYTES 11 + BYTES 12 + BYTES 13 + BYTES 14 + BYTES 15 + global permutation_1_constants: BYTES 14 BYTES 10 @@ -162,13 +180,15 @@ global permutation_9_constants: %macro blake_permutation // stack: round, i - PUSH permutation_1_constants - // stack: permutation_1_constants, round, i + PUSH permutation_0_constants + // stack: permutation_0_constants, round, i SWAP1 // stack: round, permutation_1_constants, i %mod_const(10) + // stack: round % 10, permutation_1_constants, i %mul_const(16) ADD + // stack: permutation_(round)_constants, i ADD %mload_kernel_code %endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake/store.asm b/evm/src/cpu/kernel/asm/hash/blake/store.asm index 28cfa187..448a854b 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/store.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/store.asm @@ -12,8 +12,14 @@ global blake_store: // stack: addr=0, num_blocks, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest %mstore_kernel_general // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + DUP1 + // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest PUSH 1 - // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest + // stack: 1, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + %mstore_kernel_general + // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest + PUSH 2 + // stack: addr=2, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest store_loop: // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest DUP2 diff --git a/evm/src/cpu/kernel/asm/hash/blake/util.asm b/evm/src/cpu/kernel/asm/hash/blake/util.asm index 04e29ca9..61b731b6 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/util.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/util.asm @@ -2,14 +2,15 @@ %macro mload_blake_word // stack: offset DUP1 - %mload_kernel_general_u32 - // stack: hi, offset - %shl_const(32) - // stack: hi << 32, offset + %mload_kernel_general_u32_LE + // stack: lo, offset SWAP1 - // stack: offset, hi << 32 + // stack: offset, lo %add_const(4) %mload_kernel_general_u32 + // stack: hi, lo + %shl_const(32) + // stack: hi << 32, lo OR // stack: (hi << 32) | lo %endmacro diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 32853149..3481fae5 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -41,7 +41,9 @@ fn make_random_input() -> Vec { fn make_custom_input() -> Vec { // Hardcode a custom message vec![ - 1, 2, 3 + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, ] } @@ -54,62 +56,89 @@ fn make_input_stack(message: Vec) -> Vec { initial_stack } -fn test_hash(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U512) -> Result<()> { +fn test_hash_256(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U256) -> Result<()> { // Make the input. - // let message_random = make_random_input(); + let message_random = make_random_input(); let message_custom = make_custom_input(); - // dbg!(message_random.clone()); - // Hash the message using a standard implementation. - // // let expected_random = standard_implementation(message_random.clone()); + let expected_random = standard_implementation(message_random.clone()); let expected_custom = standard_implementation(message_custom.clone()); - dbg!(expected_custom); - // Load the message onto the stack. - // // let initial_stack_random = make_input_stack(message_random); + let initial_stack_random = make_input_stack(message_random); let initial_stack_custom = make_input_stack(message_custom); - // dbg!(initial_stack_random.clone()); - // Make the kernel. let kernel_function = KERNEL.global_labels[hash_fn_label]; // Run the kernel code. - // // let result_random = run_interpreter(kernel_function, initial_stack_random)?; + let result_random = run_interpreter(kernel_function, initial_stack_random)?; let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; - dbg!(result_custom.stack()); - // Extract the final output. - // let actual_random = result_random.stack()[0]; - let actual_custom_first = result_custom.stack()[0]; - let actual_custom_second = result_custom.stack()[1]; - let mut actual_custom = U512::from(actual_custom_first); - actual_custom *= U512::from_big_endian(&[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]); - actual_custom += U512::from(actual_custom_second); - - dbg!(actual_custom); + let actual_random = result_random.stack()[0]; + let actual_custom = result_custom.stack()[0]; // Check that the result is correct. - // assert_eq!(expected_random, actual_random); - // assert_eq!(expected_custom, actual_custom); + assert_eq!(expected_random, actual_random); + assert_eq!(expected_custom, actual_custom); Ok(()) } -// #[test] -// fn test_sha2() -> Result<()> { -// test_hash("sha2", &sha2) -// } +fn combine_u256s(hi: U256, lo: U256) -> U512 { + let mut result = U512::from(hi); + result *= U512::from_big_endian(&[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]); + result += U512::from(lo); + result +} -// #[test] -// fn test_ripemd() -> Result<()> { -// test_hash("ripemd_stack", &ripemd) -// } +fn test_hash_512(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U512) -> Result<()> { + // Make the input. + let message_random = make_random_input(); + let message_custom = make_custom_input(); + + // Hash the message using a standard implementation. + let expected_random = standard_implementation(message_random.clone()); + let expected_custom = standard_implementation(message_custom.clone()); + + // Load the message onto the stack. + let initial_stack_random = make_input_stack(message_random); + let initial_stack_custom = make_input_stack(message_custom); + + // Make the kernel. + let kernel_function = KERNEL.global_labels[hash_fn_label]; + + // Run the kernel code. + let result_random = run_interpreter(kernel_function, initial_stack_random)?; + let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; + + let random_stack = result_random.stack(); + let custom_stack = result_custom.stack(); + + // Extract the final output. + let actual_random = combine_u256s(random_stack[0], random_stack[1]); + let actual_custom = combine_u256s(custom_stack[0], custom_stack[1]); + + // Check that the result is correct. + // assert_eq!(expected_random, actual_random); + assert_eq!(expected_custom, actual_custom); + + Ok(()) +} + +#[test] +fn test_sha2() -> Result<()> { + test_hash_256("sha2", &sha2) +} + +#[test] +fn test_ripemd() -> Result<()> { + test_hash_256("ripemd_stack", &ripemd) +} #[test] fn test_blake() -> Result<()> { - test_hash("blake", &blake2b) + test_hash_512("blake", &blake2b) } From 20169a5476c6b20ac1756e32f2aaff91ca5bdd27 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Sat, 19 Nov 2022 14:50:17 -0800 Subject: [PATCH 24/44] debugging --- .../cpu/kernel/asm/hash/blake/compression.asm | 33 ++++++++++++++----- evm/src/cpu/kernel/tests/hash.rs | 7 ++-- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index c0fdff9f..f3ac39e1 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -209,6 +209,7 @@ compression_loop: %mstore_kernel_general %increment %endrep + // stack: start + 8, invert_if_last_block, t, retdest PUSH 0 // stack: 0, start + 8, invert_if_last_block, t, retdest @@ -271,20 +272,36 @@ compression_loop: // stack: start, retdest PUSH 0 // stack: round=0, start, retdest - %rep 12 + %rep 1 // stack: round, start, retdest %call_blake_g_function(0, 4, 8, 12, 0, 1) - %call_blake_g_function(1, 5, 9, 13, 2, 3) - %call_blake_g_function(2, 6, 10, 14, 4, 5) - %call_blake_g_function(3, 7, 11, 15, 6, 7) - %call_blake_g_function(0, 5, 10, 15, 8, 9) - %call_blake_g_function(1, 6, 11, 12, 10, 11) - %call_blake_g_function(2, 7, 8, 13, 12, 13) - %call_blake_g_function(3, 4, 9, 14, 14, 15) + // %call_blake_g_function(1, 5, 9, 13, 2, 3) + // %call_blake_g_function(2, 6, 10, 14, 4, 5) + // %call_blake_g_function(3, 7, 11, 15, 6, 7) + // %call_blake_g_function(0, 5, 10, 15, 8, 9) + // %call_blake_g_function(1, 6, 11, 12, 10, 11) + // %call_blake_g_function(2, 7, 8, 13, 12, 13) + // %call_blake_g_function(3, 4, 9, 14, 14, 15) // stack: round, start, retdest %increment // stack: round + 1, start, retdest %endrep + + + %blake_internal_state_addr + %add_const(15) + %rep 16 + // stack: addr, ... + DUP1 + // stack: addr, addr, ... + %mload_kernel_general + // stack: val, addr, ... + SWAP1 + // stack: addr, val, ... + %decrement + %endrep + STOP + // stack: 12, start, retdest POP POP diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 3481fae5..0d9b8d18 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -41,9 +41,7 @@ fn make_random_input() -> Vec { fn make_custom_input() -> Vec { // Hardcode a custom message vec![ - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, + 1, 2, 3, 4, 5, ] } @@ -117,6 +115,9 @@ fn test_hash_512(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) let random_stack = result_random.stack(); let custom_stack = result_custom.stack(); + dbg!(custom_stack); + // dbg!(result_custom.get_kernel_general_memory()); + // Extract the final output. let actual_random = combine_u256s(random_stack[0], random_stack[1]); let actual_custom = combine_u256s(custom_stack[0], custom_stack[1]); From 49504dde7db289e50999224c413a874421fa50cf Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 29 Nov 2022 16:12:58 -0800 Subject: [PATCH 25/44] fixes --- .../cpu/kernel/asm/hash/blake/compression.asm | 35 +++++-------------- .../cpu/kernel/asm/hash/blake/g_functions.asm | 4 +-- evm/src/cpu/kernel/asm/hash/blake/util.asm | 4 +-- evm/src/cpu/kernel/tests/hash.rs | 9 +++-- 4 files changed, 17 insertions(+), 35 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index f3ac39e1..286576c6 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -162,7 +162,7 @@ compression_loop: // stack: cur_message_addr, cur_block_byte, ... DUP2 // stack: cur_block_byte, cur_message_addr, cur_block_byte, ... - %mload_blake_word + %mload_blake_word_from_bytes // stack: m_i, cur_message_addr, cur_block_byte, ... DUP2 // stack: cur_message_addr, m_i, cur_message_addr, cur_block_byte, ... @@ -209,7 +209,6 @@ compression_loop: %mstore_kernel_general %increment %endrep - // stack: start + 8, invert_if_last_block, t, retdest PUSH 0 // stack: 0, start + 8, invert_if_last_block, t, retdest @@ -272,36 +271,20 @@ compression_loop: // stack: start, retdest PUSH 0 // stack: round=0, start, retdest - %rep 1 + %rep 12 // stack: round, start, retdest %call_blake_g_function(0, 4, 8, 12, 0, 1) - // %call_blake_g_function(1, 5, 9, 13, 2, 3) - // %call_blake_g_function(2, 6, 10, 14, 4, 5) - // %call_blake_g_function(3, 7, 11, 15, 6, 7) - // %call_blake_g_function(0, 5, 10, 15, 8, 9) - // %call_blake_g_function(1, 6, 11, 12, 10, 11) - // %call_blake_g_function(2, 7, 8, 13, 12, 13) - // %call_blake_g_function(3, 4, 9, 14, 14, 15) + %call_blake_g_function(1, 5, 9, 13, 2, 3) + %call_blake_g_function(2, 6, 10, 14, 4, 5) + %call_blake_g_function(3, 7, 11, 15, 6, 7) + %call_blake_g_function(0, 5, 10, 15, 8, 9) + %call_blake_g_function(1, 6, 11, 12, 10, 11) + %call_blake_g_function(2, 7, 8, 13, 12, 13) + %call_blake_g_function(3, 4, 9, 14, 14, 15) // stack: round, start, retdest %increment // stack: round + 1, start, retdest %endrep - - - %blake_internal_state_addr - %add_const(15) - %rep 16 - // stack: addr, ... - DUP1 - // stack: addr, addr, ... - %mload_kernel_general - // stack: val, addr, ... - SWAP1 - // stack: addr, val, ... - %decrement - %endrep - STOP - // stack: 12, start, retdest POP POP diff --git a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm index 93b61875..243efa14 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm @@ -103,7 +103,7 @@ // stack: s[y_idx], round, start %blake_message_addr ADD - %mload_blake_word + %mload_kernel_general // stack: m[s[y_idx]], round, start PUSH $x_idx DUP3 @@ -112,7 +112,7 @@ // stack: s[x_idx], m[s[y_idx]], round, start %blake_message_addr ADD - %mload_blake_word + %mload_kernel_general // stack: m[s[x_idx]], m[s[y_idx]], round, start %stack (ss: 2, r, s) -> (ss, s, r, s) // stack: m[s[x_idx]], m[s[y_idx]], start, round, start diff --git a/evm/src/cpu/kernel/asm/hash/blake/util.asm b/evm/src/cpu/kernel/asm/hash/blake/util.asm index 61b731b6..916557f2 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/util.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/util.asm @@ -1,5 +1,5 @@ // Load a 64-bit word from kernel general memory. -%macro mload_blake_word +%macro mload_blake_word_from_bytes // stack: offset DUP1 %mload_kernel_general_u32_LE @@ -7,7 +7,7 @@ SWAP1 // stack: offset, lo %add_const(4) - %mload_kernel_general_u32 + %mload_kernel_general_u32_LE // stack: hi, lo %shl_const(32) // stack: hi << 32, lo diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 0d9b8d18..de06f760 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -41,7 +41,9 @@ fn make_random_input() -> Vec { fn make_custom_input() -> Vec { // Hardcode a custom message vec![ - 1, 2, 3, 4, 5, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, ] } @@ -115,15 +117,12 @@ fn test_hash_512(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) let random_stack = result_random.stack(); let custom_stack = result_custom.stack(); - dbg!(custom_stack); - // dbg!(result_custom.get_kernel_general_memory()); - // Extract the final output. let actual_random = combine_u256s(random_stack[0], random_stack[1]); let actual_custom = combine_u256s(custom_stack[0], custom_stack[1]); // Check that the result is correct. - // assert_eq!(expected_random, actual_random); + assert_eq!(expected_random, actual_random); assert_eq!(expected_custom, actual_custom); Ok(()) From 0d05a4bbbfc1675236f5dcb9fda029587ca93ffe Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 30 Nov 2022 17:46:11 -0800 Subject: [PATCH 26/44] FIX --- evm/src/cpu/kernel/aggregator.rs | 2 + .../cpu/kernel/asm/hash/blake/addresses.asm | 38 +++ .../cpu/kernel/asm/hash/blake/compression.asm | 235 +++++++----------- evm/src/cpu/kernel/asm/hash/blake/hash.asm | 18 ++ evm/src/cpu/kernel/asm/hash/blake/util.asm | 45 ++++ evm/src/cpu/kernel/interpreter.rs | 9 + evm/src/cpu/kernel/tests/hash.rs | 37 ++- 7 files changed, 234 insertions(+), 150 deletions(-) create mode 100644 evm/src/cpu/kernel/asm/hash/blake/addresses.asm create mode 100644 evm/src/cpu/kernel/asm/hash/blake/hash.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 5f9959fe..8b9cef7c 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -39,8 +39,10 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/fields/fp6_mul.asm"), include_str!("asm/fields/fp12_mul.asm"), include_str!("asm/halt.asm"), + include_str!("asm/hash/blake/addresses.asm"), include_str!("asm/hash/blake/compression.asm"), include_str!("asm/hash/blake/g_functions.asm"), + include_str!("asm/hash/blake/hash.asm"), include_str!("asm/hash/blake/iv.asm"), include_str!("asm/hash/blake/ops.asm"), include_str!("asm/hash/blake/permutations.asm"), diff --git a/evm/src/cpu/kernel/asm/hash/blake/addresses.asm b/evm/src/cpu/kernel/asm/hash/blake/addresses.asm new file mode 100644 index 00000000..751ea4e9 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/addresses.asm @@ -0,0 +1,38 @@ +// Load the initial hash value (the IV, but with params XOR'd into the first word). +%macro blake_initial_hash_value + %blake_iv_i(7) + %blake_iv_i(6) + %blake_iv_i(5) + %blake_iv_i(4) + %blake_iv_i(3) + %blake_iv_i(2) + %blake_iv_i(1) + // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 + PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 + %blake_iv_i(0) + XOR + // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 +%endmacro + +// Address where the working version of the hash value is stored. +%macro blake_hash_value_addr + PUSH 0 + // stack: 0 + %mload_kernel_general + // stack: num_blocks + %mul_const(128) + %add_const(2) + // stack: num_bytes+2 +%endmacro + +// Address where the working version of the compression internal state is stored. +%macro blake_internal_state_addr + %blake_hash_value_addr + %add_const(8) +%endmacro + +// Address where the current message block is stored. +%macro blake_message_addr + %blake_internal_state_addr + %add_const(16) +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index 286576c6..01ac0aa5 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -1,111 +1,9 @@ -// Load the initial hash value (the IV, but with params XOR'd into the first word). -%macro blake_initial_hash_value - %blake_iv_i(7) - %blake_iv_i(6) - %blake_iv_i(5) - %blake_iv_i(4) - %blake_iv_i(3) - %blake_iv_i(2) - %blake_iv_i(1) - // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 - PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 - %blake_iv_i(0) - XOR - // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 -%endmacro - -// Address where the working version of the hash value is stored. -%macro blake_hash_value_addr - PUSH 0 - // stack: 0 - %mload_kernel_general - // stack: num_blocks - %mul_const(128) - %add_const(2) - // stack: num_bytes+2 -%endmacro - -// Address where the working version of the compression internal state is stored. -%macro blake_internal_state_addr - %blake_hash_value_addr - %add_const(8) -%endmacro - -// Address where the current message block is stored. -%macro blake_message_addr - %blake_internal_state_addr - %add_const(16) -%endmacro - -%macro blake_generate_new_hash_value(i) - %blake_hash_value_addr - %add_const($i) - %mload_kernel_general - // stack: h_i, ... - %blake_internal_state_addr - %add_const($i) - %mload_kernel_general - // stack: v_i, h_i, ... - %blake_internal_state_addr - %add_const($i) - %add_const(8) - %mload_kernel_general - // stack: v_(i+8), v_i, h_i, ... - XOR - XOR - // stack: h_i' = v_(i+8) ^ v_i ^ h_i, ... -%endmacro - -%macro invert_bytes_blake_word - // stack: word, ... - DUP1 - %and_const(0xff) - %shl_const(56) - SWAP1 - // stack: word, first_byte, ... - DUP1 - %shr_const(8) - %and_const(0xff) - %shl_const(48) - SWAP1 - // stack: word, second_byte, first_byte, ... - DUP1 - %shr_const(16) - %and_const(0xff) - %shl_const(40) - SWAP1 - DUP1 - %shr_const(24) - %and_const(0xff) - %shl_const(32) - SWAP1 - DUP1 - %shr_const(32) - %and_const(0xff) - %shl_const(24) - SWAP1 - DUP1 - %shr_const(40) - %and_const(0xff) - %shl_const(16) - SWAP1 - DUP1 - %shr_const(48) - %and_const(0xff) - %shl_const(8) - SWAP1 - %shr_const(56) - %and_const(0xff) - %rep 7 - OR - %endrep -%endmacro - global blake_compression: // stack: retdest PUSH 0 // stack: cur_block = 0, retdest %blake_initial_hash_value +blake_compression_loop: // stack: h_0, ..., h_7, cur_block, retdest %blake_hash_value_addr // stack: addr, h_0, ..., h_7, cur_block, retdest @@ -118,8 +16,6 @@ global blake_compression: // stack: addr, cur_block, retdest POP // stack: cur_block, retdest -compression_loop: - // stack: cur_block, retdest PUSH 0 %mload_kernel_general // stack: num_blocks, cur_block, retdest @@ -140,24 +36,26 @@ compression_loop: // stack: is_last_block * num_bytes, cur_block, is_last_block, retdest DUP2 // stack: cur_block, is_last_block * num_bytes, cur_block, is_last_block, retdest + %increment %mul_const(128) - // stack: cur_block * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest + // stack: (cur_block + 1) * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest DUP4 - // stack: is_last_block, cur_block * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest + // stack: is_last_block, (cur_block + 1) * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest ISZERO - // stack: not_last_block, cur_block * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest + // stack: not_last_block, (cur_block + 1) * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest MUL - // stack: not_last_block * (cur_block * 128), is_last_block * num_bytes, cur_block, is_last_block, retdest + // stack: not_last_block * ((cur_block + 1) * 128), is_last_block * num_bytes, cur_block, is_last_block, retdest ADD - // stack: t = not_last_block * (cur_block * 128) + is_last_block * num_bytes, cur_block, is_last_block, retdest + // stack: t = not_last_block * ((cur_block + 1) * 128) + is_last_block * num_bytes, cur_block, is_last_block, retdest SWAP1 // stack: cur_block, t, is_last_block, retdest - // stack: cur_block, t, is_last_block, retdest + DUP1 + // stack: cur_block, cur_block, t, is_last_block, retdest %mul_const(128) %add_const(2) - // stack: cur_block_start_byte, t, is_last_block, retdest + // stack: cur_block_start_byte, t, cur_block, is_last_block, retdest %blake_message_addr - // stack: message_addr, cur_block_start_byte, t, is_last_block, retdest + // stack: message_addr, cur_block_start_byte, t, cur_block, is_last_block, retdest %rep 16 // stack: cur_message_addr, cur_block_byte, ... DUP2 @@ -177,14 +75,16 @@ compression_loop: SWAP1 // stack: cur_message_addr + 1, cur_block_byte + 8, ... %endrep - // stack: end_message_addr, end_block_start_byte, t, is_last_block, retdest + // stack: end_message_addr, end_block_start_byte, t, cur_block, is_last_block, retdest POP POP - // stack: t, is_last_block, retdest + // stack: t, cur_block, is_last_block, retdest SWAP1 - // stack: is_last_block, t, retdest + // stack: cur_block, t, is_last_block, retdest + SWAP2 + // stack: is_last_block, t, cur_block, retdest %mul_const(0xFFFFFFFFFFFFFFFF) - // stack: invert_if_last_block, t, retdest + // stack: invert_if_last_block, t, cur_block, retdest %blake_hash_value_addr %add_const(7) %rep 8 @@ -197,11 +97,11 @@ compression_loop: // stack: addr, val, ... %decrement %endrep - // stack: addr, h_0, ..., h_7, invert_if_last_block, t, retdest + // stack: addr, h_0, ..., h_7, invert_if_last_block, t, cur_block, retdest POP - // stack: h_0, ..., h_7, invert_if_last_block, t, retdest + // stack: h_0, ..., h_7, invert_if_last_block, t, cur_block, retdest %blake_internal_state_addr - // stack: start, h_0, ..., h_7, invert_if_last_block, t, retdest + // stack: start, h_0, ..., h_7, invert_if_last_block, t, cur_block, retdest // First eight words of compression state: current state h_0, ..., h_7. %rep 8 SWAP1 @@ -209,9 +109,9 @@ compression_loop: %mstore_kernel_general %increment %endrep - // stack: start + 8, invert_if_last_block, t, retdest + // stack: start + 8, invert_if_last_block, t, cur_block, retdest PUSH 0 - // stack: 0, start + 8, invert_if_last_block, t, retdest + // stack: 0, start + 8, invert_if_last_block, t, cur_block, retdest %rep 4 // stack: i, loc, ... DUP2 @@ -229,21 +129,21 @@ compression_loop: SWAP1 // stack: i + 1, loc + 1,... %endrep - // stack: 4, start + 12, invert_if_last_block, t, retdest - %stack (i, loc, inv, t) -> (t, t, i, loc, inv) - // stack: t, t, 4, start + 12, invert_if_last_block, retdest + // stack: 4, start + 12, invert_if_last_block, t, cur_block, retdest + %stack (i, loc, inv, last, t) -> (t, t, i, loc, inv, last) + // stack: t, t, 4, start + 12, invert_if_last_block, cur_block, retdest %shr_const(64) - // stack: t >> 64, t, 4, start + 12, invert_if_last_block, retdest + // stack: t >> 64, t, 4, start + 12, invert_if_last_block, cur_block, retdest SWAP1 - // stack: t, t >> 64, 4, start + 12, invert_if_last_block, retdest + // stack: t, t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest PUSH 1 %shl_const(64) - // stack: 1 << 64, t, t >> 64, 4, start + 12, invert_if_last_block, retdest + // stack: 1 << 64, t, t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest SWAP1 MOD - // stack: t_lo = t % (1 << 64), t_hi = t >> 64, 4, start + 12, invert_if_last_block, retdest + // stack: t_lo = t % (1 << 64), t_hi = t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest %stack (t_lo, t_hi, i, loc, inv) -> (i, loc, t_lo, t_hi, inv, 0) - // stack: 4, start + 12, t_lo, t_hi, invert_if_last_block, 0, retdest + // stack: 4, start + 12, t_lo, t_hi, invert_if_last_block, 0, cur_block, retdest // XOR the values (t % 2**64, t >> 64, invert_if, 0) into the last four IV values. %rep 4 // stack: i, loc, val, next_val,... @@ -263,16 +163,16 @@ compression_loop: SWAP1 // stack: i + 1, loc + 1, next_val,... %endrep - // stack: 8, loc + 16, retdest + // stack: 8, loc + 16, cur_block, retdest POP POP - // stack: retdest + // stack: cur_block, retdest %blake_internal_state_addr - // stack: start, retdest + // stack: start, cur_block, retdest PUSH 0 - // stack: round=0, start, retdest + // stack: round=0, start, cur_block, retdest %rep 12 - // stack: round, start, retdest + // stack: round, start, cur_block, retdest %call_blake_g_function(0, 4, 8, 12, 0, 1) %call_blake_g_function(1, 5, 9, 13, 2, 3) %call_blake_g_function(2, 6, 10, 14, 4, 5) @@ -281,39 +181,75 @@ compression_loop: %call_blake_g_function(1, 6, 11, 12, 10, 11) %call_blake_g_function(2, 7, 8, 13, 12, 13) %call_blake_g_function(3, 4, 9, 14, 14, 15) - // stack: round, start, retdest + // stack: round, start, cur_block, retdest %increment - // stack: round + 1, start, retdest + // stack: round + 1, start, cur_block, retdest %endrep - // stack: 12, start, retdest + // stack: 12, start, cur_block, retdest POP POP - // stack: retdest + // stack: cur_block, retdest %blake_generate_new_hash_value(7) - %invert_bytes_blake_word %blake_generate_new_hash_value(6) - %invert_bytes_blake_word %blake_generate_new_hash_value(5) - %invert_bytes_blake_word %blake_generate_new_hash_value(4) - %invert_bytes_blake_word %blake_generate_new_hash_value(3) - %invert_bytes_blake_word %blake_generate_new_hash_value(2) - %invert_bytes_blake_word %blake_generate_new_hash_value(1) - %invert_bytes_blake_word %blake_generate_new_hash_value(0) + // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest + DUP9 + // stack: cur_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest + %increment + // stack: cur_block + 1, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest + SWAP9 + // stack: cur_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %increment + // stack: cur_block + 1, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + PUSH 0 + %mload_kernel_general + // stack: num_blocks, cur_block + 1, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + EQ + // stack: last_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %jumpi(blake_compression_end) + %jump(blake_compression_loop) +blake_compression_end: + // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + PUSH 0 + // stack: dummy=0, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + SWAP1 %invert_bytes_blake_word - // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest + SWAP1 + SWAP2 + %invert_bytes_blake_word + SWAP2 + SWAP3 + %invert_bytes_blake_word + SWAP3 + SWAP4 + %invert_bytes_blake_word + SWAP4 + SWAP5 + %invert_bytes_blake_word + SWAP5 + SWAP6 + %invert_bytes_blake_word + SWAP6 + SWAP7 + %invert_bytes_blake_word + SWAP7 + SWAP8 + %invert_bytes_blake_word + SWAP8 + POP %shl_const(64) OR %shl_const(64) OR %shl_const(64) OR - // stack: h_0' || h_1' || h_2' || h_3', h_4', h_5', h_6', h_7', retdest - %stack (first, second: 4) -> (second, first) + // stack: h_0' || h_1' || h_2' || h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %stack (first, second: 4, cur) -> (second, first) // stack: h_4', h_5', h_6', h_7', h_0' || h_1' || h_2' || h_3', retdest %shl_const(64) OR @@ -324,4 +260,5 @@ compression_loop: // stack: hash_second = h_4' || h_5' || h_6' || h_7', hash_first = h_0' || h_1' || h_2' || h_3', retdest %stack (second, first, ret) -> (ret, second, first) // stack: retdest, hash_first, hash_second + STOP JUMP \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/hash.asm b/evm/src/cpu/kernel/asm/hash/blake/hash.asm new file mode 100644 index 00000000..8e3499f0 --- /dev/null +++ b/evm/src/cpu/kernel/asm/hash/blake/hash.asm @@ -0,0 +1,18 @@ +%macro blake_generate_new_hash_value(i) + %blake_hash_value_addr + %add_const($i) + %mload_kernel_general + // stack: h_i, ... + %blake_internal_state_addr + %add_const($i) + %mload_kernel_general + // stack: v_i, h_i, ... + %blake_internal_state_addr + %add_const($i) + %add_const(8) + %mload_kernel_general + // stack: v_(i+8), v_i, h_i, ... + XOR + XOR + // stack: h_i' = v_(i+8) ^ v_i ^ h_i, ... +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake/util.asm b/evm/src/cpu/kernel/asm/hash/blake/util.asm index 916557f2..2ce54672 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/util.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/util.asm @@ -14,3 +14,48 @@ OR // stack: (hi << 32) | lo %endmacro + +%macro invert_bytes_blake_word + // stack: word, ... + DUP1 + %and_const(0xff) + %shl_const(56) + SWAP1 + // stack: word, first_byte, ... + DUP1 + %shr_const(8) + %and_const(0xff) + %shl_const(48) + SWAP1 + // stack: word, second_byte, first_byte, ... + DUP1 + %shr_const(16) + %and_const(0xff) + %shl_const(40) + SWAP1 + DUP1 + %shr_const(24) + %and_const(0xff) + %shl_const(32) + SWAP1 + DUP1 + %shr_const(32) + %and_const(0xff) + %shl_const(24) + SWAP1 + DUP1 + %shr_const(40) + %and_const(0xff) + %shl_const(16) + SWAP1 + DUP1 + %shr_const(48) + %and_const(0xff) + %shl_const(8) + SWAP1 + %shr_const(56) + %and_const(0xff) + %rep 7 + OR + %endrep +%endmacro diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index d0052415..da2019b9 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -167,6 +167,14 @@ impl<'a> Interpreter<'a> { .collect() } + pub(crate) fn get_kernel_general_memory(&self) -> Vec { + self.memory.context_memory[0].segments[Segment::KernelGeneral as usize] + .content + .iter() + .cloned() + .collect() + } + pub(crate) fn set_rlp_memory(&mut self, rlp: Vec) { self.generation_state.memory.contexts[0].segments[Segment::RlpRaw as usize].content = rlp.into_iter().map(U256::from).collect(); @@ -747,6 +755,7 @@ fn find_jumpdests(code: &[u8]) -> Vec { } offset += 1; } + dbg!(res.clone()); res } diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index de06f760..097c15fa 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -34,13 +34,48 @@ fn blake2b(input: Vec) -> U512 { fn make_random_input() -> Vec { // Generate a random message, between 0 and 9999 bytes. let mut rng = thread_rng(); - let num_bytes = rng.gen_range(0..25); + let num_bytes = rng.gen_range(0..10000); (0..num_bytes).map(|_| rng.gen()).collect() } fn make_custom_input() -> Vec { // Hardcode a custom message vec![ + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 201, 77, + 150, 31, 157, 229, 126, 206, 105, 37, 17, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, 201, 77, + 150, 31, 157, 229, 126, 206, 105, 37, 17, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, 201, 77, + 150, 31, 157, 229, 126, 206, 105, 37, 17, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, + 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, + 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, + 150, 31, 157, 229, 126, 206, 105, 37, 17, 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, 150, 31, 157, 229, 126, 206, 105, 37, 17, From c0dbeb42d95ed3cc2722bc5f157182b6f4e8e0b1 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 30 Nov 2022 17:47:33 -0800 Subject: [PATCH 27/44] cleanup --- evm/src/cpu/kernel/interpreter.rs | 1 - evm/src/cpu/kernel/tests/hash.rs | 35 ------------------------------- 2 files changed, 36 deletions(-) diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index da2019b9..248e1cfc 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -755,7 +755,6 @@ fn find_jumpdests(code: &[u8]) -> Vec { } offset += 1; } - dbg!(res.clone()); res } diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 097c15fa..c07eb042 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -41,41 +41,6 @@ fn make_random_input() -> Vec { fn make_custom_input() -> Vec { // Hardcode a custom message vec![ - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 201, 77, - 150, 31, 157, 229, 126, 206, 105, 37, 17, - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, 201, 77, - 150, 31, 157, 229, 126, 206, 105, 37, 17, - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, 201, 77, - 150, 31, 157, 229, 126, 206, 105, 37, 17, - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, - 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, - 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, - 150, 31, 157, 229, 126, 206, 105, 37, 17, 86, 124, 206, 245, 74, 57, 250, 43, 60, 30, 254, 43, 143, 144, 242, 215, 13, 103, 237, 61, 90, 105, 123, 250, 189, 181, 110, 192, 227, 57, 145, 46, 221, 238, 7, 181, 146, 111, 209, 150, 31, 157, 229, 126, 206, 105, 37, 17, From 90726a58661231672eac9d2c8797cdcdf5c040b8 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 30 Nov 2022 17:47:41 -0800 Subject: [PATCH 28/44] fmt --- evm/src/cpu/kernel/tests/hash.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index c07eb042..91ec6d78 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -56,7 +56,10 @@ fn make_input_stack(message: Vec) -> Vec { initial_stack } -fn test_hash_256(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U256) -> Result<()> { +fn test_hash_256( + hash_fn_label: &str, + standard_implementation: &dyn Fn(Vec) -> U256, +) -> Result<()> { // Make the input. let message_random = make_random_input(); let message_custom = make_custom_input(); @@ -89,12 +92,18 @@ fn test_hash_256(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) fn combine_u256s(hi: U256, lo: U256) -> U512 { let mut result = U512::from(hi); - result *= U512::from_big_endian(&[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]); + result *= U512::from_big_endian(&[ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, + ]); result += U512::from(lo); result } -fn test_hash_512(hash_fn_label: &str, standard_implementation: &dyn Fn(Vec) -> U512) -> Result<()> { +fn test_hash_512( + hash_fn_label: &str, + standard_implementation: &dyn Fn(Vec) -> U512, +) -> Result<()> { // Make the input. let message_random = make_random_input(); let message_custom = make_custom_input(); From 7a5a899b45c59867bf962da684f06d36247e308b Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 30 Nov 2022 17:48:16 -0800 Subject: [PATCH 29/44] clippy --- evm/src/cpu/kernel/interpreter.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 248e1cfc..a0053108 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -169,10 +169,7 @@ impl<'a> Interpreter<'a> { pub(crate) fn get_kernel_general_memory(&self) -> Vec { self.memory.context_memory[0].segments[Segment::KernelGeneral as usize] - .content - .iter() - .cloned() - .collect() + .content.to_vec() } pub(crate) fn set_rlp_memory(&mut self, rlp: Vec) { From 7663848bbff228b04ef49af1165d3243f6cf2c52 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 30 Nov 2022 17:54:54 -0800 Subject: [PATCH 30/44] cleaned up hash tests --- evm/src/cpu/kernel/interpreter.rs | 3 +- evm/src/cpu/kernel/tests/hash.rs | 74 +++++++++++++++---------------- 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index a0053108..63c07993 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -169,7 +169,8 @@ impl<'a> Interpreter<'a> { pub(crate) fn get_kernel_general_memory(&self) -> Vec { self.memory.context_memory[0].segments[Segment::KernelGeneral as usize] - .content.to_vec() + .content + .to_vec() } pub(crate) fn set_rlp_memory(&mut self, rlp: Vec) { diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 91ec6d78..edab63ef 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -56,40 +56,6 @@ fn make_input_stack(message: Vec) -> Vec { initial_stack } -fn test_hash_256( - hash_fn_label: &str, - standard_implementation: &dyn Fn(Vec) -> U256, -) -> Result<()> { - // Make the input. - let message_random = make_random_input(); - let message_custom = make_custom_input(); - - // Hash the message using a standard implementation. - let expected_random = standard_implementation(message_random.clone()); - let expected_custom = standard_implementation(message_custom.clone()); - - // Load the message onto the stack. - let initial_stack_random = make_input_stack(message_random); - let initial_stack_custom = make_input_stack(message_custom); - - // Make the kernel. - let kernel_function = KERNEL.global_labels[hash_fn_label]; - - // Run the kernel code. - let result_random = run_interpreter(kernel_function, initial_stack_random)?; - let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; - - // Extract the final output. - let actual_random = result_random.stack()[0]; - let actual_custom = result_custom.stack()[0]; - - // Check that the result is correct. - assert_eq!(expected_random, actual_random); - assert_eq!(expected_custom, actual_custom); - - Ok(()) -} - fn combine_u256s(hi: U256, lo: U256) -> U512 { let mut result = U512::from(hi); result *= U512::from_big_endian(&[ @@ -100,10 +66,10 @@ fn combine_u256s(hi: U256, lo: U256) -> U512 { result } -fn test_hash_512( +fn prepare_test( hash_fn_label: &str, - standard_implementation: &dyn Fn(Vec) -> U512, -) -> Result<()> { + standard_implementation: &dyn Fn(Vec) -> T, +) -> Result<(T, T, Vec, Vec)> { // Make the input. let message_random = make_random_input(); let message_custom = make_custom_input(); @@ -123,8 +89,38 @@ fn test_hash_512( let result_random = run_interpreter(kernel_function, initial_stack_random)?; let result_custom = run_interpreter(kernel_function, initial_stack_custom)?; - let random_stack = result_random.stack(); - let custom_stack = result_custom.stack(); + Ok(( + expected_random, + expected_custom, + result_random.stack().to_vec(), + result_custom.stack().to_vec(), + )) +} + +fn test_hash_256( + hash_fn_label: &str, + standard_implementation: &dyn Fn(Vec) -> U256, +) -> Result<()> { + let (expected_random, expected_custom, random_stack, custom_stack) = + prepare_test(hash_fn_label, standard_implementation).unwrap(); + + // Extract the final output. + let actual_random = random_stack[0]; + let actual_custom = custom_stack[0]; + + // Check that the result is correct. + assert_eq!(expected_random, actual_random); + assert_eq!(expected_custom, actual_custom); + + Ok(()) +} + +fn test_hash_512( + hash_fn_label: &str, + standard_implementation: &dyn Fn(Vec) -> U512, +) -> Result<()> { + let (expected_random, expected_custom, random_stack, custom_stack) = + prepare_test(hash_fn_label, standard_implementation).unwrap(); // Extract the final output. let actual_random = combine_u256s(random_stack[0], random_stack[1]); From 2166a407ed60cfd08c1270b1d1032e83c4a3dae7 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Sat, 3 Dec 2022 17:29:22 -0800 Subject: [PATCH 31/44] minor memory access refactor --- evm/src/cpu/kernel/interpreter.rs | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 63c07993..419b6bed 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -159,23 +159,35 @@ impl<'a> Interpreter<'a> { &mut self.generation_state.memory.contexts[0].segments[Segment::TrieData as usize].content } - pub(crate) fn get_rlp_memory(&self) -> Vec { - self.generation_state.memory.contexts[0].segments[Segment::RlpRaw as usize] + pub(crate) fn get_memory_segment(&self, segment: Segment) -> Vec { + self.generation_state.memory.contexts[0].segments[segment as usize] + .content + .to_vec() + } + + pub(crate) fn get_memory_segment_bytes(&self, segment: Segment) -> Vec { + self.generation_state.memory.contexts[0].segments[segment as usize] .content .iter() .map(|x| x.as_u32() as u8) .collect() } - pub(crate) fn get_kernel_general_memory(&self) -> Vec { - self.memory.context_memory[0].segments[Segment::KernelGeneral as usize] - .content - .to_vec() + pub(crate) fn get_rlp_memory(&self) -> Vec { + self.get_memory_segment_bytes(Segment::RlpRaw) + } + + pub(crate) fn set_memory_segment(&mut self, segment: Segment, memory: Vec) { + self.generation_state.memory.contexts[0].segments[segment as usize].content = memory; + } + + pub(crate) fn set_memory_segment_bytes(&mut self, segment: Segment, memory: Vec) { + self.generation_state.memory.contexts[0].segments[segment as usize].content = + memory.into_iter().map(U256::from).collect(); } pub(crate) fn set_rlp_memory(&mut self, rlp: Vec) { - self.generation_state.memory.contexts[0].segments[Segment::RlpRaw as usize].content = - rlp.into_iter().map(U256::from).collect(); + self.set_memory_segment_bytes(Segment::RlpRaw, rlp) } pub(crate) fn set_code(&mut self, context: usize, code: Vec) { From d30a95f7d5d855ec1bb44dfec76ce4db1b494421 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 1 Dec 2022 20:09:11 -0800 Subject: [PATCH 32/44] fixes --- .../cpu/kernel/asm/hash/blake/compression.asm | 9 ++- evm/src/cpu/kernel/asm/hash/blake/util.asm | 72 ++++++++----------- 2 files changed, 33 insertions(+), 48 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake/compression.asm index 01ac0aa5..eeb55458 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/compression.asm @@ -3,7 +3,7 @@ global blake_compression: PUSH 0 // stack: cur_block = 0, retdest %blake_initial_hash_value -blake_compression_loop: +compression_loop: // stack: h_0, ..., h_7, cur_block, retdest %blake_hash_value_addr // stack: addr, h_0, ..., h_7, cur_block, retdest @@ -211,9 +211,9 @@ blake_compression_loop: // stack: num_blocks, cur_block + 1, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest EQ // stack: last_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest - %jumpi(blake_compression_end) - %jump(blake_compression_loop) -blake_compression_end: + %jumpi(compression_end) + %jump(compression_loop) +compression_end: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest PUSH 0 // stack: dummy=0, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest @@ -260,5 +260,4 @@ blake_compression_end: // stack: hash_second = h_4' || h_5' || h_6' || h_7', hash_first = h_0' || h_1' || h_2' || h_3', retdest %stack (second, first, ret) -> (ret, second, first) // stack: retdest, hash_first, hash_second - STOP JUMP \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/util.asm b/evm/src/cpu/kernel/asm/hash/blake/util.asm index 2ce54672..49b78c1b 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/util.asm +++ b/evm/src/cpu/kernel/asm/hash/blake/util.asm @@ -15,47 +15,33 @@ // stack: (hi << 32) | lo %endmacro -%macro invert_bytes_blake_word - // stack: word, ... - DUP1 - %and_const(0xff) - %shl_const(56) - SWAP1 - // stack: word, first_byte, ... - DUP1 - %shr_const(8) - %and_const(0xff) - %shl_const(48) - SWAP1 - // stack: word, second_byte, first_byte, ... - DUP1 - %shr_const(16) - %and_const(0xff) - %shl_const(40) - SWAP1 - DUP1 - %shr_const(24) - %and_const(0xff) - %shl_const(32) - SWAP1 - DUP1 - %shr_const(32) - %and_const(0xff) - %shl_const(24) - SWAP1 - DUP1 - %shr_const(40) - %and_const(0xff) - %shl_const(16) - SWAP1 - DUP1 - %shr_const(48) - %and_const(0xff) - %shl_const(8) - SWAP1 - %shr_const(56) - %and_const(0xff) - %rep 7 - OR - %endrep +// Invert the order of the four bytes in a word. +%macro invert_four_byte_word + // stack: word + %mul_const(0x1000000010000000100) + %and_const(0xff0000ff00ff00000000ff0000) + %mod_const(0xffffffffffff) + // stack: word_inverted +%endmacro + +// Invert the order of the eight bytes in a Blake word. +%macro invert_bytes_blake_word + // stack: word + DUP1 + // stack: word, word + %and_const(0xffffffff) + // stack: word_lo, word + SWAP1 + // stack: word, word_lo + %shr_const(32) + // stack: word_hi, word_lo + %invert_four_byte_word + // stack: word_hi_inverted, word_lo + SWAP1 + // stack: word_lo, word_hi_inverted + %invert_four_byte_word + // stack: word_lo_inverted, word_hi_inverted + %shl_const(32) + OR + // stack: word_inverted %endmacro From 778aec627bcb15640d4ef6cbd321fc9741ddba99 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 1 Dec 2022 21:42:58 -0800 Subject: [PATCH 33/44] rename blake -> blake2b --- evm/src/cpu/kernel/aggregator.rs | 18 ++--- .../asm/hash/{blake => blake2b}/addresses.asm | 28 ++++---- .../hash/{blake => blake2b}/compression.asm | 68 +++++++++---------- .../hash/{blake => blake2b}/g_functions.asm | 14 ++-- .../asm/hash/{blake => blake2b}/hash.asm | 8 +-- .../kernel/asm/hash/{blake => blake2b}/iv.asm | 20 +++--- .../asm/hash/{blake => blake2b}/ops.asm | 0 .../hash/{blake => blake2b}/permutations.asm | 2 +- .../asm/hash/{blake => blake2b}/store.asm | 8 +-- .../asm/hash/{blake => blake2b}/util.asm | 4 +- evm/src/cpu/kernel/tests/hash.rs | 4 +- 11 files changed, 87 insertions(+), 87 deletions(-) rename evm/src/cpu/kernel/asm/hash/{blake => blake2b}/addresses.asm (66%) rename evm/src/cpu/kernel/asm/hash/{blake => blake2b}/compression.asm (86%) rename evm/src/cpu/kernel/asm/hash/{blake => blake2b}/g_functions.asm (95%) rename evm/src/cpu/kernel/asm/hash/{blake => blake2b}/hash.asm (69%) rename evm/src/cpu/kernel/asm/hash/{blake => blake2b}/iv.asm (70%) rename evm/src/cpu/kernel/asm/hash/{blake => blake2b}/ops.asm (100%) rename evm/src/cpu/kernel/asm/hash/{blake => blake2b}/permutations.asm (98%) rename evm/src/cpu/kernel/asm/hash/{blake => blake2b}/store.asm (94%) rename evm/src/cpu/kernel/asm/hash/{blake => blake2b}/util.asm (94%) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 8b9cef7c..9c5b944f 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -39,15 +39,15 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/fields/fp6_mul.asm"), include_str!("asm/fields/fp12_mul.asm"), include_str!("asm/halt.asm"), - include_str!("asm/hash/blake/addresses.asm"), - include_str!("asm/hash/blake/compression.asm"), - include_str!("asm/hash/blake/g_functions.asm"), - include_str!("asm/hash/blake/hash.asm"), - include_str!("asm/hash/blake/iv.asm"), - include_str!("asm/hash/blake/ops.asm"), - include_str!("asm/hash/blake/permutations.asm"), - include_str!("asm/hash/blake/store.asm"), - include_str!("asm/hash/blake/util.asm"), + include_str!("asm/hash/blake2b/addresses.asm"), + include_str!("asm/hash/blake2b/compression.asm"), + include_str!("asm/hash/blake2b/g_functions.asm"), + include_str!("asm/hash/blake2b/hash.asm"), + include_str!("asm/hash/blake2b/iv.asm"), + include_str!("asm/hash/blake2b/ops.asm"), + include_str!("asm/hash/blake2b/permutations.asm"), + include_str!("asm/hash/blake2b/store.asm"), + include_str!("asm/hash/blake2b/util.asm"), include_str!("asm/hash/ripemd/box.asm"), include_str!("asm/hash/ripemd/compression.asm"), include_str!("asm/hash/ripemd/constants.asm"), diff --git a/evm/src/cpu/kernel/asm/hash/blake/addresses.asm b/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm similarity index 66% rename from evm/src/cpu/kernel/asm/hash/blake/addresses.asm rename to evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm index 751ea4e9..8372639c 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/addresses.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm @@ -1,21 +1,21 @@ // Load the initial hash value (the IV, but with params XOR'd into the first word). -%macro blake_initial_hash_value - %blake_iv_i(7) - %blake_iv_i(6) - %blake_iv_i(5) - %blake_iv_i(4) - %blake_iv_i(3) - %blake_iv_i(2) - %blake_iv_i(1) +%macro blake2b_initial_hash_value + %blake2b_iv_i(7) + %blake2b_iv_i(6) + %blake2b_iv_i(5) + %blake2b_iv_i(4) + %blake2b_iv_i(3) + %blake2b_iv_i(2) + %blake2b_iv_i(1) // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 - %blake_iv_i(0) + %blake2b_iv_i(0) XOR // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 %endmacro // Address where the working version of the hash value is stored. -%macro blake_hash_value_addr +%macro blake2b_hash_value_addr PUSH 0 // stack: 0 %mload_kernel_general @@ -26,13 +26,13 @@ %endmacro // Address where the working version of the compression internal state is stored. -%macro blake_internal_state_addr - %blake_hash_value_addr +%macro blake2b_internal_state_addr + %blake2b_hash_value_addr %add_const(8) %endmacro // Address where the current message block is stored. -%macro blake_message_addr - %blake_internal_state_addr +%macro blake2b_message_addr + %blake2b_internal_state_addr %add_const(16) %endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm similarity index 86% rename from evm/src/cpu/kernel/asm/hash/blake/compression.asm rename to evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index eeb55458..39fd08cf 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -1,11 +1,11 @@ -global blake_compression: +global blake2b_compression: // stack: retdest PUSH 0 // stack: cur_block = 0, retdest - %blake_initial_hash_value + %blake2b_initial_hash_value compression_loop: // stack: h_0, ..., h_7, cur_block, retdest - %blake_hash_value_addr + %blake2b_hash_value_addr // stack: addr, h_0, ..., h_7, cur_block, retdest %rep 8 SWAP1 @@ -54,13 +54,13 @@ compression_loop: %mul_const(128) %add_const(2) // stack: cur_block_start_byte, t, cur_block, is_last_block, retdest - %blake_message_addr + %blake2b_message_addr // stack: message_addr, cur_block_start_byte, t, cur_block, is_last_block, retdest %rep 16 // stack: cur_message_addr, cur_block_byte, ... DUP2 // stack: cur_block_byte, cur_message_addr, cur_block_byte, ... - %mload_blake_word_from_bytes + %mload_blake2b_word_from_bytes // stack: m_i, cur_message_addr, cur_block_byte, ... DUP2 // stack: cur_message_addr, m_i, cur_message_addr, cur_block_byte, ... @@ -85,7 +85,7 @@ compression_loop: // stack: is_last_block, t, cur_block, retdest %mul_const(0xFFFFFFFFFFFFFFFF) // stack: invert_if_last_block, t, cur_block, retdest - %blake_hash_value_addr + %blake2b_hash_value_addr %add_const(7) %rep 8 // stack: addr, ... @@ -100,7 +100,7 @@ compression_loop: // stack: addr, h_0, ..., h_7, invert_if_last_block, t, cur_block, retdest POP // stack: h_0, ..., h_7, invert_if_last_block, t, cur_block, retdest - %blake_internal_state_addr + %blake2b_internal_state_addr // stack: start, h_0, ..., h_7, invert_if_last_block, t, cur_block, retdest // First eight words of compression state: current state h_0, ..., h_7. %rep 8 @@ -117,7 +117,7 @@ compression_loop: DUP2 DUP2 // stack: i, loc, i, loc,... - %blake_iv + %blake2b_iv // stack: IV_i, loc, i, loc,... SWAP1 // stack: loc, IV_i, i, loc,... @@ -149,7 +149,7 @@ compression_loop: // stack: i, loc, val, next_val,... %stack (i, loc, val) -> (i, val, loc, i, loc) // stack: i, val, loc, i, loc, next_val,... - %blake_iv + %blake2b_iv // stack: IV_i, val, loc, i, loc, next_val,... XOR // stack: val ^ IV_i, loc, i, loc, next_val,... @@ -167,20 +167,20 @@ compression_loop: POP POP // stack: cur_block, retdest - %blake_internal_state_addr + %blake2b_internal_state_addr // stack: start, cur_block, retdest PUSH 0 // stack: round=0, start, cur_block, retdest %rep 12 // stack: round, start, cur_block, retdest - %call_blake_g_function(0, 4, 8, 12, 0, 1) - %call_blake_g_function(1, 5, 9, 13, 2, 3) - %call_blake_g_function(2, 6, 10, 14, 4, 5) - %call_blake_g_function(3, 7, 11, 15, 6, 7) - %call_blake_g_function(0, 5, 10, 15, 8, 9) - %call_blake_g_function(1, 6, 11, 12, 10, 11) - %call_blake_g_function(2, 7, 8, 13, 12, 13) - %call_blake_g_function(3, 4, 9, 14, 14, 15) + %call_blake2b_g_function(0, 4, 8, 12, 0, 1) + %call_blake2b_g_function(1, 5, 9, 13, 2, 3) + %call_blake2b_g_function(2, 6, 10, 14, 4, 5) + %call_blake2b_g_function(3, 7, 11, 15, 6, 7) + %call_blake2b_g_function(0, 5, 10, 15, 8, 9) + %call_blake2b_g_function(1, 6, 11, 12, 10, 11) + %call_blake2b_g_function(2, 7, 8, 13, 12, 13) + %call_blake2b_g_function(3, 4, 9, 14, 14, 15) // stack: round, start, cur_block, retdest %increment // stack: round + 1, start, cur_block, retdest @@ -189,14 +189,14 @@ compression_loop: POP POP // stack: cur_block, retdest - %blake_generate_new_hash_value(7) - %blake_generate_new_hash_value(6) - %blake_generate_new_hash_value(5) - %blake_generate_new_hash_value(4) - %blake_generate_new_hash_value(3) - %blake_generate_new_hash_value(2) - %blake_generate_new_hash_value(1) - %blake_generate_new_hash_value(0) + %blake2b_generate_new_hash_value(7) + %blake2b_generate_new_hash_value(6) + %blake2b_generate_new_hash_value(5) + %blake2b_generate_new_hash_value(4) + %blake2b_generate_new_hash_value(3) + %blake2b_generate_new_hash_value(2) + %blake2b_generate_new_hash_value(1) + %blake2b_generate_new_hash_value(0) // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest DUP9 // stack: cur_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest @@ -218,28 +218,28 @@ compression_end: PUSH 0 // stack: dummy=0, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP1 - %invert_bytes_blake_word + %invert_bytes_blake2b_word SWAP1 SWAP2 - %invert_bytes_blake_word + %invert_bytes_blake2b_word SWAP2 SWAP3 - %invert_bytes_blake_word + %invert_bytes_blake2b_word SWAP3 SWAP4 - %invert_bytes_blake_word + %invert_bytes_blake2b_word SWAP4 SWAP5 - %invert_bytes_blake_word + %invert_bytes_blake2b_word SWAP5 SWAP6 - %invert_bytes_blake_word + %invert_bytes_blake2b_word SWAP6 SWAP7 - %invert_bytes_blake_word + %invert_bytes_blake2b_word SWAP7 SWAP8 - %invert_bytes_blake_word + %invert_bytes_blake2b_word SWAP8 POP %shl_const(64) diff --git a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm similarity index 95% rename from evm/src/cpu/kernel/asm/hash/blake/g_functions.asm rename to evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm index 243efa14..23844f5d 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm @@ -1,4 +1,4 @@ -%macro blake_g_function +%macro blake2b_g_function // Function to mix two input words, x and y, into the four words indexed by a, b, c, d (which // are in the range 0..16) in the internal state. // The internal state is stored in memory starting at the address start. @@ -94,23 +94,23 @@ %mstore_kernel_general %endmacro -%macro call_blake_g_function(a, b, c, d, x_idx, y_idx) +%macro call_blake2b_g_function(a, b, c, d, x_idx, y_idx) // stack: round, start PUSH $y_idx DUP2 // stack: round, y_idx, round, start - %blake_permutation + %blake2b_permutation // stack: s[y_idx], round, start - %blake_message_addr + %blake2b_message_addr ADD %mload_kernel_general // stack: m[s[y_idx]], round, start PUSH $x_idx DUP3 // stack: round, 2, m[s[y_idx]], round, start - %blake_permutation + %blake2b_permutation // stack: s[x_idx], m[s[y_idx]], round, start - %blake_message_addr + %blake2b_message_addr ADD %mload_kernel_general // stack: m[s[x_idx]], m[s[y_idx]], round, start @@ -121,6 +121,6 @@ PUSH $b PUSH $a // stack: a, b, c, d, m[s[x_idx]], m[s[y_idx]], start, round, start - %blake_g_function + %blake2b_g_function // stack: round, start %endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake/hash.asm b/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm similarity index 69% rename from evm/src/cpu/kernel/asm/hash/blake/hash.asm rename to evm/src/cpu/kernel/asm/hash/blake2b/hash.asm index 8e3499f0..712a97c0 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/hash.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm @@ -1,13 +1,13 @@ -%macro blake_generate_new_hash_value(i) - %blake_hash_value_addr +%macro blake2b_generate_new_hash_value(i) + %blake2b_hash_value_addr %add_const($i) %mload_kernel_general // stack: h_i, ... - %blake_internal_state_addr + %blake2b_internal_state_addr %add_const($i) %mload_kernel_general // stack: v_i, h_i, ... - %blake_internal_state_addr + %blake2b_internal_state_addr %add_const($i) %add_const(8) %mload_kernel_general diff --git a/evm/src/cpu/kernel/asm/hash/blake/iv.asm b/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm similarity index 70% rename from evm/src/cpu/kernel/asm/hash/blake/iv.asm rename to evm/src/cpu/kernel/asm/hash/blake2b/iv.asm index e94f24b7..174afd33 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/iv.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm @@ -1,4 +1,4 @@ -global blake_iv_const: +global blake2b_iv_const: // IV constants (big-endian) // IV_0 @@ -33,19 +33,19 @@ global blake_iv_const: BYTES 91, 224, 205, 25 BYTES 19, 126, 33, 121 -%macro blake_iv +%macro blake2b_iv // stack: i, ... - PUSH blake_iv_const - // stack: blake_iv_const, i, ... + PUSH blake2b_iv_const + // stack: blake2b_iv_const, i, ... SWAP1 - // stack: i, blake_iv_const, ... + // stack: i, blake2b_iv_const, ... %mul_const(8) ADD - // stack: blake_iv_const + 2 * i, ... + // stack: blake2b_iv_const + 2 * i, ... DUP1 - // stack: blake_iv_const + 2 * i, blake_iv_const + 2 * i, ... + // stack: blake2b_iv_const + 2 * i, blake2b_iv_const + 2 * i, ... %add_const(4) - // stack: blake_iv_const + 2 * i + 1, blake_iv_const + 2 * i, ... + // stack: blake2b_iv_const + 2 * i + 1, blake2b_iv_const + 2 * i, ... %mload_kernel_code_u32 SWAP1 %mload_kernel_code_u32 @@ -56,7 +56,7 @@ global blake_iv_const: // stack: IV_i, ... %endmacro -%macro blake_iv_i(i) +%macro blake2b_iv_i(i) PUSH $i - %blake_iv + %blake2b_iv %endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake/ops.asm b/evm/src/cpu/kernel/asm/hash/blake2b/ops.asm similarity index 100% rename from evm/src/cpu/kernel/asm/hash/blake/ops.asm rename to evm/src/cpu/kernel/asm/hash/blake2b/ops.asm diff --git a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm similarity index 98% rename from evm/src/cpu/kernel/asm/hash/blake/permutations.asm rename to evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm index 452bb100..c5ee9ba4 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm @@ -178,7 +178,7 @@ global permutation_9_constants: BYTES 13 BYTES 0 -%macro blake_permutation +%macro blake2b_permutation // stack: round, i PUSH permutation_0_constants // stack: permutation_0_constants, round, i diff --git a/evm/src/cpu/kernel/asm/hash/blake/store.asm b/evm/src/cpu/kernel/asm/hash/blake2b/store.asm similarity index 94% rename from evm/src/cpu/kernel/asm/hash/blake/store.asm rename to evm/src/cpu/kernel/asm/hash/blake2b/store.asm index 448a854b..0b2a9a7a 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/store.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/store.asm @@ -1,7 +1,7 @@ -global blake: - %jump(blake_store) +global blake2b: + %jump(blake2b_store) -global blake_store: +global blake2b_store: // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest DUP1 // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest @@ -42,4 +42,4 @@ store_end: // stack: addr, counter, retdest %pop2 // stack: retdest - %jump(blake_compression) + %jump(blake2b_compression) diff --git a/evm/src/cpu/kernel/asm/hash/blake/util.asm b/evm/src/cpu/kernel/asm/hash/blake2b/util.asm similarity index 94% rename from evm/src/cpu/kernel/asm/hash/blake/util.asm rename to evm/src/cpu/kernel/asm/hash/blake2b/util.asm index 49b78c1b..7fdee98d 100644 --- a/evm/src/cpu/kernel/asm/hash/blake/util.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/util.asm @@ -1,5 +1,5 @@ // Load a 64-bit word from kernel general memory. -%macro mload_blake_word_from_bytes +%macro mload_blake2b_word_from_bytes // stack: offset DUP1 %mload_kernel_general_u32_LE @@ -25,7 +25,7 @@ %endmacro // Invert the order of the eight bytes in a Blake word. -%macro invert_bytes_blake_word +%macro invert_bytes_blake2b_word // stack: word DUP1 // stack: word, word diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index edab63ef..74cf4cd0 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -144,6 +144,6 @@ fn test_ripemd() -> Result<()> { } #[test] -fn test_blake() -> Result<()> { - test_hash_512("blake", &blake2b) +fn test_blake2b() -> Result<()> { + test_hash_512("blake2b", &blake2b) } From 50ffb9072a1601e3d6d7b481a699f4b5a25a14a4 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 1 Dec 2022 21:49:06 -0800 Subject: [PATCH 34/44] documentation --- .../kernel/asm/hash/blake2b/compression.asm | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index 39fd08cf..8c428589 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -5,6 +5,8 @@ global blake2b_compression: %blake2b_initial_hash_value compression_loop: // stack: h_0, ..., h_7, cur_block, retdest + + // Store the hash values. %blake2b_hash_value_addr // stack: addr, h_0, ..., h_7, cur_block, retdest %rep 8 @@ -13,6 +15,7 @@ compression_loop: %mstore_kernel_general %increment %endrep + // stack: addr, cur_block, retdest POP // stack: cur_block, retdest @@ -30,6 +33,8 @@ compression_loop: PUSH 1 %mload_kernel_general // stack: num_bytes, cur_block, is_last_block, retdest + + // Calculate t counter value. DUP3 // stack: is_last_block, num_bytes, cur_block, is_last_block, retdest MUL @@ -54,6 +59,8 @@ compression_loop: %mul_const(128) %add_const(2) // stack: cur_block_start_byte, t, cur_block, is_last_block, retdest + + // Copy the message from the input space to the message working space. %blake2b_message_addr // stack: message_addr, cur_block_start_byte, t, cur_block, is_last_block, retdest %rep 16 @@ -100,9 +107,11 @@ compression_loop: // stack: addr, h_0, ..., h_7, invert_if_last_block, t, cur_block, retdest POP // stack: h_0, ..., h_7, invert_if_last_block, t, cur_block, retdest + + // Store the initial 16 values of the internal state. %blake2b_internal_state_addr // stack: start, h_0, ..., h_7, invert_if_last_block, t, cur_block, retdest - // First eight words of compression state: current state h_0, ..., h_7. + // First eight words of internal state: current state h_0, ..., h_7. %rep 8 SWAP1 DUP2 @@ -171,6 +180,8 @@ compression_loop: // stack: start, cur_block, retdest PUSH 0 // stack: round=0, start, cur_block, retdest + + // Run 12 rounds of G functions. %rep 12 // stack: round, start, cur_block, retdest %call_blake2b_g_function(0, 4, 8, 12, 0, 1) @@ -188,6 +199,8 @@ compression_loop: // stack: 12, start, cur_block, retdest POP POP + + // Finalize hash value. // stack: cur_block, retdest %blake2b_generate_new_hash_value(7) %blake2b_generate_new_hash_value(6) @@ -217,6 +230,8 @@ compression_end: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest PUSH 0 // stack: dummy=0, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + + // Invert the bytes of each hash value. SWAP1 %invert_bytes_blake2b_word SWAP1 @@ -242,6 +257,8 @@ compression_end: %invert_bytes_blake2b_word SWAP8 POP + + // Combine hash values. %shl_const(64) OR %shl_const(64) From fc14475541dad3489101153611d90a3d89f73834 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 2 Dec 2022 08:10:18 -0800 Subject: [PATCH 35/44] documentation --- evm/src/cpu/kernel/asm/hash/blake2b/compression.asm | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index 8c428589..c821da19 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -111,7 +111,8 @@ compression_loop: // Store the initial 16 values of the internal state. %blake2b_internal_state_addr // stack: start, h_0, ..., h_7, invert_if_last_block, t, cur_block, retdest - // First eight words of internal state: current state h_0, ..., h_7. + + // First eight words of the internal state: current hash value h_0, ..., h_7. %rep 8 SWAP1 DUP2 @@ -119,6 +120,8 @@ compression_loop: %increment %endrep // stack: start + 8, invert_if_last_block, t, cur_block, retdest + + // Next four values of the internal state: first four IV values. PUSH 0 // stack: 0, start + 8, invert_if_last_block, t, cur_block, retdest %rep 4 @@ -153,7 +156,9 @@ compression_loop: // stack: t_lo = t % (1 << 64), t_hi = t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest %stack (t_lo, t_hi, i, loc, inv) -> (i, loc, t_lo, t_hi, inv, 0) // stack: 4, start + 12, t_lo, t_hi, invert_if_last_block, 0, cur_block, retdest - // XOR the values (t % 2**64, t >> 64, invert_if, 0) into the last four IV values. + + // Last four values of the internal state: last four IV values, XOR'd with + // the values (t % 2**64, t >> 64, invert_if, 0). %rep 4 // stack: i, loc, val, next_val,... %stack (i, loc, val) -> (i, val, loc, i, loc) @@ -277,4 +282,4 @@ compression_end: // stack: hash_second = h_4' || h_5' || h_6' || h_7', hash_first = h_0' || h_1' || h_2' || h_3', retdest %stack (second, first, ret) -> (ret, second, first) // stack: retdest, hash_first, hash_second - JUMP \ No newline at end of file + JUMP From 779c46c7a37f48a6d115c7cf9baa9991beed7cdf Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 2 Dec 2022 08:46:46 -0800 Subject: [PATCH 36/44] clippy fix --- evm/src/cpu/kernel/constants/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/evm/src/cpu/kernel/constants/mod.rs b/evm/src/cpu/kernel/constants/mod.rs index 2125fa18..24c8ae6d 100644 --- a/evm/src/cpu/kernel/constants/mod.rs +++ b/evm/src/cpu/kernel/constants/mod.rs @@ -22,9 +22,10 @@ pub fn evm_constants() -> HashMap { let hex_constants = MISC_CONSTANTS .iter() .chain(EC_CONSTANTS.iter()) - .chain(HASH_CONSTANTS.iter()); + .chain(HASH_CONSTANTS.iter()) + .cloned(); for (name, value) in hex_constants { - c.insert(name.clone().into(), U256::from_big_endian(value)); + c.insert(name.into(), U256::from_big_endian(&value)); } for (name, value) in GAS_CONSTANTS { From f6af5240bd926d45053863ecf44c7fdff80cf189 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Sat, 3 Dec 2022 17:29:47 -0800 Subject: [PATCH 37/44] another clippy fix --- evm/src/cpu/kernel/interpreter.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 419b6bed..b1e84237 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -159,12 +159,6 @@ impl<'a> Interpreter<'a> { &mut self.generation_state.memory.contexts[0].segments[Segment::TrieData as usize].content } - pub(crate) fn get_memory_segment(&self, segment: Segment) -> Vec { - self.generation_state.memory.contexts[0].segments[segment as usize] - .content - .to_vec() - } - pub(crate) fn get_memory_segment_bytes(&self, segment: Segment) -> Vec { self.generation_state.memory.contexts[0].segments[segment as usize] .content @@ -177,10 +171,6 @@ impl<'a> Interpreter<'a> { self.get_memory_segment_bytes(Segment::RlpRaw) } - pub(crate) fn set_memory_segment(&mut self, segment: Segment, memory: Vec) { - self.generation_state.memory.contexts[0].segments[segment as usize].content = memory; - } - pub(crate) fn set_memory_segment_bytes(&mut self, segment: Segment, memory: Vec) { self.generation_state.memory.contexts[0].segments[segment as usize].content = memory.into_iter().map(U256::from).collect(); From 2e62ac1b63d1b047e1032ea1c8d9779b16b78ae4 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 9 Dec 2022 10:10:07 -0800 Subject: [PATCH 38/44] cleanup --- evm/src/cpu/kernel/aggregator.rs | 8 -------- evm/src/cpu/kernel/constants/mod.rs | 14 +++++--------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 9c5b944f..a7c01cec 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -84,14 +84,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/mpt/storage/storage_read.asm"), include_str!("asm/mpt/storage/storage_write.asm"), include_str!("asm/mpt/util.asm"), - include_str!("asm/ripemd/box.asm"), - include_str!("asm/ripemd/compression.asm"), - include_str!("asm/ripemd/constants.asm"), - include_str!("asm/ripemd/functions.asm"), - include_str!("asm/ripemd/main.asm"), - include_str!("asm/ripemd/memory.asm"), - include_str!("asm/ripemd/update.asm"), - include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/decode.asm"), include_str!("asm/rlp/encode.asm"), include_str!("asm/rlp/encode_rlp_string.asm"), diff --git a/evm/src/cpu/kernel/constants/mod.rs b/evm/src/cpu/kernel/constants/mod.rs index 24c8ae6d..ef97ff5a 100644 --- a/evm/src/cpu/kernel/constants/mod.rs +++ b/evm/src/cpu/kernel/constants/mod.rs @@ -19,9 +19,8 @@ pub(crate) mod txn_fields; pub fn evm_constants() -> HashMap { let mut c = HashMap::new(); - let hex_constants = MISC_CONSTANTS + let hex_constants = EC_CONSTANTS .iter() - .chain(EC_CONSTANTS.iter()) .chain(HASH_CONSTANTS.iter()) .cloned(); for (name, value) in hex_constants { @@ -54,15 +53,12 @@ pub fn evm_constants() -> HashMap { c } -const MISC_CONSTANTS: [(&str, [u8; 32]); 1] = [ - // 2^64 +const HASH_CONSTANTS: [(&str, [u8; 32]); 2] = [ + // Hash of an empty string: keccak(b'').hex() ( - "BLAKE_WORD_SIZE", - hex!("0000000000000000000000000000000000000000000000010000000000000000"), + "EMPTY_STRING_HASH", + hex!("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470"), ), -]; - -const HASH_CONSTANTS: [(&str, [u8; 32]); 1] = [ // Hash of an empty node: keccak(rlp.encode(b'')).hex() ( "EMPTY_NODE_HASH", From 29143fe5d34defb5553355b03eff7e01b25876c8 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Fri, 9 Dec 2022 10:14:48 -0800 Subject: [PATCH 39/44] fmt --- evm/src/cpu/kernel/constants/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/evm/src/cpu/kernel/constants/mod.rs b/evm/src/cpu/kernel/constants/mod.rs index ef97ff5a..c9094338 100644 --- a/evm/src/cpu/kernel/constants/mod.rs +++ b/evm/src/cpu/kernel/constants/mod.rs @@ -19,10 +19,7 @@ pub(crate) mod txn_fields; pub fn evm_constants() -> HashMap { let mut c = HashMap::new(); - let hex_constants = EC_CONSTANTS - .iter() - .chain(HASH_CONSTANTS.iter()) - .cloned(); + let hex_constants = EC_CONSTANTS.iter().chain(HASH_CONSTANTS.iter()).cloned(); for (name, value) in hex_constants { c.insert(name.into(), U256::from_big_endian(&value)); } From 24d6627a62a3c84a72c429fe7b15d001fd6a873a Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 12 Dec 2022 16:38:40 -0800 Subject: [PATCH 40/44] addressed comments --- evm/src/cpu/kernel/aggregator.rs | 1 - .../kernel/asm/hash/blake2b/compression.asm | 53 ++--- .../kernel/asm/hash/blake2b/g_functions.asm | 2 +- evm/src/cpu/kernel/asm/hash/blake2b/ops.asm | 4 - .../kernel/asm/hash/blake2b/permutations.asm | 200 ++++-------------- evm/src/cpu/kernel/asm/hash/blake2b/util.asm | 47 ---- evm/src/cpu/kernel/asm/memory/core.asm | 30 ++- evm/src/cpu/kernel/asm/util/basic_macros.asm | 57 +++++ evm/src/cpu/kernel/tests/hash.rs | 5 +- 9 files changed, 156 insertions(+), 243 deletions(-) delete mode 100644 evm/src/cpu/kernel/asm/hash/blake2b/util.asm diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index a7c01cec..f8d2860e 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -47,7 +47,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/hash/blake2b/ops.asm"), include_str!("asm/hash/blake2b/permutations.asm"), include_str!("asm/hash/blake2b/store.asm"), - include_str!("asm/hash/blake2b/util.asm"), include_str!("asm/hash/ripemd/box.asm"), include_str!("asm/hash/ripemd/compression.asm"), include_str!("asm/hash/ripemd/constants.asm"), diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index c821da19..e7b81072 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -67,7 +67,7 @@ compression_loop: // stack: cur_message_addr, cur_block_byte, ... DUP2 // stack: cur_block_byte, cur_message_addr, cur_block_byte, ... - %mload_blake2b_word_from_bytes + %mload_kernel_general_u64_LE // stack: m_i, cur_message_addr, cur_block_byte, ... DUP2 // stack: cur_message_addr, m_i, cur_message_addr, cur_block_byte, ... @@ -233,35 +233,40 @@ compression_loop: %jump(compression_loop) compression_end: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest - PUSH 0 - // stack: dummy=0, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest // Invert the bytes of each hash value. + %reverse_bytes_u64_blake + // stack: h_0'', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP1 - %invert_bytes_blake2b_word - SWAP1 - SWAP2 - %invert_bytes_blake2b_word + // stack: h_1', h_0'', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_1'', h_0'', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP2 + // stack: h_2', h_0'', h_1'', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_2'', h_0'', h_1'', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP3 - %invert_bytes_blake2b_word - SWAP3 - SWAP4 - %invert_bytes_blake2b_word + // stack: h_3', h_0'', h_1'', h_2'', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_3'', h_0'', h_1'', h_2'', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP4 + // stack: h_4', h_0'', h_1'', h_2'', h_3'', h_5', h_6', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_4'', h_0'', h_1'', h_2'', h_3'', h_5', h_6', h_7', cur_block + 1, retdest SWAP5 - %invert_bytes_blake2b_word - SWAP5 - SWAP6 - %invert_bytes_blake2b_word + // stack: h_5', h_0'', h_1'', h_2'', h_3'', h_4'', h_6', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_5'', h_0'', h_1'', h_2'', h_3'', h_4'', h_6', h_7', cur_block + 1, retdest SWAP6 + // stack: h_6', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_7', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_6'', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_7', cur_block + 1, retdest SWAP7 - %invert_bytes_blake2b_word - SWAP7 - SWAP8 - %invert_bytes_blake2b_word - SWAP8 - POP + // stack: h_7', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_6'', cur_block + 1, retdest + %reverse_bytes_u64_blake + // stack: h_7'', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_6'', cur_block + 1, retdest + %stack (h_7, h_s: 7) -> (h_s, h_7) + // stack: h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_6'', h_7'', cur_block + 1, retdest // Combine hash values. %shl_const(64) @@ -270,16 +275,16 @@ compression_end: OR %shl_const(64) OR - // stack: h_0' || h_1' || h_2' || h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + // stack: h_0'' || h_1'' || h_2'' || h_3'', h_4'', h_5'', h_6'', h_7'', cur_block + 1, retdest %stack (first, second: 4, cur) -> (second, first) - // stack: h_4', h_5', h_6', h_7', h_0' || h_1' || h_2' || h_3', retdest + // stack: h_4'', h_5'', h_6'', h_7'', h_0'' || h_1'' || h_2'' || h_3'', retdest %shl_const(64) OR %shl_const(64) OR %shl_const(64) OR - // stack: hash_second = h_4' || h_5' || h_6' || h_7', hash_first = h_0' || h_1' || h_2' || h_3', retdest + // stack: hash_second = h_4'' || h_5'' || h_6'' || h_7'', hash_first = h_0'' || h_1'' || h_2'' || h_3'', retdest %stack (second, first, ret) -> (ret, second, first) // stack: retdest, hash_first, hash_second JUMP diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm index 23844f5d..11e879fc 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm @@ -123,4 +123,4 @@ // stack: a, b, c, d, m[s[x_idx]], m[s[y_idx]], start, round, start %blake2b_g_function // stack: round, start -%endmacro \ No newline at end of file +%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/ops.asm b/evm/src/cpu/kernel/asm/hash/blake2b/ops.asm index e587abef..2b40db7f 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/ops.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/ops.asm @@ -1,7 +1,3 @@ -%macro as_u64 - %and_const(0xffffffffffffffff) -%endmacro - // 64-bit right rotation %macro rotr_64(rot) // stack: value diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm index c5ee9ba4..d3d2b0e4 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm @@ -1,182 +1,62 @@ global permutation_0_constants: - BYTES 0 - BYTES 1 - BYTES 2 - BYTES 3 - BYTES 4 - BYTES 5 - BYTES 6 - BYTES 7 - BYTES 8 - BYTES 9 - BYTES 10 - BYTES 11 - BYTES 12 - BYTES 13 - BYTES 14 - BYTES 15 + BYTES 0, 1, 2, 3 + BYTES 4, 5, 6, 7 + BYTES 8, 9, 10, 11 + BYTES 12, 13, 14, 15 global permutation_1_constants: - BYTES 14 - BYTES 10 - BYTES 4 - BYTES 8 - BYTES 9 - BYTES 15 - BYTES 13 - BYTES 6 - BYTES 1 - BYTES 12 - BYTES 0 - BYTES 2 - BYTES 11 - BYTES 7 - BYTES 5 - BYTES 3 + BYTES 14, 10, 4, 8 + BYTES 9, 15, 13, 6 + BYTES 1, 12, 0, 2 + BYTES 11, 7, 5, 3 global permutation_2_constants: - BYTES 11 - BYTES 8 - BYTES 12 - BYTES 0 - BYTES 5 - BYTES 2 - BYTES 15 - BYTES 13 - BYTES 10 - BYTES 14 - BYTES 3 - BYTES 6 - BYTES 7 - BYTES 1 - BYTES 9 - BYTES 4 + BYTES 11, 8, 12, 0 + BYTES 5, 2, 15, 13 + BYTES 10, 4, 3, 6 + BYTES 7, 1, 9, 4 global permutation_3_constants: - BYTES 7 - BYTES 9 - BYTES 3 - BYTES 1 - BYTES 13 - BYTES 12 - BYTES 11 - BYTES 14 - BYTES 2 - BYTES 6 - BYTES 5 - BYTES 10 - BYTES 4 - BYTES 0 - BYTES 15 - BYTES 8 + BYTES 7, 9, 3, 1 + BYTES 13, 2, 11, 14 + BYTES 2, 6, 5, 10 + BYTES 4, 0, 15, 8 global permutation_4_constants: - BYTES 9 - BYTES 0 - BYTES 5 - BYTES 7 - BYTES 2 - BYTES 4 - BYTES 10 - BYTES 15 - BYTES 14 - BYTES 1 - BYTES 11 - BYTES 12 - BYTES 6 - BYTES 8 - BYTES 3 - BYTES 13 + BYTES 9, 0, 5, 7 + BYTES 2, 4, 10, 15 + BYTES 14, 1, 11, 12 + BYTES 6, 8, 3, 13 global permutation_5_constants: - BYTES 2 - BYTES 12 - BYTES 6 - BYTES 10 - BYTES 0 - BYTES 11 - BYTES 8 - BYTES 3 - BYTES 4 - BYTES 13 - BYTES 7 - BYTES 5 - BYTES 15 - BYTES 14 - BYTES 1 - BYTES 9 + BYTES 2, 2, 6, 10 + BYTES 0, 1, 8, 3 + BYTES 4, 3, 7, 5 + BYTES 15, 4, 1, 9 global permutation_6_constants: - BYTES 12 - BYTES 5 - BYTES 1 - BYTES 15 - BYTES 14 - BYTES 13 - BYTES 4 - BYTES 10 - BYTES 0 - BYTES 7 - BYTES 6 - BYTES 3 - BYTES 9 - BYTES 2 - BYTES 8 - BYTES 11 + BYTES 12, 5, 1, 15 + BYTES 14, 3, 4, 10 + BYTES 0, 7, 6, 3 + BYTES 9, 2, 8, 11 global permutation_7_constants: - BYTES 13 - BYTES 11 - BYTES 7 - BYTES 14 - BYTES 12 - BYTES 1 - BYTES 3 - BYTES 9 - BYTES 5 - BYTES 0 - BYTES 15 - BYTES 4 - BYTES 8 - BYTES 6 - BYTES 2 - BYTES 10 + BYTES 13, 11, 7, 14 + BYTES 12, 1, 3, 9 + BYTES 5, 0, 15, 4 + BYTES 8, 6, 2, 10 global permutation_8_constants: - BYTES 6 - BYTES 15 - BYTES 14 - BYTES 9 - BYTES 11 - BYTES 3 - BYTES 0 - BYTES 8 - BYTES 12 - BYTES 2 - BYTES 13 - BYTES 7 - BYTES 1 - BYTES 4 - BYTES 10 - BYTES 5 + BYTES 6, 15, 14, 9 + BYTES 11, 3, 0, 8 + BYTES 12, 2, 13, 7 + BYTES 1, 4, 10, 5 global permutation_9_constants: - BYTES 10 - BYTES 2 - BYTES 8 - BYTES 4 - BYTES 7 - BYTES 6 - BYTES 1 - BYTES 5 - BYTES 15 - BYTES 11 - BYTES 9 - BYTES 14 - BYTES 3 - BYTES 12 - BYTES 13 - BYTES 0 + BYTES 10, 2, 8, 4 + BYTES 7, 6, 1, 5 + BYTES 15, 11, 9, 14 + BYTES 3, 12, 13, 0 %macro blake2b_permutation // stack: round, i diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/util.asm b/evm/src/cpu/kernel/asm/hash/blake2b/util.asm deleted file mode 100644 index 7fdee98d..00000000 --- a/evm/src/cpu/kernel/asm/hash/blake2b/util.asm +++ /dev/null @@ -1,47 +0,0 @@ -// Load a 64-bit word from kernel general memory. -%macro mload_blake2b_word_from_bytes - // stack: offset - DUP1 - %mload_kernel_general_u32_LE - // stack: lo, offset - SWAP1 - // stack: offset, lo - %add_const(4) - %mload_kernel_general_u32_LE - // stack: hi, lo - %shl_const(32) - // stack: hi << 32, lo - OR - // stack: (hi << 32) | lo -%endmacro - -// Invert the order of the four bytes in a word. -%macro invert_four_byte_word - // stack: word - %mul_const(0x1000000010000000100) - %and_const(0xff0000ff00ff00000000ff0000) - %mod_const(0xffffffffffff) - // stack: word_inverted -%endmacro - -// Invert the order of the eight bytes in a Blake word. -%macro invert_bytes_blake2b_word - // stack: word - DUP1 - // stack: word, word - %and_const(0xffffffff) - // stack: word_lo, word - SWAP1 - // stack: word, word_lo - %shr_const(32) - // stack: word_hi, word_lo - %invert_four_byte_word - // stack: word_hi_inverted, word_lo - SWAP1 - // stack: word_lo, word_hi_inverted - %invert_four_byte_word - // stack: word_lo_inverted, word_hi_inverted - %shl_const(32) - OR - // stack: word_inverted -%endmacro diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index f6bb99b6..a979f930 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -97,7 +97,7 @@ // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0 %endmacro -// Load from the kernel a little-endian u32, consisting of 4 bytes (c_0, c_1, c_2, c_3) +// Load from the kernel a little-endian u32, consisting of 4 bytes (c_0, c_1, c_2, c_3). %macro mload_kernel_u32_LE(segment) // stack: offset DUP1 @@ -123,6 +123,24 @@ // stack: c0 | (c1 << 8) | (c2 << 16) | (c3 << 24) %endmacro +// Load from the kernel a little-endian u64, consisting of 8 bytes +// (c_0, c_1, c_2, c_3, c_4, c_5, c_6, c_7). +%macro mload_kernel_u64_LE(segment) + // stack: offset + DUP1 + %mload_kernel_u32_LE($segment) + // stack: lo, offset + SWAP1 + // stack: offset, lo + %add_const(4) + %mload_kernel_u32_LE($segment) + // stack: hi, lo + %shl_const(32) + // stack: hi << 32, lo + OR + // stack: (hi << 32) | lo +%endmacro + // Load a u256 (big-endian) from the kernel. %macro mload_kernel_u256(segment) // stack: offset @@ -292,7 +310,7 @@ // stack: value %endmacro -// Load a little-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0), +// Load a little-endian u32, consisting of 4 bytes (c_0, c_1, c_2, c_3), // from kernel general memory. %macro mload_kernel_general_u32_LE // stack: offset @@ -300,6 +318,14 @@ // stack: value %endmacro +// Load a little-endian u64, consisting of 8 bytes +// (c_0, c_1, c_2, c_3, c_4, c_5, c_6, c_7), from kernel general memory. +%macro mload_kernel_general_u64_LE + // stack: offset + %mload_kernel_u64_LE(@SEGMENT_KERNEL_GENERAL) + // stack: value +%endmacro + // Load a u256 (big-endian) from kernel code. %macro mload_kernel_code_u256 // stack: offset diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 6ec13835..aa6b908e 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -240,6 +240,10 @@ %and_const(0xffffffff) %endmacro +%macro as_u64 + %and_const(0xffffffffffffffff) +%endmacro + %macro not_u32 // stack: x PUSH 0xffffffff @@ -310,3 +314,56 @@ OR // stack: dcba %endmacro + +%macro reverse_bytes_u64 + // stack: word + DUP1 + // stack: word, word + %and_const(0xffffffff) + // stack: word_lo, word + SWAP1 + // stack: word, word_lo + %shr_const(32) + // stack: word_hi, word_lo + %reverse_bytes_u32 + // stack: word_hi_inverted, word_lo + SWAP1 + // stack: word_lo, word_hi_inverted + %reverse_bytes_u32 + // stack: word_lo_inverted, word_hi_inverted + %shl_const(32) + OR + // stack: word_inverted +%endmacro + + +// Invert the order of the four bytes in a word. +%macro invert_four_byte_word + // stack: word + %mul_const(0x1000000010000000100) + %and_const(0xff0000ff00ff00000000ff0000) + %mod_const(0xffffffffffff) + // stack: word_inverted +%endmacro + +// Invert the order of the eight bytes in a Blake word. +%macro reverse_bytes_u64_blake + // stack: word + DUP1 + // stack: word, word + %and_const(0xffffffff) + // stack: word_lo, word + SWAP1 + // stack: word, word_lo + %shr_const(32) + // stack: word_hi, word_lo + %invert_four_byte_word + // stack: word_hi_inverted, word_lo + SWAP1 + // stack: word_lo, word_hi_inverted + %invert_four_byte_word + // stack: word_lo_inverted, word_hi_inverted + %shl_const(32) + OR + // stack: word_inverted +%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index 74cf4cd0..b24317e0 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -58,10 +58,7 @@ fn make_input_stack(message: Vec) -> Vec { fn combine_u256s(hi: U256, lo: U256) -> U512 { let mut result = U512::from(hi); - result *= U512::from_big_endian(&[ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, - ]); + result <<= 256; result += U512::from(lo); result } From a564d7350cffe0eb3a43b4f210413948eafcfb1f Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 12 Dec 2022 17:29:56 -0800 Subject: [PATCH 41/44] fixes and addressed comments --- .../kernel/asm/hash/blake2b/compression.asm | 16 +++++----- .../kernel/asm/hash/blake2b/permutations.asm | 14 ++++---- evm/src/cpu/kernel/asm/util/basic_macros.asm | 32 ------------------- 3 files changed, 15 insertions(+), 47 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index e7b81072..3c700eb3 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -235,35 +235,35 @@ compression_end: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest // Invert the bytes of each hash value. - %reverse_bytes_u64_blake + %reverse_bytes_u64 // stack: h_0'', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP1 // stack: h_1', h_0'', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest - %reverse_bytes_u64_blake + %reverse_bytes_u64 // stack: h_1'', h_0'', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP2 // stack: h_2', h_0'', h_1'', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest - %reverse_bytes_u64_blake + %reverse_bytes_u64 // stack: h_2'', h_0'', h_1'', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP3 // stack: h_3', h_0'', h_1'', h_2'', h_4', h_5', h_6', h_7', cur_block + 1, retdest - %reverse_bytes_u64_blake + %reverse_bytes_u64 // stack: h_3'', h_0'', h_1'', h_2'', h_4', h_5', h_6', h_7', cur_block + 1, retdest SWAP4 // stack: h_4', h_0'', h_1'', h_2'', h_3'', h_5', h_6', h_7', cur_block + 1, retdest - %reverse_bytes_u64_blake + %reverse_bytes_u64 // stack: h_4'', h_0'', h_1'', h_2'', h_3'', h_5', h_6', h_7', cur_block + 1, retdest SWAP5 // stack: h_5', h_0'', h_1'', h_2'', h_3'', h_4'', h_6', h_7', cur_block + 1, retdest - %reverse_bytes_u64_blake + %reverse_bytes_u64 // stack: h_5'', h_0'', h_1'', h_2'', h_3'', h_4'', h_6', h_7', cur_block + 1, retdest SWAP6 // stack: h_6', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_7', cur_block + 1, retdest - %reverse_bytes_u64_blake + %reverse_bytes_u64 // stack: h_6'', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_7', cur_block + 1, retdest SWAP7 // stack: h_7', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_6'', cur_block + 1, retdest - %reverse_bytes_u64_blake + %reverse_bytes_u64 // stack: h_7'', h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_6'', cur_block + 1, retdest %stack (h_7, h_s: 7) -> (h_s, h_7) // stack: h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_6'', h_7'', cur_block + 1, retdest diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm index d3d2b0e4..5277e611 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm @@ -13,12 +13,12 @@ global permutation_1_constants: global permutation_2_constants: BYTES 11, 8, 12, 0 BYTES 5, 2, 15, 13 - BYTES 10, 4, 3, 6 + BYTES 10, 14, 3, 6 BYTES 7, 1, 9, 4 global permutation_3_constants: BYTES 7, 9, 3, 1 - BYTES 13, 2, 11, 14 + BYTES 13, 12, 11, 14 BYTES 2, 6, 5, 10 BYTES 4, 0, 15, 8 @@ -29,14 +29,14 @@ global permutation_4_constants: BYTES 6, 8, 3, 13 global permutation_5_constants: - BYTES 2, 2, 6, 10 - BYTES 0, 1, 8, 3 - BYTES 4, 3, 7, 5 - BYTES 15, 4, 1, 9 + BYTES 2, 12, 6, 10 + BYTES 0, 11, 8, 3 + BYTES 4, 13, 7, 5 + BYTES 15, 14, 1, 9 global permutation_6_constants: BYTES 12, 5, 1, 15 - BYTES 14, 3, 4, 10 + BYTES 14, 13, 4, 10 BYTES 0, 7, 6, 3 BYTES 9, 2, 8, 11 diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index aa6b908e..2a7473fd 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -335,35 +335,3 @@ OR // stack: word_inverted %endmacro - - -// Invert the order of the four bytes in a word. -%macro invert_four_byte_word - // stack: word - %mul_const(0x1000000010000000100) - %and_const(0xff0000ff00ff00000000ff0000) - %mod_const(0xffffffffffff) - // stack: word_inverted -%endmacro - -// Invert the order of the eight bytes in a Blake word. -%macro reverse_bytes_u64_blake - // stack: word - DUP1 - // stack: word, word - %and_const(0xffffffff) - // stack: word_lo, word - SWAP1 - // stack: word, word_lo - %shr_const(32) - // stack: word_hi, word_lo - %invert_four_byte_word - // stack: word_hi_inverted, word_lo - SWAP1 - // stack: word_lo, word_hi_inverted - %invert_four_byte_word - // stack: word_lo_inverted, word_hi_inverted - %shl_const(32) - OR - // stack: word_inverted -%endmacro \ No newline at end of file From 53004867b3cb5cc3680de31ed262be6c87d277ec Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Mon, 12 Dec 2022 17:36:21 -0800 Subject: [PATCH 42/44] macro --- .../cpu/kernel/asm/hash/blake2b/compression.asm | 14 ++------------ evm/src/cpu/kernel/asm/util/basic_macros.asm | 10 ++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index 3c700eb3..2b590d41 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -269,21 +269,11 @@ compression_end: // stack: h_0'', h_1'', h_2'', h_3'', h_4'', h_5'', h_6'', h_7'', cur_block + 1, retdest // Combine hash values. - %shl_const(64) - OR - %shl_const(64) - OR - %shl_const(64) - OR + %u64s_to_u256 // stack: h_0'' || h_1'' || h_2'' || h_3'', h_4'', h_5'', h_6'', h_7'', cur_block + 1, retdest %stack (first, second: 4, cur) -> (second, first) // stack: h_4'', h_5'', h_6'', h_7'', h_0'' || h_1'' || h_2'' || h_3'', retdest - %shl_const(64) - OR - %shl_const(64) - OR - %shl_const(64) - OR + %u64s_to_u256 // stack: hash_second = h_4'' || h_5'' || h_6'' || h_7'', hash_first = h_0'' || h_1'' || h_2'' || h_3'', retdest %stack (second, first, ret) -> (ret, second, first) // stack: retdest, hash_first, hash_second diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 2a7473fd..c7da8c60 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -335,3 +335,13 @@ OR // stack: word_inverted %endmacro + +// Combine four big-endian u64s into a u256. +%macro u64s_to_u256 + // stack: a, b, c, d + %rep 3 + %shl_const(64) + OR + %endrep + // stack: a || b || c || d +%endmacro From f3937e9977c8dd9de926b572422936db5d78e2b6 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 13 Dec 2022 10:29:03 -0800 Subject: [PATCH 43/44] deps fix --- evm/Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/evm/Cargo.toml b/evm/Cargo.toml index 634aa563..f6494d7a 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" [dependencies] anyhow = "1.0.40" blake2 = "0.10.5" -env_logger = "0.9.0" +env_logger = "0.10.0" eth_trie_utils = "0.4.0" ethereum-types = "0.14.0" hex = { version = "0.4.3", optional = true } @@ -35,7 +35,6 @@ jemallocator = "0.5.0" [dev-dependencies] criterion = "0.4.0" -env_logger = "0.10.0" hex = "0.4.3" ripemd = "0.1.3" sha2 = "0.10.6" From 6ab65800585a699658a8a8ad7ffdaa67539d6505 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 13 Dec 2022 14:42:45 -0800 Subject: [PATCH 44/44] block_size macro --- evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm | 7 ++++++- evm/src/cpu/kernel/asm/hash/blake2b/compression.asm | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm b/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm index 8372639c..9d65b9ed 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm @@ -20,7 +20,7 @@ // stack: 0 %mload_kernel_general // stack: num_blocks - %mul_const(128) + %block_size %add_const(2) // stack: num_bytes+2 %endmacro @@ -36,3 +36,8 @@ %blake2b_internal_state_addr %add_const(16) %endmacro + +// Block size is 128 bytes. +%macro block_size + %mul_const(128) +%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index 2b590d41..a25158d9 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -42,7 +42,7 @@ compression_loop: DUP2 // stack: cur_block, is_last_block * num_bytes, cur_block, is_last_block, retdest %increment - %mul_const(128) + %block_size // stack: (cur_block + 1) * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest DUP4 // stack: is_last_block, (cur_block + 1) * 128, is_last_block * num_bytes, cur_block, is_last_block, retdest @@ -56,7 +56,7 @@ compression_loop: // stack: cur_block, t, is_last_block, retdest DUP1 // stack: cur_block, cur_block, t, is_last_block, retdest - %mul_const(128) + %block_size %add_const(2) // stack: cur_block_start_byte, t, cur_block, is_last_block, retdest