diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index c11180e3..c967dc3c 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -39,6 +39,7 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/memory/metadata.asm"), include_str!("asm/memory/packing.asm"), include_str!("asm/memory/txn_fields.asm"), + include_str!("asm/ripemd/box.asm"), include_str!("asm/ripemd/compression.asm"), include_str!("asm/ripemd/constants.asm"), include_str!("asm/ripemd/functions.asm"), diff --git a/evm/src/cpu/kernel/asm/ripemd/box.asm b/evm/src/cpu/kernel/asm/ripemd/box.asm new file mode 100644 index 00000000..81e89c1f --- /dev/null +++ b/evm/src/cpu/kernel/asm/ripemd/box.asm @@ -0,0 +1,96 @@ +/// Note that we unpack STATE: 5 to a, b, c, d, e +/// All additions are u32 +/// +/// def box(a, b, c, d, e, F, K): +/// +/// box = get_box(sides, rounds, boxes) +/// a += F(b, c, d) +/// r = load(r)(box) +/// x = load_offset(r) +/// a += x + K +/// s = load(s)(box) +/// a = rol(s, a) +/// a += e +/// c = rol(10, c) +/// +/// return e, a, b, c, d, F, K + +global box: + // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt + PUSH pre_rol + DUP5 + DUP5 + DUP5 + DUP10 + // stack: F, b, c, d, pre_rol, a, b, c, d, e, F, K, boxes, rounds, sides, virt + JUMP +pre_rol: + // stack: F(b, c, d), a, b, c, d, e, F, K, boxes, rounds, sides, virt + ADD + // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt + %get_box + // stack: box, a, b, c, d, e, F, K, boxes, rounds, sides, virt + DUP12 + DUP2 + %mload_kernel_code(r_data) + ADD + // stack: virt + r, box, a, b, c, d, e, F, K, boxes, rounds, sides, virt + %load_u32_from_block + // stack: x, box, a, b, c, d, e, F, K, boxes, rounds, sides, virt + SWAP1 + SWAP2 + // stack: a, x, box, b, c, d, e, F, K, boxes, rounds, sides, virt + ADD + DUP8 + ADD + %as_u32 + // stack: a, box, b, c, d, e, F, K, boxes, rounds, sides, virt + PUSH mid_rol + SWAP2 + // stack: box, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides, virt + %mload_kernel_code(s_data) + // stack: s, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides, virt + %jump(rol) +mid_rol: + // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt + DUP5 + // stack: e, a, b, c, d, e, F, K, boxes, rounds, sides, virt + ADD + %as_u32 + // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt + %stack (a, b, c) -> (10, c, post_rol, a, b) + // stack: 10, c, post_rol, a, b, d, e, F, K, boxes, rounds, sides, virt + %jump(rol) +post_rol: + // stack: c, a, b, d, e, F, K, boxes , rounds, sides, virt + %stack (c, a, b, d, e, F, K, boxes) -> (boxes, 1, a, b, c, d, F, K, e) + // stack: boxes, 1, a, b, c, d, F, K, e, rounds, sides, virt + SUB + SWAP7 + // stack: e, a, b, c, d, F, K, boxes-1, rounds, sides, virt + %jump(round) + + +%macro get_round + // stack: sides, rounds + %mul_const(5) + PUSH 10 + SUB + SUB + // stack: 10 - 5*sides - rounds +%endmacro + +%macro get_box + // stack: ARGS: 7, boxes, rounds, sides + DUP10 + %mul_const(80) + DUP10 + %mul_const(16) + DUP10 + // stack: boxes , 16*rounds , 80*sides, ARGS: 7, boxes, rounds, sides + PUSH 176 + SUB + SUB + SUB + // stack: 176 - boxes - 16*rounds - 80*sides, ARGS: 7, boxes, rounds, sides +%endmacro diff --git a/evm/src/cpu/kernel/asm/ripemd/compression.asm b/evm/src/cpu/kernel/asm/ripemd/compression.asm index a1535e30..a83bf832 100644 --- a/evm/src/cpu/kernel/asm/ripemd/compression.asm +++ b/evm/src/cpu/kernel/asm/ripemd/compression.asm @@ -1,17 +1,16 @@ -/// _block is stored in memory and its address virt remains on the stack -/// Note that STATE takes up 5 stack slots -/// def compress(state, _block): +/// _block is stored in memory: its address virt stays on the stack +/// def compress(STATE: 5, _block): /// -/// stateL = state -/// stateL = loop(stateL) +/// STATEL = STATE +/// STATEL = loop(STATEL) /// -/// stateR = state -/// stateR = loop(stateR) +/// STATER = state +/// STATER = loop(STATER) /// -/// return mix(state, stateL, stateR) +/// return mix(STATER, STATEL, STATE) /// /// -/// def mix(stateR, stateL, state): +/// def mix(STATER, STATEL, STATE): /// return /// u32(s1 + l2 + r3), /// u32(s2 + l3 + r4), @@ -20,7 +19,7 @@ /// u32(s0 + l1 + r2) /// /// where si, li, ri, oi, VR, RD respectively denote -/// state[i], stateL[i], stateR[i], output[i], virt, retdest +/// STATE[i], STATEL[i], STATER[i], OUTPUT[i], virt, retdest global compress: // stack: STATE, virt, retdest @@ -97,10 +96,10 @@ mix: JUMP -/// def loop(STATE): +/// def loop(STATE: 5): /// while rounds: /// update_round_vars() -/// round(STATE, F, K, rounds, sides) +/// round(STATE: 5, F, K, rounds, sides) /// /// def update_round_vars(): /// F = load(F)(sides, rounds) @@ -141,7 +140,7 @@ update_round_vars: POP // stack: STATE, F', K', 16, rounds, sides, virt, retdest %jump(round) -round: +global round: // stack: STATE, F, K, boxes, rounds , sides, virt, retdest DUP8 // stack: boxes, STATE, F, K, boxes, rounds , sides, virt, retdest @@ -159,100 +158,3 @@ round: POP // stack: STATE, F, K, 16, rounds-1, sides, virt, retdest %jump(loop) - - -/// Note that we unpack STATE to a, b, c, d, e -/// All additions are u32 -/// -/// def box(a, b, c, d, e, F, K): -/// -/// box = get_box(sides, rounds, boxes) -/// a += F(b, c, d) -/// r = load(r)(box) -/// x = load_offset(r) -/// a += x + K -/// s = load(s)(box) -/// a = rol(s, a) -/// a += e -/// c = rol(10, c) -/// -/// return e, a, b, c, d, F, K - -box: - // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt - PUSH pre_rol - DUP5 - DUP5 - DUP5 - DUP10 - // stack: F, b, c, d, pre_rol, a, b, c, d, e, F, K, boxes, rounds, sides, virt - JUMP -pre_rol: - // stack: F(b, c, d), a, b, c, d, e, F, K, boxes, rounds, sides, virt - ADD - // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt - %get_box - // stack: box, a, b, c, d, e, F, K, boxes, rounds, sides, virt - DUP12 - DUP2 - %mload_kernel_code(r_data) - ADD - // stack: virt + r, box, a, b, c, d, e, F, K, boxes, rounds, sides, virt - %load_u32_from_block - // stack: x, box, a, b, c, d, e, F, K, boxes, rounds, sides, virt - SWAP1 - SWAP2 - // stack: a, x, box, b, c, d, e, F, K, boxes, rounds, sides, virt - ADD - DUP8 - ADD - %as_u32 - // stack: a, box, b, c, d, e, F, K, boxes, rounds, sides, virt - PUSH mid_rol - SWAP2 - // stack: box, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides, virt - %mload_kernel_code(s_data) - // stack: s, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides, virt - %jump(rol) -mid_rol: - // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt - DUP5 - // stack: e, a, b, c, d, e, F, K, boxes, rounds, sides, virt - ADD - %as_u32 - // stack: a, b, c, d, e, F, K, boxes, rounds, sides, virt - %stack (a, b, c) -> (10, c, post_rol, a, b) - // stack: 10, c, post_rol, a, b, d, e, F, K, boxes, rounds, sides, virt - %jump(rol) -post_rol: - // stack: c, a, b, d, e, F, K, boxes , rounds, sides, virt - %stack (c, a, b, d, e, F, K, boxes) -> (boxes, 1, a, b, c, d, F, K, e) - // stack: boxes, 1, a, b, c, d, F, K, e, rounds, sides, virt - SUB - SWAP7 - // stack: e, a, b, c, d, F, K, boxes-1, rounds, sides, virt - %jump(round) - -%macro get_round - // stack: sides, rounds - %mul_const(5) - PUSH 10 - SUB - SUB - // stack: 10 - 5*sides - rounds -%endmacro - -%macro get_box - // stack: ARGS: 7, boxes, rounds, sides - DUP10 - %mul_const(80) - DUP10 - %mul_const(16) - DUP10 - // stack: boxes , 16*rounds , 80*sides, ARGS: 7, boxes, rounds, sides - PUSH 176 - SUB - SUB - SUB - // stack: 176 - boxes - 16*rounds - 80*sides, ARGS: 7, boxes, rounds, sides -%endmacro