diff --git a/evm/src/cpu/kernel/asm/ripemd/constants.asm b/evm/src/cpu/kernel/asm/ripemd/constants.asm index ad20de9c..42997b7e 100644 --- a/evm/src/cpu/kernel/asm/ripemd/constants.asm +++ b/evm/src/cpu/kernel/asm/ripemd/constants.asm @@ -1,4 +1,10 @@ -// %macro load_K +%macro load_K + // stack: rnd + %mul_const(4) push K_data add + // stack: K_data + 4*rnd + %mload_kernel_code_u32 + // stack: K +%end_macro K_data: // Left @@ -14,20 +20,27 @@ K_data: BYTES 0x7A, 0x6D, 0x76, 0xE9 BYTES 0x00, 0x00, 0x00, 0x00 -// %macro load_s -s_data: +%macro load_s + // stack: box + push S_data add + // stack: S_data + box + %mload_kernel_code + // stack: s +%end_macro + +S_data: // Left Round 1 BYTES 11, 14, 15, 12 BYTES 05, 08, 07, 09 BYTES 11, 13, 14, 15 BYTES 06, 07, 09, 08 - // Left Round 2 + // Left Round 2 BYTES 07, 06, 08, 13 BYTES 11, 09, 07, 15 BYTES 07, 12, 15, 09 BYTES 11, 07, 13, 12 - // Left Round 3 + // Left Round 3 BYTES 11, 13, 06, 07 BYTES 14, 09, 13, 15 BYTES 14, 08, 13, 06 @@ -37,7 +50,7 @@ s_data: BYTES 14, 15, 09, 08 BYTES 09, 14, 05, 06 BYTES 08, 06, 05, 12 - // Left Round 5 + // Left Round 5 BYTES 09, 15, 05, 11 BYTES 06, 08, 13, 12 BYTES 05, 12, 13, 14 @@ -48,77 +61,83 @@ s_data: BYTES 13, 15, 15, 05 BYTES 07, 07, 08, 11 BYTES 14, 14, 12, 06 - // Right Round 2 + // Right Round 2 BYTES 09, 13, 15, 07 BYTES 12, 08, 09, 11 BYTES 07, 07, 12, 07 BYTES 06, 15, 13, 11 - // Right Round 3 + // Right Round 3 BYTES 09, 07, 15, 11 BYTES 08, 06, 06, 14 BYTES 12, 13, 05, 14 BYTES 13, 13, 07, 05 - // Right Round 4 + // Right Round 4 BYTES 15, 05, 08, 11 BYTES 14, 14, 06, 14 BYTES 06, 09, 12, 09 BYTES 12, 05, 15, 08 - // Right Round 5 + // Right Round 5 BYTES 08, 05, 12, 09 BYTES 12, 05, 14, 06 BYTES 08, 13, 06, 05 BYTES 15, 13, 11, 11 -// %macro load_r +%macro load_r + // stack: box + push R_data add + // stack: R_data + box + %mload_kernel_code + // stack: r +%end_macro -r_data: - // Left Round 0 +R_data: + // Left Round 1 BYTES 00, 01, 02, 03 BYTES 04, 05, 06, 07 BYTES 08, 09, 10, 11 BYTES 12, 13, 14, 15 - // Left Round 1 + // Left Round 2 BYTES 07, 04, 13, 01 BYTES 10, 06, 15, 03 BYTES 12, 00, 09, 05 BYTES 02, 14, 11, 08 - // Left Round 2 + // Left Round 3 BYTES 03, 10, 14, 04 BYTES 09, 15, 08, 01 BYTES 02, 07, 00, 06 BYTES 13, 11, 05, 12 - // Left Round 3 + // Left Round 4 BYTES 01, 09, 11, 10 BYTES 00, 08, 12, 04 BYTES 13, 03, 07, 15 BYTES 14, 05, 06, 02 - // Left Round 4 + // Left Round 5 BYTES 04, 00, 05, 09 BYTES 07, 12, 02, 10 BYTES 14, 01, 03, 08 BYTES 11, 06, 15, 13 - // Right Round 0 + // Right Round 1 BYTES 05, 14, 07, 00 BYTES 09, 02, 11, 04 BYTES 13, 06, 15, 08 BYTES 01, 10, 03, 12 - // Right Round 1 + // Right Round 2 BYTES 06, 11, 03, 07 BYTES 00, 13, 05, 10 BYTES 14, 15, 08, 12 BYTES 04, 09, 01, 02 - // Right Round 2 + // Right Round 3 BYTES 15, 05, 01, 03 BYTES 07, 14, 06, 09 BYTES 11, 08, 12, 02 BYTES 10, 00, 04, 13 - // Right Round 3 + // Right Round 4 BYTES 08, 06, 04, 01 BYTES 03, 11, 15, 00 BYTES 05, 12, 02, 13 BYTES 09, 07, 10, 14 - // Right Round 4 + // Right Round 5 BYTES 12, 15, 10, 04 BYTES 01, 05, 08, 07 BYTES 06, 02, 13, 14 diff --git a/evm/src/cpu/kernel/asm/ripemd/hash.asm b/evm/src/cpu/kernel/asm/ripemd/hash.asm index 2f7cf77d..85d12f0d 100644 --- a/evm/src/cpu/kernel/asm/ripemd/hash.asm +++ b/evm/src/cpu/kernel/asm/ripemd/hash.asm @@ -10,120 +10,35 @@ /// stateR = state /// stateR = loop(stateR) /// -/// state = mix(state, stateL, stateR) +/// return mix(state, stateL, stateR) +/// +/// +/// def mix(*stateR, *stateL, *state): +/// return +/// u32(state[1] + stateL[2] + stateR[3]), +/// u32(state[2] + stateL[3] + stateR[4]), +/// u32(state[3] + stateL[4] + stateR[0]), +/// u32(state[4] + stateL[0] + stateR[1]), +/// u32(state[0] + stateL[1] + stateR[2]) +/// +/// In mix, we denote state[i], stateL[i], stateR[i] by si, li, ri global hash: jumpdest // stack: *state, retdest - push switch push 5 push 16 push K0 push F0 - // stack: F0, K0, 16, 5, switch, *state, retdest - dup10 dup10 dup10 dup10 dup10 - // stack: *state, F0, K0, 16, 5, switch, *state, retdest + push switch push 0 push 5 push 16 push 0 push 0 + // stack: 0, 0, 16, 5, 1, switch, *state, retdest + dup11 dup11 dup11 dup11 dup11 + // stack: *state, 0, 0, 16, 5, 1, switch, *state, retdest %jump(loop) switch: jumpdest // stack: *stateL, *state, retdest - push mix push 5 push 16 - // stack: F0, K0, 16, 5, mix, *stateL, *state, retdest - dup15 dup15 dup15 dup15 dup15 - // stack: *state, F0, K0, 16, 5, mix, *stateL, *state, retdest + push mix push 1 push 5 push 16 push 0 push 0 + // stack: 0, 0, 16, 5, 0, mix, *stateL, *state, retdest + dup16 dup16 dup16 dup16 dup16 + // stack: *state, 0, 0, 16, 5, 0, mix, *stateL, *state, retdest %jump(loop) - - -/// def loop(*state, Fj, Kj): -/// while n: -/// while i: -/// R(*state, Fj, Kj) -/// i -= 1 -/// i = 16 -/// n -= 1 -/// F = Fs[n] -/// K = Ks[n] - -loop: - jumpdest - // stack: *state, Fj, Kj, 16, n, retdest - push 1 dup9 sub swap8 - // stack: n, *state, Fj, Kj, 16, n-1, retdest - %jumpi(cycle) - // stack: *state, Fj, Kj, 16, -1, retdest - %stack (a, b, c, d, e, f, k, i, n, ret) -> (ret, a, b, c, d, e) - // stack: retdest, *state - jump -cycle: - jumpdest - // stack: *state, Fj, Kj, i, n, retdest - push 1 dup9 sub swap8 - // stack: i, *state, Fj, Kj, i-1, n, retdest - %jumpi(R) - // stack: *state, Fj, Kj, -1, n, retdest - swap5 pop push Fj swap5 ---------------------------------------------------------------------TODO - // stack: *state, Fj, Kj 16, n, retdest - swap6 pop push Kj swap6 ---------------------------------------------------------------------TODO - // stack: *state, Fj, Kj 16, n, retdest - swap7 pop push 16 swap7 - // stack: *state, Fj, Kj 16, n, retdest - %jump(loop) - -/// Note that a, b, c, d, e represent *state -/// -/// def R(a, b, c, d, e, Fj, Kj, _sj, _rj, _X): -/// a = u32(ROL(sj, u32(Fj(b, c, d) + a + X[rj] + Kj)) + e) -/// c = ROL(10, c) -/// return e, a, b, c, d, Fj, Kj - -R: - jumpdest - // stack: a, b, c, d, e, Fj, Kj - push after_F dup5 dup5 dup5 dup10 - // stack: Fj, b, c, d, after_F, a, b, c, d, e, Fj, Kj - jump -after_F: - // stack: Fj(b, c, d), a, b, c, d, e, Fj, Kj - add - // stack: Fj(b, c, d) + a, b, c, d, e, Fj, Kj - push X[rj]---------------------------------------------------------------------TODO - // stack: X[rj], Fj(b, c, d) + a, b, c, d, e, Fj, Kj - add - // stack: X[rj] + Fj(b, c, d) + a, b, c, d, e, Fj, Kj - dup7 - // stack: Kj, X[rj] + Fj(b, c, d) + a, b, c, d, e, Fj, Kj - add %u32 - // stack: Kj + X[rj] + Fj(b, c, d) + a, b, c, d, e, Fj, Kj - push sj------------------------------------------------------------------------TODO - // stack: sj, Kj + X[rj] + Fj(b, c, d) + a, b, c, d, e, Fj, Kj - %jump(ROL) - // stack: ROL(sj, Kj + X[rj] + Fj(b, c, d) + a), b, c, d, e, Fj, Kj - dup5 - // stack: e, ROL(sj, Kj + X[rj] + Fj(b, c, d) + a), b, c, d, e, Fj, Kj - add %u32 - // stack: e + ROL(sj, Kj + X[rj] + Fj(b, c, d) + a), b, c, d, e, Fj, Kj - swap1 - // stack: b, e + ROL(sj, Kj + X[rj] + Fj(b, c, d) + a), c, d, e, Fj, Kj - swap2 - // stack: c, e + ROL(sj, Kj + X[rj] + Fj(b, c, d) + a), b, d, e, Fj, Kj - push 10 - // stack: 10, c, b, e + ROL(sj, Kj + X[rj] + Fj(b, c, d) + a), d, e, Fj, Kj - %jump(ROL) - // stack: ROL(10, c), e + ROL(sj, Kj + X[rj] + Fj(b, c, d) + a), b, d, e, Fj, Kj - swap4 - // stack: d, e + ROL(sj, Kj + X[rj] + Fj(b, c, d) + a), b, ROL(10,c), e, Fj, Kj - swap5 - // stack: e, e + ROL(sj, Kj + X[rj] + Fj(b, c, d) + a), b, ROL(10, c), d, Fj, Kj - %jump(cycle) - - -/// def mix(*stateR, *stateL, *state): -/// return [ -/// u32(state[1] + stateL[2] + stateR[3]), -/// u32(state[2] + stateL[3] + stateR[4]), -/// u32(state[3] + stateL[4] + stateR[0]), -/// u32(state[4] + stateL[0] + stateR[1]), -/// u32(state[0] + stateL[1] + stateR[2]) -/// ] -/// -/// Note that we denote state[i], stateL[i], stateR[i] by si, li, ri - mix: jumpdest // stack: r0, r1, r2, r3, r4, l0, l1, l2, l3, l4, s0, s1, s2, s3, s4, retdest @@ -166,3 +81,136 @@ mix: swap3 // stack: retdest, s1+l2+r3, s2+l3+r4, s3+l4+r0, s4+l0+r1, s0+l1+r2 jump + + +/// def loop(*state): +/// while rounds: +/// update_round_vars() +/// round(*state, F, K, rounds, sides) +/// +/// def update_round_vars(): +/// F = load_F(sides, rounds) +/// K = load_K(sides, rounds) +/// +/// def round(*state, rounds, sides): +/// while boxes: +/// box(*state, F, K) +/// boxes -= 1 +/// boxes = 16 +/// rounds -= 1 + + +loop: + jumpdest + // stack: *state, F, K, 16, rounds, sides, retdest + dup9 + // stack: round, *state, F, K, 16, rounds, sides, retdest + %jumpi(update_round_vars) + // stack: *state, F, K, 16, 0, sides, retdest + %stack (a, b, c, d, e, F, K, boxes, rounds, sides, retdest) -> (retdest, a, b, c, d, e) + // stack: retdest, *state + jump +update_round_vars: + jumpdest + // stack: *state, F , K , 16, rounds, sides, retdest + dup10 dup10 %get_round up1 + // stack: rnd, rnd, *state, F , K , 16, rounds, sides, retdest + swap7 pop %load_F swap6 + // stack: rnd, rnd, *state, F', K , 16, rounds, sides, retdest + swap8 pop %load_K swap7 pop + // stack: *state, F', K', 16, rounds, sides, retdest + %jump(round) +round: + jumpdest + // stack: *state, F, K, boxes, rounds , sides, retdest + dup8 + // stack: boxes, *state, F, K, boxes, rounds , sides, retdest + %jumpi(box) + // stack: *state, F, K, 0, rounds , sides, retdest + swap7 pop push 16 swap7 + // stack: *state, F, K, 16, rounds , sides, retdest + push 1 dup10 sub swap9 pop + // stack: *state, F, K, 16, rounds-1, sides, retdest + %jump(loop) + + +/// Note that we unpack *state to a, b, c, d, e +/// All additions are u32 +/// +/// def box(a, b, c, d, e, F, K): +/// +/// box = get_box_index(sides, rounds, boxes) +/// a += F(b, c, d) +/// r = load_r(box) +/// x = load_block(r) +/// a += x + K +/// s = load_s(box) +/// a = ROL(s, a) +/// a += e +/// c = ROL(10, c) +/// +/// return e, a, b, c, d, F, K + + +box: + jumpdest + // stack: a, b, c, d, e, F, K, boxes, rounds, sides + push after_F dup5 dup5 dup5 dup10 + // stack: F, b, c, d, pre_rol, a, b, c, d, e, F, K, boxes, rounds, sides + jump +pre_rol: + jumpdest + // stack: F(b, c, d), a, b, c, d, e, F, K, boxes, rounds, sides + add + // stack: a, b, c, d, e, F, K, boxes, rounds, sides + %get_box + // stack: box, a, b, c, d, e, F, K, boxes, rounds, sides + dup1 %load_r + // stack: r, box, a, b, c, d, e, F, K, boxes, rounds, sides + %load_X ------------------------------------------------------------------------TODO + // stack: x, box, a, b, c, d, e, F, K, boxes, rounds, sides + swap1 swap2 + // stack: a, x, box, b, c, d, e, F, K, boxes, rounds, sides + add dup8 add %u32 + // stack: a, box, b, c, d, e, F, K, boxes, rounds, sides + push mid_rol swap2 + // stack: box, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides + %load_s + // stack: s, a, mid_rol, b, c, d, e, F, K, boxes, rounds, sides + %jump(ROL) +mid_rol: + jumpdest + // stack: a, b, c, d, e, F, K, boxes, rounds, sides + dup5 + // stack: e, a, b, c, d, e, F, K, boxes, rounds, sides + add %u32 + // stack: a, b, c, d, e, F, K, boxes, rounds, sides + %stack (a, b, c) -> (10, c, post_rol, a, b) + // stack: 10, c, post_rol, b, a, d, e, F, K, boxes, rounds, sides + %jump(ROL) +post_rol: + jumpdest + // stack: c, a, b, d, e, F, K, boxes, rounds, sides + swap4 + // stack: d, a, b, c, e, F, K, boxes, rounds, sides + swap5 + // stack: e, a, b, c, d, F, K, boxes, rounds, sides + swap7 push 1 swap1 sub swap7 + // stack: e, a, b, c, d, F, K, boxes-1, rounds, sides + %jump(round) + + +%macro get_round + // stack: sides, rounds + %mul_const(5) push 10 sub sub + // stack: 10 - 5*sides - rounds +%end_macro + + +%macro get_box + // stack: *7_args, boxes, rounds, sides + dup10 %mul_const(80) dup10 %mul_const(16) dup10 + // stack: boxes , 16*rounds , 80*sides, *7_args, boxes, rounds, sides + push 160 sub sub sub + // stack: 160 - boxes - 16*rounds - 80*sides, *7_args, boxes, rounds, sides +%end_macro diff --git a/evm/src/cpu/kernel/asm/ripemd/subroutines.asm b/evm/src/cpu/kernel/asm/ripemd/subroutines.asm index ab34a8d6..dab8c4bf 100644 --- a/evm/src/cpu/kernel/asm/ripemd/subroutines.asm +++ b/evm/src/cpu/kernel/asm/ripemd/subroutines.asm @@ -1,34 +1,52 @@ -/// def ROL(n, x): +/// def rol(n, x): /// return (u32(x << n)) | (x >> (32 - n)) -global ROL: +global rol: jumpdest - // stack: n, x, retdest - swap1 - // stack: x, n, retdest - dup1 - // stack: x, x, n, retdest - dup3 - // stack: n, x, x, n, retdest - push 32 - // stack: 32, n, x, x, n, retdest - sub - // stack: 32-n, x, x, n, retdest + // stack: n, x, retdest + swap1 dup1 dup3 + // stack: n, x, x, n, retdest + push 32 sub + // stack: 32-n, x, x, n, retdest shr - // stack: x >> (32-n), x, n, retdest + // stack: x >> (32-n), x, n, retdest swap2 - // stack: n, x, x >> (32-n), retdest + // stack: n, x, x >> (32-n), retdest shl - // stack: x << n, x >> (32-n), retdest - push 0xffffffff - // stack: 0xffffffff, (x << n), x >> (32-n), retdest - and - // stack: (x << n) & 0xffffffff, x >> (32-n), retdest + // stack: x << n, x >> (32-n), retdest + %u32 + // stack: u32(x << n), x >> (32-n), retdest or - // stack: ((x << n) & 0xffffffff) | (x >> (32-n)), retdest - swap1 - // stack: retdest, ((x << n) & 0xffffffff) | (x >> (32-n)) - jump + // stack: u32(x << n) | (x >> (32-n)), retdest + swap1 jump + + +%macro load_F: + push 0 + %this_F(0,F0) + %this_F(1,F1) + %this_F(2,F2) + %this_F(3,F3) + %this_F(4,F4) + %this_F(5,F4) + %this_F(6,F3) + %this_F(7,F2) + %this_F(8,F1) + %this_F(9,F0) +%endmacro + + +%macro this_F(i, F) + // stack: acc, rnd + dup2 + // stack: rnd, acc, rnd + %eq_const(i) + // stack: rnd==i, acc, j + %mul_const(result) + // stack: (rnd==i)*F, acc, rnd + add + acc + (rnd==j)*result, rnd +%endmacro /// def F0(x, y, z): @@ -36,14 +54,12 @@ global ROL: global F0: jumpdest - // stack: x, y, z, retdest + // stack: x , y , z, retdest xor - // stack: x ^ y, z, retdest + // stack: x ^ y , z, retdest xor // stack: x ^ y ^ z, retdest - swap1 - // stack: retdest, x ^ y ^ z - jump + swap1 jump /// def F1(x, y, z): @@ -51,26 +67,24 @@ global F0: global F1: jumpdest - // stack: x, y, z, retdest + // stack: x, y, z, retdest dup1 - // stack: x, x, y, z, retdest + // stack: x, x, y, z, retdest swap2 - // stack: y, x, x, z, retdest + // stack: y, x, x, z, retdest and - // stack: y & x, x, z, retdest + // stack: y & x, x, z, retdest swap2 - // stack: z, x, y & x, retdest + // stack: z, x, y & x, retdest swap1 - // stack: x, z, y & x, retdest + // stack: x, z, y & x, retdest %not_32 - // stack: ~x, z, y & x, retdest + // stack: ~x, z, y & x, retdest and - // stack: ~x & z, y & x, retdest + // stack: ~x & z, y & x, retdest or // stack: (~x & z) | (y & x), retdest - swap1 - // stack: retdest, (~x & z) | (y & x) - jump + swap1 jump /// def F2(x, y, z): @@ -78,18 +92,16 @@ global F1: global F2: jumpdest - // stack: x, y, z, retdest + // stack: x, y, z, retdest swap1 - // stack: y, x, z, retdest + // stack: y, x, z, retdest %not_32 - // stack: ~y, x, z, retdest + // stack: ~y, x, z, retdest or - // stack: ~y | x, z, retdest + // stack: ~y | x, z, retdest xor // stack: (~y | x) ^ z, retdest - swap1 - // stack: retdest, (~y | x) ^ z - jump + swap1 jump /// def F3(x, y, z): @@ -97,22 +109,20 @@ global F2: global F3: jumpdest - // stack: x, y, z, retdest + // stack: x, y, z, retdest dup3 - // stack: z, x, y, z, retdest + // stack: z, x, y, z, retdest and - // stack: z & x, y, z, retdest + // stack: z & x, y, z, retdest swap2 - // stack: z, y, z & x, retdest + // stack: z, y, z & x, retdest %not_32 - // stack: ~z, y, z & x, retdest + // stack: ~z, y, z & x, retdest and - // stack: ~z & y, z & x, retdest + // stack: ~z & y, z & x, retdest or // stack: (~z & y) | (z & x), retdest - swap1 - // stack: retdest, (~z & y) | (z & x) - jump + swap1 jump /// def F4(x, y, z): @@ -120,15 +130,13 @@ global F3: global F4: jumpdest - // stack: x, y, z, retdest + // stack: x, y, z, retdest swap2 - // stack: z, y, x, retdest + // stack: z, y, x, retdest %not_32 - // stack: ~z, y, x, retdest + // stack: ~z, y, x, retdest or - // stack: ~z | y, x, retdest + // stack: ~z | y, x, retdest xor // stack: (~z | y) ^ x, retdest - swap1 - // stack: retdest, (~z | y) ^ x - jump + swap1 jump