plonky2/evm/src/cpu/kernel/asm/fields/fp6_macros.asm

315 lines
8.1 KiB
NASM
Raw Normal View History

2022-10-20 12:07:53 -04:00
// cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
%macro load_fp6
2022-10-28 02:01:04 -07:00
// stack: ptr
DUP1 %add_const(4)
// stack: ind4, ptr
2022-10-20 12:07:53 -04:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x4, ptr
DUP2 %add_const(3)
// stack: ind3, x4, ptr
2022-10-20 12:07:53 -04:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x3, x4, ptr
DUP3 %add_const(2)
// stack: ind2, x3, x4, ptr
2022-10-20 12:07:53 -04:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x2, x3, x4, ptr
DUP4 %add_const(1)
// stack: ind1, x2, x3, x4, ptr
2022-10-20 12:07:53 -04:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x1, x2, x3, x4, ptr
DUP5 %add_const(5)
// stack: ind5, x1, x2, x3, x4, ptr
2022-10-20 12:07:53 -04:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x5, x1, x2, x3, x4, ptr
2022-10-20 12:07:53 -04:00
SWAP5
// stack: ind0, x1, x2, x3, x4, x5
%mload_kernel_general
// stack: x0, x1, x2, x3, x4, x5
%endmacro
// cost: 6 loads + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
2022-10-28 02:01:04 -07:00
%macro load_fp6(ptr)
2022-10-07 15:41:46 -07:00
// stack:
2022-10-28 02:01:04 -07:00
PUSH $ptr %add_const(5)
// stack: ind5
2022-10-07 15:41:46 -07:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x5
PUSH $ptr %add_const(4)
// stack: ind4, x5
2022-10-07 15:41:46 -07:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x4, x5
PUSH $ptr %add_const(3)
// stack: ind3, x4, x5
2022-10-07 15:41:46 -07:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x3, x4, x5
PUSH $ptr %add_const(2)
// stack: ind2, x3, x4, x5
2022-10-07 15:41:46 -07:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x2, x3, x4, x5
PUSH $ptr %add_const(1)
// stack: ind1, x2, x3, x4, x5
2022-10-07 15:41:46 -07:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x1, x2, x3, x4, x5
PUSH $ptr
// stack: ind0, x1, x2, x3, x4, x5
2022-10-07 15:41:46 -07:00
%mload_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x0, x1, x2, x3, x4, x5
2022-10-07 15:41:46 -07:00
%endmacro
2022-10-20 12:07:53 -04:00
// cost: 6 stores + 6 swaps/dups + 5 adds = 6*4 + 6*1 + 5*2 = 40
%macro store_fp6
2022-10-28 02:01:04 -07:00
// stack: ptr, x0, x1, x2, x3, x4 , x5
2022-10-20 12:07:53 -04:00
SWAP5
2022-10-28 02:01:04 -07:00
// stack: x4, x0, x1, x2, x3, ptr, x5
DUP6 %add_const(4)
// stack: ind4, x4, x0, x1, x2, x3, ptr, x5
2022-10-20 12:07:53 -04:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x0, x1, x2, x3, ptr, x5
2022-10-20 12:07:53 -04:00
DUP5
2022-10-28 02:01:04 -07:00
// stack: ind0, x0, x1, x2, x3, ptr, x5
2022-10-20 12:07:53 -04:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x1, x2, x3, ptr, x5
DUP4 %add_const(1)
// stack: ind1, x1, x2, x3, ptr, x5
2022-10-20 12:07:53 -04:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x2, x3, ptr, x5
DUP3 %add_const(2)
// stack: ind2, x2, x3, ptr, x5
2022-10-20 12:07:53 -04:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x3, ptr, x5
DUP2 %add_const(3)
// stack: ind3, x3, ptr, x5
2022-10-20 12:07:53 -04:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: ptr, x5
2022-10-20 12:07:53 -04:00
%add_const(5)
2022-10-28 02:01:04 -07:00
// stack: ind5, x5
2022-10-20 12:07:53 -04:00
%mstore_kernel_general
// stack:
%endmacro
// cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
2022-10-28 02:01:04 -07:00
%macro store_fp6(ptr)
// stack: x0, x1, x2, x3, x4, x5
PUSH $ptr
// stack: ind0, x0, x1, x2, x3, x4, x5
2022-10-07 15:41:46 -07:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x1, x2, x3, x4, x5
PUSH $ptr %add_const(1)
// stack: ind1, x1, x2, x3, x4, x5
2022-10-07 15:41:46 -07:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x2, x3, x4, x5
PUSH $ptr %add_const(2)
// stack: ind2, x2, x3, x4, x5
2022-10-07 15:41:46 -07:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x3, x4, x5
PUSH $ptr %add_const(3)
// stack: ind3, x3, x4, x5
2022-10-07 15:41:46 -07:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x4, x5
PUSH $ptr %add_const(4)
// stack: ind4, x4, x5
2022-10-07 15:41:46 -07:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x5
PUSH $ptr %add_const(5)
// stack: ind5, x5
2022-10-07 15:41:46 -07:00
%mstore_kernel_general
// stack:
%endmacro
2022-10-18 11:23:43 -04:00
// cost: store (40) + i9 (9) = 49
2022-10-28 02:01:04 -07:00
%macro store_fp6_sh(ptr)
// stack: x0, x1, x2, x3, x4, x5
PUSH $ptr %add_const(2)
// stack: ind2, x0, x1, x2, x3, x4, x5
2022-10-07 18:13:32 -07:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x1, x2, x3, x4, x5
PUSH $ptr %add_const(3)
// stack: ind3, x1, x2, x3, x4, x5
2022-10-07 18:13:32 -07:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x2, x3, x4, x5
PUSH $ptr %add_const(4)
// stack: ind4, x2, x3, x4, x5
2022-10-07 18:13:32 -07:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x3, x4, x5
PUSH $ptr %add_const(5)
// stack: ind5, x3, x4, x5
2022-10-07 18:13:32 -07:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: x4, x5
2022-10-07 18:13:32 -07:00
%i9
2022-10-28 02:01:04 -07:00
// stack: y5, y4
PUSH $ptr %add_const(1)
// stack: ind1, y5, y4
2022-10-07 18:13:32 -07:00
%mstore_kernel_general
2022-10-28 02:01:04 -07:00
// stack: y4
PUSH $ptr
// stack: ind0, y4
2022-10-07 18:13:32 -07:00
%mstore_kernel_general
// stack:
%endmacro
2022-10-20 16:48:31 -04:00
// cost: 9; note this returns y, x for the output x + yi
2022-10-18 12:47:31 -04:00
%macro i9
// stack: a , b
DUP2
2022-10-28 02:01:04 -07:00
// stack: b, a, b
2022-10-18 12:47:31 -04:00
DUP2
// stack: a , b, a , b
2022-10-28 02:01:04 -07:00
PUSH 9 MULFP254
// stack: 9a , b, a , b
2022-10-18 12:47:31 -04:00
SUBFP254
// stack: 9a - b, a , b
SWAP2
// stack: b , a, 9a - b
2022-10-28 02:01:04 -07:00
PUSH 9 MULFP254
// stack 9b , a, 9a - b
2022-10-18 12:47:31 -04:00
ADDFP254
// stack: 9b + a, 9a - b
%endmacro
2022-10-07 15:41:46 -07:00
// cost: 6
2022-10-20 16:07:39 -04:00
%macro dup_fp6_0
2022-10-18 12:04:26 -04:00
// stack: f: 6
2022-10-07 15:41:46 -07:00
DUP6
DUP6
DUP6
DUP6
DUP6
DUP6
2022-10-18 12:04:26 -04:00
// stack: f: 6, g: 6
2022-10-07 15:41:46 -07:00
%endmacro
// cost: 6
2022-10-20 16:07:39 -04:00
%macro dup_fp6_6
2022-10-18 12:04:26 -04:00
// stack: f: 6, g: 6
2022-10-07 15:41:46 -07:00
DUP12
DUP12
DUP12
DUP12
DUP12
DUP12
2022-10-18 12:04:26 -04:00
// stack: g: 6, f: 6, g: 6
2022-10-07 15:41:46 -07:00
%endmacro
2022-10-20 16:07:39 -04:00
// cost: 6
%macro dup_fp6_7
// stack: f: 6, g: 6
DUP13
DUP13
DUP13
DUP13
DUP13
DUP13
// stack: g: 6, f: 6, g: 6
%endmacro
2022-10-07 15:41:46 -07:00
// cost: 16
%macro swap_fp6
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
SWAP6
// stack: g0, f1, f2, f3, f4, f5, f0, g1, g2, g3, g4, g5
SWAP1
SWAP7
SWAP1
// stack: g0, g1, f2, f3, f4, f5, f0, f1, g2, g3, g4, g5
SWAP2
SWAP8
SWAP2
// stack: g0, g1, g2, f3, f4, f5, f0, f1, f2, g3, g4, g5
SWAP3
SWAP9
SWAP3
// stack: g0, g1, g2, g3, f4, f5, f0, f1, f2, f3, g4, g5
SWAP4
SWAP10
SWAP4
// stack: g0, g1, g2, g3, g4, f5, f0, f1, f2, f3, f4, g5
SWAP5
SWAP11
SWAP5
// stack: g0, g1, g2, g3, g4, g5, f0, f1, f2, f3, f4, f5
%endmacro
2022-10-20 16:07:39 -04:00
// cost: 16
2022-10-28 02:01:04 -07:00
// swap two fp6 elements with a stack term separating them
// (f: 6, x, g: 6) -> (g: 6, x, f: 6)
2022-10-20 16:07:39 -04:00
%macro swap_fp6_hole
// stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
SWAP7
// stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5
SWAP1
SWAP8
SWAP1
// stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5
SWAP2
SWAP9
SWAP2
// stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5
SWAP3
SWAP10
SWAP3
// stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5
SWAP4
SWAP11
SWAP4
// stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5
SWAP5
SWAP12
SWAP5
// stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
%endmacro
2022-10-07 15:41:46 -07:00
// cost: 16
%macro add_fp6
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
SWAP7
2022-10-17 11:02:24 -04:00
ADDFP254
2022-10-07 15:41:46 -07:00
SWAP6
// stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5
SWAP7
2022-10-17 11:02:24 -04:00
ADDFP254
2022-10-07 15:41:46 -07:00
SWAP6
// stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5
SWAP7
2022-10-17 11:02:24 -04:00
ADDFP254
2022-10-07 15:41:46 -07:00
SWAP6
// stack: f0, f4, f5, g0, h1, h2, h3, g4, g5
SWAP7
2022-10-17 11:02:24 -04:00
ADDFP254
2022-10-07 15:41:46 -07:00
SWAP6
// stack: f0, f5, g0, h1, h2, h3, h4, g5
SWAP7
2022-10-17 11:02:24 -04:00
ADDFP254
2022-10-07 15:41:46 -07:00
SWAP6
// stack: f0, g0, h1, h2, h3, h4, h5
2022-10-17 11:02:24 -04:00
ADDFP254
2022-10-07 15:41:46 -07:00
// stack: h0, h1, h2, h3, h4, h5
%endmacro
2022-10-19 10:31:43 -04:00
// *reversed argument subtraction* cost: 17
2022-10-20 10:56:55 -04:00
%macro subr_fp6
2022-10-07 15:41:46 -07:00
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
SWAP7
2022-10-17 11:02:24 -04:00
SUBFP254
2022-10-07 15:41:46 -07:00
SWAP6
// stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5
SWAP7
2022-10-17 11:02:24 -04:00
SUBFP254
2022-10-07 15:41:46 -07:00
SWAP6
// stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5
SWAP7
2022-10-17 11:02:24 -04:00
SUBFP254
2022-10-07 15:41:46 -07:00
SWAP6
// stack: f0, f4, f5, g0, h1, h2, h3, g4, g5
SWAP7
2022-10-17 11:02:24 -04:00
SUBFP254
2022-10-07 15:41:46 -07:00
SWAP6
// stack: f0, f5, g0, h1, h2, h3, h4, g5
SWAP7
2022-10-17 11:02:24 -04:00
SUBFP254
2022-10-07 15:41:46 -07:00
SWAP6
// stack: f0, g0, h1, h2, h3, h4, h5
SWAP1
2022-10-17 11:02:24 -04:00
SUBFP254
2022-10-07 15:41:46 -07:00
// stack: h0, h1, h2, h3, h4, h5
%endmacro