207 lines
4.4 KiB
NASM
Raw Normal View History

2022-10-07 15:41:46 -07:00
// cost: 6 loads + 6 offsets + 5 adds = 6*4 + 6*1 + 5*2 = 40
%macro load_fp6(offset)
// stack:
PUSH $offset
%add_const(5)
%mload_kernel_general
// stack: x5
PUSH $offset
%add_const(4)
%mload_kernel_general
// stack: x4, x5
PUSH $offset
%add_const(3)
%mload_kernel_general
// stack: x3, x4, x5
PUSH $offset
%add_const(2)
%mload_kernel_general
// stack: x2, x3, x4, x5
PUSH $offset
%add_const(1)
%mload_kernel_general
// stack: x1, x2, x3, x4, x5
PUSH $offset
%mload_kernel_general
// stack: x0, x1, x2, x3, x4, x5
%endmacro
// cost: 40
%macro store_fp6(offset)
// stack: x0, x1, x2, x3, x4, x5
PUSH $offset
%mstore_kernel_general
// stack: x1, x2, x3, x4, x5
PUSH $offset
%add_const(1)
%mstore_kernel_general
// stack: x2, x3, x4, x5
PUSH $offset
%add_const(2)
%mstore_kernel_general
// stack: x3, x4, x5
PUSH $offset
%add_const(3)
%mstore_kernel_general
// stack: x4, x5
PUSH $offset
%add_const(4)
%mstore_kernel_general
// stack: x5
PUSH $offset
%add_const(5)
%mstore_kernel_general
// stack:
%endmacro
2022-10-07 18:13:32 -07:00
// cost: 49
%macro store_fp6_sh(offset)
// stack: x0, x1, x2, x3, x4, x5
PUSH $offset
%add_const(2)
%mstore_kernel_general
// stack: x1, x2, x3, x4, x5
PUSH $offset
%add_const(3)
%mstore_kernel_general
// stack: x2, x3, x4, x5
PUSH $offset
%add_const(4)
%mstore_kernel_general
// stack: x3, x4, x5
PUSH $offset
%add_const(5)
%mstore_kernel_general
// stack: x4, x5
%i9
// stack: y5, y4
PUSH $offset
%add_const(1)
%mstore_kernel_general
// stack: y4
PUSH $offset
%mstore_kernel_general
// stack:
%endmacro
2022-10-07 15:41:46 -07:00
// cost: 6
%macro dup1_fp6
// stack: F: 6
DUP6
DUP6
DUP6
DUP6
DUP6
DUP6
// stack: F: 6, F: 6
%endmacro
// cost: 6
%macro dup2_fp6
// stack: F: 6, G: 6
DUP12
DUP12
DUP12
DUP12
DUP12
DUP12
// stack: G: 6, F: 6, G: 6
%endmacro
// cost: 16
%macro swap_fp6
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
SWAP6
// stack: g0, f1, f2, f3, f4, f5, f0, g1, g2, g3, g4, g5
SWAP1
SWAP7
SWAP1
// stack: g0, g1, f2, f3, f4, f5, f0, f1, g2, g3, g4, g5
SWAP2
SWAP8
SWAP2
// stack: g0, g1, g2, f3, f4, f5, f0, f1, f2, g3, g4, g5
SWAP3
SWAP9
SWAP3
// stack: g0, g1, g2, g3, f4, f5, f0, f1, f2, f3, g4, g5
SWAP4
SWAP10
SWAP4
// stack: g0, g1, g2, g3, g4, f5, f0, f1, f2, f3, f4, g5
SWAP5
SWAP11
SWAP5
// stack: g0, g1, g2, g3, g4, g5, f0, f1, f2, f3, f4, f5
%endmacro
// cost: 16
%macro add_fp6
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
SWAP7
ADD
SWAP6
// stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5
SWAP7
ADD
SWAP6
// stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5
SWAP7
ADD
SWAP6
// stack: f0, f4, f5, g0, h1, h2, h3, g4, g5
SWAP7
ADD
SWAP6
// stack: f0, f5, g0, h1, h2, h3, h4, g5
SWAP7
ADD
SWAP6
// stack: f0, g0, h1, h2, h3, h4, h5
ADD
// stack: h0, h1, h2, h3, h4, h5
%endmacro
// *backwards order subtraction* cost: 16
%macro bus_fp6
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
SWAP7
SUB
SWAP6
// stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5
SWAP7
SUB
SWAP6
// stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5
SWAP7
SUB
SWAP6
// stack: f0, f4, f5, g0, h1, h2, h3, g4, g5
SWAP7
SUB
SWAP6
// stack: f0, f5, g0, h1, h2, h3, h4, g5
SWAP7
SUB
SWAP6
// stack: f0, g0, h1, h2, h3, h4, h5
SUB
// stack: h0, h1, h2, h3, h4, h5
%endmacro
2022-10-07 18:13:32 -07:00
// cost: 9; note this returns y, x for x + yi
%macro i9
// stack: a , b
DUP2
DUP2
// stack: a , b, a , b
%mul_const(9)
SUB
// stack: 9a - b, a , b
SWAP2
// stack: b , a, 9a - b
%mul_const(9)
ADD
// stack: 9b + a, 9a - b
2022-10-07 15:41:46 -07:00
%endmacro