sparse mul finished

This commit is contained in:
Dmitry Vagner 2022-11-09 16:17:07 -08:00
parent 215e9a6e04
commit 5d2a9b3d53
4 changed files with 483 additions and 77 deletions

View File

@ -1,3 +1,7 @@
%macro offset_fp6
%add_const(6)
%endmacro
// cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
%macro load_fp6
// stack: ptr
@ -87,6 +91,51 @@
// stack:
%endmacro
// cost: 6 stores + 7 swaps/dups + 5 adds + 6 doubles = 6*4 + 7*1 + 5*2 + 6*2 = 53
%macro store_fp6_double
// stack: ptr, x0, x1, x2, x3, x4, x5
SWAP6
// stack: x5, x0, x1, x2, x3, x4, ptr
PUSH 2 MULFP254
// stack: 2*x5, x0, x1, x2, x3, x4, ptr
DUP7 %add_const(5)
// stack: ind5, 2*x5, x0, x1, x2, x3, x4, ptr
%mstore_kernel_general
// stack: x0, x1, x2, x3, x4, ptr
PUSH 2 MULFP254
// stack: 2*x0, x1, x2, x3, x4, ptr
DUP6
// stack: ind0, 2*x0, x1, x2, x3, x4, ptr
%mstore_kernel_general
// stack: x1, x2, x3, x4, ptr
PUSH 2 MULFP254
// stack: 2*x1, x2, x3, x4, ptr
DUP5 %add_const(1)
// stack: ind1, 2*x1, x2, x3, x4, ptr
%mstore_kernel_general
// stack: x2, x3, x4, ptr
PUSH 2 MULFP254
// stack: 2*x2, x3, x4, ptr
DUP4 %add_const(2)
// stack: ind2, 2*x2, x3, x4, ptr
%mstore_kernel_general
// stack: x3, x4, ptr
PUSH 2 MULFP254
// stack: 2*x3, x4, ptr
DUP3 %add_const(3)
// stack: ind3, 2*x3, x4, ptr
%mstore_kernel_general
// stack: x4, ptr
PUSH 2 MULFP254
// stack: 2*x4, ptr
SWAP1
// stack: ptr, 2*x4
%add_const(4)
// stack: ind4, 2*x4
%mstore_kernel_general
// stack:
%endmacro
// cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
%macro store_fp6(ptr)
// stack: x0, x1, x2, x3, x4, x5
@ -175,31 +224,55 @@
DUP6
DUP6
DUP6
// stack: f: 6, g: 6
// stack: f: 6, f: 6
%endmacro
// cost: 6
%macro dup_fp6_2
// stack: X: 2, f: 6
DUP8
DUP8
DUP8
DUP8
DUP8
DUP8
// stack: f: 6, X: 2, f: 6
%endmacro
// cost: 6
%macro dup_fp6_6
// stack: f: 6, g: 6
// stack: X: 6, f: 6
DUP12
DUP12
DUP12
DUP12
DUP12
DUP12
// stack: g: 6, f: 6, g: 6
// stack: f: 6, X: 6, f: 6
%endmacro
// cost: 6
%macro dup_fp6_7
// stack: f: 6, g: 6
// stack: X: 7, f: 6
DUP13
DUP13
DUP13
DUP13
DUP13
DUP13
// stack: g: 6, f: 6, g: 6
// stack: f: 6, X: 7, f: 6
%endmacro
// cost: 6
%macro dup_fp6_8
// stack: X: 8, f: 6
DUP14
DUP14
DUP14
DUP14
DUP14
DUP14
// stack: f: 6, X: 8, f: 6
%endmacro
// cost: 16
@ -231,7 +304,7 @@
// cost: 16
// swap two fp6 elements with a stack term separating them
// (f: 6, x, g: 6) -> (g: 6, x, f: 6)
// (f: 6, X, g: 6) -> (g: 6, X, f: 6)
%macro swap_fp6_hole
// stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
SWAP7
@ -258,6 +331,35 @@
// stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
%endmacro
// cost: 16
// swap two fp6 elements with two stack terms separating them
// (f: 6, X: 2, g: 6) -> (g: 6, X: 2, f: 6)
%macro swap_fp6_hole_2
// stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
SWAP8
// stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5
SWAP1
SWAP9
SWAP1
// stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5
SWAP2
SWAP10
SWAP2
// stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5
SWAP3
SWAP11
SWAP3
// stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5
SWAP4
SWAP12
SWAP4
// stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5
SWAP5
SWAP13
SWAP5
// stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
%endmacro
// cost: 16
%macro add_fp6
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
@ -285,6 +387,37 @@
// stack: h0, h1, h2, h3, h4, h5
%endmacro
// cost: 18
// add two fp6 elements with a to-be-popped stack term separating them
// (f: 6, X, g: 6) -> (f + g: 6)
%macro add_fp6_hole
// stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
SWAP8
ADDFP254
SWAP7
// stack: f0, f2, f3, f4, f5, X, g0, h1, g2, g3, g4, g5
SWAP8
ADDFP254
SWAP7
// stack: f0, f3, f4, f5, X, g0, h1, h2, g3, g4, g5
SWAP8
ADDFP254
SWAP7
// stack: f0, f4, f5, X, g0, h1, h2, h3, g4, g5
SWAP8
ADDFP254
SWAP7
// stack: f0, f5, X, g0, h1, h2, h3, h4, g5
SWAP8
ADDFP254
SWAP7
// stack: f0, X, g0, h1, h2, h3, h4, h5
SWAP1
POP
ADDFP254
// stack: h0, h1, h2, h3, h4, h5
%endmacro
// *reversed argument subtraction* cost: 17
%macro subr_fp6
// stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5

View File

@ -1,31 +1,31 @@
/// Note: uncomment this to test
/// global test_mul_Fp12:
/// // stack: f, in0 , f', g, in1 , g', in1, out, in0, out
/// global test_mul_fp12:
/// // stack: f, inA , f', g, inB , g', inB, out, inA, out
/// DUP7
/// // stack: in0, f, in0 , f', g, in1 , g', in1, out, in0, out
/// // stack: inA, f, inA , f', g, inB , g', inB, out, inA, out
/// %store_fp6
/// // stack: in0 , f', g, in1 , g', in1, out, in0, out
/// %add_const(6)
/// // stack: in0', f', g, in1 , g', in1, out, in0, out
/// // stack: inA , f', g, inB , g', inB, out, inA, out
/// %offset_fp6
/// // stack: inA', f', g, inB , g', inB, out, inA, out
/// %store_fp6
/// // stack: g, in1 , g', in1, out, in0, out
/// // stack: g, inB , g', inB, out, inA, out
/// DUP7
/// // stack: in1, g, in1 , g', in1, out, in0, out
/// // stack: inB, g, inB , g', inB, out, inA, out
/// %store_fp6
/// // stack: in1 , g', in1, out, in0, out
/// %add_const(6)
/// // stack: in1', g', in1, out, in0, out
/// // stack: inB , g', inB, out, inA, out
/// %offset_fp6
/// // stack: inB', g', inB, out, inA, out
/// %store_fp6
/// // stack: in1, out, in0, out
/// // stack: inB, out, inA, out
/// PUSH ret_stack
/// // stack: ret_stack, in1, out, in0, out
/// // stack: ret_stack, inB, out, inA, out
/// SWAP3
/// // stack: in0, in1, out, ret_stack, out
/// %jump(mul_Fp12)
/// // stack: inA, inB, out, ret_stack, out
/// %jump(mul_fp12)
/// ret_stack:
/// // stack: out
/// DUP1 %add_const(6)
/// DUP1 %offset_fp6
/// // stack: out', out
/// %load_fp6
/// // stack: h', out
@ -56,9 +56,9 @@
/// DUP | 6
/// PUSH | 6
/// POP | 2
/// JUMP | 1
/// JUMP | 6
///
/// TOTAL: 1196
/// TOTAL: 1201
/// inputs:
/// F = f + f'z
@ -71,96 +71,365 @@
/// h' = (f+f')(g+g') - fg - f'g'
///
/// memory pointers [ind' = ind+6]
/// {in0: f, in0: f', in1: g, in1':g', out: h, out': h'}
/// {inA: f, inA: f', inB: g, inB':g', out: h, out': h'}
///
/// f, f', g, g' consist of six elements on the stack
global mul_Fp12:
// stack: in0, in1, out
DUP1 %add_const(6)
// stack: in0', in0, in1, out
global mul_fp12:
// stack: inA, inB, out
DUP1 %offset_fp6
// stack: inA', inA, inB, out
%load_fp6
// stack: f', in0, in1, out
DUP8 %add_const(6)
// stack: in1', f', in0, in1, out
// stack: f', inA, inB, out
DUP8 %offset_fp6
// stack: inB', f', inA, inB, out
%load_fp6
// stack: g', f', in0, in1, out
// stack: g', f', inA, inB, out
PUSH ret_1
// stack: ret_1, g', f', in0, in1, out
// stack: ret_1, g', f', inA, inB, out
%dup_fp6_7
// stack: f', ret_1, g', f', in0, in1, out
// stack: f', ret_1, g', f', inA, inB, out
%dup_fp6_7
// stack: g', f', ret_1, g', f', in0, in1, out
// stack: g', f', ret_1, g', f', inA, inB, out
%jump(mul_fp6)
ret_1:
// stack: f'g', g' , f', in0, in1, out
// stack: f'g', g' , f', inA, inB, out
%dup_fp6_0
// stack: f'g', f'g', g' , f', in0, in1, out
%store_fp6_sh(100)
// stack: f'g', g' , f', in0, in1, out {100: sh(f'g')}
%store_fp6(106)
// stack: g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: f'g', f'g', g' , f', inA, inB, out
%store_fp6_sh(0)
// stack: f'g', g' , f', inA, inB, out {0: sh(f'g')}
%store_fp6(6)
// stack: g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
DUP13
// stack: in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: inA, g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
DUP15
// stack: in1, in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: inB, inA, g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
%load_fp6
// stack: g , in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: g , inA, g' , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
%swap_fp6_hole
// stack: g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: g', inA, g , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
%dup_fp6_7
// stack: g,g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: g,g', inA, g , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
%add_fp6
// stack: g+g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: g+g', inA, g , f', inA, inB, out {0: sh(f'g'), 6: f'g'}
%swap_fp6_hole
// stack: g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: g, inA, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'}
PUSH ret_2
// stack: ret_2, g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: ret_2, g, inA, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'}
SWAP7
// stack: in0, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: inA, g, ret_2, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'}
%load_fp6
// stack: f, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
// stack: f, g, ret_2, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'}
%jump(mul_fp6)
ret_2:
// stack: fg, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'}
%store_fp6(112)
// stack: g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: fg, g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g'}
%store_fp6(12)
// stack: g+g', f', inA, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%swap_fp6
// stack: f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: f', g+g', inA, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
PUSH ret_3
// stack: ret_3, f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: ret_3, f', g+g', inA, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
SWAP13
// stack: in0, f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: inA, f', g+g', ret_3, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%load_fp6
// stack: f,f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: f,f', g+g', ret_3, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%add_fp6
// stack: f+f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: f+f', g+g', ret_3, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%jump(mul_fp6)
ret_3:
// stack: (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%load_fp6(112)
// stack: fg, (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: (f+f')(g+g'), inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%load_fp6(12)
// stack: fg, (f+f')(g+g'), inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%swap_fp6
// stack: (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%dup_fp6_6
// stack: fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%load_fp6(106)
// stack: f'g',fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: fg, (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%load_fp6(6)
// stack: f'g',fg, (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%add_fp6
// stack: f'g'+fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: f'g'+fg, (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%subr_fp6
// stack: (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
DUP14 %add_const(6)
// stack: out', (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: (f+f')(g+g') - (f'g'+fg), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
DUP14 %offset_fp6
// stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%store_fp6
// stack: fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
%load_fp6(100)
// stack: sh(f'g') , fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%load_fp6(0)
// stack: sh(f'g') , fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%add_fp6
// stack: sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: sh(f'g') + fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
DUP8
// stack: out, sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: out, sh(f'g') + fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%store_fp6
// stack: in1, out {100: sh(f'g'), 106: f'g', 112: fg}
// stack: inB, out {0: sh(f'g'), 6: f'g', 12: fg}
%pop2
JUMP
/// fp6 functions:
/// fn | num | ops | cost
/// -------------------------
/// load | 2 | 40 | 80
/// store | 2 | 40 | 80
/// dup | 2 | 6 | 12
/// swap | 2 | 16 | 32
/// add | 1 | 16 | 16
/// mul | 1 | 157 | 157
/// sq | 2 | |
/// dbl | 1 | 13 | 13
///
/// lone stack operations:
/// op | num
/// ------------
/// ADD | 3
/// SWAP | 4
/// DUP | 5
/// PUSH | 6
/// POP | 3
/// JUMP | 4
///
/// TOTAL:
/// input:
/// F = f + f'z
///
/// output:
/// H = h + h'z = FF
///
/// h = ff + sh(f'f')
/// h' = 2ff'
///
/// memory pointers [ind' = ind+6]
/// {inp: f, inp: f', out: h, out': h'}
///
/// f, f' consist of six elements on the stack
global square_fp12:
// stack: inp, out
DUP1 %offset_fp6
// stack: inp, inp, out
%load_fp6
// stack: f, inp, out
PUSH post_sq2
// stack: post_sq2, f, inp, out
SWAP7
// stack: inp, f, post_sq2, out
PUSH post_sq1
// stack: post_sq1, inp, f, post_sq2, out
%dup_fp6_2
// stack: f , post_sq1, inp, f, post_sq2, out
DUP16 %offset_fp6
// stack: out', f , post_sq1, inp, f, post_sq2, out
PUSH post_mul
// stack: post_mul, out', f , post_sq1, inp, f, post_sq2, out
DUP10 %offset_fp6
// stack: inp', post_mul, out', f , post_sq1, inp, f, post_sq2, out
%load_fp6
// stack: f', post_mul, out', f , post_sq1, inp, f, post_sq2, out
%swap_fp6_hole_2
// stack: f , post_mul, out', f', post_sq1, inp, f, post_sq2, out
%dup_fp6_8
// stack: f', f , post_mul, out', f', post_sq1, inp, f, post_sq2, out
%jump(mul_fp6)
post_mul:
// stack: f'f, out', f', post_sq1, inp, f, post_sq2, out
DUP7
// stack: out', f'f, out', f', post_sq1, inp, f, post_sq2, out
%store_fp6_double
// stack: out', f', post_sq1, inp, f, post_sq2, out
POP
// stack: f', post_sq1, inp, f, post_sq2, out
%jump(square_fp6)
post_sq1:
// stack: f'f', inp, f, post_sq2, out
%swap_fp6_hole
// stack: f, inp, f'f', post_sq2, out
SWAP6 SWAP13 SWAP6
// stack: f, post_sq2, f'f', inp, out
%jump(square_fp6)
post_sq2:
// stack: ff , f'f', inp, out
%add_fp6
// stack: ff + f'f', inp, out
DUP8
// stack: out, ff + f'f', inp, out
%store_fp6
// stack: inp, out
%pop2
JUMP
/// fp6 functions:
/// fn | num | ops | cost
/// -------------------------
/// load | 2 | 40 | 80
/// store | 2 | 40 | 80
/// dup | 2 | 6 | 12
/// swap | 2 | 16 | 32
/// add | 1 | 16 | 16
/// mul | 1 | 157 | 157
/// sq | 2 | |
/// dbl | 1 | 13 | 13
///
/// lone stack operations:
/// op | num
/// ------------
/// ADD | 3
/// SWAP | 4
/// DUP | 5
/// PUSH | 6
/// POP | 3
/// JUMP | 4
///
/// TOTAL:
/// input:
/// F = f + f'z
/// G = g0 + (G1)t + (G2)tz
///
/// output:
/// H = h + h'z = FG
/// = g0 * [f + f'z] + G1 * [sh(f) + sh(f')z] + G2 * [sh2(f') + sh(f)z]
///
/// h = g0 * f + G1 * sh(f ) + G2 * sh2(f')
/// h' = g0 * f' + G1 * sh(f') + G2 * sh (f )
///
/// memory pointers [ind' = ind+6, inB2 = inB1 + 2 = inB + 3]
/// { inA: f, inA': f', inB: g0, inB1: G1, inB2: G2, out: h, out': h'}
///
/// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element
global mul_fp12_sparse:
// stack: inA, inB, out
DUP1 %offset_fp6
// stack: inA', inA, inB, out
%load_fp6
// stack: f', inA, inB, out
DUP8
// stack: inB, f', inA, inB, out
DUP8
// stack: inA, inB, f', inA, inB, out
%load_fp6
// stack: f, inB, f', inA, inB, out
DUP16
// stack: out, f, inB, f', inA, inB, out
%dup_fp6_8
// stack: f', out, f, inB, f', inA, inB, out
DUP14
// stack: inB, f', out, f, inB, f', inA, inB, out
%dup_fp6_8
// stack: f, inB, f', out, f, inB, f', inA, inB, out
DUP7
// stack: inB, f, inB, f', out, f, inB, f', inA, inB, out
%dup_fp6_8
// stack: f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%dup_fp6_7
// stack: f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
DUP13
// stack: inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%mload_kernel_general
// stack: g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%mul_fp_fp12
// stack: g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
%swap_fp6
// stack: f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
DUP13 %add_const(3)
// stack: inB2, f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%load_fp2
// stack: G2 , f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%mul_fp2_fp12_sh2
// stack: G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%add_fp6
// stack: G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
%swap_fp6_hole
// stack: f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
DUP7 %add_const(1)
// stack: inB1, f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
%load_fp2
// stack: G1 , f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
%mul_fp2_fp12_sh
// stack: G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
%add_fp6_hole
// stack: G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
DUP14
// stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, out, f', f, inB, f', inA, inB, out
%store_fp6
// stack: inB, out, f', f, inB, f', inA, inB, out
%pop2
// stack: f', f, inB, f', inA, inB, out
DUP13
// stack: inB, f', f, inB, f', inA, inB, out
%mload_kernel_general
// stack: g0 , f', f, inB, f', inA, inB, out
%mul_fp_fp12
// stack: g0 * f', f, inB, f', inA, inB, out
%swap_fp6
// stack: f , g0 * f', inB, f', inA, inB, out
DUP13 %add_const(3)
// stack: inB2, f , g0 * f', inB, f', inA, inB, out
%load_fp2
// stack: G2 , f , g0 * f', inB, f', inA, inB, out
%mul_fp2_fp12_sh
// stack: G2 * sh(f) , g0 * f', inB, f', inA, inB, out
%add_fp6
// stack: G2 * sh(f) + g0 * f', inB, f', inA, inB, out
%swap_fp6_hole
// stack: f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
DUP7 %add_const(1)
// stack: inB1, f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
%load_fp2
// stack: G1 , f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
%mul_fp2_fp12_sh
// stack: G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
%add_fp6_hole
// stack: G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
DUP9 %offset_fp6
// stack: out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
%store_fp6
// stack: inA, inB, out
%pop3
/// global mul_fp12_sparse_fast:
/// // stack: inA, inB, out
/// DUP2
/// // stack: inB, inA, inB, out
/// %load_fp12_sparse
/// // stack: g0, G1, G1', inA, inB, out
/// DUP6 %offset_fp6
/// // stack: inA', g0, G1, G1', inA, inB, out
/// %load_fp6
/// // stack: f', g0, G1, G1', inA, inB, out
/// DUP12
/// // stack: inA, f', g0, G1, G1', inA, inB, out
/// %load_fp6
/// // stack: f, f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp_fp6
/// // stack: (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp2_fp6_sh
/// // stack: (G1)sh(f) , (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp2_fp6_sh2
/// // stack: (G1')sh2(f') , (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// DUP26
/// // stack: out, (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
/// %store_fp6
/// // stack: f, f', g0, G1, G1', inA, inB, out
/// %semiclone_mul_fp2_fp6_sh
/// // stack: (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp2_fp6_sh
/// // stack: (G1)sh(f') , (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %clone_mul_fp_fp6
/// // stack: (g0)f' , (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %add_fp6
/// // stack: (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// DUP20 offset_fp6
/// // stack: out', (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out
/// %store_fp6
/// // stack: f', g0, G1, G1', inA, inB, out
/// %pop14

View File

@ -256,3 +256,7 @@ global mul_fp6:
// stack: retdest, e0, e0_, e1, e1_, e2, e2_
JUMP
global square_fp6: